pax_global_header00006660000000000000000000000064130150506620014510gustar00rootroot0000000000000052 comment=c96d457a286e422bf45269a82afb33409b69140d lpeg_patterns-0.4/000077500000000000000000000000001301505066200142225ustar00rootroot00000000000000lpeg_patterns-0.4/.busted000066400000000000000000000000611301505066200155060ustar00rootroot00000000000000return { default = { lpath = "./?.lua"; }; } lpeg_patterns-0.4/.luacheckrc000066400000000000000000000000561301505066200163300ustar00rootroot00000000000000std = "min" files["spec"] = {std = "+busted"} lpeg_patterns-0.4/LICENSE.md000066400000000000000000000020731301505066200156300ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2012-2016 Daurnimator Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. lpeg_patterns-0.4/NEWS000066400000000000000000000021261301505066200147220ustar00rootroot000000000000000.4 - 2016-11-23 - Reduce memory usage by refactoring IPv6 pattern Also fixes lpeg 0.10 compatability - http: case-normalise cookie attribute names - http: fix captures of Via header - http: fixes some whitespace rules - uri: fix missing case-normalisation for percent encoded characters in hostnames - uri: export IP_literal and sub_delims patterns 0.3 - 2016-08-21 - New http module Includes parsers for almost every HTTP header. Expect this API to be unstable, it's a significant amount of new code - New language module that parses language codes e.g. "zh-Hans-CN" - New email.mailbox pattern (name + email like: "Bob ") - New uri.absolute_uri pattern that does not allow fragments - Expose some previously internal uri patterns - Fix: Don't percent decode in URIs when it could introduce ambiguity (thanks @torhve) 0.2 - 2015-12-14 - Fixed parsing of IPv6 addresses (thanks Sean Conner) - IPv6 zone support - Stricter uri matching (scheme is now compulsory) - "reference" (i.e. relative) URI matching 0.1 - 2015-01-29 - First release lpeg_patterns-0.4/README.md000066400000000000000000000257461301505066200155170ustar00rootroot00000000000000A collection of [LPEG](http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html) patterns ## Use cases - Strict validation of user input - Searching free-form input ## Modules ### `core` A small module implementing commonly used rules from [RFC-5234 appendix B.1](https://tools.ietf.org/html/rfc5234#appendix-B.1) - `ALPHA` (pattern) - `BIT` (pattern) - `CHAR` (pattern) - `CR` (pattern) - `CRLF` (pattern) - `CTL` (pattern) - `DIGIT` (pattern) - `DQUOTE` (pattern) - `HEXDIG` (pattern) - `HTAB` (pattern) - `LF` (pattern) - `LWSP` (pattern) - `OCTET` (pattern) - `SP` (pattern) - `VCHAR` (pattern) - `WSP` (pattern) ### `IPv4` - `IPv4address` (pattern): parses an IPv4 address in dotted decimal notation. on success, returns addresses as an IPv4 object - `IPv4_methods` (table): - `unpack` (function): the IPv4 address as a series of 4 8 bit numbers - `binary` (function): the IPv4 address as a 4 byte binary string - `IPv4_mt` (table): metatable given to IPv4 objects - `__index` (table): `IPv4_methods` - `__tostring` (function): returns the IPv4 address in dotted decimal notation IPv4 "dotted decimal notation" in this document refers to "strict" form (see [RFC-6943 section 3.1.1](https://tools.ietf.org/html/rfc6943#section-3.1.1)) unless otherwise noted. ### `IPv6` - `IPv6address` (pattern): parses an IPv6 address - `IPv6addrz` (pattern): parses an IPv6 address with optional "ZoneID" (see [RFC-6874](https://tools.ietf.org/html/rfc6874)) - `IPv6_methods` (table): methods available on IPv6 objects - `unpack` (function): the IPv6 address as a series of 8 16bit numbers, optionally followed by zoneid - `binary` (function): the IPv6 address as a 16 byte binary string - `setzoneid` (function): set the zoneid of this IPv6 address - `IPv6_mt` (table): metatable given to IPv6 objects - `__tostring` (function): will return the IPv6 address as a valid IPv6 string ### `uri` Parses URIs as described in [RFC-3986](https://tools.ietf.org/html/rfc3986). - `uri` (pattern): on success, returns a table with fields: (similar to [luasocket](http://w3.impa.br/~diego/software/luasocket/url.html#parse)) - `scheme` - `userinfo` - `host` - `port` - `path` - `query` - `fragment` - `absolute_uri` (pattern): similar to `uri`, but does not permit fragments - `uri_reference` (pattern): similar to `uri`, but permits relative URIs - `relative_part` (pattern): matches a relative uri not including query and fragment; data is held in named group captures `"userinfo"`, `"host"`, `"port"`, `"path"` - `scheme` (pattern): matches the scheme portion of a URI - `userinfo` (pattern): matches the userinfo portion of a URI - `host` (pattern): matches the host portion of a URI - `IP_literal` (pattern): matches an IP based host portion of a URI. Capture is an [IPv4](#IPv4), [IPv6](#IPv6) or IPvFuture object - `port` (pattern): matches the port portion of a URI - `authority` (pattern): matches the authority portion of a URI; data is held in named group captures of `"userinfo"`, `"host"`, `"port"` - `path` (pattern): matches the path portion of a URI. Captures `nil` for the empty path. - `segment` (pattern): matches a path segment (a piece of a path without a `/`) - `query` (pattern): matches the query portion of a URI - `fragment` (pattern): matches the fragment portion of a URI - `sane_uri` (pattern): a variant that shouldn't match things that people would not normally consider URIs. e.g. uris without a hostname - `sane_host` (pattern): a variant that shouldn't match things that people would not normally consider valid hosts. - `sane_authority` (pattern): a variant that shouldn't match things that people would not normally consider valid hosts. - `pct_encoded` (pattern): matches a percent encoded octet, produces a capture of the normalised form. - `sub_delims` (pattern): the set of subcomponent delimeters ### `email` - `mailbox` (pattern): the mailbox format: matches either `name_addr` or an addr-spec. - `name_addr` (pattern): the name and address format i.e. `Display Name` Has captures of the local_part and the domain. Captures the display name in the named capture `"display"` - `email` (pattern): also known as an "addr-spec"; follows [RFC-5322 section 3.4.1](http://tools.ietf.org/html/rfc5322#section-3.4.1) Has captures of the local_part and the domain Be careful trying to reconstruct the email address from the captures; you may need escaping - `local_part` (pattern): the bit before the `@` in an email address - `domain` (pattern): the bit after the `@` in an email address - `email_nocfws` (pattern): a variant that doesn't allow for comments or folding whitespace - `local_part_nocfws` (pattern): the bit before the `@` in an email address; no comments or folding whitespace allowed. - `domain_nocfws` (pattern): the bit after the `@` in an email address; no comments or folding whitespace allowed. ### `http` These patterns should be considered to have non stable APIs. #### [RFC 4918](https://tools.ietf.org/html/rfc4918) - `DAV` (pattern) - `Depth` (pattern) - `Destination` (pattern) - `If` (pattern) - `Lock_Token` (pattern) - `Overwrite` (pattern) - `TimeOut` (pattern) #### [RFC 5023](https://tools.ietf.org/html/rfc5023) - `SLUG` (pattern) #### [RFC 5323](https://tools.ietf.org/html/rfc5323) - `DASL` (pattern) #### [RFC 5789](https://tools.ietf.org/html/rfc5789) - `Accept_Patch` (pattern) #### [RFC 5988](https://tools.ietf.org/html/rfc5988) - `Link` (pattern) #### [RFC 6265](https://tools.ietf.org/html/rfc6265) - `Set_Cookie` (pattern) - `Cookie` (pattern) #### [RFC 6266](https://tools.ietf.org/html/rfc6266) - `Content_Disposition` (pattern) #### [RFC 6454](https://tools.ietf.org/html/rfc6454) - `Origin` (pattern) #### [RFC 6455](https://tools.ietf.org/html/rfc6455) - `Sec_WebSocket_Accept` (pattern) - `Sec_WebSocket_Key` (pattern) - `Sec_WebSocket_Extensions` (pattern) - `Sec_WebSocket_Protocol_Client` (pattern) - `Sec_WebSocket_Protocol_Server` (pattern) - `Sec_WebSocket_Version_Client` (pattern) - `Sec_WebSocket_Version_Server` (pattern) #### [RFC 6638](https://tools.ietf.org/html/rfc6638) - `Schedule_Reply` (pattern) - `Schedule_Tag` (pattern) - `If_Schedule_Tag_Match` (pattern) #### [RFC 6797](https://tools.ietf.org/html/rfc6797) - `Strict_Transport_Security` (pattern) #### [RFC 7034](https://tools.ietf.org/html/rfc7034) - `X_Frame_Options` (pattern) #### [RFC 7089](https://tools.ietf.org/html/rfc7089) - `Accept_Datetime` (pattern) - `Memento_Datetime` (pattern) #### [RFC 7230](https://tools.ietf.org/html/rfc7230) - `request_line` (pattern) - `field_name` (pattern) - `field_value` (pattern) - `header_field` (pattern) - `OWS` (pattern) - `RWS` (pattern) - `BWS` (pattern) - `token` (pattern) - `qdtext` (pattern) - `quoted_string` (pattern) - `comment` (pattern) - `Content_Length` (pattern) - `Transfer_Encoding` (pattern) - `chunk_ext` (pattern) - `TE` (pattern) - `Trailer` (pattern) - `request_target` (pattern) - `Host` (pattern) - `Via` (pattern): captures are a list of tables with fields `.protocol`, `.by` and `.comment` - `Connection` (pattern) - `Upgrade` (pattern): captures are a list of strings containing *protocol* or *protocol/version* #### [RFC 7231](https://tools.ietf.org/html/rfc7231) - `IMF_fixdate` (pattern) - `Content_Encoding` (pattern) - `Content_Type` (pattern) - `Content_Language` (pattern) - `Content_Location` (pattern) - `Expect` (pattern) - `Max_Forwards` (pattern) - `Accept` (pattern) - `Accept_Charset` (pattern) - `Accept_Encoding` (pattern) - `Accept_Language` (pattern) - `From` (pattern) - `Referer` (pattern) - `User_Agent` (pattern) - `Date` (pattern): capture is a table in the same format as used by [`os.time`](http://www.lua.org/manual/5.3/manual.html#pdf-os.time) - `Location` (pattern) - `Retry_After` (pattern): capture is either a table describing an absolute time in the same format as used by [`os.time`](http://www.lua.org/manual/5.3/manual.html#pdf-os.time), or a relative time as a number of seconds - `Vary` (pattern) - `Allow` (pattern) - `Server` (pattern) #### [RFC 7232](https://tools.ietf.org/html/rfc7232) - `Last_Modified` (pattern): capture is a table in the same format as used by [`os.time`](http://www.lua.org/manual/5.3/manual.html#pdf-os.time) - `ETag` (pattern) - `If_Match` (pattern) - `If_None_Match` (pattern) - `If_Modified_Since` (pattern): capture is a table in the same format as used by [`os.time`](http://www.lua.org/manual/5.3/manual.html#pdf-os.time) - `If_Unmodified_Since` (pattern): capture is a table in the same format as used by [`os.time`](http://www.lua.org/manual/5.3/manual.html#pdf-os.time) #### [RFC 7233](https://tools.ietf.org/html/rfc7233) - `Accept_Ranges` (pattern) - `Range` (pattern) - `If_Range` (pattern): capture is either an `entity_tag` or a table in the same format as used by [`os.time`](http://www.lua.org/manual/5.3/manual.html#pdf-os.time) - `Content_Range` (pattern) #### [RFC 7234](https://tools.ietf.org/html/rfc7234) - `Age` (pattern) - `Cache_Control` (pattern) - `Expires` (pattern): capture is a table in the same format as used by [`os.time`](http://www.lua.org/manual/5.3/manual.html#pdf-os.time) - `Pragma` (pattern) - `Warning` (pattern) #### [RFC 7235](https://tools.ietf.org/html/rfc7235) - `WWW_Authenticate` (pattern) - `Authorization` (pattern) - `Proxy_Authenticate` (pattern) - `Proxy_Authorization` (pattern) #### [RFC 7239](https://tools.ietf.org/html/rfc7239) - `Forwarded` (pattern) #### [RFC 7469](https://tools.ietf.org/html/rfc7469) - `Public_Key_Pins` (pattern) - `Public_Key_Pins_Report_Only` (pattern) #### [RFC 7486](https://tools.ietf.org/html/rfc7486) - `Hobareg` (pattern) #### [RFC 7615](https://tools.ietf.org/html/rfc7615) - `Authentication_Info` (pattern) - `Proxy_Authentication_Info` (pattern) #### [RFC 7639](https://tools.ietf.org/html/rfc7639) - `ALPN` (pattern) #### [RFC 7809](https://tools.ietf.org/html/rfc7809) - `CalDAV_Timezones` (pattern) #### [RFC 7838](https://tools.ietf.org/html/rfc7838) - `Alt_Svc` (pattern) - `Alt_Used` (pattern) ### `phone` - `phone` (pattern): includes detailed checking for: - USA phone numbers using the [NANP](https://en.wikipedia.org/wiki/North_American_Numbering_Plan) ### `language` Patterns for definitions from [RFC-4646 Section 2.1](https://tools.ietf.org/html/rfc4646#section-2.1) - `langtag` (pattern): Capture is a table with the language tag decomposed into components: - `language` - `extlang` (optional) - `script` (optional) - `region` (optional) - `variant` (optional): an array - `extension` (optional): a dictionary from singleton to value - `privateuse` (optional): an array - `privateuse` (pattern): captures an array - `Language_Tag` (pattern): captures the whole language tag lpeg_patterns-0.4/lpeg_patterns-0.4-0.rockspec000066400000000000000000000014531301505066200212630ustar00rootroot00000000000000package = "lpeg_patterns" version = "0.4-0" description= { summary = "a collection of LPEG patterns"; license = "MIT"; } dependencies = { "lua"; "lpeg"; } source = { url = "https://github.com/daurnimator/lpeg_patterns/archive/v0.4.zip"; dir = "lpeg_patterns-0.4"; } build = { type = "builtin"; modules = { ["lpeg_patterns.util"] = "lpeg_patterns/util.lua"; ["lpeg_patterns.core"] = "lpeg_patterns/core.lua"; ["lpeg_patterns.IPv4"] = "lpeg_patterns/IPv4.lua"; ["lpeg_patterns.IPv6"] = "lpeg_patterns/IPv6.lua"; ["lpeg_patterns.uri"] = "lpeg_patterns/uri.lua"; ["lpeg_patterns.email"] = "lpeg_patterns/email.lua"; ["lpeg_patterns.http"] = "lpeg_patterns/http.lua"; ["lpeg_patterns.phone"] = "lpeg_patterns/phone.lua"; ["lpeg_patterns.language"] = "lpeg_patterns/language.lua"; }; } lpeg_patterns-0.4/lpeg_patterns/000077500000000000000000000000001301505066200170715ustar00rootroot00000000000000lpeg_patterns-0.4/lpeg_patterns/IPv4.lua000066400000000000000000000016211301505066200203560ustar00rootroot00000000000000-- IPv4 local lpeg = require "lpeg" local P = lpeg.P local R = lpeg.R local Cg = lpeg.Cg local core = require "lpeg_patterns.core" local DIGIT = core.DIGIT local dec_octet = ( P"1" * DIGIT * DIGIT + P"2" * (R"04"*DIGIT + P"5"*R"05") + DIGIT * DIGIT^-1 ) / tonumber local IPv4_methods = {} local IPv4_mt = { __name = "lpeg_patterns.IPv4"; __index = IPv4_methods; } local function new_IPv4 ( o1 , o2 , o3 , o4 ) return setmetatable({o1, o2, o3, o4}, IPv4_mt) end function IPv4_methods:unpack() return self[1], self[2], self[3], self[4] end function IPv4_methods:binary() return string.char(self:unpack()) end function IPv4_mt:__tostring ( ) return string.format("%d.%d.%d.%d", self:unpack()) end local IPv4address = Cg ( dec_octet * P"." * dec_octet * P"." * dec_octet * P"." * dec_octet ) / new_IPv4 return { IPv4_methods = IPv4_methods; IPv4_mt = IPv4_mt; IPv4address = IPv4address; } lpeg_patterns-0.4/lpeg_patterns/IPv6.lua000066400000000000000000000100321301505066200203540ustar00rootroot00000000000000-- IPv6 local unpack = table.unpack or unpack -- luacheck: ignore 113 local lpeg = require "lpeg" local P = lpeg.P local V = lpeg.V local Cc = lpeg.Cc local Cg = lpeg.Cg local util = require "lpeg_patterns.util" local core = require "lpeg_patterns.core" local HEXDIG = core.HEXDIG local IPv4address = require "lpeg_patterns.IPv4".IPv4address local IPv6_methods = {} local IPv6_mt = { __name = "lpeg_patterns.IPv6"; __index = IPv6_methods; } local function new_IPv6(o1, o2, o3, o4, o5, o6, o7, o8, zoneid) return setmetatable({ o1, o2, o3, o4, o5, o6, o7, o8, zoneid = zoneid; }, IPv6_mt) end function IPv6_methods:unpack() return self[1], self[2], self[3], self[4], self[5], self[6], self[7], self[8], self.zoneid end function IPv6_methods:binary() local t = {} for i=1, 8 do local lo = self[i] % 256 t[i*2-1] = (self[i] - lo) / 256 t[i*2] = lo end -- TODO: append zoneid. -- In a struct sockaddr_in6 it is the numeric index of the scope, so need to lookup? return string.char(unpack(t, 1, 16)) end function IPv6_methods:setzoneid(zoneid) self.zoneid = zoneid end function IPv6_mt:__tostring() local fmt_str if self.zoneid then fmt_str = "%x:%x:%x:%x:%x:%x:%x:%x%%%s" else fmt_str = "%x:%x:%x:%x:%x:%x:%x:%x" end return string.format(fmt_str, self:unpack()) end -- RFC 3986 Section 3.2.2 -- This is written as a grammar to reduce memory usage local raw_IPv6address = Cg(P{ h16 = HEXDIG * HEXDIG^-3 / util.read_hex; h16c = V"h16" * P":"; ls32 = ( V"h16c" * V"h16" ) + IPv4address / function ( ipv4 ) local o1, o2, o3, o4 = ipv4:unpack() return o1*2^8 + o2 , o3*2^8 + o4 end; mh16c_1 = V"h16c"; mh16c_2 = V"h16c" * V"h16c"; mh16c_3 = V"h16c" * V"h16c" * V"h16c"; mh16c_4 = V"h16c" * V"h16c" * V"h16c" * V"h16c"; mh16c_5 = V"h16c" * V"h16c" * V"h16c" * V"h16c" * V"h16c"; mh16c_6 = V"h16c" * V"h16c" * V"h16c" * V"h16c" * V"h16c" * V"h16c"; mcc_1 = P"::" * Cc(0); mcc_2 = P"::" * Cc(0, 0); mcc_3 = P"::" * Cc(0, 0, 0); mcc_4 = P"::" * Cc(0, 0, 0, 0); mcc_5 = P"::" * Cc(0, 0, 0, 0, 0); mcc_6 = P"::" * Cc(0, 0, 0, 0, 0, 0); mcc_7 = P"::" * Cc(0, 0, 0, 0, 0, 0, 0); mcc_8 = P"::" * Cc(0, 0, 0, 0, 0, 0, 0, 0); mh16_1 = V"h16"; mh16_2 = V"mh16c_1" * V"h16"; mh16_3 = V"mh16c_2" * V"h16"; mh16_4 = V"mh16c_3" * V"h16"; mh16_5 = V"mh16c_4" * V"h16"; mh16_6 = V"mh16c_5" * V"h16"; mh16_7 = V"mh16c_6" * V"h16"; V"mh16c_6" * V"ls32" + V"mcc_1" * V"mh16c_5" * V"ls32" + V"mcc_2" * V"mh16c_4" * V"ls32" + V"h16" * V"mcc_1" * V"mh16c_4" * V"ls32" + V"mcc_3" * V"mh16c_3" * V"ls32" + V"h16" * V"mcc_2" * V"mh16c_3" * V"ls32" + V"mh16_2" * V"mcc_1" * V"mh16c_3" * V"ls32" + V"mcc_4" * V"mh16c_2" * V"ls32" + V"h16" * V"mcc_3" * V"mh16c_2" * V"ls32" + V"mh16_2" * V"mcc_2" * V"mh16c_2" * V"ls32" + V"mh16_3" * V"mcc_1" * V"mh16c_2" * V"ls32" + V"mcc_5" * V"h16c" * V"ls32" + V"h16" * V"mcc_4" * V"h16c" * V"ls32" + V"mh16_2" * V"mcc_3" * V"h16c" * V"ls32" + V"mh16_3" * V"mcc_2" * V"h16c" * V"ls32" + V"mh16_4" * V"mcc_1" * V"h16c" * V"ls32" + V"mcc_6" * V"ls32" + V"h16" * V"mcc_5" * V"ls32" + V"mh16_2" * V"mcc_4" * V"ls32" + V"mh16_3" * V"mcc_3" * V"ls32" + V"mh16_4" * V"mcc_2" * V"ls32" + V"mh16_5" * V"mcc_1" * V"ls32" + V"mcc_7" * V"h16" + V"h16" * V"mcc_6" * V"h16" + V"mh16_2" * V"mcc_5" * V"h16" + V"mh16_3" * V"mcc_4" * V"h16" + V"mh16_4" * V"mcc_3" * V"h16" + V"mh16_5" * V"mcc_2" * V"h16" + V"mh16_6" * V"mcc_1" * V"h16" + V"mcc_8" + V"mh16_1" * V"mcc_7" + V"mh16_2" * V"mcc_6" + V"mh16_3" * V"mcc_5" + V"mh16_4" * V"mcc_4" + V"mh16_5" * V"mcc_3" + V"mh16_6" * V"mcc_2" + V"mh16_7" * V"mcc_1" }) local IPv6address = raw_IPv6address / new_IPv6 local ZoneID = P(1)^1 -- ZoneIDs can be any character local IPv6addrz = raw_IPv6address * (P"%" * ZoneID)^-1 / new_IPv6 return { IPv6_methods = IPv6_methods; IPv6_mt = IPv6_mt; IPv6address = IPv6address; IPv6addrz = IPv6addrz; } lpeg_patterns-0.4/lpeg_patterns/core.lua000066400000000000000000000010101301505066200205140ustar00rootroot00000000000000-- Core Rules -- https://tools.ietf.org/html/rfc5234#appendix-B.1 local lpeg = require "lpeg" local P = lpeg.P local R = lpeg.R local S = lpeg.S local _M = { } _M.ALPHA = R("AZ","az") _M.BIT = S"01" _M.CHAR = R"\1\127" _M.CR = P"\r" _M.CRLF = P"\r\n" _M.CTL = R"\0\31" + P"\127" _M.DIGIT = R"09" _M.DQUOTE= P'"' _M.HEXDIG= _M.DIGIT + S"ABCDEFabcdef" _M.HTAB = P"\t" _M.LF = P"\n" _M.OCTET = P(1) _M.SP = P" " _M.VCHAR = R"\33\126" _M.WSP = S" \t" _M.LWSP = (_M.WSP + _M.CRLF*_M.WSP)^0 return _M lpeg_patterns-0.4/lpeg_patterns/email.lua000066400000000000000000000061611301505066200206670ustar00rootroot00000000000000-- Email Addresses -- RFC 5322 Section 3.4.1 local lpeg = require "lpeg" local P = lpeg.P local R = lpeg.R local S = lpeg.S local V = lpeg.V local C = lpeg.C local Cg = lpeg.Cg local Ct = lpeg.Ct local Cs = lpeg.Cs local core = require "lpeg_patterns.core" local CHAR = core.CHAR local CRLF = core.CRLF local CTL = core.CTL local DQUOTE = core.DQUOTE local WSP = core.WSP local VCHAR = core.VCHAR local obs_NO_WS_CTL = R("\1\8", "\11\12", "\14\31") + P"\127" local obs_qp = Cg(P"\\" * C(P"\0" + obs_NO_WS_CTL + core.LF + core.CR)) local quoted_pair = Cg(P"\\" * C(VCHAR + WSP)) + obs_qp -- Folding White Space local FWS = (WSP^0 * CRLF)^-1 * WSP^1 / " " -- Fold whitespace into a single " " -- Comments local ctext = R"\33\39" + R"\42\91" + R"\93\126" local comment = P { V"comment" ; ccontent = ctext + quoted_pair + V"comment" ; comment = P"("* (FWS^-1 * V"ccontent")^0 * FWS^-1 * P")"; } local CFWS = ((FWS^-1 * comment)^1 * FWS^-1 + FWS ) / function() end -- Atom local specials = S[=[()<>@,;:\".[]]=] local atext = CHAR-specials-P" "-CTL local atom = CFWS^-1 * C(atext^1) * CFWS^-1 local dot_atom_text = C(atext^1 * ( P"." * atext^1 )^0) local dot_atom = CFWS^-1 * dot_atom_text * CFWS^-1 -- Quoted Strings local qtext = S"\33"+R("\35\91","\93\126") local qcontent = qtext + quoted_pair local quoted_string_text = DQUOTE * Cs((FWS^-1 * qcontent)^0 * FWS^-1) * DQUOTE local quoted_string = CFWS^-1 * quoted_string_text * CFWS^-1 -- Miscellaneous Tokens local word = atom + quoted_string local obs_phrase = C(word * (word + P"." + CFWS)^0 / function() end) local phrase = obs_phrase -- obs_phrase is more broad than `word^1`, it's really the same but allows "." -- Addr-spec local obs_dtext = obs_NO_WS_CTL + quoted_pair local dtext = R("\33\90", "\94\126") + obs_dtext local domain_literal_text = P"[" * Cs((FWS^-1 * dtext)^0 * FWS^-1) * P"]" local domain_text = dot_atom_text + domain_literal_text local local_part_text = dot_atom_text + quoted_string_text local addr_spec_text = local_part_text * P"@" * domain_text local domain_literal = CFWS^-1 * domain_literal_text * CFWS^-1 local obs_domain = Ct(atom * (C"." * atom)^0) / table.concat local domain = obs_domain + dot_atom + domain_literal local obs_local_part = Ct(word * (C"." * word)^0) / table.concat local local_part = obs_local_part + dot_atom + quoted_string local addr_spec = local_part * P"@" * domain local display_name = phrase local obs_domain_list = (CFWS + P",")^0 * P"@" * domain * (P"," * CFWS^-1 * (P"@" * domain)^-1)^0 local obs_route = Cg(Ct(obs_domain_list) * P":", "route") local obs_angle_addr = CFWS^-1 * P"<" * obs_route * addr_spec * P">" * CFWS^-1 local angle_addr = CFWS^-1 * P"<" * addr_spec * P">" * CFWS^-1 + obs_angle_addr local name_addr = Cg(display_name, "display")^-1 * angle_addr local mailbox = name_addr + addr_spec return { local_part = local_part; domain = domain; email = addr_spec; name_addr = name_addr; mailbox = mailbox; -- A variant that does not allow comments or folding whitespace local_part_nocfws = local_part_text; domain_nocfws = domain_text; email_nocfws = addr_spec_text; } lpeg_patterns-0.4/lpeg_patterns/http.lua000066400000000000000000000517231301505066200205630ustar00rootroot00000000000000--[[ https://tools.ietf.org/html/rfc7230 https://tools.ietf.org/html/rfc7231 ]] local lpeg = require "lpeg" local core = require "lpeg_patterns.core" local email = require "lpeg_patterns.email" local language = require "lpeg_patterns.language" local uri = require "lpeg_patterns.uri" local util = require "lpeg_patterns.util" local C = lpeg.C local Cc = lpeg.Cc local Cf = lpeg.Cf local Cg = lpeg.Cg local Cs = lpeg.Cs local Ct = lpeg.Ct local Cmt = lpeg.Cmt local P = lpeg.P local R = lpeg.R local S = lpeg.S local V = lpeg.V local _M = {} local T_F = S"Tt" * Cc(true) + S"Ff" * Cc(false) local function no_rich_capture(patt) return C(patt) / function(a) return a end end local function case_insensitive(str) local patt = P(true) for i=1, #str do local c = str:sub(i, i) patt = patt * S(c:upper() .. c:lower()) end return patt end -- RFC 7230 Section 3.2.3 _M.OWS = (core.SP + core.HTAB)^0 _M.RWS = (core.SP + core.HTAB)^1 _M.BWS = _M.OWS -- RFC 5023 local slugtext = _M.RWS / " " + P"%" * (core.HEXDIG * core.HEXDIG / util.read_hex) / string.char + R"\32\126" _M.SLUG = Cs(slugtext^0) -- RFC 6454 -- discard captures from scheme, host, port and just get whole string local serialized_origin = C(uri.scheme * P"://" * uri.host * (P":" * uri.port)^-1/function() end) local origin_list = serialized_origin * (core.SP * serialized_origin)^0 local origin_list_or_null = P"null" + origin_list _M.Origin = _M.OWS * origin_list_or_null * _M.OWS -- Analogue to RFC 7230 Section 7's ABNF extension of '#' -- Also documented as `#rule` under RFC 2616 Section 2.1 local comma_sep, comma_sep_trim do local sep = _M.OWS * lpeg.P "," * _M.OWS local optional_sep = (lpeg.P"," + core.SP + core.HTAB)^0 comma_sep = function(element, min, max) local extra = sep * optional_sep * element local patt = element if min then for _=2, min do patt = patt * extra end else min = 0 patt = patt^-1 end if max then local more = max-min-1 patt = patt * extra^-more else patt = patt * extra^0 end return patt end -- allows leading + trailing comma_sep_trim = function (...) return optional_sep * comma_sep(...) * optional_sep end end -- RFC 7034 _M.X_Frame_Options = case_insensitive "deny" * Cc("deny") + case_insensitive "sameorigin" * Cc("sameorigin") + case_insensitive "allow-from" * _M.RWS * serialized_origin -- RFC 7230 Section 2.6 local HTTP_name = P"HTTP" local HTTP_version = HTTP_name * P"/" * (core.DIGIT * P"." * core.DIGIT / util.safe_tonumber) -- RFC 7230 Section 2.7 local absolute_path = (P"/" * uri.segment )^1 local partial_uri = Ct(uri.relative_part * (P"?" * uri.query)^-1) -- RFC 7230 Section 3.2.6 local tchar = S "!#$%&'*+-.^_`|~" + core.DIGIT + core.ALPHA _M.token = C(tchar^1) local obs_text = R("\128\255") _M.qdtext = core.HTAB + core.SP + P"\33" + R("\35\91", "\93\126") + obs_text local quoted_pair = Cs(P"\\" * C(core.HTAB + core.SP + core.VCHAR + obs_text) / "%1") _M.quoted_string = core.DQUOTE * Cs((_M.qdtext + quoted_pair)^0) * core.DQUOTE local ctext = core.HTAB + core.SP + R("\33\39", "\42\91", "\93\126") + obs_text _M.comment = P { P"(" * ( ctext + quoted_pair + V(1) )^0 * P")" } -- RFC 7230 Section 3.2 _M.field_name = _M.token / string.lower -- case insensitive local field_vchar = core.VCHAR + obs_text local field_content = field_vchar * (( core.SP + core.HTAB )^1 * field_vchar)^-1 local obs_fold = ( core.SP + core.HTAB )^0 * core.CRLF * ( core.SP + core.HTAB )^1 / " " -- field_value is not correct, see Errata: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 _M.field_value = Cs((field_content + obs_fold)^0) _M.header_field = _M.field_name * P":" * _M.OWS * _M.field_value * _M.OWS -- RFC 7230 Section 3.3.2 _M.Content_Length = core.DIGIT^1 -- RFC 7230 Section 4 -- See https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4683 local transfer_parameter = (_M.token - S"qQ" * _M.BWS * P"=") * _M.BWS * P"=" * _M.BWS * ( _M.token + _M.quoted_string ) local transfer_extension = Cf(Ct(_M.token / string.lower) -- case insensitive * ( _M.OWS * P";" * _M.OWS * Cg(transfer_parameter) )^0, rawset) local transfer_coding = transfer_extension -- RFC 7230 Section 3.3.1 _M.Transfer_Encoding = comma_sep_trim(transfer_coding, 1) -- RFC 7230 Section 4.1.1 local chunk_ext_name = _M.token local chunk_ext_val = _M.token + _M.quoted_string -- See https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667 _M.chunk_ext = ( P";" * chunk_ext_name * ( P"=" * chunk_ext_val)^-1 )^0 -- RFC 7230 Section 4.3 local rank = (P"0" * ((P"." * core.DIGIT^-3) / util.safe_tonumber + Cc(0)) + P"1" * ("." * (P"0")^-3)^-1) * Cc(1) local t_ranking = _M.OWS * P";" * _M.OWS * S"qQ" * P"=" * rank -- q is case insensitive local t_codings = (transfer_coding * t_ranking^-1) / function(t, q) if q then t["q"] = q end return t end _M.TE = comma_sep_trim(t_codings) -- RFC 7230 Section 4.4 _M.Trailer = comma_sep_trim(_M.field_name, 1) -- RFC 7230 Section 5.3 local origin_form = Cs(absolute_path * (P"?" * uri.query)^-1) local absolute_form = no_rich_capture(uri.absolute_uri) local authority_form = no_rich_capture(uri.authority) local asterisk_form = C"*" _M.request_target = asterisk_form + origin_form + absolute_form + authority_form -- RFC 7230 Section 3.1.1 local method = _M.token _M.request_line = method * core.SP * _M.request_target * core.SP * HTTP_version * core.CRLF -- RFC 7230 Section 5.4 _M.Host = uri.host * (P":" * uri.port)^-1 -- RFC 7230 Section 6.7 local protocol_name = _M.token local protocol_version = _M.token local protocol = protocol_name * (P"/" * protocol_version)^-1 / "%0" _M.Upgrade = comma_sep_trim(protocol) -- RFC 7230 Section 5.7.1 local received_protocol = (protocol_name * P"/" + Cc("HTTP")) * protocol_version / "%1/%2" local pseudonym = _M.token -- workaround for https://lists.w3.org/Archives/Public/ietf-http-wg/2016OctDec/0527.html local received_by = uri.host * ((P":" * uri.port) + -lpeg.B(",")) / "%0" + pseudonym _M.Via = comma_sep_trim(Ct(Cg(received_protocol, "protocol") * _M.RWS * Cg(received_by, "by") * (_M.RWS * Cg(_M.comment, "comment"))^-1), 1) -- RFC 7230 Section 6.1 local connection_option = _M.token / string.lower -- case insensitive _M.Connection = comma_sep_trim(connection_option) -- RFC 7231 Section 3.1.1 local content_coding = _M.token / string.lower -- case insensitive _M.Content_Encoding = comma_sep_trim(content_coding, 1) -- RFC 7231 Section 3.1.2 local type = _M.token / string.lower -- case insensitive local subtype = _M.token / string.lower -- case insensitive local parameter = _M.token / string.lower -- case insensitive * P"=" * (_M.token + _M.quoted_string) local media_type = Cg(type, "type") * P"/" * Cg(subtype, "subtype") * Cg(Cf(Ct(true) * (_M.OWS * P";" * _M.OWS * Cg(parameter))^0, rawset), "parameters") local charset = _M.token / string.lower -- case insensitive _M.Content_Type = Ct(media_type) -- RFC 7231 Section 3.1.3 _M.Content_Language = comma_sep_trim(language.Language_Tag, 1) -- RFC 7231 Section 3.1.4.2 _M.Content_Location = uri.absolute_uri + partial_uri -- RFC 7231 Section 5.1.1 _M.Expect = P"100-"*S"cC"*S"oO"*S"nN"*S"tT"*S"iI"*S"nN"*S"uU"*S"eE" * Cc("100-continue") -- RFC 7231 Section 5.1.2 _M.Max_Forwards = core.DIGIT^1 / tonumber -- RFC 7231 Section 5.3.1 local qvalue = rank -- luacheck: ignore 211 local weight = t_ranking -- RFC 7231 Section 5.3.2 local media_range = (P"*/*" + (Cg(type, "type") * P"/*") + (Cg(type, "type") * P"/" * Cg(subtype, "subtype")) ) * Cg(Cf(Ct(true) * (_M.OWS * ";" * _M.OWS * Cg(parameter) - weight)^0, rawset), "parameters") local accept_ext = _M.OWS * P";" * _M.OWS * _M.token * (P"=" * (_M.token + _M.quoted_string))^-1 local accept_params = Cg(weight, "q") * Cg(Cf(Ct(true) * Cg(accept_ext)^0, rawset), "extensions") _M.Accept = comma_sep_trim(Ct(media_range * (accept_params+Cg(Ct(true), "extensions")))) -- RFC 7231 Section 5.3.3 _M.Accept_Charset = comma_sep_trim((charset + P"*") * weight^-1, 1) -- RFC 7231 Section 5.3.4 local codings = content_coding + "*" _M.Accept_Encoding = comma_sep_trim(codings * weight^-1) -- RFC 4647 Section 2.1 local alphanum = core.ALPHA + core.DIGIT local language_range = (core.ALPHA * core.ALPHA^-7 * (P"-" * alphanum * alphanum^-7)^0) + P"*" -- RFC 7231 Section 5.3.5 _M.Accept_Language = comma_sep_trim(language_range * weight^-1, 1) -- RFC 7231 Section 5.5.1 _M.From = email.mailbox -- RFC 7231 Section 5.5.2 _M.Referer = uri.absolute_uri + partial_uri -- RFC 7231 Section 5.5.3 local product_version = _M.token local product = _M.token * (P"/" * product_version)^-1 _M.User_Agent = product * (_M.RWS * (product + _M.comment))^0 -- RFC 7231 Section 7.1.1.1 -- Uses os.date field names local day_name = Cg(P"Mon"*Cc(2) + P"Tue"*Cc(3) + P"Wed"*Cc(4) + P"Thu"*Cc(5) + P"Fri"*Cc(6) + P"Sat"*Cc(7) + P"Sun"*Cc(1), "wday") local day = Cg(core.DIGIT * core.DIGIT / tonumber, "day") local month = Cg(P"Jan"*Cc(1) + P"Feb"*Cc(2) + P"Mar"*Cc(3) + P"Apr"*Cc(4) + P"May"*Cc(5) + P"Jun"*Cc(6) + P"Jul"*Cc(7) + P"Aug"*Cc(8) + P"Sep"*Cc(9) + P"Oct"*Cc(10) + P"Nov"*Cc(11) + P"Dec"*Cc(12), "month") local year = Cg(core.DIGIT * core.DIGIT * core.DIGIT * core.DIGIT / tonumber, "year") local date1 = day * core.SP * month * core.SP * year local GMT = P"GMT" local minute = Cg(core.DIGIT * core.DIGIT / tonumber, "min") local second = Cg(core.DIGIT * core.DIGIT / tonumber, "sec") local hour = Cg(core.DIGIT * core.DIGIT / tonumber, "hour") -- XXX only match 00:00:00 - 23:59:60 (leap second)? local time_of_day = hour * P":" * minute * P":" * second _M.IMF_fixdate = Ct(day_name * P"," * core.SP * date1 * core.SP * time_of_day * core.SP * GMT) local date2 do local year_barrier = 70 local twodayyear = Cg(core.DIGIT * core.DIGIT / function(y) y = tonumber(y, 10) if y < year_barrier then return 2000+y else return 1900+y end end, "year") date2 = day * P"-" * month * P"-" * twodayyear end local day_name_l = Cg(P"Monday"*Cc(2) + P"Tuesday"*Cc(3) + P"Wednesday"*Cc(4) + P"Thursday"*Cc(5) + P"Friday"*Cc(6) + P"Saturday"*Cc(7) + P"Sunday"*Cc(1), "wday") local rfc850_date = Ct(day_name_l * P"," * core.SP * date2 * core.SP * time_of_day * core.SP * GMT) local date3 = month * core.SP * (day + Cg(core.SP * core.DIGIT / tonumber, "day")) local asctime_date = Ct(day_name * core.SP * date3 * core.SP * time_of_day * core.SP * year) local obs_date = rfc850_date + asctime_date local HTTP_date = _M.IMF_fixdate + obs_date _M.Date = HTTP_date -- RFC 7231 Section 7.1.2 _M.Location = uri.uri_reference -- RFC 7231 Section 7.1.3 local delay_seconds = core.DIGIT^1 / tonumber _M.Retry_After = HTTP_date + delay_seconds -- RFC 7231 Section 7.1.4 _M.Vary = P"*" + comma_sep(_M.field_name, 1) -- RFC 7231 Section 7.4.1 _M.Allow = comma_sep_trim(method) -- RFC 7231 Section 7.4.2 _M.Server = product * (_M.RWS * (product + _M.comment))^0 -- RFC 5789 _M.Accept_Patch = comma_sep_trim(media_type, 1) -- RFC 5987 local attr_char = core.ALPHA + core.DIGIT + S"!#$&+-.^_`|~" -- can't use uri.pct_encoded, as it doesn't decode all characters local pct_encoded = P"%" * (core.HEXDIG * core.HEXDIG / util.read_hex) / string.char local value_chars = Cs((pct_encoded + attr_char)^0) local parmname = C(attr_char^1) local ext_value do -- ext-value uses charset from RFC 5987 instead local mime_charsetc = core.ALPHA + core.DIGIT + S"!#$%&+-^_`{}~" local mime_charset = C(mime_charsetc^1) ext_value = Cg(mime_charset, "charset") * P"'" * Cg(language.Language_Tag, "language")^-1 * P"'" * value_chars end do -- RFC 5988 local ptokenchar = S"!#$%&'()*+-./:<=>?@[]^_`{|}~" + core.DIGIT + core.ALPHA local ptoken = ptokenchar^1 local ext_name_star = parmname * P"*" local link_extension = ext_name_star * P"=" * ext_value + parmname * (P"=" * (ptoken + _M.quoted_string))^-1 -- See https://www.rfc-editor.org/errata_search.php?rfc=5988&eid=3158 local link_param = link_extension local link_value = Cf(Ct(P"<" * uri.uri_reference * P">") * (_M.OWS * P";" * _M.OWS * Cg(link_param))^0, rawset) -- TODO: handle multiple ext_value variants... -- e.g. server might provide one title in english, one in chinese, client should be able to pick which one to display _M.Link = comma_sep_trim(link_value) end do -- RFC 6265 local cookie_name = _M.token local cookie_octet = S"!" + R("\35\43", "\45\58", "\60\91", "\93\126") local cookie_value = core.DQUOTE * C(cookie_octet^0) * core.DQUOTE + C(cookie_octet^0) local cookie_pair = cookie_name * _M.BWS * P"=" * _M.BWS * cookie_value * _M.BWS local ext_char = core.CHAR - core.CTL - S";" ext_char = ext_char - core.WSP + core.WSP * #(core.WSP^0 * ext_char) -- No trailing whitespace -- Complexity is to make sure whitespace before an `=` isn't captured local extension_av = ((ext_char - S"=" - core.WSP) + core.WSP^1 * #(1-S"="))^0 / string.lower * _M.BWS * P"=" * _M.BWS * C(ext_char^0) + (ext_char)^0 / string.lower * Cc(true) local cookie_av = extension_av local set_cookie_string = cookie_pair * Cf(Ct(true) * (P";" * _M.OWS * Cg(cookie_av))^0, rawset) _M.Set_Cookie = set_cookie_string local cookie_string = Cf(Ct(true) * Cg(cookie_pair) * (P";" * _M.OWS * Cg(cookie_pair))^0, rawset) _M.Cookie = cookie_string end do -- RFC 6266 local disp_ext_type = _M.token / string.lower local disposition_type = disp_ext_type local ext_token = C((tchar-P"*"*(-tchar))^1) * P"*" -- can't use 'token' here as we need to not include the "*" at the end local value = _M.token + _M.quoted_string local disp_ext_parm = ext_token * _M.OWS * P"=" * _M.OWS * ext_value + _M.token * _M.OWS * P"=" * _M.OWS * value local disposition_parm = disp_ext_parm _M.Content_Disposition = disposition_type * Cf(Ct(true) * (_M.OWS * P";" * _M.OWS * Cg(disposition_parm))^0, rawset) end -- RFC 6455 local base64_character = core.ALPHA + core.DIGIT + S"+/" local base64_data = base64_character * base64_character * base64_character * base64_character local base64_padding = base64_character * base64_character * P"==" + base64_character * base64_character * base64_character * P"=" local base64_value_non_empty = (base64_data^1 * base64_padding^-1) + base64_padding _M.Sec_WebSocket_Accept = base64_value_non_empty _M.Sec_WebSocket_Key = base64_value_non_empty local registered_token = _M.token local extension_token = registered_token local extension_param do local EOF = P(-1) local token_then_EOF = Cc(true) * _M.token * EOF -- the quoted-string must be a valid token local quoted_token = Cmt(_M.quoted_string, function(_, _, q) return token_then_EOF:match(q) end) extension_param = _M.token * ((P"=" * (_M.token + quoted_token)) + Cc(true)) end local extension = extension_token * Cg(Cf(Ct(true) * (P";" * Cg(extension_param))^0, rawset), "parameters") local extension_list = comma_sep_trim(Ct(extension)) _M.Sec_WebSocket_Extensions = extension_list _M.Sec_WebSocket_Protocol_Client = comma_sep_trim(_M.token) _M.Sec_WebSocket_Protocol_Server = _M.token local NZDIGIT = S"123456789" -- Limited to 0-255 range, with no leading zeros local version = ( P"2" * (S"01234" * core.DIGIT + P"5" * S"012345") + (P"1") * core.DIGIT * core.DIGIT + NZDIGIT * core.DIGIT^-1 ) / tonumber _M.Sec_WebSocket_Version_Client = version _M.Sec_WebSocket_Version_Server = comma_sep_trim(version) -- RFC 6797 local directive_name = _M.token / string.lower local directive_value = _M.token + _M.quoted_string local directive = Cg(directive_name * ((_M.OWS * P"=" * _M.OWS * directive_value) + Cc(true))) _M.Strict_Transport_Security = directive^-1 * (_M.OWS * P";" * _M.OWS * directive^-1)^0 -- RFC 7089 _M.Accept_Datetime = _M.IMF_fixdate _M.Memento_Datetime = _M.IMF_fixdate -- RFC 7232 Section 2.2 _M.Last_Modified = HTTP_date -- RFC 7232 Section 2.3 local weak = P"W/" -- case sensitive local etagc = P"\33" + R"\35\115" + obs_text local opaque_tag = core.DQUOTE * etagc^0 * core.DQUOTE local entity_tag = Cg(weak*Cc(true) + Cc(false), "weak") * C(opaque_tag) _M.ETag = entity_tag -- RFC 7232 Section 3.1 _M.If_Match = P"*" + comma_sep(entity_tag, 1) -- RFC 7232 Section 3.2 _M.If_None_Match = P"*" + comma_sep(entity_tag, 1) -- RFC 7232 Section 3.3 _M.If_Modified_Since = HTTP_date -- RFC 7232 Section 3.4 _M.If_Unmodified_Since = HTTP_date -- RFC 4918 local Coded_URL = P"<" * uri.absolute_uri * P">" local extend = Coded_URL + _M.token local compliance_class = P"1" + P"2" + P"3" + extend _M.DAV = comma_sep_trim(compliance_class) _M.Depth = P"0" * Cc(0) + P"1" * Cc(1) + case_insensitive "infinity" * Cc(math.huge) local Simple_ref = uri.absolute_uri + partial_uri _M.Destination = Simple_ref local State_token = Coded_URL local Condition = (case_insensitive("not") * Cc("not"))^-1 * _M.OWS * (State_token + P"[" * entity_tag * P"]") local List = P"(" * _M.OWS * (Condition * _M.OWS)^1 * P")" local No_tag_list = List local Resource_Tag = P"<" * Simple_ref * P">" local Tagged_list = Resource_Tag * _M.OWS * (List * _M.OWS)^1 _M.If = (Tagged_list * _M.OWS)^1 + (No_tag_list * _M.OWS)^1 _M.Lock_Token = Coded_URL _M.Overwrite = T_F local DAVTimeOutVal = core.DIGIT^1 / tonumber local TimeType = case_insensitive "Second-" * DAVTimeOutVal + case_insensitive "Infinite" * Cc(math.huge) _M.TimeOut = comma_sep_trim(TimeType) -- RFC 5323 _M.DASL = comma_sep_trim(Coded_URL, 1) -- RFC 6638 _M.Schedule_Reply = T_F _M.Schedule_Tag = opaque_tag _M.If_Schedule_Tag_Match = opaque_tag -- RFC 7233 local bytes_unit = P"bytes" local other_range_unit = _M.token local range_unit = C(bytes_unit) + other_range_unit local first_byte_pos = core.DIGIT^1 / tonumber local last_byte_pos = core.DIGIT^1 / tonumber local byte_range_spec = first_byte_pos * P"-" * last_byte_pos^-1 local suffix_length = core.DIGIT^1 / tonumber local suffix_byte_range_spec = Cc(nil) * P"-" * suffix_length local byte_range_set = comma_sep(byte_range_spec + suffix_byte_range_spec, 1) local byte_ranges_specifier = bytes_unit * P"=" * byte_range_set -- RFC 7233 Section 2.3 local acceptable_ranges = comma_sep_trim(range_unit, 1) + P"none" _M.Accept_Ranges = acceptable_ranges -- RFC 7233 Section 3.1 local other_range_set = core.VCHAR^1 local other_ranges_specifier = other_range_unit * P"=" * other_range_set _M.Range = byte_ranges_specifier + other_ranges_specifier -- RFC 7233 Section 3.2 _M.If_Range = entity_tag + HTTP_date -- RFC 7233 Section 4.2 local complete_length = core.DIGIT^1 / tonumber local unsatisfied_range = P"*/" * complete_length local byte_range = first_byte_pos * P"-" * last_byte_pos local byte_range_resp = byte_range * P"/" * (complete_length + P"*") local byte_content_range = bytes_unit * core.SP * (byte_range_resp + unsatisfied_range) local other_range_resp = core.CHAR^0 local other_content_range = other_range_unit * core.SP * other_range_resp _M.Content_Range = byte_content_range + other_content_range -- RFC 7234 Section 1.2.1 local delta_seconds = core.DIGIT^1 / tonumber -- RFC 7234 Section 5.1 _M.Age = delta_seconds -- RFC 7234 Section 5.2 local cache_directive = _M.token * (P"=" * (_M.token + _M.quoted_string))^-1 _M.Cache_Control = comma_sep_trim(cache_directive, 1) -- RFC 7234 Section 5.3 _M.Expires = HTTP_date -- RFC 7234 Section 5.4 local extension_pragma = _M.token * (P"=" * (_M.token + _M.quoted_string))^-1 local pragma_directive = "no_cache" + extension_pragma _M.Pragma = comma_sep_trim(pragma_directive, 1) -- RFC 7234 Section 5.5 local warn_code = core.DIGIT * core.DIGIT * core.DIGIT local warn_agent = (uri.host * (P":" * uri.port)^-1) + pseudonym local warn_text = _M.quoted_string local warn_date = core.DQUOTE * HTTP_date * core.DQUOTE local warning_value = warn_code * core.SP * warn_agent * core.SP * warn_text * (core.SP * warn_date)^-1 _M.Warning = comma_sep_trim(warning_value, 1) -- RFC 7235 Section 2 local auth_scheme = _M.token local auth_param = Cg(_M.token / string.lower * _M.BWS * P"=" * _M.BWS * (_M.token + _M.quoted_string)) local token68 = C((core.ALPHA + core.DIGIT + P"-" + P"." + P"_" + P"~" + P"+" + P"/" )^1 * (P"=")^0) -- TODO: each parameter name MUST only occur once per challenge local challenge = auth_scheme * (core.SP^1 * (Cf(Ct(true) * comma_sep(auth_param), rawset) + token68))^-1 local credentials = challenge -- RFC 7235 Section 4 _M.WWW_Authenticate = comma_sep_trim(Ct(challenge), 1) _M.Authorization = credentials _M.Proxy_Authenticate = _M.WWW_Authenticate _M.Proxy_Authorization = _M.Proxy_Authorization -- RFC 7239 Section 4 local value = _M.token + _M.quoted_string local forwarded_pair = _M.token * P"=" * value local forwarded_element = forwarded_pair^-1 * (P";" * forwarded_pair^-1)^0 _M.Forwarded = comma_sep_trim(forwarded_element) -- RFC 7469 local Public_Key_Directives = directive * (_M.OWS * P";" * _M.OWS * directive)^0 _M.Public_Key_Pins = Public_Key_Directives _M.Public_Key_Pins_Report_Only = Public_Key_Directives -- RFC 7486 _M.Hobareg = C"regok" + C"reginwork" -- RFC 7615 _M.Authentication_Info = comma_sep_trim(auth_param) _M.Proxy_Authentication_Info = comma_sep_trim(auth_param) -- RFC 7639 local protocol_id = _M.token _M.ALPN = comma_sep_trim(protocol_id, 1) -- RFC 7809 _M.CalDAV_Timezones = T_F -- RFC 7838 local clear = C"clear" -- case-sensitive local alt_authority = _M.quoted_string -- containing [ uri_host ] ":" port local alternative = protocol_id * P"=" * alt_authority local alt_value = alternative * (_M.OWS * P";" * _M.OWS * parameter)^0 _M.Alt_Svc = clear + comma_sep_trim(alt_value, 1) _M.Alt_Used = uri.host * (P":" * uri.port)^-1 return _M lpeg_patterns-0.4/lpeg_patterns/language.lua000066400000000000000000000042471301505066200213660ustar00rootroot00000000000000-- RFC 5646 Section 2.1 local lpeg = require "lpeg" local core = require "lpeg_patterns.core" local C = lpeg.C local P = lpeg.P local R = lpeg.R local Cg = lpeg.Cg local Ct = lpeg.Ct local Cmt = lpeg.Cmt local M = {} local alphanum = core.ALPHA + core.DIGIT local extlang = core.ALPHA * core.ALPHA * core.ALPHA * -#alphanum * (P"-" * core.ALPHA * core.ALPHA * core.ALPHA * -#alphanum)^-2 local language = Cg(core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA^-3, "language") + Cg(core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA, "language") + Cg(core.ALPHA * core.ALPHA * core.ALPHA^-1, "language") * (P"-" * Cg(extlang, "extlang"))^-1 local script = core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA * -#alphanum -- Prevent intepretation of a 'variant' local region = ( core.ALPHA * core.ALPHA + core.DIGIT * core.DIGIT * core.DIGIT ) * -#alphanum -- Prevent intepretation of a 'variant' local variant = core.DIGIT * alphanum * alphanum * alphanum + alphanum * alphanum * alphanum * alphanum * alphanum * alphanum^-3 local singleton = core.DIGIT + R("AW", "YZ", "aw", "yz") local extension = C(singleton) * Ct((P"-" * (alphanum*alphanum*alphanum^-6 / string.lower))^1) M.privateuse = P"x" * Ct((P"-" * C(alphanum*alphanum^-7))^1) M.langtag = language * (P"-" * Cg(script, "script"))^-1 * (P"-" * Cg(region, "region"))^-1 * Cg(Ct((P"-" * C(variant))^1), "variant")^-1 * Cg(Cmt(Ct((P"-" * Ct(extension))^1), function(_, _, c) -- Can't use a fold with rawset as we want the pattern to not match if there is a duplicate extension local r = {} for _, v in ipairs(c) do local a, b = v[1], v[2] if r[a] then -- duplicate extension return false end r[a] = b end return true, r end), "extension")^-1 * (P"-" * Cg(M.privateuse, "privateuse"))^-1 local irregular = P"en-GB-oed" + P"i-ami" + P"i-bnn" + P"i-default" + P"i-enochian" + P"i-hak" + P"i-klingon" + P"i-lux" + P"i-mingo" + P"i-navajo" + P"i-pwn" + P"i-tao" + P"i-tay" + P"i-tsu" + P"sgn-BE-FR" + P"sgn-BE-NL" + P"sgn-CH-DE" M.Language_Tag = C((M.langtag + M.privateuse + irregular) / function() end) -- capture the whole tag. throws away decomposition return M lpeg_patterns-0.4/lpeg_patterns/phone.lua000066400000000000000000000064051301505066200207120ustar00rootroot00000000000000-- Phone numbers local lpeg = require "lpeg" local P = lpeg.P local R = lpeg.R local S = lpeg.S local digit = R"09" local seperator = S"- ,." local function optional_parens(patt) return P"(" * patt * P")" + patt end local _M = {} local extension = P"e" * (P"xt")^-1 * seperator^-1 * digit^1 local optional_extension = (seperator^-1 * extension)^-1 _M.Australia = ( -- Normal landlines optional_parens((P"0")^-1*S"2378") * seperator^-1 * digit*digit*digit*digit * seperator^-1 * digit*digit*digit*digit -- Mobile numbers + (optional_parens(P"0"*S"45"*digit*digit) + S"45"*digit*digit) * seperator^-1 * digit*digit*digit * seperator^-1 * digit*digit*digit -- Local rate calls + P"1300" * seperator^-1 * digit*digit*digit * seperator^-1 * digit*digit*digit + P"1345" * seperator^-1 * digit*digit * seperator^-1 * digit*digit --only used for back-to-base monitored alarm systems + P"13" * seperator^-1 * digit*digit * seperator^-1 * digit*digit + (P"0")^-1*P"198" * seperator^-1 * digit*digit*digit * seperator^-1 * digit*digit*digit -- data calls -- Free calls + P"1800" * seperator^-1 * digit*digit*digit * seperator^-1 * digit*digit*digit + P"180" * seperator^-1 * digit*digit*digit*digit ) * optional_extension local NPA = (digit-S"01")*digit*digit local NXX = ((digit-S"01")*(digit-P"9")-P"37"-P"96")*digit-P(1)*P"11" local USSubscriber = digit*digit*digit*digit _M.USA = ((P"1" * seperator^-1)^-1 * optional_parens(NPA) * seperator^-1)^-1 * NXX * seperator^-1 * USSubscriber * optional_extension local international = ( P"1" * seperator^-1 * #(-P"1") * _M.USA + P"61" * seperator^-1 * #(digit-P"0") * _M.Australia -- Other countries we haven't made specific patterns for yet +(P"20"+P"212"+P"213"+P"216"+P"218"+P"220"+P"221" +P"222"+P"223"+P"224"+P"225"+P"226"+P"227"+P"228"+P"229" +P"230"+P"231"+P"232"+P"233"+P"234"+P"235"+P"236"+P"237" +P"238"+P"239"+P"240"+P"241"+P"242"+P"243"+P"244"+P"245" +P"246"+P"247"+P"248"+P"249"+P"250"+P"251"+P"252"+P"253" +P"254"+P"255"+P"256"+P"257"+P"258"+P"260"+P"261"+P"262" +P"263"+P"264"+P"265"+P"266"+P"267"+P"268"+P"269"+P"27" +P"290"+P"291"+P"297"+P"298"+P"299"+P"30" +P"31" +P"32" +P"33" +P"34" +P"350"+P"351"+P"352"+P"353"+P"354"+P"355" +P"356"+P"357"+P"358"+P"359"+P"36" +P"370"+P"371"+P"372" +P"373"+P"374"+P"375"+P"376"+P"377"+P"378"+P"380"+P"381" +P"385"+P"386"+P"387"+P"389"+P"39" +P"40" +P"41" +P"420" +P"421"+P"423"+P"43" +P"44" +P"45" +P"46" +P"47" +P"48" +P"49" +P"500"+P"501"+P"502"+P"503"+P"504"+P"505"+P"506" +P"507"+P"508"+P"509"+P"51" +P"52" +P"53" +P"54" +P"55" +P"56" +P"57" +P"58" +P"590"+P"591"+P"592"+P"593"+P"594" +P"595"+P"596"+P"597"+P"598"+P"599"+P"60" +P"62" +P"63" +P"64" +P"65" +P"66" +P"670"+P"672"+P"673"+P"674" +P"675"+P"676"+P"677"+P"678"+P"679"+P"680"+P"681"+P"682" +P"683"+P"684"+P"685"+P"686"+P"687"+P"688"+P"689"+P"690" +P"691"+P"692"+P"7" +P"808"+P"81" +P"82" +P"84" +P"850" +P"852"+P"853"+P"855"+P"856"+P"86" +P"870"+P"871"+P"872" +P"873"+P"874"+P"878"+P"880"+P"881"+P"886"+P"90" +P"91" +P"92" +P"93" +P"94" +P"95" +P"960"+P"961"+P"962"+P"963" +P"964"+P"965"+P"966"+P"967"+P"968"+P"970"+P"971"+P"972" +P"973"+P"974"+P"975"+P"976"+P"977"+P"98" +P"992"+P"993" +P"994"+P"995"+P"996"+P"998" ) * (seperator^-1*digit)^6 -- At least 6 digits ) _M.phone = P"+" * seperator^-1 * international return _M lpeg_patterns-0.4/lpeg_patterns/uri.lua000066400000000000000000000102451301505066200203750ustar00rootroot00000000000000-- URI -- RFC 3986 local lpeg = require "lpeg" local P = lpeg.P local S = lpeg.S local C = lpeg.C local Cc = lpeg.Cc local Cg = lpeg.Cg local Cs = lpeg.Cs local Ct = lpeg.Ct local util = require "lpeg_patterns.util" local core = require "lpeg_patterns.core" local ALPHA = core.ALPHA local DIGIT = core.DIGIT local HEXDIG = core.HEXDIG local IPv4address = require "lpeg_patterns.IPv4".IPv4address local IPv6address = require "lpeg_patterns.IPv6".IPv6address local _M = {} _M.sub_delims = S"!$&'()*+,;=" -- 2.2 local unreserved = ALPHA + DIGIT + S"-._~" -- 2.3 _M.pct_encoded = P"%" * (HEXDIG * HEXDIG / util.read_hex) / function(n) local c = string.char(n) if unreserved:match(c) then -- always decode unreserved characters (2.3) return c else -- normalise to upper-case (6.2.2.1) return string.format("%%%02X", n) end end -- 2.1 _M.scheme = ALPHA * (ALPHA + DIGIT + S"+-.")^0 / string.lower -- 3.1 _M.userinfo = Cs((unreserved + _M.pct_encoded + _M.sub_delims + P":")^0) -- 3.2.1 -- Host 3.2.2 local IPvFuture_mt = { __name = "lpeg_patterns.IPvFuture"; } function IPvFuture_mt:__tostring() return string.format("v%x.%s", self.version, self.string) end local function new_IPvFuture(version, string) return setmetatable({version=version, string=string}, IPvFuture_mt) end local IPvFuture = S"vV" * (HEXDIG^1/util.read_hex) * P"." * C((unreserved+_M.sub_delims+P":")^1) / new_IPvFuture -- RFC 6874 local ZoneID = Cs((unreserved + _M.pct_encoded)^1) local IPv6addrz = IPv6address * (P"%25" * ZoneID)^-1 / function(IPv6, zoneid) IPv6:setzoneid(zoneid) return IPv6 end _M.IP_literal = P"[" * (IPv6addrz + IPvFuture) * P"]" local IP_host = (_M.IP_literal + IPv4address) / tostring local reg_name = Cs(( unreserved / string.lower + _M.pct_encoded / function(s) return s:sub(1,1) == "%" and s or string.lower(s) end + _M.sub_delims )^1) + Cc(nil) _M.host = IP_host + reg_name _M.port = DIGIT^0 / tonumber -- 3.2.3 -- Path 3.3 local pchar = unreserved + _M.pct_encoded + _M.sub_delims + S":@" local segment = pchar^0 _M.segment = Cs(segment) local segment_nz = pchar^1 local segment_nz_nc = (pchar - P":")^1 -- an empty path is nil instead of the empty string local path_empty = Cc(nil) local path_abempty = Cs((P"/" * segment)^1) + path_empty local path_rootless = Cs(segment_nz * (P"/" * segment)^0) local path_noscheme = Cs(segment_nz_nc * (P"/" * segment)^0) local path_absolute = Cs(P"/" * (segment_nz * (P"/" * segment)^0)^-1) _M.query = Cs( ( pchar + S"/?" )^0 ) -- 3.4 _M.fragment = _M.query -- 3.5 -- Put together with named captures _M.authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1 * Cg(_M.host, "host") * ( P":" * Cg(_M.port, "port") )^-1 local hier_part = P"//" * _M.authority * Cg (path_abempty, "path") + Cg(path_absolute + path_rootless + path_empty, "path") _M.absolute_uri = Ct ( ( Cg(_M.scheme, "scheme") * P":" ) * hier_part * ( P"?" * Cg(_M.query, "query"))^-1 ) _M.uri = Ct ( ( Cg(_M.scheme, "scheme") * P":" ) * hier_part * ( P"?" * Cg(_M.query, "query"))^-1 * ( P"#" * Cg(_M.fragment, "fragment"))^-1 ) _M.relative_part = P"//" * _M.authority * Cg(path_abempty, "path") + Cg(path_absolute + path_noscheme + path_empty, "path") local relative_ref = Ct ( _M.relative_part * ( P"?" * Cg(_M.query, "query"))^-1 * ( P"#" * Cg(_M.fragment, "fragment"))^-1 ) _M.uri_reference = _M.uri + relative_ref _M.path = path_abempty + path_absolute + path_noscheme + path_rootless + path_empty -- Create a slightly more sane host pattern -- scheme is optional -- the "//" isn't required -- if missing, the host needs to at least have a "." and end in two alpha characters -- an authority is always required local sane_host_char = unreserved / string.lower local hostsegment = (sane_host_char - P".")^1 local dns_entry = Cs ( ( hostsegment * P"." )^1 * ALPHA^2 ) _M.sane_host = IP_host + dns_entry _M.sane_authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1 * Cg(_M.sane_host, "host") * ( P":" * Cg(_M.port, "port") )^-1 local sane_hier_part = (P"//")^-1 * _M.sane_authority * Cg(path_absolute + path_empty, "path") _M.sane_uri = Ct ( ( Cg(_M.scheme, "scheme") * P":" )^-1 * sane_hier_part * ( P"?" * Cg(_M.query, "query"))^-1 * ( P"#" * Cg(_M.fragment, "fragment"))^-1 ) return _M lpeg_patterns-0.4/lpeg_patterns/util.lua000066400000000000000000000012401301505066200205460ustar00rootroot00000000000000local function read_hex(hex_num) return tonumber(hex_num, 16) end local safe_tonumber do -- locale independent tonumber function local tolocale local function updatelocale() local decpoint = string.format("%f", 0.5):match "[^05]+" if decpoint == "." then tolocale = function(str) return str end else tolocale = function(str) str = str:gsub("%.", decpoint, 1) return str end end end updatelocale() safe_tonumber = function(str) local num = tonumber(tolocale(str)) if num then return num else updatelocale() return tonumber(tolocale(str)) end end end return { read_hex = read_hex; safe_tonumber = safe_tonumber; } lpeg_patterns-0.4/spec/000077500000000000000000000000001301505066200151545ustar00rootroot00000000000000lpeg_patterns-0.4/spec/IPv6_spec.lua000066400000000000000000000024631301505066200174620ustar00rootroot00000000000000local lpeg = require "lpeg" describe("IPv6 Addresses", function() local IPv6address = require "lpeg_patterns.IPv6".IPv6address local IPv6address_only = IPv6address * lpeg.P(-1) it("Addresses are parsed correctly", function() local function same(str, ...) local addr = IPv6address_only:match(str) assert(addr, "Could not parse " .. str) assert.same({...}, {addr:unpack()}) end same("::", 0,0,0,0,0,0,0,0) same("::0.0.0.0", 0,0,0,0,0,0,0,0) same("::0:0.0.0.0", 0,0,0,0,0,0,0,0) same("0::0.0.0.0", 0,0,0,0,0,0,0,0) same("::1", 0,0,0,0,0,0,0,1) same("ff02::1", 0xff02,0,0,0,0,0,0,1) same("2001:0db8:85a3:0042:1000:8a2e:0370:7334", 0x2001, 0x0db8, 0x85a3, 0x0042, 0x1000, 0x8a2e, 0x0370, 0x7334) same("::FFFF:204.152.189.116", 0, 0, 0, 0, 0, 0xFFFF, 204*256+152, 189*256+116) end) it("Non-addresses fail parsing", function() assert.falsy(IPv6address_only:match"") assert.falsy(IPv6address_only:match"not an ip") assert.falsy(IPv6address_only:match"::x") assert.falsy(IPv6address_only:match"x::") assert.falsy(IPv6address_only:match":::") assert.falsy(IPv6address_only:match":1::") -- Two :: assert.falsy(IPv6address_only:match"1234::5678::") -- Invalid IPv4 assert.falsy(IPv6address_only:match"::FFFF:0.0.0") assert.falsy(IPv6address_only:match"::FFFF:0.999.0.0") end) end) lpeg_patterns-0.4/spec/email_spec.lua000066400000000000000000000224131301505066200177620ustar00rootroot00000000000000local lpeg = require "lpeg" local EOF = lpeg.P(-1) describe("email Addresses", function() local email = lpeg.Ct(require "lpeg_patterns.email".email) * EOF it("Pass valid addresses", function() assert.same({"localpart", "example.com"}, email:match "localpart@example.com") end) it("Deny invalid addresses", function() assert.falsy(email:match "not an address") end) it("Handle unusual localpart", function() assert.same({"foo.bar", "example.com"}, email:match "foo.bar@example.com") assert.same({"foo+", "example.com"}, email:match "foo+@example.com") assert.same({"foo+bar", "example.com"}, email:match "foo+bar@example.com") assert.same({"!#$%&'*+-/=?^_`{}|~", "example.com"}, email:match "!#$%&'*+-/=?^_`{}|~@example.com") assert.same({[[quoted]], "example.com"}, email:match [["quoted"@example.com]]) assert.same({[[quoted string]], "example.com"}, email:match [["quoted string"@example.com]]) assert.same({[[quoted@symbol]], "example.com"}, email:match [["quoted@symbol"@example.com]]) assert.same({[=[very.(),:;<>[]".VERY."very@\ "very".unusual]=], "example.com"}, email:match [=["very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@example.com]=]) end) it("folds whitespace", function() assert.same({"localpart ", "example.com"}, email:match [["localpart "@example.com]]) assert.same({"localpart ", "example.com"}, email:match [["localpart "@example.com]]) assert.same({" localpart ", "example.com"}, email:match [[" localpart "@example.com]]) assert.same({" localpart and again ", "example.com"}, email:match [[" localpart and again "@example.com]]) assert.same({"localpart", "example.com "}, email:match [=[localpart@[example.com ]]=]) assert.same({"localpart", "example.com "}, email:match [=[localpart@[example.com ]]=]) assert.same({"localpart", " example.com"}, email:match [=[localpart@[ example.com]]=]) assert.same({"localpart", " example.com "}, email:match [=[localpart@[ example.com ]]=]) assert.same({"localpart", " example with whitespace "}, email:match [=[localpart@[ example with whitespace ]]=]) end) it("Ignore invalid localpart", function() assert.falsy(email:match "@example.com") assert.falsy(email:match ".@example.com") assert.falsy(email:match "foobar.@example.com") assert.falsy(email:match "@foo@example.com") assert.falsy(email:match "foo@bar@example.com") assert.falsy(email:match [[just"not"right@example.com]]) -- quoted strings must be dot separated, or the only element making up the local-pat assert.falsy(email:match "\127@example.com") end) it("Handle unusual hosts", function() assert.same({"localpart", "host_name"}, email:match "localpart@host_name") assert.same({"localpart", "127.0.0.1"}, email:match "localpart@[127.0.0.1]") assert.same({"localpart", "IPv6:2001::d1"}, email:match "localpart@[IPv6:2001::d1]") assert.same({"localpart", "::1"}, email:match "localpart@[::1]") end) it("Handle comments", function() assert.same({"localpart", "example.com"}, email:match "(comment)localpart@example.com") assert.same({"localpart", "example.com"}, email:match "localpart(comment)@example.com") assert.same({"quoted", "example.com"}, email:match "(comment)\"quoted\"@example.com") assert.same({"quoted", "example.com"}, email:match "\"quoted\"(comment)@example.com") assert.same({"localpart", "example.com"}, email:match "localpart@(comment)example.com") assert.same({"localpart", "example.com"}, email:match "localpart@example.com(comment)") end) it("Handle escaped items in quotes", function() assert.same({"escape d", "example.com"}, email:match [["escape\ d"(comment)@example.com]]) assert.same({"escape\"d", "example.com"}, email:match [["escape\"d"(comment)@example.com]]) -- tests obs-qp assert.same({"escape\0d", "example.com"}, email:match "\"escape\\\0d\"@example.com") end) it("processes obs-dtext", function() assert.same({"localpart", "escape d"}, email:match "localpart@[escape\\ d]") end) it("processes obs-local-part", function() -- obs-local-part allows whitespace between atoms assert.same({"local.part", "example.com"}, email:match [[local .part@example.com]]) -- obs-local-part allows individually quoted atoms assert.same({"local.part", "example.com"}, email:match [["local".part@example.com]]) end) it("processes obs-domain", function() -- obs-domain allows whitespace between atoms assert.same({"localpart", "example.com"}, email:match [[localpart@example .com]]) end) it("Examples from RFC 3696 Section 3", function() -- Note: Look at errata 246, the followup 3563 and the followup to the followup 4002 -- not only did the RFC author get some of these wrong, so did the RFC errata verifiers assert.same({"Abc@def", "example.com"}, email:match [["Abc\@def"@example.com]]) assert.same({"Abc@def", "example.com"}, email:match [["Abc@def"@example.com]]) assert.same({"Fred Bloggs", "example.com"}, email:match [["Fred\ Bloggs"@example.com]]) assert.same({"Fred Bloggs", "example.com"}, email:match [["Fred Bloggs"@example.com]]) assert.same({[[Joe.\Blow]], "example.com"}, email:match [["Joe.\\Blow"@example.com]]) assert.same({[[Joe.Blow]], "example.com"}, email:match [["Joe.\Blow"@example.com]]) assert.same({"Abc@def", "example.com"}, email:match [["Abc@def"@example.com]]) assert.same({"Fred Bloggs", "example.com"}, email:match [["Fred Bloggs"@example.com]]) assert.same({"user+mailbox", "example.com"}, email:match [[user+mailbox@example.com]]) assert.same({"customer/department", "example.com"}, email:match [[customer/department@example.com]]) assert.same({"$A12345", "example.com"}, email:match [[$A12345@example.com]]) assert.same({"!def!xyz%abc", "example.com"}, email:match [[!def!xyz%abc@example.com]]) assert.same({"_somename", "example.com"}, email:match [[_somename@example.com]]) end) end) describe("email nocfws variants", function() local email_nocfws = lpeg.Ct(require "lpeg_patterns.email".email_nocfws) * EOF it("Pass valid addresses", function() assert.same({"localpart", "example.com"}, email_nocfws:match "localpart@example.com") end) it("Deny invalid addresses", function() assert.falsy(email_nocfws:match "not an address") end) it("Handle unusual localpart", function() assert.same({"foo.bar", "example.com"}, email_nocfws:match "foo.bar@example.com") assert.same({"foo+", "example.com"}, email_nocfws:match "foo+@example.com") assert.same({"foo+bar", "example.com"}, email_nocfws:match "foo+bar@example.com") assert.same({"!#$%&'*+-/=?^_`{}|~", "example.com"}, email_nocfws:match "!#$%&'*+-/=?^_`{}|~@example.com") assert.same({[[quoted]], "example.com"}, email_nocfws:match [["quoted"@example.com]]) assert.same({[[quoted string]], "example.com"}, email_nocfws:match [["quoted string"@example.com]]) assert.same({[[quoted@symbol]], "example.com"}, email_nocfws:match [["quoted@symbol"@example.com]]) assert.same({[=[very.(),:;<>[]".VERY."very@\ "very".unusual]=], "example.com"}, email_nocfws:match [=["very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@example.com]=]) end) it("Ignore invalid localpart", function() assert.falsy(email_nocfws:match "@example.com") assert.falsy(email_nocfws:match ".@example.com") assert.falsy(email_nocfws:match "foobar.@example.com") assert.falsy(email_nocfws:match "@foo@example.com") assert.falsy(email_nocfws:match "foo@bar@example.com") assert.falsy(email_nocfws:match [[just"not"right@example.com]]) -- quoted strings must be dot separated, or the only element making up the local-pat assert.falsy(email_nocfws:match "\127@example.com") end) it("Handle unusual hosts", function() assert.same({"localpart", "host_name"}, email_nocfws:match "localpart@host_name") assert.same({"localpart", "127.0.0.1"}, email_nocfws:match "localpart@[127.0.0.1]") assert.same({"localpart", "IPv6:2001::d1"}, email_nocfws:match "localpart@[IPv6:2001::d1]") assert.same({"localpart", "::1"}, email_nocfws:match "localpart@[::1]") end) it("Doesn't allow comments", function() assert.falsy(email_nocfws:match "(comment)localpart@example.com") assert.falsy(email_nocfws:match "localpart(comment)@example.com") assert.falsy(email_nocfws:match "(comment)\"quoted\"@example.com") assert.falsy(email_nocfws:match "\"quoted\"(comment)@example.com") assert.falsy(email_nocfws:match "localpart@example.com(comment)") assert.falsy(email_nocfws:match "localpart@example.com(comment)") end) end) describe("mailbox", function() local mailbox = lpeg.Ct(require "lpeg_patterns.email".mailbox) * EOF it("matches an addr-spec", function() assert.same({"foo", "example.com"}, mailbox:match "foo@example.com") end) it("matches a name-addr", function() assert.same({"foo", "example.com"}, mailbox:match "") assert.same({"foo", "example.com", display = "Foo"}, mailbox:match "Foo") assert.same({"foo", "example.com", display = "Foo "}, mailbox:match "Foo ") assert.same({"foo", "example.com", display = [["Foo"]]}, mailbox:match [["Foo"]]) assert.same({"foo", "example.com", display = "Old.Style.With.Dots"}, mailbox:match "Old.Style.With.Dots") assert.same({"foo", "example.com", display = "Multiple Words"}, mailbox:match "Multiple Words") end) it("matches a old school name-addr", function() assert.same({"foo", "example.com", route = {"wow", "such", "domains"}}, mailbox:match "<@wow,@such,,@domains:foo@example.com>") end) end) lpeg_patterns-0.4/spec/http_spec.lua000066400000000000000000000347321301505066200176610ustar00rootroot00000000000000describe("http patterns", function() local http = require "lpeg_patterns.http" local lpeg = require "lpeg" local EOF = lpeg.P(-1) it("Parses a SLUG header", function() local SLUG = http.SLUG * EOF assert.same("foo", SLUG:match("foo")) assert.same("foo bar", SLUG:match("foo bar")) assert.same("foo bar", SLUG:match("foo bar")) assert.same("foo bar", SLUG:match("foo %20 bar")) end) it("Parses an Origin header", function() local Origin = lpeg.Ct(http.Origin) * EOF assert.same({}, Origin:match("null")) assert.same({"http://example.com"}, Origin:match("http://example.com")) assert.same({"http://example.com", "https://foo.org"}, Origin:match("http://example.com https://foo.org")) end) it("Parses an X-Frame-Options header", function() local X_Frame_Options = lpeg.Ct(http.X_Frame_Options) * EOF assert.same({"deny"}, X_Frame_Options:match("deny")) assert.same({"deny"}, X_Frame_Options:match("DENY")) assert.same({"deny"}, X_Frame_Options:match("dEnY")) assert.same({"http://example.com"}, X_Frame_Options:match("Allow-From http://example.com")) end) it("Splits a request line", function() local request_line = lpeg.Ct(http.request_line) * EOF assert.same({"GET", "/", 1.0}, request_line:match("GET / HTTP/1.0\r\n")) assert.same({"GET", "http://foo.com/", 1.0}, request_line:match("GET http://foo.com/ HTTP/1.0\r\n")) assert.same({"OPTIONS", "*", 1.1}, request_line:match("OPTIONS * HTTP/1.1\r\n")) end) it("Splits an Upgrade header", function() local Upgrade = lpeg.Ct(http.Upgrade) * EOF assert.same({"Foo"}, Upgrade:match("Foo")) assert.same({"WebSocket"}, Upgrade:match("WebSocket")) assert.same({"HTTP/2.0", "SHTTP/1.3", "IRC/6.9", "RTA/x11"}, Upgrade:match("HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11")) end) it("Splits a Via header", function() local Via = lpeg.Ct(http.Via) * EOF assert.same({{protocol="HTTP/1.0", by="fred"}}, Via:match("1.0 fred")) assert.same({{protocol="HTTP/1.0", by="fred"}}, Via:match("HTTP/1.0 fred")) assert.same({{protocol="Other/myversion", by="fred"}}, Via:match("Other/myversion fred")) assert.same({{protocol="HTTP/1.1", by="p.example.net"}}, Via:match("1.1 p.example.net")) assert.same({ {protocol="HTTP/1.0", by="fred"}, {protocol="HTTP/1.1", by="p.example.net"} }, Via:match("1.0 fred, 1.1 p.example.net")) assert.same({ {protocol="HTTP/1.0", by="my.host:80"}, {protocol="HTTP/1.1", by="my.other.host"} }, Via:match("1.0 my.host:80, 1.1 my.other.host")) assert.same({ {protocol="HTTP/1.0", by="fred"}, {protocol="HTTP/1.1", by="p.example.net"} }, Via:match(",,,1.0 fred , ,,, 1.1 p.example.net,,,")) end) it("Handles folding whitespace in field_value", function() local field_value = http.field_value * EOF assert.same("Foo", field_value:match("Foo")) -- doesn't remove repeated whitespace assert.same("Foo Bar", field_value:match("Foo Bar")) -- unfolds whitespace broken over multiple lines assert.same("Foo Bar", field_value:match("Foo\r\n Bar")) assert.same("Foo Bar", field_value:match("Foo \r\n Bar")) end) it("Splits a Connection header", function() local Connection = lpeg.Ct(http.Connection) * EOF assert.same({}, Connection:match(" ")) assert.same({}, Connection:match(",")) assert.same({}, Connection:match(", ,")) assert.same({"foo"}, Connection:match("foo")) assert.same({"foo"}, Connection:match(" foo")) assert.same({"foo"}, Connection:match(" foo,,,")) assert.same({"foo"}, Connection:match(",, , foo ")) assert.same({"foo", "bar"}, Connection:match("foo,bar")) assert.same({"foo", "bar"}, Connection:match("foo, bar")) assert.same({"foo", "bar"}, Connection:match("foo , bar")) assert.same({"foo", "bar"}, Connection:match("foo\t, bar")) assert.same({"foo", "bar"}, Connection:match("foo,,, ,bar")) end) it("Parses a Transfer-Encoding header", function() local Transfer_Encoding = lpeg.Ct(http.Transfer_Encoding) * EOF assert.falsy(Transfer_Encoding:match("")) -- doesn't allow empty assert.same({{"foo"}}, Transfer_Encoding:match("foo")) assert.same({{"foo"}, {"bar"}}, Transfer_Encoding:match("foo, bar")) assert.same({{"foo", someext = "bar"}}, Transfer_Encoding:match("foo;someext=bar")) assert.same({{"foo", someext = "bar", another = "qux"}}, Transfer_Encoding:match("foo;someext=bar;another=\"qux\"")) -- q not allowed assert.falsy(Transfer_Encoding:match("foo;q=0.5")) assert.same({{"foo", queen = "foo"}}, Transfer_Encoding:match("foo;queen=foo")) -- check transfer parameters starting with q (but not q) are allowed end) it("Parses a TE header", function() local TE = lpeg.Ct(http.TE) * EOF assert.same({}, TE:match("")) -- allows empty assert.same({{"foo"}}, TE:match("foo")) assert.same({{"foo"}, {"bar"}}, TE:match("foo, bar")) assert.same({{"foo", q=0.5}}, TE:match("foo;q=0.5")) assert.same({{"foo", someext = "foo", q=0.5}}, TE:match("foo;someext=foo;q=0.5")) end) it("Splits a Trailer header", function() local Trailer = lpeg.Ct(http.Trailer) * EOF assert.falsy(Trailer:match(" ")) assert.falsy(Trailer:match(",")) assert.falsy(Trailer:match(", ,")) assert.same({"foo"}, Trailer:match("foo")) assert.same({"foo"}, Trailer:match(" foo")) assert.same({"foo"}, Trailer:match(" foo,,,")) assert.same({"foo"}, Trailer:match(",, , foo ")) assert.same({"foo", "bar"}, Trailer:match("foo,bar")) assert.same({"foo", "bar"}, Trailer:match("foo, bar")) assert.same({"foo", "bar"}, Trailer:match("foo , bar")) assert.same({"foo", "bar"}, Trailer:match("foo\t, bar")) assert.same({"foo", "bar"}, Trailer:match("foo,,, ,bar")) end) it("Parses a Content-Type header", function() local Content_Type = http.Content_Type * EOF assert.same({ type = "foo", subtype = "bar", parameters = {}}, Content_Type:match("foo/bar")) assert.same({ type = "foo", subtype = "bar", parameters = {param="value"}}, Content_Type:match("foo/bar;param=value")) -- Examples from RFC7231 3.1.1.1. assert.same({ type = "text", subtype = "html", parameters = {charset="utf-8"}}, Content_Type:match([[text/html;charset=utf-8]])) -- assert.same({ type = "text", subtype = "html", parameters = {charset="utf-8"}}, Content_Type:match([[text/html;charset=UTF-8]])) assert.same({ type = "text", subtype = "html", parameters = {charset="utf-8"}}, Content_Type:match([[Text/HTML;Charset="utf-8"]])) assert.same({ type = "text", subtype = "html", parameters = {charset="utf-8"}}, Content_Type:match([[text/html; charset="utf-8"]])) end) it("Parses an Accept header", function() local Accept = lpeg.Ct(http.Accept) * EOF assert.same({{type = "foo", subtype = "bar", parameters = {}, q = nil, extensions = {}}}, Accept:match("foo/bar")) assert.same({ {type = "audio", subtype = nil, parameters = {}, q = 0.2, extensions = {}}; {type = "audio", subtype = "basic", parameters = {}, q = nil, extensions = {}}; }, Accept:match("audio/*; q=0.2, audio/basic")) assert.same({ {type = "text", subtype = "plain", parameters = {}, q = 0.5, extensions = {}}; {type = "text", subtype = "html", parameters = {}, q = nil, extensions = {}}; {type = "text", subtype = "x-dvi", parameters = {}, q = 0.8, extensions = {}}; {type = "text", subtype = "x-c", parameters = {}, q = nil, extensions = {}}; }, Accept:match("text/plain; q=0.5, text/html, text/x-dvi; q=0.8, text/x-c")) assert.same({ {type = "text", subtype = nil, parameters = {}, extensions = {}}; {type = "text", subtype = "plain", parameters = {}, extensions = {}}; {type = "text", subtype = "plain", parameters = {format = "flowed"}, extensions = {}}; {type = nil, subtype = nil, parameters = {}, extensions = {}}; }, Accept:match("text/*, text/plain, text/plain;format=flowed, */*")) assert.same({ {type = "text", subtype = nil, parameters = {}, q = 0.3, extensions = {}}; {type = "text", subtype = "html", parameters = {}, q = 0.7, extensions = {}}; {type = "text", subtype = "html", parameters = {level = "1"}, q = nil, extensions = {}}; {type = "text", subtype = "html", parameters = {level = "2"}, q = 0.4, extensions = {}}; {type = nil, subtype = nil, parameters = {}, q = 0.5, extensions = {}}; }, Accept:match("text/*;q=0.3, text/html;q=0.7, text/html;level=1,text/html;level=2;q=0.4, */*;q=0.5")) end) it("Matches the 3 date formats", function() local Date = http.Date * EOF local example_time = { year = 1994; month = 11; day = 6; hour = 8; min = 49; sec = 37; wday = 1; } assert.same(example_time, Date:match"Sun, 06 Nov 1994 08:49:37 GMT") assert.same(example_time, Date:match"Sunday, 06-Nov-94 08:49:37 GMT") assert.same(example_time, Date:match"Sun Nov 6 08:49:37 1994") end) it("Parses a Sec-WebSocket-Extensions header", function() local Sec_WebSocket_Extensions = lpeg.Ct(http.Sec_WebSocket_Extensions) * EOF assert.same({{"foo", parameters = {}}}, Sec_WebSocket_Extensions:match"foo") assert.same({{"foo", parameters = {}}, {"bar", parameters = {}}}, Sec_WebSocket_Extensions:match"foo, bar") assert.same({{"foo", parameters = {hello = true; world = "extension"}}, {"bar", parameters = {}}}, Sec_WebSocket_Extensions:match"foo;hello;world=extension, bar") assert.same({{"foo", parameters = {hello = true; world = "extension"}}, {"bar", parameters = {}}}, Sec_WebSocket_Extensions:match"foo;hello;world=\"extension\", bar") -- quoted strings must be valid tokens assert.falsy(Sec_WebSocket_Extensions:match"foo;hello;world=\"exte\\\"nsion\", bar") end) it("Parses a Sec_WebSocket-Version-Client header", function() local Sec_WebSocket_Version_Client = http.Sec_WebSocket_Version_Client * EOF assert.same(1, Sec_WebSocket_Version_Client:match"1") assert.same(100, Sec_WebSocket_Version_Client:match"100") assert.same(255, Sec_WebSocket_Version_Client:match"255") assert.falsy(Sec_WebSocket_Version_Client:match"0") assert.falsy(Sec_WebSocket_Version_Client:match"256") assert.falsy(Sec_WebSocket_Version_Client:match"1.2") assert.falsy(Sec_WebSocket_Version_Client:match"090") end) it("Parses a Link header", function() local Link = lpeg.Ct(http.Link) * EOF assert.same({{{host="example.com"}}}, Link:match"") assert.same({{{scheme = "http"; host = "example.com"; path = "/TheBook/chapter2";}; rel = "previous"; title="previous chapter"}}, Link:match[[; rel="previous"; title="previous chapter"]]) assert.same({{{path = "/"}, rel = "http://example.net/foo"}}, Link:match[[; rel="http://example.net/foo"]]) assert.same({ {{path = "/TheBook/chapter2"}, rel = "previous", title = "letztes Kapitel"}; {{path = "/TheBook/chapter4"}, rel = "next", title = "nächstes Kapitel"}; }, Link:match[[; rel="previous"; title*=UTF-8'de'letztes%20Kapitel, ; rel="next"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel]]) assert.same({{{scheme = "http"; host = "example.org"; path = "/"}, rel = "start http://example.net/relation/other"}}, Link:match[[; rel="start http://example.net/relation/other"]]) end) it("Parses a Set-Cookie header", function() local Set_Cookie = lpeg.Ct(http.Set_Cookie) * EOF assert.same({"SID", "31d4d96e407aad42", {}}, Set_Cookie:match"SID=31d4d96e407aad42") assert.same({"SID", "", {}}, Set_Cookie:match"SID=") assert.same({"SID", "31d4d96e407aad42", {path="/"; domain="example.com"}}, Set_Cookie:match"SID=31d4d96e407aad42; Path=/; Domain=example.com") assert.same({"SID", "31d4d96e407aad42", { path = "/"; domain = "example.com"; secure = true; expires = "Sun Nov 6 08:49:37 1994"; }}, Set_Cookie:match"SID=31d4d96e407aad42; Path=/; Domain=example.com; Secure; Expires=Sun Nov 6 08:49:37 1994") -- Space before '=' assert.same({"SID", "31d4d96e407aad42", {path = "/";}}, Set_Cookie:match"SID=31d4d96e407aad42; Path =/") -- Quoted cookie value assert.same({"SID", "31d4d96e407aad42", {path = "/";}}, Set_Cookie:match[[SID="31d4d96e407aad42"; Path=/]]) -- Crazy whitespace assert.same({"SID", "31d4d96e407aad42", {path = "/";}}, Set_Cookie:match"SID = 31d4d96e407aad42 ; Path = /") assert.same({"SID", "31d4d96e407aad42", {["foo bar"] = true;}}, Set_Cookie:match"SID = 31d4d96e407aad42 ; foo bar") end) it("Parses a Cookie header", function() local Cookie = http.Cookie * EOF assert.same({SID = "31d4d96e407aad42"}, Cookie:match"SID=31d4d96e407aad42") assert.same({SID = "31d4d96e407aad42"}, Cookie:match"SID = 31d4d96e407aad42") assert.same({SID = "31d4d96e407aad42", lang = "en-US"}, Cookie:match"SID=31d4d96e407aad42; lang=en-US") end) it("Parses a Content-Disposition header", function() local Content_Disposition = lpeg.Ct(http.Content_Disposition) * EOF assert.same({"foo", {}}, Content_Disposition:match"foo") assert.same({"foo", {filename="example"}}, Content_Disposition:match"foo; filename=example") assert.same({"foo", {filename="example"}}, Content_Disposition:match"foo; filename*=UTF-8''example") end) it("Parses a Strict-Transport-Security header", function() local sts_patt = lpeg.Cf(lpeg.Ct(true) * http.Strict_Transport_Security, rawset) * EOF assert.same({["max-age"] = "0"}, sts_patt:match("max-age=0")) assert.same({["max-age"] = "0"}, sts_patt:match("max-age = 0")) assert.same({["max-age"] = "0"}, sts_patt:match("Max-Age=0")) assert.same({["max-age"] = "0"; includesubdomains = true}, sts_patt:match("max-age=0;includeSubdomains")) assert.same({["max-age"] = "0"; includesubdomains = true}, sts_patt:match("max-age=0 ; includeSubdomains")) end) it("Parses an WWW_Authenticate header", function() local WWW_Authenticate = lpeg.Ct(http.WWW_Authenticate) * EOF assert.same({{"Newauth"}}, WWW_Authenticate:match"Newauth") assert.same({{"Newauth", {realm = "apps"}}}, WWW_Authenticate:match[[Newauth realm="apps"]]) assert.same({{"Newauth", {realm = "apps"}}}, WWW_Authenticate:match[[Newauth ReaLm="apps"]]) assert.same({{"Newauth"}, {"Basic"}}, WWW_Authenticate:match"Newauth, Basic") assert.same({{"Newauth", {realm = "apps", type="1", title="Login to \"apps\""}}, {"Basic", {realm="simple"}}}, WWW_Authenticate:match[[Newauth realm="apps", type=1, title="Login to \"apps\"", Basic realm="simple"]]) end) it("Parses a HPKP header", function() -- Example from RFC 7469 2.1.5 local pkp_patt = lpeg.Cf(lpeg.Ct(true) * http.Public_Key_Pins, function(t, k, v) table.insert(t, {k,v}) return t end) * EOF assert.same({ { "max-age", "3000" }; { "pin-sha256", "d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=" }; { "pin-sha256", "E9CZ9INDbd+2eRQozYqqbQ2yXLVKB9+xcprMF+44U1g=" }; }, pkp_patt:match([[max-age=3000; pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM="; pin-sha256="E9CZ9INDbd+2eRQozYqqbQ2yXLVKB9+xcprMF+44U1g="]])) end) end) lpeg_patterns-0.4/spec/language_spec.lua000066400000000000000000000131401301505066200204530ustar00rootroot00000000000000local lpeg = require "lpeg" local EOF = lpeg.P(-1) describe("language tags", function() local language = require "lpeg_patterns.language" local langtag = lpeg.Ct(language.langtag) * EOF local Language_Tag = language.Language_Tag * EOF describe("examples from RFC 5646 Appendix A", function() it("Parses Simple language subtag", function() assert.same({language = "de"}, langtag:match "de") -- German assert.same({language = "fr"}, langtag:match "fr") -- French assert.same({language = "ja"}, langtag:match "ja") -- Japanese assert.truthy(Language_Tag:match "i-enochian") -- example of a grandfathered tag end) it("Parses Language subtag plus Script subtag", function() assert.same({language = "zh"; script = "Hant"}, langtag:match "zh-Hant") -- Chinese written using the Traditional Chinese script assert.same({language = "zh"; script = "Hans"}, langtag:match "zh-Hans") -- Chinese written using the Simplified Chinese script assert.same({language = "sr"; script = "Cyrl"}, langtag:match "sr-Cyrl") -- Serbian written using the Cyrillic script assert.same({language = "sr"; script = "Latn"}, langtag:match "sr-Latn") -- Serbian written using the Latin script end) it("Parses Extended language subtags and their primary language subtag counterparts", function() assert.same({language = "zh"; extlang = "cmn", script = "Hans"; region = "CN"}, langtag:match "zh-cmn-Hans-CN") -- Chinese, Mandarin, Simplified script, as used in China assert.same({language = "cmn"; script = "Hans"; region = "CN"}, langtag:match "cmn-Hans-CN") -- Mandarin Chinese, Simplified script, as used in China assert.same({language = "zh"; extlang = "yue"; region = "HK"}, langtag:match "zh-yue-HK") -- Chinese, Cantonese, as used in Hong Kong SAR assert.same({language = "yue"; region = "HK"}, langtag:match "yue-HK") -- Cantonese Chinese, as used in Hong Kong SAR end) it("Parses Language-Script-Region", function() assert.same({language = "zh"; script = "Hans"; region = "CN"}, langtag:match "zh-Hans-CN") -- Chinese written using the Simplified script as used in mainland China assert.same({language = "sr"; script = "Latn"; region = "RS"}, langtag:match "sr-Latn-RS") -- Serbian written using the Latin script as used in Serbia end) it("Parses Language-Variant", function() assert.same({language = "sl"; variant = {"rozaj"}}, langtag:match "sl-rozaj") -- Resian dialect of Slovenian assert.same({language = "sl"; variant = {"rozaj", "biske"}}, langtag:match "sl-rozaj-biske") -- San Giorgio dialect of Resian dialect of Slovenian assert.same({language = "sl"; variant = {"nedis"}}, langtag:match "sl-nedis") -- Nadiza dialect of Slovenian end) it("Parses Language-Region-Variant", function() assert.same({language = "de"; region = "CH"; variant = {"1901"}}, langtag:match "de-CH-1901") -- German as used in Switzerland using the 1901 variant [orthography] assert.same({language = "sl"; region = "IT"; variant = {"nedis"}}, langtag:match "sl-IT-nedis") -- Slovenian as used in Italy, Nadiza dialect end) it("Parses Language-Script-Region-Variant", function() assert.same({language = "hy"; script = "Latn"; region = "IT"; variant = {"arevela"}}, langtag:match "hy-Latn-IT-arevela") -- Eastern Armenian written in Latin script, as used in Italy end) it("Parses Language-Region", function() assert.same({language = "de"; region = "DE"}, langtag:match "de-DE") -- German for Germany assert.same({language = "en"; region = "US"}, langtag:match "en-US") -- English as used in the United States assert.same({language = "es"; region = "419"}, langtag:match "es-419") -- Spanish appropriate for the Latin America and Caribbean region using the UN region code end) it("Parses private use subtags", function() assert.same({language = "de"; region = "CH"; privateuse = {"phonebk"}}, langtag:match "de-CH-x-phonebk") assert.same({language = "az"; script = "Arab"; privateuse = {"AZE", "derbend"}}, langtag:match "az-Arab-x-AZE-derbend") end) it("Parses private use registry values", function() assert.truthy(Language_Tag:match "x-whatever") -- private use using the singleton 'x' assert.same({language = "qaa"; script = "Qaaa"; region = "QM"; privateuse = {"southern"}}, langtag:match "qaa-Qaaa-QM-x-southern") -- all private tags assert.same({language = "de"; script = "Qaaa"}, langtag:match "de-Qaaa") -- German, with a private script assert.same({language = "sr"; script = "Latn"; region = "QM"}, langtag:match "sr-Latn-QM") -- Serbian, Latin script, private region assert.same({language = "sr"; script = "Qaaa"; region = "RS"}, langtag:match "sr-Qaaa-RS") -- Serbian, private script, for Serbia end) it("Parses tags that use extensions", function() assert.same({language = "en"; region = "US"; extension = { u = {"islamcal"}}}, langtag:match "en-US-u-islamcal") assert.same({language = "zh"; region = "CN"; extension = { a = {"myext"}}; privateuse = {"private"}}, langtag:match "zh-CN-a-myext-x-private") assert.same({language = "en"; extension = { a = {"myext"}, b = {"another"}}}, langtag:match "en-a-myext-b-another") end) it("Rejects Invalid Tags", function() assert.falsy(langtag:match "de-419-DE") -- two region tags assert.falsy(langtag:match "a-DE") -- use of a single-character subtag in primary position; note that there are a few grandfathered tags that start with "i-" that are valid assert.falsy(langtag:match "ar-a-aaa-b-bbb-a-ccc") -- two extensions with same single-letter prefix end) end) it("captures whole text when using Language_Tag", function() assert.same("en", Language_Tag:match "en") assert.same("hy-Latn-IT-arevela", Language_Tag:match "hy-Latn-IT-arevela") end) end) lpeg_patterns-0.4/spec/phone_spec.lua000066400000000000000000000023071301505066200200040ustar00rootroot00000000000000local lpeg = require "lpeg" describe("Phone numbers", function() local phone = require "lpeg_patterns.phone" local any_only = phone.phone * lpeg.P(-1) it("NANP (North America Numbering Plan)", function() assert.truthy(any_only:match"+12345678900") assert.truthy(any_only:match"+1 (234) 567-8900") assert.truthy(phone.USA:match"1 (234) 567-8900") assert.truthy(phone.USA:match"(234) 567-8900") assert.falsy(phone.USA:match"2 (234) 567-8900") -- N11 not allowed assert.falsy(any_only:match"+12345118900") -- N9X not allowed assert.falsy(any_only:match"+12345978900") -- 37X not allowed assert.falsy(any_only:match"+12343778900") -- 96X not allowed assert.falsy(any_only:match"+12349678900") end) it("Australian numbers", function() assert.truthy(phone.Australia:match"0390000000") assert.truthy(phone.Australia:match"3 90000000") assert.truthy(phone.Australia:match"3 9000 0000") assert.truthy(phone.Australia:match"400 000 000") assert.truthy(any_only:match"+61390000000") assert.truthy(any_only:match"+61 3 90000000") assert.truthy(any_only:match"+61 3 9000 0000") assert.truthy(any_only:match"+61 400 000 000") assert.falsy(any_only:match"+610390000000") end) end) lpeg_patterns-0.4/spec/uri_spec.lua000066400000000000000000000250121301505066200174700ustar00rootroot00000000000000local lpeg=require "lpeg" local uri_lib=require "lpeg_patterns.uri" describe("URI", function() local absolute_uri = uri_lib.absolute_uri * lpeg.P(-1) local uri = uri_lib.uri * lpeg.P(-1) local ref = uri_lib.uri_reference * lpeg.P(-1) local path = uri_lib.path * lpeg.P(-1) local segment = uri_lib.segment * lpeg.P(-1) it("Should break down full URIs correctly", function() assert.same({scheme="scheme", userinfo="userinfo", host="host", port=1234, path="/path", query="query", fragment="fragment"}, uri:match "scheme://userinfo@host:1234/path?query#fragment") assert.same({scheme="scheme", userinfo="userinfo", host="host", port=1234, path="/path", query="query"}, uri:match "scheme://userinfo@host:1234/path?query") assert.same({scheme="scheme", userinfo="userinfo", host="host", port=1234, path="/path"}, uri:match "scheme://userinfo@host:1234/path") assert.same({scheme="scheme", host="host", port=1234, path="/path"}, uri:match "scheme://host:1234/path") assert.same({scheme="scheme", host="host", path="/path"}, uri:match "scheme://host/path") assert.same({scheme="scheme", path="/path"}, uri:match "scheme:///path") assert.same({scheme="scheme"}, uri:match "scheme://") end) it("Normalises to lower case scheme", function() assert.same({scheme="scheme"}, uri:match "Scheme://") assert.same({scheme="scheme"}, uri:match "SCHEME://") end) it("shouldn't allow fragments when using absolute_uri", function() assert.falsy(absolute_uri:match "scheme://userinfo@host:1234/path?query#fragment") assert.same({scheme="scheme", userinfo="userinfo", host="host", port=1234, path="/path", query="query"}, absolute_uri:match "scheme://userinfo@host:1234/path?query") end) it("Should break down relative URIs correctly", function() assert.same({scheme="scheme", userinfo="userinfo", host="host", port=1234, path="/path", query="query", fragment="fragment"}, ref:match "scheme://userinfo@host:1234/path?query#fragment") assert.same({userinfo="userinfo", host="host", port=1234, path="/path", query="query", fragment="fragment"}, ref:match "//userinfo@host:1234/path?query#fragment") assert.same({host="host", port=1234, path="/path", query="query", fragment="fragment"}, ref:match "//host:1234/path?query#fragment") assert.same({host="host", path="/path", query="query", fragment="fragment"}, ref:match "//host/path?query#fragment") assert.same({path="/path", query="query", fragment="fragment"}, ref:match "///path?query#fragment") assert.same({path="/path", query="query", fragment="fragment"}, ref:match "/path?query#fragment") assert.same({path="/path", fragment="fragment"}, ref:match "/path#fragment") assert.same({path="/path"}, ref:match "/path") assert.same({}, ref:match "") assert.same({query="query"}, ref:match "?query") assert.same({fragment="fragment"}, ref:match "#fragment") end) it("Should match file urls", function() assert.same({scheme="file", path="/var/log/messages"}, uri:match "file:///var/log/messages") assert.same({scheme="file", path="/C:/Windows/"}, uri:match "file:///C:/Windows/") end) it("Should decode unreserved percent characters in path segment", function() assert.same("underscore_character", segment:match "underscore%5Fcharacter") assert.same("null%00byte", segment:match "null%00byte") end) it("Should decode unreserved percent characters path", function() assert.same("/underscore_character", path:match "/underscore%5Fcharacter") assert.same("/null%00byte", path:match "/null%00byte") end ) it("Should fail on incorrect percent characters", function() assert.falsy(path:match "/bad%x0percent") assert.falsy(path:match "/%s") end) it("Should not introduce ambiguiuty by decoding percent encoded entities", function() assert.same({query="query%26with&ersand"}, ref:match "?query%26with&ersand") end) it("Should decode unreserved percent characters in query and fragment", function() assert.same({query="query%20with_escapes"}, ref:match "?query%20with%5Fescapes") assert.same({fragment="fragment%20with_escapes"}, ref:match "#fragment%20with%5Fescapes") end) it("Should match localhost", function() assert.same({host="localhost"}, ref:match "//localhost") assert.same({host="localhost"}, ref:match "//LOCALHOST") assert.same({host="localhost"}, ref:match "//l%4FcAlH%6fSt") assert.same({host="localhost", port=8000}, ref:match "//localhost:8000") assert.same({scheme="http", host="localhost", port=8000}, uri:match "http://localhost:8000") end) it("Should work with IPv6", function() assert.same({host="0:0:0:0:0:0:0:1"}, ref:match "//[::1]") assert.same({host="0:0:0:0:0:0:0:1", port=80}, ref:match "//[::1]:80") end) it("IPvFuture", function() assert.same({host="v4.2", port=80}, ref:match "//[v4.2]:80") assert.same({host="v4.2", port=80}, ref:match "//[V4.2]:80") end) it("Should work with IPv6 zone local addresses", function() assert.same({host="0:0:0:0:0:0:0:1%eth0"}, ref:match "//[::1%25eth0]") end) it("Relative URI does not match authority when scheme is missing", function() assert.same({path="example.com/"}, ref:match "example.com/") -- should end up in path assert.same({scheme="scheme", host="example.com", path="/"}, ref:match "scheme://example.com/") end) it("Should work with mailto URIs", function() assert.same({scheme="mailto", path="user@example.com"}, uri:match "mailto:user@example.com") assert.same({scheme="mailto", path="someone@example.com,someoneelse@example.com"}, uri:match "mailto:someone@example.com,someoneelse@example.com") assert.same({scheme="mailto", path="user@example.com", query="subject=This%20is%20the%20subject&cc=someone_else@example.com&body=This%20is%20the%20body"}, uri:match "mailto:user@example.com?subject=This%20is%20the%20subject&cc=someone_else@example.com&body=This%20is%20the%20body") -- Examples from RFC-6068 -- Section 6.1 assert.same({scheme="mailto", path="chris@example.com"}, uri:match "mailto:chris@example.com") assert.same({scheme="mailto", path="infobot@example.com", query="subject=current-issue"}, uri:match "mailto:infobot@example.com?subject=current-issue") assert.same({scheme="mailto", path="infobot@example.com", query="body=send%20current-issue"}, uri:match "mailto:infobot@example.com?body=send%20current-issue") assert.same({scheme="mailto", path="infobot@example.com", query="body=send%20current-issue%0D%0Asend%20index"}, uri:match "mailto:infobot@example.com?body=send%20current-issue%0D%0Asend%20index") assert.same({scheme="mailto", path="list@example.org", query="In-Reply-To=%3C3469A91.D10AF4C@example.com%3E"}, uri:match "mailto:list@example.org?In-Reply-To=%3C3469A91.D10AF4C@example.com%3E") assert.same({scheme="mailto", path="majordomo@example.com", query="body=subscribe%20bamboo-l"}, uri:match "mailto:majordomo@example.com?body=subscribe%20bamboo-l") assert.same({scheme="mailto", path="joe@example.com", query="cc=bob@example.com&body=hello"}, uri:match "mailto:joe@example.com?cc=bob@example.com&body=hello") assert.same({scheme="mailto", path="gorby%25kremvax@example.com"}, uri:match "mailto:gorby%25kremvax@example.com") assert.same({scheme="mailto", path="unlikely%3Faddress@example.com", query="blat=foop"}, uri:match "mailto:unlikely%3Faddress@example.com?blat=foop") assert.same({scheme="mailto", path="Mike%26family@example.org"}, uri:match "mailto:Mike%26family@example.org") -- Section 6.2 assert.same({scheme="mailto", path=[[%22not%40me%22@example.org]]}, uri:match "mailto:%22not%40me%22@example.org") assert.same({scheme="mailto", path=[[%22oh%5C%5Cno%22@example.org]]}, uri:match "mailto:%22oh%5C%5Cno%22@example.org") assert.same({scheme="mailto", path=[[%22%5C%5C%5C%22it's%5C%20ugly%5C%5C%5C%22%22@example.org]]}, uri:match "mailto:%22%5C%5C%5C%22it's%5C%20ugly%5C%5C%5C%22%22@example.org") end) it("Should work with xmpp URIs", function() -- Examples from RFC-5122 assert.same({scheme="xmpp", path="node@example.com"}, uri:match "xmpp:node@example.com") assert.same({scheme="xmpp", userinfo="guest", host="example.com"}, uri:match "xmpp://guest@example.com") assert.same({scheme="xmpp", userinfo="guest", host="example.com", path="/support@example.com", query="message"}, uri:match "xmpp://guest@example.com/support@example.com?message") assert.same({scheme="xmpp", path="support@example.com", query="message"}, uri:match "xmpp:support@example.com?message") assert.same({scheme="xmpp", path="example-node@example.com"}, uri:match "xmpp:example-node@example.com") assert.same({scheme="xmpp", path="example-node@example.com/some-resource"}, uri:match "xmpp:example-node@example.com/some-resource") assert.same({scheme="xmpp", path="example.com"}, uri:match "xmpp:example.com") assert.same({scheme="xmpp", path="example-node@example.com", query="message"}, uri:match "xmpp:example-node@example.com?message") assert.same({scheme="xmpp", path="example-node@example.com", query="message;subject=Hello%20World"}, uri:match "xmpp:example-node@example.com?message;subject=Hello%20World") assert.same({scheme="xmpp", path=[[nasty!%23$%25()*+,-.;=%3F%5B%5C%5D%5E_%60%7B%7C%7D~node@example.com]]}, uri:match "xmpp:nasty!%23$%25()*+,-.;=%3F%5B%5C%5D%5E_%60%7B%7C%7D~node@example.com") assert.same({scheme="xmpp", path=[[node@example.com/repulsive%20!%23%22$%25&'()*+,-.%2F:;%3C=%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~resource]]}, uri:match [[xmpp:node@example.com/repulsive%20!%23%22$%25&'()*+,-.%2F:;%3C=%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~resource]]) assert.same({scheme="xmpp", path="ji%C5%99i@%C4%8Dechy.example/v%20Praze"}, uri:match "xmpp:ji%C5%99i@%C4%8Dechy.example/v%20Praze") end) end) describe("Sane URI", function() local sane_uri = uri_lib.sane_uri it("Not match the empty string", function() assert.falsy ( sane_uri:match "" ) end) it("Not match misc words", function() assert.falsy ( sane_uri:match "localhost" ) assert.falsy ( sane_uri:match "//localhost" ) assert.falsy ( sane_uri:match "the quick fox jumped over the lazy dog." ) end) it("Not match numbers", function() assert.falsy( sane_uri:match "123" ) assert.falsy( sane_uri:match "17.3" ) assert.falsy( sane_uri:match "17.3234" ) assert.falsy( sane_uri:match "17.3234" ) end) it("Should match a host when no // present", function() assert.same({host="example.com"}, sane_uri:match "example.com") end) it("Match a scheme without a //", function() assert.same({scheme="scheme", host="example.com"}, sane_uri:match "scheme:example.com") end) it("Will match up to but not including a close parenthsis with empty path", function() assert.same({scheme="scheme", host="example.com"}, sane_uri:match "scheme:example.com)") end) end)