pax_global_header00006660000000000000000000000064137600122560014514gustar00rootroot0000000000000052 comment=c7431b1fa7e54a396e8c76aebac05c2265509464 participle-0.7.1/000077500000000000000000000000001376001225600136555ustar00rootroot00000000000000participle-0.7.1/.circleci/000077500000000000000000000000001376001225600155105ustar00rootroot00000000000000participle-0.7.1/.circleci/config.yml000066400000000000000000000015221376001225600175000ustar00rootroot00000000000000version: 2 jobs: build: environment: GO111MODULE: "on" docker: - image: circleci/golang:1.14 working_directory: /go/src/github.com/alecthomas/participle steps: - checkout - run: name: Prepare command: | go get -v github.com/jstemmer/go-junit-report curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s v1.26.0 mkdir ~/report when: always - run: name: Test command: | (go test -v ./... && (cd ./_examples && go test ./...)) 2>&1 | tee report.txt && go-junit-report < report.txt > ~/report/junit.xml - run: name: Lint command: | go build ./... ./bin/golangci-lint run - store_test_results: path: ~/report participle-0.7.1/.golangci.yml000066400000000000000000000022231376001225600162400ustar00rootroot00000000000000run: tests: true skip-dirs: - _examples output: print-issued-lines: false linters: enable-all: true disable: - maligned - megacheck - lll - gocyclo - gochecknoglobals - wsl - whitespace - godox - funlen - gocognit - gomnd - goerr113 - godot - nestif - testpackage - nolintlint linters-settings: govet: check-shadowing: true gocyclo: min-complexity: 10 dupl: threshold: 100 goconst: min-len: 8 min-occurrences: 3 issues: max-per-linter: 0 max-same: 0 exclude-use-default: false exclude: # Captured by errcheck. - '^(G104|G204):' # Very commonly not checked. - 'Error return value of .(.*\.Help|.*\.MarkFlagRequired|(os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*printf?|os\.(Un)?Setenv). is not checked' - 'exported method `(.*\.MarshalJSON|.*\.UnmarshalJSON|.*\.EntityURN|.*\.GoString|.*\.Pos)` should have comment or be unexported' - 'composite literal uses unkeyed fields' - 'declaration of "err" shadows declaration' - 'bad syntax for struct tag key' - 'bad syntax for struct tag pair' - '^ST1012' participle-0.7.1/COPYING000066400000000000000000000020371376001225600147120ustar00rootroot00000000000000Copyright (C) 2017 Alec Thomas Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. participle-0.7.1/README.md000066400000000000000000000344121376001225600151400ustar00rootroot00000000000000# A dead simple parser package for Go [![Godoc](https://godoc.org/github.com/alecthomas/participle?status.svg)](http://godoc.org/github.com/alecthomas/participle) [![CircleCI](https://img.shields.io/circleci/project/github/alecthomas/participle.svg)](https://circleci.com/gh/alecthomas/participle) [![Go Report Card](https://goreportcard.com/badge/github.com/alecthomas/participle)](https://goreportcard.com/report/github.com/alecthomas/participle) [![Slack chat](https://img.shields.io/static/v1?logo=slack&style=flat&label=slack&color=green&message=gophers)](https://gophers.slack.com/messages/CN9DS8YF3) - [Old version](#old-version) - [Introduction](#introduction) - [Limitations](#limitations) - [Tutorial](#tutorial) - [Overview](#overview) - [Annotation syntax](#annotation-syntax) - [Capturing](#capturing) - [Streaming](#streaming) - [Lexing](#lexing) - [Options](#options) - [Examples](#examples) - [Performance](#performance) - [Concurrency](#concurrency) - [Error reporting](#error-reporting) - [EBNF](#ebnf) ## Old version This is an outdated version of Participle. See [here](https://pkg.go.dev/github.com/alecthomas/participle/?tab=versions) for a full list of available versions. ## Introduction The goal of this package is to provide a simple, idiomatic and elegant way of defining parsers in Go. Participle's method of defining grammars should be familiar to any Go programmer who has used the `encoding/json` package: struct field tags define what and how input is mapped to those same fields. This is not unusual for Go encoders, but is unusual for a parser. ## Limitations Participle grammars are LL(k). Among other things, this means that they do not support left recursion. The default value of K is 1 but this can be controlled with `participle.UseLookahead(k)`. Left recursion must be eliminated by restructuring your grammar. ## Tutorial A [tutorial](TUTORIAL.md) is available, walking through the creation of an .ini parser. ## Overview A grammar is an annotated Go structure used to both define the parser grammar, and be the AST output by the parser. As an example, following is the final INI parser from the tutorial. ```go type INI struct { Properties []*Property `{ @@ }` Sections []*Section `{ @@ }` } type Section struct { Identifier string `"[" @Ident "]"` Properties []*Property `{ @@ }` } type Property struct { Key string `@Ident "="` Value *Value `@@` } type Value struct { String *string ` @String` Number *float64 `| @Float` } ``` > **Note:** Participle also supports named struct tags (eg. Hello string `parser:"@Ident"`). A parser is constructed from a grammar and a lexer: ```go parser, err := participle.Build(&INI{}) ``` Once constructed, the parser is applied to input to produce an AST: ```go ast := &INI{} err := parser.ParseString("size = 10", ast) // ast == &INI{ // Properties: []*Property{ // {Key: "size", Value: &Value{Number: &10}}, // }, // } ``` ## Annotation syntax - `@` Capture expression into the field. - `@@` Recursively capture using the fields own type. - `` Match named lexer token. - `( ... )` Group. - `"..."` Match the literal (note that the lexer must emit tokens matching this literal exactly). - `"...":` Match the literal, specifying the exact lexer token type to match. - ` ...` Match expressions. - ` | ` Match one of the alternatives. - `!` Match any token that is not the start of the expression (eg: `@!";"` matches anything but the `;` character into the field). The following modifiers can be used after any expression: - `*` Expression can match zero or more times. - `+` Expression must match one or more times. - `?` Expression can match zero or once. - `!` Require a non-empty match (this is useful with a sequence of optional matches eg. `("a"? "b"? "c"?)!`). Supported but deprecated: - `{ ... }` Match 0 or more times (**DEPRECATED** - prefer `( ... )*`). - `[ ... ]` Optional (**DEPRECATED** - prefer `( ... )?`). Notes: - Each struct is a single production, with each field applied in sequence. - `@` is the mechanism for capturing matches into the field. - if a struct field is not keyed with "parser", the entire struct tag will be used as the grammar fragment. This allows the grammar syntax to remain clear and simple to maintain. ## Capturing Prefixing any expression in the grammar with `@` will capture matching values for that expression into the corresponding field. For example: ```go // The grammar definition. type Grammar struct { Hello string `@Ident` } // The source text to parse. source := "world" // After parsing, the resulting AST. result == &Grammar{ Hello: "world", } ``` For slice and string fields, each instance of `@` will accumulate into the field (including repeated patterns). Accumulation into other types is not supported. A successful capture match into a boolean field will set the field to true. For integer and floating point types, a successful capture will be parsed with `strconv.ParseInt()` and `strconv.ParseBool()` respectively. Custom control of how values are captured into fields can be achieved by a field type implementing the `Capture` interface (`Capture(values []string) error`). Additionally, any field implementing the `encoding.TextUnmarshaler` interface will be capturable too. One caveat is that `UnmarshalText()` will be called once for each captured token, so eg. `@(Ident Ident Ident)` will be called three times. ## Streaming Participle supports streaming parsing. Simply pass a channel of your grammar into `Parse*()`. The grammar will be repeatedly parsed and sent to the channel. Note that the `Parse*()` call will not return until parsing completes, so it should generally be started in a goroutine. ```go type token struct { Str string ` @Ident` Num int `| @Int` } parser, err := participle.Build(&token{}) tokens := make(chan *token, 128) err := parser.ParseString(`hello 10 11 12 world`, tokens) for token := range tokens { fmt.Printf("%#v\n", token) } ``` ## Lexing Participle operates on tokens and thus relies on a lexer to convert character streams to tokens. Four lexers are provided, varying in speed and flexibility. Configure your parser with a lexer via `participle.Lexer()`. The best combination of speed, flexibility and usability is `lexer/regex.New()`. Ordered by speed they are: 1. `lexer.DefaultDefinition` is based on the [text/scanner](https://golang.org/pkg/text/scanner/) package and only allows tokens provided by that package. This is the default lexer. 2. `lexer.Regexp()` (legacy) maps regular expression named subgroups to lexer symbols. 3. `lexer/regex.New()` is a more readable regex lexer, with each rule in the form ` = `. 4. `lexer/ebnf.New()` is a lexer based on the Go EBNF package. It has a large potential for optimisation through code generation, but that is not implemented yet. To use your own Lexer you will need to implement two interfaces: [Definition](https://godoc.org/github.com/alecthomas/participle/lexer#Definition) and [Lexer](https://godoc.org/github.com/alecthomas/participle/lexer#Lexer). ## Options The Parser's behaviour can be configured via [Options](https://godoc.org/github.com/alecthomas/participle#Option). ## Examples There are several [examples](https://github.com/alecthomas/participle/tree/master/_examples) included: Example | Description --------|--------------- [BASIC](https://github.com/alecthomas/participle/tree/master/_examples/basic) | A lexer, parser and interpreter for a [rudimentary dialect](https://caml.inria.fr/pub/docs/oreilly-book/html/book-ora058.html) of BASIC. [EBNF](https://github.com/alecthomas/participle/tree/master/_examples/ebnf) | Parser for the form of EBNF used by Go. [Expr](https://github.com/alecthomas/participle/tree/master/_examples/expr) | A basic mathematical expression parser and evaluator. [GraphQL](https://github.com/alecthomas/participle/tree/master/_examples/graphql) | Lexer+parser for GraphQL schemas [HCL](https://github.com/alecthomas/participle/tree/master/_examples/hcl) | A parser for the [HashiCorp Configuration Language](https://github.com/hashicorp/hcl). [INI](https://github.com/alecthomas/participle/tree/master/_examples/ini) | An INI file parser. [Protobuf](https://github.com/alecthomas/participle/tree/master/_examples/protobuf) | A full [Protobuf](https://developers.google.com/protocol-buffers/) version 2 and 3 parser. [SQL](https://github.com/alecthomas/participle/tree/master/_examples/sql) | A *very* rudimentary SQL SELECT parser. [Thrift](https://github.com/alecthomas/participle/tree/master/_examples/thrift) | A full [Thrift](https://thrift.apache.org/docs/idl) parser. [TOML](https://github.com/alecthomas/participle/blob/master/_examples/toml/main.go) | A [TOML](https://github.com/toml-lang/toml) parser. Included below is a full GraphQL lexer and parser: ```go package main import ( "os" "github.com/alecthomas/kong" "github.com/alecthomas/repr" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/ebnf" ) type File struct { Entries []*Entry `@@*` } type Entry struct { Type *Type ` @@` Schema *Schema `| @@` Enum *Enum `| @@` Scalar string `| "scalar" @Ident` } type Enum struct { Name string `"enum" @Ident` Cases []string `"{" @Ident* "}"` } type Schema struct { Fields []*Field `"schema" "{" @@* "}"` } type Type struct { Name string `"type" @Ident` Implements string `("implements" @Ident)?` Fields []*Field `"{" @@* "}"` } type Field struct { Name string `@Ident` Arguments []*Argument `("(" (@@ ("," @@)*)? ")")?` Type *TypeRef `":" @@` Annotation string `("@" @Ident)?` } type Argument struct { Name string `@Ident` Type *TypeRef `":" @@` Default *Value `("=" @@)?` } type TypeRef struct { Array *TypeRef `( "[" @@ "]"` Type string ` | @Ident )` NonNullable bool `@"!"?` } type Value struct { Symbol string `@Ident` } var ( graphQLLexer = lexer.Must(ebnf.New(` Comment = ("#" | "//") { "\u0000"…"\uffff"-"\n" } . Ident = (alpha | "_") { "_" | alpha | digit } . Number = ("." | digit) {"." | digit} . Whitespace = " " | "\t" | "\n" | "\r" . Punct = "!"…"/" | ":"…"@" | "["…`+"\"`\""+` | "{"…"~" . alpha = "a"…"z" | "A"…"Z" . digit = "0"…"9" . `)) parser = participle.MustBuild(&File{}, participle.Lexer(graphQLLexer), participle.Elide("Comment", "Whitespace"), ) cli struct { Files []string `arg:"" type:"existingfile" required:"" help:"GraphQL schema files to parse."` } ) func main() { ctx := kong.Parse(&cli) for _, file := range cli.Files { ast := &File{} r, err := os.Open(file) ctx.FatalIfErrorf(err) err = parser.Parse(r, ast) r.Close() repr.Println(ast) ctx.FatalIfErrorf(err) } } ``` ## Performance One of the included examples is a complete Thrift parser (shell-style comments are not supported). This gives a convenient baseline for comparing to the PEG based [pigeon](https://github.com/PuerkitoBio/pigeon), which is the parser used by [go-thrift](https://github.com/samuel/go-thrift). Additionally, the pigeon parser is utilising a generated parser, while the participle parser is built at run time. You can run the benchmarks yourself, but here's the output on my machine: BenchmarkParticipleThrift-4 10000 221818 ns/op 48880 B/op 1240 allocs/op BenchmarkGoThriftParser-4 2000 804709 ns/op 170301 B/op 3086 allocs/op On a real life codebase of 47K lines of Thrift, Participle takes 200ms and go- thrift takes 630ms, which aligns quite closely with the benchmarks. ## Concurrency A compiled `Parser` instance can be used concurrently. A `LexerDefinition` can be used concurrently. A `Lexer` instance cannot be used concurrently. ## Error reporting There are a few areas where Participle can provide useful feedback to users of your parser. 1. Errors returned by [Parser.Parse()](https://godoc.org/github.com/alecthomas/participle#Parser.Parse) will be of type [Error](https://godoc.org/github.com/alecthomas/participle#Error). This will contain positional information where available. If the source `io.Reader` includes a `Name() string` method (as `os.File` does), the filename will be included. 2. Participle will make a best effort to return as much of the AST up to the error location as possible. 3. Any node in the AST containing a field `Pos lexer.Position` or `Tok lexer.Token` will be automatically populated from the nearest matching token. 4. Any node in the AST containing a field `EndPos lexer.Position` or `EndTok lexer.Token` will be automatically populated with the token at the end of the node. These related pieces of information can be combined to provide fairly comprehensive error reporting. ## EBNF Participle supports outputting an EBNF grammar from a Participle parser. Once the parser is constructed simply call `String()`. eg. The [GraphQL example](https://github.com/alecthomas/participle/blob/cbe0cc62a3ad95955311002abd642f11543cb8ed/_examples/graphql/main.go#L14-L61) gives in the following EBNF: ```ebnf File = Entry* . Entry = Type | Schema | Enum | "scalar" ident . Type = "type" ident ("implements" ident)? "{" Field* "}" . Field = ident ("(" (Argument ("," Argument)*)? ")")? ":" TypeRef ("@" ident)? . Argument = ident ":" TypeRef ("=" Value)? . TypeRef = "[" TypeRef "]" | ident "!"? . Value = ident . Schema = "schema" "{" Field* "}" . Enum = "enum" ident "{" ident* "}" . ```participle-0.7.1/TUTORIAL.md000066400000000000000000000140521376001225600154440ustar00rootroot00000000000000# Participle parser tutorial 1. [Introduction](#introduction) 1. [The complete grammar](#the-complete-grammar) 1. [Root of the .ini AST \(structure, fields\)](#root-of-the-ini-ast-structure-fields) 1. [.ini properties \(named tokens, capturing, literals\)](#ini-properties-named-tokens-capturing-literals) 1. [.ini property values \(alternates, recursive structs, sequences\)](#ini-property-values-alternates-recursive-structs-sequences) 1. [Complete, but limited, .ini grammar \(top-level properties only\)](#complete-but-limited-ini-grammar-top-level-properties-only) 1. [Extending our grammar to support sections](#extending-our-grammar-to-support-sections) 1. [\(Optional\) Source positional information](#optional-source-positional-information) 1. [Parsing using our grammar](#parsing-using-our-grammar) ## Introduction Writing a parser in Participle typically involves starting from the "root" of the AST, annotating fields with the grammar, then recursively expanding until it is complete. The AST is expressed via Go data types and the grammar is expressed through struct field tags, as a form of EBNF. The parser we're going to create for this tutorial parses .ini files like this: ```ini age = 21 name = "Bob Smith" [address] city = "Beverly Hills" postal_code = 90210 ``` ## The complete grammar I think it's useful to see the complete grammar first, to see what we're working towards. Read on below for details. ```go type INI struct { Properties []*Property `@@*` Sections []*Section `@@*` } type Section struct { Identifier string `"[" @Ident "]"` Properties []*Property `@@*` } type Property struct { Key string `@Ident "="` Value *Value `@@` } type Value struct { String *string ` @String` Number *float64 `| @Float` } ``` ## Root of the .ini AST (structure, fields) The first step is to create a root struct for our grammar. In the case of our .ini parser, this struct will contain a sequence of properties: ```go type INI struct { Properties []*Property } type Property struct { } ``` ## .ini properties (named tokens, capturing, literals) Each property in an .ini file has an identifier key: ```go type Property struct { Key string } ``` The default lexer tokenises Go source code, and includes an `Ident` token type that matches identifiers. To match this token we simply use the token type name: ```go type Property struct { Key string `Ident` } ``` This will *match* identifiers, but not *capture* them into the `Key` field. To capture input tokens into AST fields, prefix any grammar node with `@`: ```go type Property struct { Key string `@Ident` } ``` In .ini files, each key is separated from its value with a literal `=`. To match a literal, enclose the literal in double quotes: ```go type Property struct { Key string `@Ident "="` } ``` > Note: literals in the grammar must match tokens from the lexer *exactly*. In > this example if the lexer does not output `=` as a distinct token the > grammar will not match. ## .ini property values (alternates, recursive structs, sequences) For the purposes of our example we are only going to support quoted string and numeric property values. As each value can be *either* a string or a float we'll need something akin to a sum type. Go's type system cannot express this directly, so we'll use the common approach of making each element a pointer. The selected "case" will *not* be nil. ```go type Value struct { String *string Number *float64 } ``` > Note: Participle will hydrate pointers as necessary. To express matching a set of alternatives we use the `|` operator: ```go type Value struct { String *string ` @String` Number *float64 `| @Float` } ``` > Note: the grammar can cross fields. Next, we'll match values and capture them into the `Property`. To recursively capture structs use `@@` (capture self): ```go type Property struct { Key string `@Ident "="` Value *Value `@@` } ``` Now that we can parse a `Property` we need to go back to the root of the grammar. We want to parse 0 or more properties. To do this, we use `*`. Participle will accumulate each match into the slice until matching fails, then move to the next node in the grammar. ```go type INI struct { Properties []*Property `@@*` } ``` > Note: tokens can also be accumulated into strings, appending each match. ## Complete, but limited, .ini grammar (top-level properties only) We now have a functional, but limited, .ini parser! ```go type INI struct { Properties []*Property `@@*` } type Property struct { Key string `@Ident "="` Value *Value `@@` } type Value struct { String *string ` @String` Number *float64 `| @Float` } ``` ## Extending our grammar to support sections Adding support for sections is simply a matter of utilising the constructs we've just learnt. A section consists of a header identifier, and a sequence of properties: ```go type Section struct { Identifier string `"[" @Ident "]"` Properties []*Property `@@*` } ``` Simple! Now we just add a sequence of `Section`s to our root node: ```go type INI struct { Properties []*Property `@@*` Sections []*Section `@@*` } ``` And we're done! ## (Optional) Source positional information If a grammar node includes a field with the name `Pos` and type `lexer.Position`, it will be automatically populated by positional information. eg. ```go type Value struct { Pos lexer.Position String *string ` @String` Number *float64 `| @Float` } ``` This is useful for error reporting. ## Parsing using our grammar To parse with this grammar we first construct the parser (we'll use the default lexer for now): ```go parser, err := participle.Build(&INI{}) ``` Then create a root node and parse into it with `parser.Parse{,String,Bytes}()`: ```go ini := &INI{} err = parser.ParseString(` age = 21 name = "Bob Smith" [address] city = "Beverly Hills" postal_code = 90210 `, ini) ``` You can find the full example [here](_examples/ini/main.go), alongside other examples including an SQL `SELECT` parser and a full [Thrift](https://thrift.apache.org/) parser. participle-0.7.1/_examples/000077500000000000000000000000001376001225600156325ustar00rootroot00000000000000participle-0.7.1/_examples/basic/000077500000000000000000000000001376001225600167135ustar00rootroot00000000000000participle-0.7.1/_examples/basic/ast.go000066400000000000000000000045351376001225600200400ustar00rootroot00000000000000// nolint: golint package main import ( "io" "strings" "github.com/alecthomas/participle/lexer" ) // Parse a BASIC program. func Parse(r io.Reader) (*Program, error) { program := &Program{} err := basicParser.Parse(r, program) if err != nil { return nil, err } program.init() return program, nil } type Program struct { Pos lexer.Position Commands []*Command `{ @@ }` Table map[int]*Command } type Command struct { Pos lexer.Position Index int Line int `@Number` Remark *Remark `( @@` Input *Input ` | @@` Let *Let ` | @@` Goto *Goto ` | @@` If *If ` | @@` Print *Print ` | @@` Call *Call ` | @@ ) EOL` } type Remark struct { Pos lexer.Position Comment string `@Comment` } type Call struct { Pos lexer.Position Name string `@Ident` Args []*Expression `"(" [ @@ { "," @@ } ] ")"` } type Print struct { Pos lexer.Position Expression *Expression `"PRINT" @@` } type Input struct { Pos lexer.Position Variable string `"INPUT" @Ident` } type Let struct { Pos lexer.Position Variable string `"LET" @Ident` Value *Expression `"=" @@` } type Goto struct { Pos lexer.Position Line int `"GOTO" @Number` } type If struct { Pos lexer.Position Condition *Expression `"IF" @@` Line int `"THEN" @Number` } type Operator string func (o *Operator) Capture(s []string) error { *o = Operator(strings.Join(s, "")) return nil } type Value struct { Pos lexer.Position Number *float64 ` @Number` Variable *string `| @Ident` String *string `| @String` Call *Call `| @@` Subexpression *Expression `| "(" @@ ")"` } type Factor struct { Pos lexer.Position Base *Value `@@` Exponent *Value `[ "^" @@ ]` } type OpFactor struct { Pos lexer.Position Operator Operator `@("*" | "/")` Factor *Factor `@@` } type Term struct { Pos lexer.Position Left *Factor `@@` Right []*OpFactor `{ @@ }` } type OpTerm struct { Pos lexer.Position Operator Operator `@("+" | "-")` Term *Term `@@` } type Cmp struct { Pos lexer.Position Left *Term `@@` Right []*OpTerm `{ @@ }` } type OpCmp struct { Pos lexer.Position Operator Operator `@("=" | "<" "=" | ">" "=" | "<" | ">" | "!" "=")` Cmp *Cmp `@@` } type Expression struct { Pos lexer.Position Left *Cmp `@@` Right []*OpCmp `{ @@ }` } participle-0.7.1/_examples/basic/eval.go000066400000000000000000000152571376001225600202030ustar00rootroot00000000000000// nolint: golint, dupl package main import ( "fmt" "io" "math" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/repr" ) type Evaluatable interface { Evaluate(ctx *Context) (interface{}, error) } type Function func(args ...interface{}) (interface{}, error) // Context for evaluation. type Context struct { // User-provided functions. Functions map[string]Function // Vars defined during evaluation. Vars map[string]interface{} // Reader from which INPUT is read. Input io.Reader // Writer where PRINTing will write. Output io.Writer } func (p *Program) init() { p.Table = map[int]*Command{} for index, cmd := range p.Commands { cmd.Index = index p.Table[cmd.Line] = cmd } } func (v *Value) Evaluate(ctx *Context) (interface{}, error) { switch { case v.Number != nil: return *v.Number, nil case v.String != nil: return *v.String, nil case v.Variable != nil: value, ok := ctx.Vars[*v.Variable] if !ok { return nil, fmt.Errorf("unknown variable %q", *v.Variable) } return value, nil case v.Subexpression != nil: return v.Subexpression.Evaluate(ctx) case v.Call != nil: return v.Call.Evaluate(ctx) } panic("unsupported value type" + repr.String(v)) } func (f *Factor) Evaluate(ctx *Context) (interface{}, error) { base, err := f.Base.Evaluate(ctx) if err != nil { return nil, err } if f.Exponent == nil { return base, nil } baseNum, exponentNum, err := evaluateFloats(ctx, base, f.Exponent) if err != nil { return nil, lexer.Errorf(f.Pos, "invalid factor: %s", err) } return math.Pow(baseNum, exponentNum), nil } func (o *OpFactor) Evaluate(ctx *Context, lhs interface{}) (interface{}, error) { lhsNumber, rhsNumber, err := evaluateFloats(ctx, lhs, o.Factor) if err != nil { return nil, lexer.Errorf(o.Pos, "invalid arguments for %s: %s", o.Operator, err) } switch o.Operator { case "*": return lhsNumber * rhsNumber, nil case "/": return lhsNumber / rhsNumber, nil } panic("unreachable") } func (t *Term) Evaluate(ctx *Context) (interface{}, error) { lhs, err := t.Left.Evaluate(ctx) if err != nil { return nil, err } for _, right := range t.Right { rhs, err := right.Evaluate(ctx, lhs) if err != nil { return nil, err } lhs = rhs } return lhs, nil } func (o *OpTerm) Evaluate(ctx *Context, lhs interface{}) (interface{}, error) { lhsNumber, rhsNumber, err := evaluateFloats(ctx, lhs, o.Term) if err != nil { return nil, lexer.Errorf(o.Pos, "invalid arguments for %s: %s", o.Operator, err) } switch o.Operator { case "+": return lhsNumber + rhsNumber, nil case "-": return lhsNumber - rhsNumber, nil } panic("unreachable") } func (c *Cmp) Evaluate(ctx *Context) (interface{}, error) { lhs, err := c.Left.Evaluate(ctx) if err != nil { return nil, err } for _, right := range c.Right { rhs, err := right.Evaluate(ctx, lhs) if err != nil { return nil, err } lhs = rhs } return lhs, nil } func (o *OpCmp) Evaluate(ctx *Context, lhs interface{}) (interface{}, error) { rhs, err := o.Cmp.Evaluate(ctx) if err != nil { return nil, err } switch lhs := lhs.(type) { case float64: rhs, ok := rhs.(float64) if !ok { return nil, lexer.Errorf(o.Pos, "rhs of %s must be a number", o.Operator) } switch o.Operator { case "=": return lhs == rhs, nil case "!=": return lhs != rhs, nil case "<": return lhs < rhs, nil case ">": return lhs > rhs, nil case "<=": return lhs <= rhs, nil case ">=": return lhs >= rhs, nil } case string: rhs, ok := rhs.(string) if !ok { return nil, lexer.Errorf(o.Pos, "rhs of %s must be a string", o.Operator) } switch o.Operator { case "=": return lhs == rhs, nil case "!=": return lhs != rhs, nil case "<": return lhs < rhs, nil case ">": return lhs > rhs, nil case "<=": return lhs <= rhs, nil case ">=": return lhs >= rhs, nil } default: return nil, lexer.Errorf(o.Pos, "lhs of %s must be a number or string", o.Operator) } panic("unreachable") } func (e *Expression) Evaluate(ctx *Context) (interface{}, error) { lhs, err := e.Left.Evaluate(ctx) if err != nil { return nil, err } for _, right := range e.Right { rhs, err := right.Evaluate(ctx, lhs) if err != nil { return nil, err } lhs = rhs } return lhs, nil } func (c *Call) Evaluate(ctx *Context) (interface{}, error) { function, ok := ctx.Functions[c.Name] if !ok { return nil, lexer.Errorf(c.Pos, "unknown function %q", c.Name) } args := []interface{}{} for _, arg := range c.Args { value, err := arg.Evaluate(ctx) if err != nil { return nil, err } args = append(args, value) } value, err := function(args...) if err != nil { return nil, lexer.Errorf(c.Pos, "call to %s() failed", c.Name) } return value, nil } func (p *Program) Evaluate(r io.Reader, w io.Writer, functions map[string]Function) error { if len(p.Commands) == 0 { return nil } ctx := &Context{ Vars: map[string]interface{}{}, Functions: functions, Input: r, Output: w, } for index := 0; index < len(p.Commands); { cmd := p.Commands[index] switch { case cmd.Goto != nil: cmd := cmd.Goto next, ok := p.Table[cmd.Line] if !ok { return lexer.Errorf(cmd.Pos, "invalid line number %d", cmd.Line) } index = next.Index continue case cmd.Remark != nil: case cmd.Let != nil: cmd := cmd.Let value, err := cmd.Value.Evaluate(ctx) if err != nil { return err } ctx.Vars[cmd.Variable] = value case cmd.Print != nil: cmd := cmd.Print value, err := cmd.Expression.Evaluate(ctx) if err != nil { return err } fmt.Fprintln(ctx.Output, value) case cmd.Input != nil: cmd := cmd.Input var value float64 _, err := fmt.Fscanln(ctx.Input, &value) if err != nil { return lexer.Errorf(cmd.Pos, "invalid input: %s", err) } ctx.Vars[cmd.Variable] = value case cmd.If != nil: cmd := cmd.If condition, err := cmd.Condition.Evaluate(ctx) if err != nil { return err } if test, ok := condition.(bool); ok && test { next, ok := p.Table[cmd.Line] if !ok { return lexer.Errorf(cmd.Pos, "invalid line number %d", cmd.Line) } index = next.Index continue } case cmd.Call != nil: _, err := cmd.Call.Evaluate(ctx) if err != nil { return err } default: panic("unsupported command " + repr.String(cmd)) } index++ } return nil } func evaluateFloats(ctx *Context, lhs interface{}, rhsExpr Evaluatable) (float64, float64, error) { rhs, err := rhsExpr.Evaluate(ctx) if err != nil { return 0, 0, err } lhsNumber, ok := lhs.(float64) if !ok { return 0, 0, fmt.Errorf("lhs must be a number") } rhsNumber, ok := rhs.(float64) if !ok { return 0, 0, fmt.Errorf("rhs must be a number") } return lhsNumber, rhsNumber, nil } participle-0.7.1/_examples/basic/example.bas000066400000000000000000000003351376001225600210360ustar00rootroot00000000000000 5 REM inputting the argument 10 PRINT "Factorial of:" 20 INPUT A 30 LET B = 1 35 REM beginning of the loop 40 IF A <= 1 THEN 80 50 LET B = B * A 60 LET A = A - 1 70 GOTO 40 75 REM prints the result 80 PRINT B participle-0.7.1/_examples/basic/hidden.bas000066400000000000000000000003151376001225600206340ustar00rootroot0000000000000010 PRINT "Give the hidden number: " 20 INPUT N 30 PRINT "Give a number: " 40 INPUT R 50 IF R = N THEN 110 60 IF R < N THEN 90 70 PRINT "C-" 80 GOTO 30 90 PRINT "C+" 100 GOTO 30 110 PRINT "CONGRATULATIONS" participle-0.7.1/_examples/basic/main.go000066400000000000000000000026361376001225600201750ustar00rootroot00000000000000// nolint: golint, dupl package main import ( "os" "github.com/alecthomas/kong" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/ebnf" ) var ( basicLexer = lexer.Must(ebnf.New(` Comment = ("REM" | "rem" ) { "\u0000"…"\uffff"-"\n"-"\r" } . Ident = (alpha | "_") { "_" | alpha | digit } . String = "\"" { "\u0000"…"\uffff"-"\""-"\\" | "\\" any } "\"" . Number = [ "-" | "+" ] ("." | digit) { "." | digit } . Punct = "!"…"/" | ":"…"@" | "["…` + "\"`\"" + ` | "{"…"~" . EOL = ( "\n" | "\r" ) { "\n" | "\r" }. Whitespace = ( " " | "\t" ) { " " | "\t" } . alpha = "a"…"z" | "A"…"Z" . digit = "0"…"9" . any = "\u0000"…"\uffff" . `)) basicParser = participle.MustBuild(&Program{}, participle.Lexer(basicLexer), participle.CaseInsensitive("Ident"), participle.Unquote("String"), participle.UseLookahead(2), participle.Elide("Whitespace"), ) cli struct { File string `arg:"" type:"existingfile" help:"File to parse."` } ) func main() { ctx := kong.Parse(&cli) r, err := os.Open(cli.File) ctx.FatalIfErrorf(err) defer r.Close() program, err := Parse(r) ctx.FatalIfErrorf(err) funcs := map[string]Function{ "ADD": func(args ...interface{}) (interface{}, error) { return args[0].(float64) + args[1].(float64), nil }, } err = program.Evaluate(os.Stdin, os.Stdout, funcs) ctx.FatalIfErrorf(err) } participle-0.7.1/_examples/ebnf/000077500000000000000000000000001376001225600165445ustar00rootroot00000000000000participle-0.7.1/_examples/ebnf/main.go000066400000000000000000000065671376001225600200350ustar00rootroot00000000000000package main import ( "bytes" "encoding/json" "fmt" "os" "strings" "gopkg.in/alecthomas/kingpin.v2" "github.com/alecthomas/participle" ) var ( jsonFlag = kingpin.Flag("json", "Display AST as JSON.").Bool() ) type Group struct { Expression *Expression `"(" @@ ")"` } func (g *Group) String() string { return fmt.Sprintf("( %s )", g.Expression) } type Option struct { Expression *Expression `"[" @@ "]"` } func (o *Option) String() string { return fmt.Sprintf("[ %s ]", o.Expression) } type Repetition struct { Expression *Expression `"{" @@ "}"` } func (r *Repetition) String() string { return fmt.Sprintf("{ %s }", r.Expression) } type Literal struct { Start string `@String` // Lexer token "String" End string `[ "…" @String ]` } func (l *Literal) String() string { if l.End != "" { return fmt.Sprintf("%q … %q", l.Start, l.End) } return fmt.Sprintf("%q", l.Start) } type Term struct { Name string `@Ident |` Literal *Literal `@@ |` Group *Group `@@ |` Option *Option `@@ |` Repetition *Repetition `@@` } func (t *Term) String() string { switch { case t.Name != "": return t.Name case t.Literal != nil: return t.Literal.String() case t.Group != nil: return t.Group.String() case t.Option != nil: return t.Option.String() case t.Repetition != nil: return t.Repetition.String() default: panic("wut") } } type Sequence struct { Terms []*Term `@@ { @@ }` } func (s *Sequence) String() string { terms := []string{} for _, term := range s.Terms { terms = append(terms, term.String()) } return strings.Join(terms, " ") } type Expression struct { Alternatives []*Sequence `@@ { "|" @@ }` } func (e *Expression) String() string { sequences := []string{} for _, sequence := range e.Alternatives { sequences = append(sequences, sequence.String()) } return strings.Join(sequences, " | ") } type Expressions []*Expression func (e Expressions) String() string { expressions := []string{} for _, expression := range e { expressions = append(expressions, expression.String()) } return strings.Join(expressions, " ") } type Production struct { Name string `@Ident "="` Expressions Expressions `@@ { @@ } "."` } func (p *Production) String() string { expressions := []string{} for _, expression := range p.Expressions { expressions = append(expressions, expression.String()) } return fmt.Sprintf("%s = %s .", p.Name, strings.Join(expressions, " ")) } type EBNF struct { Productions []*Production `{ @@ }` } func (e *EBNF) String() string { w := bytes.NewBuffer(nil) for _, production := range e.Productions { fmt.Fprintf(w, "%s\n", production) } return w.String() } func main() { kingpin.CommandLine.Help = `An EBNF parser compatible with Go"s exp/ebnf. The grammar is in the form: Production = name "=" [ Expression ] "." . Expression = Alternative { "|" Alternative } . Alternative = Term { Term } . Term = name | token [ "…" token ] | Group | Option | Repetition . Group = "(" Expression ")" . Option = "[" Expression "]" . Repetition = "{" Expression "}" . ` kingpin.Parse() parser, err := participle.Build(&EBNF{}) kingpin.FatalIfError(err, "") ebnf := &EBNF{} err = parser.Parse(os.Stdin, ebnf) kingpin.FatalIfError(err, "") if *jsonFlag { bytes, _ := json.MarshalIndent(ebnf, "", " ") fmt.Printf("%s\n", bytes) } else { fmt.Print(ebnf) } } participle-0.7.1/_examples/expr/000077500000000000000000000000001376001225600166105ustar00rootroot00000000000000participle-0.7.1/_examples/expr/main.go000066400000000000000000000072631376001225600200730ustar00rootroot00000000000000// nolint: govet package main import ( "encoding/json" "fmt" "math" "os" "strings" "github.com/alecthomas/kong" "github.com/alecthomas/participle" ) var cli struct { AST bool `help:"Print AST for expression."` Set map[string]float64 `short:"s" help:"Set variables."` Expression []string `arg required help:"Expression to evaluate."` } type Operator int const ( OpMul Operator = iota OpDiv OpAdd OpSub ) var operatorMap = map[string]Operator{"+": OpAdd, "-": OpSub, "*": OpMul, "/": OpDiv} func (o *Operator) Capture(s []string) error { *o = operatorMap[s[0]] return nil } // E --> T {( "+" | "-" ) T} // T --> F {( "*" | "/" ) F} // F --> P ["^" F] // P --> v | "(" E ")" | "-" T type Value struct { Number *float64 ` @(Float|Int)` Variable *string `| @Ident` Subexpression *Expression `| "(" @@ ")"` } type Factor struct { Base *Value `@@` Exponent *Value `[ "^" @@ ]` } type OpFactor struct { Operator Operator `@("*" | "/")` Factor *Factor `@@` } type Term struct { Left *Factor `@@` Right []*OpFactor `{ @@ }` } type OpTerm struct { Operator Operator `@("+" | "-")` Term *Term `@@` } type Expression struct { Left *Term `@@` Right []*OpTerm `{ @@ }` } // Display func (o Operator) String() string { switch o { case OpMul: return "*" case OpDiv: return "/" case OpSub: return "-" case OpAdd: return "+" } panic("unsupported operator") } func (v *Value) String() string { if v.Number != nil { return fmt.Sprintf("%g", *v.Number) } if v.Variable != nil { return *v.Variable } return "(" + v.Subexpression.String() + ")" } func (f *Factor) String() string { out := f.Base.String() if f.Exponent != nil { out += " ^ " + f.Exponent.String() } return out } func (o *OpFactor) String() string { return fmt.Sprintf("%s %s", o.Operator, o.Factor) } func (t *Term) String() string { out := []string{t.Left.String()} for _, r := range t.Right { out = append(out, r.String()) } return strings.Join(out, " ") } func (o *OpTerm) String() string { return fmt.Sprintf("%s %s", o.Operator, o.Term) } func (e *Expression) String() string { out := []string{e.Left.String()} for _, r := range e.Right { out = append(out, r.String()) } return strings.Join(out, " ") } // Evaluation func (o Operator) Eval(l, r float64) float64 { switch o { case OpMul: return l * r case OpDiv: return l / r case OpAdd: return l + r case OpSub: return l - r } panic("unsupported operator") } func (v *Value) Eval(ctx Context) float64 { switch { case v.Number != nil: return *v.Number case v.Variable != nil: value, ok := ctx[*v.Variable] if !ok { panic("no such variable " + *v.Variable) } return value default: return v.Subexpression.Eval(ctx) } } func (f *Factor) Eval(ctx Context) float64 { b := f.Base.Eval(ctx) if f.Exponent != nil { return math.Pow(b, f.Exponent.Eval(ctx)) } return b } func (t *Term) Eval(ctx Context) float64 { n := t.Left.Eval(ctx) for _, r := range t.Right { n = r.Operator.Eval(n, r.Factor.Eval(ctx)) } return n } func (e *Expression) Eval(ctx Context) float64 { l := e.Left.Eval(ctx) for _, r := range e.Right { l = r.Operator.Eval(l, r.Term.Eval(ctx)) } return l } type Context map[string]float64 func main() { ctx := kong.Parse(&cli, kong.Description("A basic expression parser and evaluator."), kong.UsageOnError(), ) parser, err := participle.Build(&Expression{}) ctx.FatalIfErrorf(err) expr := &Expression{} err = parser.ParseString(strings.Join(cli.Expression, " "), expr) ctx.FatalIfErrorf(err) if cli.AST { json.NewEncoder(os.Stdout).Encode(expr) } else { fmt.Println(expr, "=", expr.Eval(cli.Set)) } } participle-0.7.1/_examples/expr2/000077500000000000000000000000001376001225600166725ustar00rootroot00000000000000participle-0.7.1/_examples/expr2/main.go000066400000000000000000000037131376001225600201510ustar00rootroot00000000000000package main import ( "strings" "github.com/alecthomas/kong" "github.com/alecthomas/repr" "github.com/alecthomas/participle" ) // Based on http://www.craftinginterpreters.com/parsing-expressions.html // expression → equality ; // equality → comparison ( ( "!=" | "==" ) comparison )* ; // comparison → addition ( ( ">" | ">=" | "<" | "<=" ) addition )* ; // addition → multiplication ( ( "-" | "+" ) multiplication )* ; // multiplication → unary ( ( "/" | "*" ) unary )* ; // unary → ( "!" | "-" ) unary // | primary ; // primary → NUMBER | STRING | "false" | "true" | "nil" // | "(" expression ")" ; type Expression struct { Equality *Equality `@@` } type Equality struct { Comparison *Comparison `@@` Op string `[ @( "!" "=" | "=" "=" )` Next *Equality ` @@ ]` } type Comparison struct { Addition *Addition `@@` Op string `[ @( ">" | ">" "=" | "<" | "<" "=" )` Next *Comparison ` @@ ]` } type Addition struct { Multiplication *Multiplication `@@` Op string `[ @( "-" | "+" )` Next *Addition ` @@ ]` } type Multiplication struct { Unary *Unary `@@` Op string `[ @( "/" | "*" )` Next *Multiplication ` @@ ]` } type Unary struct { Op string ` ( @( "!" | "-" )` Unary *Unary ` @@ )` Primary *Primary `| @@` } type Primary struct { Number *float64 ` @Float | @Int` String *string `| @String` Bool *bool `| ( @"true" | "false" )` Nil bool `| @"nil"` SubExpression *Expression `| "(" @@ ")" ` } func main() { var cli struct { Expr []string `arg required help:"Expression to parse."` } ctx := kong.Parse(&cli) p := participle.MustBuild(&Expression{}, participle.UseLookahead(2)) expr := &Expression{} err := p.ParseString(strings.Join(cli.Expr, " "), expr) ctx.FatalIfErrorf(err) repr.Println(expr) } participle-0.7.1/_examples/go.mod000066400000000000000000000010561376001225600167420ustar00rootroot00000000000000module github.com/alecthomas/participle/_examples go 1.14 require ( github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2 github.com/alecthomas/kong v0.2.8 github.com/alecthomas/participle v0.4.1 github.com/alecthomas/repr v0.0.0-20200325044227-4184120f674c github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect github.com/stretchr/testify v1.5.1 gopkg.in/alecthomas/kingpin.v2 v2.2.6 ) replace github.com/alecthomas/participle => ../ participle-0.7.1/_examples/go.sum000066400000000000000000000057641376001225600170010ustar00rootroot00000000000000github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2 h1:gKv6LPDhF/G3cNribA+kZtNPiPpKabZGLhcJuEtp3ig= github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2/go.mod h1:CxCgO+NdpMdi9SsTlGbc0W+/UNxO3I0AabOEJZ3w61w= github.com/alecthomas/kong v0.2.1/go.mod h1:+inYUSluD+p4L8KdviBSgzcqEjUQOfC5fQDRFuc36lI= github.com/alecthomas/kong v0.2.8 h1:VSWWkD1TZij2967FcfVwgRwlp3khCA0liZIkUI9hTdU= github.com/alecthomas/kong v0.2.8/go.mod h1:kQOmtJgV+Lb4aj+I2LEn40cbtawdWJ9Y8QLq+lElKxE= github.com/alecthomas/participle v0.4.1 h1:P2PJWzwrSpuCWXKnzqvw0b0phSfH1kJo4p2HvLynVsI= github.com/alecthomas/participle v0.4.1/go.mod h1:T8u4bQOSMwrkTWOSyt8/jSFPEnRtd0FKFMjVfYBlqPs= github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= github.com/alecthomas/repr v0.0.0-20200325044227-4184120f674c h1:MVVbswUlqicyj8P/JljoocA7AyCo62gzD0O7jfvrhtE= github.com/alecthomas/repr v0.0.0-20200325044227-4184120f674c/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= participle-0.7.1/_examples/graphql/000077500000000000000000000000001376001225600172705ustar00rootroot00000000000000participle-0.7.1/_examples/graphql/example.graphql000066400000000000000000000017541376001225600223120ustar00rootroot00000000000000# A comment. type Tweet { id: ID! # The tweet text. No more than 140 characters! body: String # When the tweet was published date: Date # Who published the tweet Author: User # Views, retweets, likes, etc Stats: Stat } type User { id: ID! username: String first_name: String last_name: String full_name: String name: String @deprecated avatar_url: Url } type Stat { views: Int likes: Int retweets: Int responses: Int } type Notification { id: ID date: Date type: String } type Meta { count: Int } scalar Url scalar Date type Query { Tweet(id: ID!): Tweet Tweets(limit: Int, skip: Int, sort_field: String, sort_order: String): [Tweet] TweetsMeta: Meta User(id: ID!): User Notifications(limit: Int): [Notification] NotificationsMeta: Meta } type Mutation { createTweet ( body: String ): Tweet deleteTweet(id: ID!): Tweet markTweetRead(id: ID!): Boolean } participle-0.7.1/_examples/graphql/main.go000066400000000000000000000041231376001225600205430ustar00rootroot00000000000000package main import ( "fmt" "os" "github.com/alecthomas/kong" "github.com/alecthomas/repr" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/ebnf" ) type File struct { Entries []*Entry `@@*` } type Entry struct { Type *Type ` @@` Schema *Schema `| @@` Enum *Enum `| @@` Scalar string `| "scalar" @Ident` } type Enum struct { Name string `"enum" @Ident` Cases []string `"{" { @Ident } "}"` } type Schema struct { Fields []*Field `"schema" "{" { @@ } "}"` } type Type struct { Name string `"type" @Ident` Implements string `[ "implements" @Ident ]` Fields []*Field `"{" { @@ } "}"` } type Field struct { Name string `@Ident` Arguments []*Argument `[ "(" [ @@ { "," @@ } ] ")" ]` Type *TypeRef `":" @@` Annotation string `[ "@" @Ident ]` } type Argument struct { Name string `@Ident` Type *TypeRef `":" @@` Default *Value `[ "=" @@ ]` } type TypeRef struct { Array *TypeRef `( "[" @@ "]"` Type string ` | @Ident )` NonNullable bool `[ @"!" ]` } type Value struct { Symbol string `@Ident` } var ( graphQLLexer = lexer.Must(ebnf.New(` Comment = ("#" | "//") { "\u0000"…"\uffff"-"\n" } . Ident = (alpha | "_") { "_" | alpha | digit } . Number = ("." | digit) {"." | digit} . Whitespace = " " | "\t" | "\n" | "\r" . Punct = "!"…"/" | ":"…"@" | "["…` + "\"`\"" + ` | "{"…"~" . alpha = "a"…"z" | "A"…"Z" . digit = "0"…"9" . `)) parser = participle.MustBuild(&File{}, participle.Lexer(graphQLLexer), participle.Elide("Comment", "Whitespace"), participle.UseLookahead(2), ) ) var cli struct { EBNF bool `help"Dump EBNF."` Files []string `arg:"" optional:"" type:"existingfile" help:"GraphQL schema files to parse."` } func main() { ctx := kong.Parse(&cli) if cli.EBNF { fmt.Println(parser.String()) ctx.Exit(0) } for _, file := range cli.Files { ast := &File{} r, err := os.Open(file) ctx.FatalIfErrorf(err) err = parser.Parse(r, ast) r.Close() repr.Println(ast) ctx.FatalIfErrorf(err) } } participle-0.7.1/_examples/graphql/main_test.go000066400000000000000000000005451376001225600216060ustar00rootroot00000000000000package main import ( "io/ioutil" "testing" "github.com/stretchr/testify/require" ) func BenchmarkParser(b *testing.B) { source, err := ioutil.ReadFile("example.graphql") require.NoError(b, err) b.ReportAllocs() b.ReportMetric(float64(len(source)*b.N), "B/s") for i := 0; i < b.N; i++ { ast := &File{} _ = parser.ParseBytes(source, ast) } } participle-0.7.1/_examples/hcl/000077500000000000000000000000001376001225600164005ustar00rootroot00000000000000participle-0.7.1/_examples/hcl/example.hcl000066400000000000000000000011721376001225600205240ustar00rootroot00000000000000region = "us-west-2" access_key = "something" secret_key = "something_else" bucket = "backups" directory config { source_dir = "/etc/eventstore" dest_prefix = "escluster/config" exclude = ["*.hcl"] pre_backup_script = "before_backup.sh" post_backup_script = "after_backup.sh" pre_restore_script = "before_restore.sh" post_restore_script = "after_restore.sh" chmod = 0755 } directory data { source_dir = "/var/lib/eventstore" dest_prefix = "escluster/a/data" exclude = [ "*.merging" ] pre_restore_script = "before_restore.sh" post_restore_script = "after_restore.sh" } participle-0.7.1/_examples/hcl/main.go000066400000000000000000000027631376001225600176630ustar00rootroot00000000000000// Package main implements a parser for HashiCorp's HCL configuration syntax. package main import ( "fmt" "os" "strings" "gopkg.in/alecthomas/kingpin.v2" "github.com/alecthomas/participle" "github.com/alecthomas/repr" ) type Bool bool func (b *Bool) Capture(v []string) error { *b = v[0] == "true"; return nil } type Value struct { Boolean *Bool ` @("true"|"false")` Identifier *string `| @Ident { @"." @Ident }` String *string `| @(String|Char|RawString)` Number *float64 `| @(Float|Int)` Array []*Value `| "[" { @@ [ "," ] } "]"` } func (l *Value) GoString() string { switch { case l.Boolean != nil: return fmt.Sprintf("%v", *l.Boolean) case l.Identifier != nil: return fmt.Sprintf("`%s`", *l.Identifier) case l.String != nil: return fmt.Sprintf("%q", *l.String) case l.Number != nil: return fmt.Sprintf("%v", *l.Number) case l.Array != nil: out := []string{} for _, v := range l.Array { out = append(out, v.GoString()) } return fmt.Sprintf("[]*Value{ %s }", strings.Join(out, ", ")) } panic("??") } type Entry struct { Key string `@Ident` Value *Value `( "=" @@` Block *Block `| @@ )` } type Block struct { Parameters []*Value `{ @@ }` Entries []*Entry `"{" { @@ } "}"` } type Config struct { Entries []*Entry `{ @@ }` } func main() { kingpin.Parse() parser, err := participle.Build(&Config{}) kingpin.FatalIfError(err, "") expr := &Config{} err = parser.Parse(os.Stdin, expr) kingpin.FatalIfError(err, "") repr.Println(expr) } participle-0.7.1/_examples/ini/000077500000000000000000000000001376001225600164115ustar00rootroot00000000000000participle-0.7.1/_examples/ini/example.ini000066400000000000000000000001551376001225600205460ustar00rootroot00000000000000a = "a" b = 123 # A comment [numbers] a = 10.3 b = 20 ; Another comment [strings] a = "\"quoted\"" b = "b" participle-0.7.1/_examples/ini/main.go000066400000000000000000000021611376001225600176640ustar00rootroot00000000000000package main import ( "os" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/repr" ) // A custom lexer for INI files. This illustrates a relatively complex Regexp lexer, as well // as use of the Unquote filter, which unquotes string tokens. var iniLexer = lexer.Must(lexer.Regexp( `(?m)` + `(\s+)` + `|(^[#;].*$)` + `|(?P[a-zA-Z][a-zA-Z_\d]*)` + `|(?P"(?:\\.|[^"])*")` + `|(?P\d+(?:\.\d+)?)` + `|(?P[][=])`, )) type INI struct { Properties []*Property `@@*` Sections []*Section `@@*` } type Section struct { Identifier string `"[" @Ident "]"` Properties []*Property `@@*` } type Property struct { Key string `@Ident "="` Value *Value `@@` } type Value struct { String *string ` @String` Number *float64 `| @Float` } func main() { parser, err := participle.Build(&INI{}, participle.Lexer(iniLexer), participle.Unquote("String"), ) if err != nil { panic(err) } ini := &INI{} err = parser.Parse(os.Stdin, ini) if err != nil { panic(err) } repr.Println(ini, repr.Indent(" "), repr.OmitEmpty(true)) } participle-0.7.1/_examples/json/000077500000000000000000000000001376001225600166035ustar00rootroot00000000000000participle-0.7.1/_examples/json/github-webhook.json000066400000000000000000000332341376001225600224210ustar00rootroot00000000000000{ "action": "created", "check_run": { "id": 128620228, "node_id": "MDg6Q2hlY2tSdW4xMjg2MjAyMjg=", "head_sha": "ec26c3e57ca3a959ca5aad62de7213c562f8c821", "external_id": "", "url": "https://api.github.com/repos/Codertocat/Hello-World/check-runs/128620228", "html_url": "https://github.com/Codertocat/Hello-World/runs/128620228", "details_url": "https://octocoders.io", "status": "queued", "conclusion": null, "started_at": "2019-05-15T15:21:12Z", "completed_at": null, "output": { "title": null, "summary": null, "text": null, "annotations_count": 0, "annotations_url": "https://api.github.com/repos/Codertocat/Hello-World/check-runs/128620228/annotations" }, "name": "Octocoders-linter", "check_suite": { "id": 118578147, "node_id": "MDEwOkNoZWNrU3VpdGUxMTg1NzgxNDc=", "head_branch": "changes", "head_sha": "ec26c3e57ca3a959ca5aad62de7213c562f8c821", "status": "queued", "conclusion": null, "url": "https://api.github.com/repos/Codertocat/Hello-World/check-suites/118578147", "before": "6113728f27ae82c7b1a177c8d03f9e96e0adf246", "after": "ec26c3e57ca3a959ca5aad62de7213c562f8c821", "pull_requests": [ { "url": "https://api.github.com/repos/Codertocat/Hello-World/pulls/2", "id": 279147437, "number": 2, "head": { "ref": "changes", "sha": "ec26c3e57ca3a959ca5aad62de7213c562f8c821", "repo": { "id": 186853002, "url": "https://api.github.com/repos/Codertocat/Hello-World", "name": "Hello-World" } }, "base": { "ref": "master", "sha": "f95f852bd8fca8fcc58a9a2d6c842781e32a215e", "repo": { "id": 186853002, "url": "https://api.github.com/repos/Codertocat/Hello-World", "name": "Hello-World" } } } ], "app": { "id": 29310, "node_id": "MDM6QXBwMjkzMTA=", "owner": { "login": "Octocoders", "id": 38302899, "node_id": "MDEyOk9yZ2FuaXphdGlvbjM4MzAyODk5", "avatar_url": "https://avatars1.githubusercontent.com/u/38302899?v=4", "gravatar_id": "", "url": "https://api.github.com/users/Octocoders", "html_url": "https://github.com/Octocoders", "followers_url": "https://api.github.com/users/Octocoders/followers", "following_url": "https://api.github.com/users/Octocoders/following{/other_user}", "gists_url": "https://api.github.com/users/Octocoders/gists{/gist_id}", "starred_url": "https://api.github.com/users/Octocoders/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/Octocoders/subscriptions", "organizations_url": "https://api.github.com/users/Octocoders/orgs", "repos_url": "https://api.github.com/users/Octocoders/repos", "events_url": "https://api.github.com/users/Octocoders/events{/privacy}", "received_events_url": "https://api.github.com/users/Octocoders/received_events", "type": "Organization", "site_admin": false }, "name": "octocoders-linter", "description": "", "external_url": "https://octocoders.io", "html_url": "https://github.com/apps/octocoders-linter", "created_at": "2019-04-19T19:36:24Z", "updated_at": "2019-04-19T19:36:56Z", "permissions": { "administration": "write", "checks": "write", "contents": "write", "deployments": "write", "issues": "write", "members": "write", "metadata": "read", "organization_administration": "write", "organization_hooks": "write", "organization_plan": "read", "organization_projects": "write", "organization_user_blocking": "write", "pages": "write", "pull_requests": "write", "repository_hooks": "write", "repository_projects": "write", "statuses": "write", "team_discussions": "write", "vulnerability_alerts": "read" }, "events": [] }, "created_at": "2019-05-15T15:20:31Z", "updated_at": "2019-05-15T15:20:31Z" }, "app": { "id": 29310, "node_id": "MDM6QXBwMjkzMTA=", "owner": { "login": "Octocoders", "id": 38302899, "node_id": "MDEyOk9yZ2FuaXphdGlvbjM4MzAyODk5", "avatar_url": "https://avatars1.githubusercontent.com/u/38302899?v=4", "gravatar_id": "", "url": "https://api.github.com/users/Octocoders", "html_url": "https://github.com/Octocoders", "followers_url": "https://api.github.com/users/Octocoders/followers", "following_url": "https://api.github.com/users/Octocoders/following{/other_user}", "gists_url": "https://api.github.com/users/Octocoders/gists{/gist_id}", "starred_url": "https://api.github.com/users/Octocoders/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/Octocoders/subscriptions", "organizations_url": "https://api.github.com/users/Octocoders/orgs", "repos_url": "https://api.github.com/users/Octocoders/repos", "events_url": "https://api.github.com/users/Octocoders/events{/privacy}", "received_events_url": "https://api.github.com/users/Octocoders/received_events", "type": "Organization", "site_admin": false }, "name": "octocoders-linter", "description": "", "external_url": "https://octocoders.io", "html_url": "https://github.com/apps/octocoders-linter", "created_at": "2019-04-19T19:36:24Z", "updated_at": "2019-04-19T19:36:56Z", "permissions": { "administration": "write", "checks": "write", "contents": "write", "deployments": "write", "issues": "write", "members": "write", "metadata": "read", "organization_administration": "write", "organization_hooks": "write", "organization_plan": "read", "organization_projects": "write", "organization_user_blocking": "write", "pages": "write", "pull_requests": "write", "repository_hooks": "write", "repository_projects": "write", "statuses": "write", "team_discussions": "write", "vulnerability_alerts": "read" }, "events": [] }, "pull_requests": [ { "url": "https://api.github.com/repos/Codertocat/Hello-World/pulls/2", "id": 279147437, "number": 2, "head": { "ref": "changes", "sha": "ec26c3e57ca3a959ca5aad62de7213c562f8c821", "repo": { "id": 186853002, "url": "https://api.github.com/repos/Codertocat/Hello-World", "name": "Hello-World" } }, "base": { "ref": "master", "sha": "f95f852bd8fca8fcc58a9a2d6c842781e32a215e", "repo": { "id": 186853002, "url": "https://api.github.com/repos/Codertocat/Hello-World", "name": "Hello-World" } } } ] }, "repository": { "id": 186853002, "node_id": "MDEwOlJlcG9zaXRvcnkxODY4NTMwMDI=", "name": "Hello-World", "full_name": "Codertocat/Hello-World", "private": false, "owner": { "login": "Codertocat", "id": 21031067, "node_id": "MDQ6VXNlcjIxMDMxMDY3", "avatar_url": "https://avatars1.githubusercontent.com/u/21031067?v=4", "gravatar_id": "", "url": "https://api.github.com/users/Codertocat", "html_url": "https://github.com/Codertocat", "followers_url": "https://api.github.com/users/Codertocat/followers", "following_url": "https://api.github.com/users/Codertocat/following{/other_user}", "gists_url": "https://api.github.com/users/Codertocat/gists{/gist_id}", "starred_url": "https://api.github.com/users/Codertocat/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/Codertocat/subscriptions", "organizations_url": "https://api.github.com/users/Codertocat/orgs", "repos_url": "https://api.github.com/users/Codertocat/repos", "events_url": "https://api.github.com/users/Codertocat/events{/privacy}", "received_events_url": "https://api.github.com/users/Codertocat/received_events", "type": "User", "site_admin": false }, "html_url": "https://github.com/Codertocat/Hello-World", "description": null, "fork": false, "url": "https://api.github.com/repos/Codertocat/Hello-World", "forks_url": "https://api.github.com/repos/Codertocat/Hello-World/forks", "keys_url": "https://api.github.com/repos/Codertocat/Hello-World/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/Codertocat/Hello-World/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/Codertocat/Hello-World/teams", "hooks_url": "https://api.github.com/repos/Codertocat/Hello-World/hooks", "issue_events_url": "https://api.github.com/repos/Codertocat/Hello-World/issues/events{/number}", "events_url": "https://api.github.com/repos/Codertocat/Hello-World/events", "assignees_url": "https://api.github.com/repos/Codertocat/Hello-World/assignees{/user}", "branches_url": "https://api.github.com/repos/Codertocat/Hello-World/branches{/branch}", "tags_url": "https://api.github.com/repos/Codertocat/Hello-World/tags", "blobs_url": "https://api.github.com/repos/Codertocat/Hello-World/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/Codertocat/Hello-World/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/Codertocat/Hello-World/git/refs{/sha}", "trees_url": "https://api.github.com/repos/Codertocat/Hello-World/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/Codertocat/Hello-World/statuses/{sha}", "languages_url": "https://api.github.com/repos/Codertocat/Hello-World/languages", "stargazers_url": "https://api.github.com/repos/Codertocat/Hello-World/stargazers", "contributors_url": "https://api.github.com/repos/Codertocat/Hello-World/contributors", "subscribers_url": "https://api.github.com/repos/Codertocat/Hello-World/subscribers", "subscription_url": "https://api.github.com/repos/Codertocat/Hello-World/subscription", "commits_url": "https://api.github.com/repos/Codertocat/Hello-World/commits{/sha}", "git_commits_url": "https://api.github.com/repos/Codertocat/Hello-World/git/commits{/sha}", "comments_url": "https://api.github.com/repos/Codertocat/Hello-World/comments{/number}", "issue_comment_url": "https://api.github.com/repos/Codertocat/Hello-World/issues/comments{/number}", "contents_url": "https://api.github.com/repos/Codertocat/Hello-World/contents/{+path}", "compare_url": "https://api.github.com/repos/Codertocat/Hello-World/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/Codertocat/Hello-World/merges", "archive_url": "https://api.github.com/repos/Codertocat/Hello-World/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/Codertocat/Hello-World/downloads", "issues_url": "https://api.github.com/repos/Codertocat/Hello-World/issues{/number}", "pulls_url": "https://api.github.com/repos/Codertocat/Hello-World/pulls{/number}", "milestones_url": "https://api.github.com/repos/Codertocat/Hello-World/milestones{/number}", "notifications_url": "https://api.github.com/repos/Codertocat/Hello-World/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/Codertocat/Hello-World/labels{/name}", "releases_url": "https://api.github.com/repos/Codertocat/Hello-World/releases{/id}", "deployments_url": "https://api.github.com/repos/Codertocat/Hello-World/deployments", "created_at": "2019-05-15T15:19:25Z", "updated_at": "2019-05-15T15:21:03Z", "pushed_at": "2019-05-15T15:20:57Z", "git_url": "git://github.com/Codertocat/Hello-World.git", "ssh_url": "git@github.com:Codertocat/Hello-World.git", "clone_url": "https://github.com/Codertocat/Hello-World.git", "svn_url": "https://github.com/Codertocat/Hello-World", "homepage": null, "size": 0, "stargazers_count": 0, "watchers_count": 0, "language": "Ruby", "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": true, "has_pages": true, "forks_count": 1, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 2, "license": null, "forks": 1, "open_issues": 2, "watchers": 0, "default_branch": "master" }, "sender": { "login": "Codertocat", "id": 21031067, "node_id": "MDQ6VXNlcjIxMDMxMDY3", "avatar_url": "https://avatars1.githubusercontent.com/u/21031067?v=4", "gravatar_id": "", "url": "https://api.github.com/users/Codertocat", "html_url": "https://github.com/Codertocat", "followers_url": "https://api.github.com/users/Codertocat/followers", "following_url": "https://api.github.com/users/Codertocat/following{/other_user}", "gists_url": "https://api.github.com/users/Codertocat/gists{/gist_id}", "starred_url": "https://api.github.com/users/Codertocat/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/Codertocat/subscriptions", "organizations_url": "https://api.github.com/users/Codertocat/orgs", "repos_url": "https://api.github.com/users/Codertocat/repos", "events_url": "https://api.github.com/users/Codertocat/events{/privacy}", "received_events_url": "https://api.github.com/users/Codertocat/received_events", "type": "User", "site_admin": false } } participle-0.7.1/_examples/json/main.go000066400000000000000000000044441376001225600200640ustar00rootroot00000000000000package main import ( "encoding/json" "fmt" "os" "github.com/alecthomas/participle" ) type pathExpr struct { Parts []part `@@ { "." @@ }` } type part struct { Obj string `@Ident` Acc []acc `("[" @@ "]")*` } type acc struct { Name *string `@(String|Char|RawString)` Index *int `| @Int` } var parser = participle.MustBuild(&pathExpr{}) func main() { if len(os.Args) < 3 { fmt.Fprintf(os.Stderr, "Usage: %s \n", os.Args[0]) os.Exit(2) } q := os.Args[1] files := os.Args[2:] var expr pathExpr if err := parser.ParseString(q, &expr); err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } for _, file := range files { f, err := os.Open(file) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } var input map[string]interface{} if err := json.NewDecoder(f).Decode(&input); err != nil { f.Close() fmt.Fprintln(os.Stderr, err) os.Exit(1) } f.Close() result, err := match(input, expr) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } switch r := result.(type) { case map[string]interface{}: enc := json.NewEncoder(os.Stdout) enc.SetIndent("", " ") _ = enc.Encode(r) default: fmt.Printf("%v\n", r) } } } func match(input map[string]interface{}, expr pathExpr) (interface{}, error) { var v interface{} = input for _, e := range expr.Parts { switch m := v.(type) { case map[string]interface{}: val, ok := m[e.Obj] if !ok { return nil, fmt.Errorf("not found: %q", e.Obj) } v = val for _, a := range e.Acc { if a.Name != nil { switch m := v.(type) { case map[string]interface{}: val, ok = m[*a.Name].(map[string]interface{}) if !ok { return nil, fmt.Errorf("not found: %q does not contain %q", e.Obj, *a.Name) } v = val default: return nil, fmt.Errorf("cannot access named index in %T", v) } } if a.Index != nil { switch s := v.(type) { case []interface{}: if len(s) <= *a.Index { return nil, fmt.Errorf("not found: %q does contains %d items", e.Obj, len(s)) } v = s[*a.Index] default: return nil, fmt.Errorf("cannot access numeric index in %T", v) } } } default: return nil, fmt.Errorf("cannot read %q, parent is not a map", e.Obj) } } return v, nil } participle-0.7.1/_examples/precedenceclimbing/000077500000000000000000000000001376001225600214345ustar00rootroot00000000000000participle-0.7.1/_examples/precedenceclimbing/main.go000066400000000000000000000046301376001225600227120ustar00rootroot00000000000000// Package main shows an example of how to add precedence climbing to a Participle parser. // // Precedence climbing is an approach to parsing expressions that efficiently // produces compact parse trees. // // In contrast, naive recursive descent expression parsers produce parse trees proportional in // complexity to the number of operators supported. This impacts both readability and // performance. // // It is based on https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing package main import ( "fmt" "strconv" "text/scanner" "github.com/alecthomas/repr" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" ) type opInfo struct { RightAssociative bool Priority int } var info = map[string]opInfo{ "+": {Priority: 1}, "-": {Priority: 1}, "*": {Priority: 2}, "/": {Priority: 2}, "^": {RightAssociative: true, Priority: 3}, } type Expr struct { Terminal *int Left *Expr Op string Right *Expr } func (e *Expr) String() string { if e.Left != nil { return fmt.Sprintf("(%s %s %s)", e.Left, e.Op, e.Right) } return fmt.Sprintf("%d", *e.Terminal) } func (e *Expr) Parse(lex *lexer.PeekingLexer) error { *e = *parseExpr(lex, 0) return nil } // (1 + 2) * 3 func parseExpr(lex *lexer.PeekingLexer, minPrec int) *Expr { lhs := next(lex) for { op := peek(lex) if op == nil || info[op.Op].Priority < minPrec { break } nextMinPrec := info[op.Op].Priority if !info[op.Op].RightAssociative { nextMinPrec++ } next(lex) rhs := parseExpr(lex, nextMinPrec) lhs = parseOp(op, lhs, rhs) } return lhs } func parseOp(op *Expr, lhs *Expr, rhs *Expr) *Expr { op.Left = lhs op.Right = rhs return op } func next(lex *lexer.PeekingLexer) *Expr { e := peek(lex) if e == nil { return e } _, _ = lex.Next() switch e.Op { case "(": return next(lex) } return e } func peek(lex *lexer.PeekingLexer) *Expr { t, err := lex.Peek(0) if err != nil { panic(err) } if t.EOF() { return nil } switch t.Type { case scanner.Int: n, err := strconv.ParseInt(t.Value, 10, 64) if err != nil { panic(err) } ni := int(n) return &Expr{Terminal: &ni} case ')': _, _ = lex.Next() return nil default: return &Expr{Op: t.Value} } } var parser = participle.MustBuild(&Expr{}) func main() { e := &Expr{} err := parser.ParseString(`(1 + 3) * 2 ^ 2 + 1`, e) if err != nil { panic(err) } fmt.Println(e) repr.Println(e) } participle-0.7.1/_examples/protobuf/000077500000000000000000000000001376001225600174725ustar00rootroot00000000000000participle-0.7.1/_examples/protobuf/example.proto000066400000000000000000000006121376001225600222110ustar00rootroot00000000000000syntax = "proto3"; package test.test; message SearchRequest { string query = 1; int32 page_number = 2; int32 result_per_page = 3; map scores = 4; message Foo {} enum Bar { FOO = 0; } } message SearchResponse { string results = 1; } enum Type { INT = 0; DOUBLE = 1; } service SearchService { rpc Search(SearchRequest) returns (SearchResponse); } participle-0.7.1/_examples/protobuf/main.go000066400000000000000000000127621376001225600207550ustar00rootroot00000000000000// nolint: govet, golint package main import ( "fmt" "os" "github.com/alecthomas/kong" "github.com/alecthomas/repr" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" ) type Proto struct { Pos lexer.Position Entries []*Entry `{ @@ { ";" } }` } type Entry struct { Pos lexer.Position Syntax string ` "syntax" "=" @String` Package string `| "package" @(Ident { "." Ident })` Import string `| "import" @String` Message *Message `| @@` Service *Service `| @@` Enum *Enum `| @@` Option *Option `| "option" @@` Extend *Extend `| @@` } type Option struct { Pos lexer.Position Name string `( "(" @Ident @{ "." Ident } ")" | @Ident @{ "." @Ident } )` Attr *string `[ "." @Ident { "." @Ident } ]` Value *Value `"=" @@` } type Value struct { Pos lexer.Position String *string ` @String` Number *float64 `| @Float` Int *int64 `| @Int` Bool *bool `| (@"true" | "false")` Reference *string `| @Ident @{ "." Ident }` Map *Map `| @@` Array *Array `| @@` } type Array struct { Pos lexer.Position Elements []*Value `"[" [ @@ { [ "," ] @@ } ] "]"` } type Map struct { Pos lexer.Position Entries []*MapEntry `"{" [ @@ { [ "," ] @@ } ] "}"` } type MapEntry struct { Pos lexer.Position Key *Value `@@` Value *Value `[ ":" ] @@` } type Extensions struct { Pos lexer.Position Extensions []Range `"extensions" @@ { "," @@ }` } type Reserved struct { Pos lexer.Position Reserved []Range `"reserved" @@ { "," @@ }` } type Range struct { Ident string ` @String` Start int `| ( @Int` End *int ` [ "to" ( @Int` Max bool ` | @"max" ) ] )` } type Extend struct { Pos lexer.Position Reference string `"extend" @Ident { "." @Ident }` Fields []*Field `"{" { @@ [ ";" ] } "}"` } type Service struct { Pos lexer.Position Name string `"service" @Ident` Entry []*ServiceEntry `"{" { @@ [ ";" ] } "}"` } type ServiceEntry struct { Pos lexer.Position Option *Option ` "option" @@` Method *Method `| @@` } type Method struct { Pos lexer.Position Name string `"rpc" @Ident` StreamingRequest bool `"(" [ @"stream" ]` Request *Type ` @@ ")"` StreamingResponse bool `"returns" "(" [ @"stream" ]` Response *Type ` @@ ")"` Options []*Option `[ "{" { "option" @@ ";" } "}" ]` } type Enum struct { Pos lexer.Position Name string `"enum" @Ident` Values []*EnumEntry `"{" { @@ { ";" } } "}"` } type EnumEntry struct { Pos lexer.Position Value *EnumValue ` @@` Option *Option `| "option" @@` } type EnumValue struct { Pos lexer.Position Key string `@Ident` Value int `"=" @( [ "-" ] Int )` Options []*Option `[ "[" @@ { "," @@ } "]" ]` } type Message struct { Pos lexer.Position Name string `"message" @Ident` Entries []*MessageEntry `"{" { @@ } "}"` } type MessageEntry struct { Pos lexer.Position Enum *Enum `( @@` Option *Option ` | "option" @@` Message *Message ` | @@` Oneof *Oneof ` | @@` Extend *Extend ` | @@` Reserved *Reserved ` | @@` Extensions *Extensions ` | @@` Field *Field ` | @@ ) { ";" }` } type Oneof struct { Pos lexer.Position Name string `"oneof" @Ident` Entries []*OneofEntry `"{" { @@ { ";" } } "}"` } type OneofEntry struct { Pos lexer.Position Field *Field ` @@` Option *Option `| "option" @@` } type Field struct { Pos lexer.Position Optional bool `[ @"optional"` Required bool ` | @"required"` Repeated bool ` | @"repeated" ]` Type *Type `@@` Name string `@Ident` Tag int `"=" @Int` Options []*Option `[ "[" @@ { "," @@ } "]" ]` } type Scalar int const ( None Scalar = iota Double Float Int32 Int64 Uint32 Uint64 Sint32 Sint64 Fixed32 Fixed64 SFixed32 SFixed64 Bool String Bytes ) var scalarToString = map[Scalar]string{ None: "None", Double: "Double", Float: "Float", Int32: "Int32", Int64: "Int64", Uint32: "Uint32", Uint64: "Uint64", Sint32: "Sint32", Sint64: "Sint64", Fixed32: "Fixed32", Fixed64: "Fixed64", SFixed32: "SFixed32", SFixed64: "SFixed64", Bool: "Bool", String: "String", Bytes: "Bytes", } func (s Scalar) GoString() string { return scalarToString[s] } var stringToScalar = map[string]Scalar{ "double": Double, "float": Float, "int32": Int32, "int64": Int64, "uint32": Uint32, "uint64": Uint64, "sint32": Sint32, "sint64": Sint64, "fixed32": Fixed32, "fixed64": Fixed64, "sfixed32": SFixed32, "sfixed64": SFixed64, "bool": Bool, "string": String, "bytes": Bytes, } func (s *Scalar) Parse(lex *lexer.PeekingLexer) error { token, err := lex.Peek(0) if err != nil { return err } v, ok := stringToScalar[token.Value] if !ok { return participle.NextMatch } _, err = lex.Next() if err != nil { return err } *s = v return nil } type Type struct { Pos lexer.Position Scalar Scalar ` @@` Map *MapType `| @@` Reference string `| @(Ident { "." Ident })` } type MapType struct { Pos lexer.Position Key *Type `"map" "<" @@` Value *Type `"," @@ ">"` } var ( parser = participle.MustBuild(&Proto{}, participle.UseLookahead(2)) cli struct { Files []string `required existingfile arg help:"Protobuf files."` } ) func main() { ctx := kong.Parse(&cli) for _, file := range cli.Files { fmt.Println(file) proto := &Proto{} r, err := os.Open(file) ctx.FatalIfErrorf(err, "") err = parser.Parse(r, proto) ctx.FatalIfErrorf(err, "") repr.Println(proto, repr.Hide(&lexer.Position{})) } } participle-0.7.1/_examples/sql/000077500000000000000000000000001376001225600164315ustar00rootroot00000000000000participle-0.7.1/_examples/sql/main.go000066400000000000000000000105041376001225600177040ustar00rootroot00000000000000// nolint: govet package main import ( "github.com/alecthomas/kong" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/repr" ) type Boolean bool func (b *Boolean) Capture(values []string) error { *b = values[0] == "TRUE" return nil } // Select based on http://www.h2database.com/html/grammar.html type Select struct { Top *Term `"SELECT" [ "TOP" @@ ]` Distinct bool `[ @"DISTINCT"` All bool ` | @"ALL" ]` Expression *SelectExpression `@@` From *From `"FROM" @@` Limit *Expression `[ "LIMIT" @@ ]` Offset *Expression `[ "OFFSET" @@ ]` GroupBy *Expression `[ "GROUP" "BY" @@ ]` } type From struct { TableExpressions []*TableExpression `@@ { "," @@ }` Where *Expression `[ "WHERE" @@ ]` } type TableExpression struct { Table string `( @Ident { "." @Ident }` Select *Select ` | "(" @@ ")"` Values []*Expression ` | "VALUES" "(" @@ { "," @@ } ")")` As string `[ "AS" @Ident ]` } type SelectExpression struct { All bool ` @"*"` Expressions []*AliasedExpression `| @@ { "," @@ }` } type AliasedExpression struct { Expression *Expression `@@` As string `[ "AS" @Ident ]` } type Expression struct { Or []*OrCondition `@@ { "OR" @@ }` } type OrCondition struct { And []*Condition `@@ { "AND" @@ }` } type Condition struct { Operand *ConditionOperand ` @@` Not *Condition `| "NOT" @@` Exists *Select `| "EXISTS" "(" @@ ")"` } type ConditionOperand struct { Operand *Operand `@@` ConditionRHS *ConditionRHS `[ @@ ]` } type ConditionRHS struct { Compare *Compare ` @@` Is *Is `| "IS" @@` Between *Between `| "BETWEEN" @@` In *In `| "IN" "(" @@ ")"` Like *Like `| "LIKE" @@` } type Compare struct { Operator string `@( "<>" | "<=" | ">=" | "=" | "<" | ">" | "!=" )` Operand *Operand `( @@` Select *CompareSelect ` | @@ )` } type CompareSelect struct { All bool `( @"ALL"` Any bool ` | @"ANY"` Some bool ` | @"SOME" )` Select *Select `"(" @@ ")"` } type Like struct { Not bool `[ @"NOT" ]` Operand *Operand `@@` } type Is struct { Not bool `[ @"NOT" ]` Null bool `( @"NULL"` DistinctFrom *Operand ` | "DISTINCT" "FROM" @@ )` } type Between struct { Start *Operand `@@` End *Operand `"AND" @@` } type In struct { Select *Select ` @@` Expressions []*Expression `| @@ { "," @@ }` } type Operand struct { Summand []*Summand `@@ { "|" "|" @@ }` } type Summand struct { LHS *Factor `@@` Op string `[ @("+" | "-")` RHS *Factor ` @@ ]` } type Factor struct { LHS *Term `@@` Op string `[ @("*" | "/" | "%")` RHS *Term ` @@ ]` } type Term struct { Select *Select ` @@` Value *Value `| @@` SymbolRef *SymbolRef `| @@` SubExpression *Expression `| "(" @@ ")"` } type SymbolRef struct { Symbol string `@Ident @{ "." Ident }` Parameters []*Expression `[ "(" @@ { "," @@ } ")" ]` } type Value struct { Wildcard bool `( @"*"` Number *float64 ` | @Number` String *string ` | @String` Boolean *Boolean ` | @("TRUE" | "FALSE")` Null bool ` | @"NULL"` Array *Array ` | @@ )` } type Array struct { Expressions []*Expression `"(" @@ { "," @@ } ")"` } var ( cli struct { SQL string `arg:"" required:"" help:"SQL to parse."` } sqlLexer = lexer.Must(lexer.Regexp(`(\s+)` + `|(?P(?i)SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|AS|IN)` + `|(?P[a-zA-Z_][a-zA-Z0-9_]*)` + `|(?P[-+]?\d*\.?\d+([eE][-+]?\d+)?)` + `|(?P'[^']*'|"[^"]*")` + `|(?P<>|!=|<=|>=|[-+*/%,.()=<>])`, )) sqlParser = participle.MustBuild( &Select{}, participle.Lexer(sqlLexer), participle.Unquote("String"), participle.CaseInsensitive("Keyword"), // participle.Elide("Comment"), // Need to solve left recursion detection first, if possible. // participle.UseLookahead(), ) ) func main() { ctx := kong.Parse(&cli) sql := &Select{} err := sqlParser.ParseString(cli.SQL, sql) repr.Println(sql, repr.Indent(" "), repr.OmitEmpty(true)) ctx.FatalIfErrorf(err) } participle-0.7.1/_examples/stateful/000077500000000000000000000000001376001225600174615ustar00rootroot00000000000000participle-0.7.1/_examples/stateful/main.go000066400000000000000000000023541376001225600207400ustar00rootroot00000000000000package main import ( "log" "github.com/alecthomas/repr" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/stateful" ) type Terminal struct { String *String ` @@` Ident string `| @Ident` } type Expr struct { Left *Terminal `@@` Op string `( @Oper` Right *Terminal ` @@)?` } type Fragment struct { Escaped string `( @Escaped` Expr *Expr ` | "${" @@ "}"` Text string ` | @Char)` } type String struct { Fragments []*Fragment `"\"" @@* "\""` } var ( def = lexer.Must(stateful.New(stateful.Rules{ "Root": { {`String`, `"`, stateful.Push("String")}, }, "String": { {"Escaped", `\\.`, nil}, {"StringEnd", `"`, stateful.Pop()}, {"Expr", `\${`, stateful.Push("Expr")}, {"Char", `[^$"\\]+`, nil}, }, "Expr": { stateful.Include("Root"), {`Whitespace`, `\s+`, nil}, {`Oper`, `[-+/*%]`, nil}, {"Ident", `\w+`, nil}, {"ExprEnd", `}`, stateful.Pop()}, }, })) parser = participle.MustBuild(&String{}, participle.Lexer(def), participle.Elide("Whitespace")) ) func main() { actual := &String{} err := parser.ParseString(`"hello ${first + "${last}"}"`, actual) if err != nil { log.Fatal(err) } repr.Println(actual) } participle-0.7.1/_examples/thrift/000077500000000000000000000000001376001225600171325ustar00rootroot00000000000000participle-0.7.1/_examples/thrift/main.go000066400000000000000000000123001376001225600204010ustar00rootroot00000000000000// Package main implements a parser for Thrift files (https://thrift.apache.org/) // // It parses namespaces, exceptions, services, structs, consts, typedefs and enums, but is easily // extensible to more. // // It also supports annotations and method throws. package main import ( "fmt" "os" "strings" "gopkg.in/alecthomas/kingpin.v2" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/repr" ) var ( files = kingpin.Arg("thrift", "Thrift files.").Required().Strings() ) type Namespace struct { Pos lexer.Position Language string `"namespace" @Ident` Namespace string `@Ident { @"." @Ident }` } type Type struct { Pos lexer.Position Name string `@Ident { @"." @Ident }` TypeOne *Type `[ "<" @@ [ ","` TypeTwo *Type ` @@ ] ">" ]` } type Annotation struct { Pos lexer.Position Key string `@Ident { @"." @Ident }` Value *Literal `[ "=" @@ ]` } type Field struct { Pos lexer.Position ID string `@Int ":"` Requirement string `@[ "optional" | "required" ]` Type *Type `@@` Name string `@Ident` Default *Literal `[ "=" @@ ]` Annotations []*Annotation `[ "(" @@ { "," @@ } ")" ] [ ";" ]` } type Exception struct { Pos lexer.Position Name string `"exception" @Ident "{"` Fields []*Field `@@ { @@ } "}"` Annotations []*Annotation `[ "(" @@ { "," @@ } ")" ]` } type Struct struct { Pos lexer.Position Union bool `( "struct" | @"union" )` Name string `@Ident "{"` Fields []*Field `{ @@ } "}"` Annotations []*Annotation `[ "(" @@ { "," @@ } ")" ]` } type Argument struct { Pos lexer.Position ID string `@Int ":"` Type *Type `@@` Name string `@Ident` } type Throw struct { Pos lexer.Position ID string `@Int ":"` Type *Type `@@` Name string `@Ident` } type Method struct { Pos lexer.Position ReturnType *Type `@@` Name string `@Ident` Arguments []*Argument `"(" [ @@ { "," @@ } ] ")"` Throws []*Throw `[ "throws" "(" @@ { "," @@ } ")" ]` Annotations []*Annotation `[ "(" @@ { "," @@ } ")" ]` } type Service struct { Pos lexer.Position Name string `"service" @Ident` Extends string `[ "extends" @Ident { @"." @Ident } ]` Methods []*Method `"{" { @@ [ ";" ] } "}"` Annotations []*Annotation `[ "(" @@ { "," @@ } ")" ]` } // Literal is a "union" type, where only one matching value will be present. type Literal struct { Pos lexer.Position Str *string ` @String` Float *float64 `| @Float` Int *int64 `| @Int` Bool *string `| @( "true" | "false" )` Reference *string `| @Ident { @"." @Ident }` Minus *Literal `| "-" @@` List []*Literal `| "[" { @@ [ "," ] } "]"` Map []*MapItem `| "{" { @@ [ "," ] } "}"` } func (l *Literal) GoString() string { switch { case l.Str != nil: return fmt.Sprintf("%q", *l.Str) case l.Float != nil: return fmt.Sprintf("%v", *l.Float) case l.Int != nil: return fmt.Sprintf("%v", *l.Int) case l.Bool != nil: return fmt.Sprintf("%v", *l.Bool) case l.Reference != nil: return fmt.Sprintf("%s", *l.Reference) case l.Minus != nil: return fmt.Sprintf("-%v", l.Minus) case l.List != nil: parts := []string{} for _, e := range l.List { parts = append(parts, e.GoString()) } return fmt.Sprintf("[%s]", strings.Join(parts, ", ")) case l.Map != nil: parts := []string{} for _, e := range l.Map { parts = append(parts, e.GoString()) } return fmt.Sprintf("{%s}", strings.Join(parts, ", ")) } panic("unsupported?") } type MapItem struct { Pos lexer.Position Key *Literal `@@ ":"` Value *Literal `@@` } func (m *MapItem) GoString() string { return fmt.Sprintf("%v: %v", m.Key, m.Value) } type Case struct { Pos lexer.Position Name string `@Ident` Annotations []*Annotation `[ "(" @@ { "," @@ } ")" ]` Value *Literal `[ "=" @@ ] [ "," | ";" ]` } type Enum struct { Pos lexer.Position Name string `"enum" @Ident "{"` Cases []*Case `{ @@ } "}"` Annotations []*Annotation `[ "(" @@ { "," @@ } ")" ]` } type Typedef struct { Pos lexer.Position Type *Type `"typedef" @@` Name string `@Ident` } type Const struct { Pos lexer.Position Type *Type `"const" @@` Name string `@Ident` Value *Literal `"=" @@ [ ";" ]` } type Entry struct { Pos lexer.Position Includes []string ` "include" @String` Namespaces []*Namespace `| @@` Structs []*Struct `| @@` Exceptions []*Exception `| @@` Services []*Service `| @@` Enums []*Enum `| @@` Typedefs []*Typedef `| @@` Consts []*Const `| @@` } // Thrift files consist of a set of top-level directives and definitions. // // The grammar type Thrift struct { Pos lexer.Position Entries []*Entry `{ @@ }` } func main() { kingpin.Parse() parser, err := participle.Build(&Thrift{}) kingpin.FatalIfError(err, "") for _, file := range *files { thrift := &Thrift{} r, err := os.Open(file) kingpin.FatalIfError(err, "") err = parser.Parse(r, thrift) kingpin.FatalIfError(err, "") repr.Println(thrift) } } participle-0.7.1/_examples/thrift/main_test.go000066400000000000000000000031611376001225600214450ustar00rootroot00000000000000package main import ( "strings" "testing" "github.com/stretchr/testify/require" "github.com/alecthomas/go-thrift/parser" "github.com/alecthomas/participle" ) var ( source = strings.TrimSpace(` namespace cpp thrift.example namespace java thrift.example enum TweetType { TWEET RETWEET = 2 DM = 3 REPLY } struct Location { 1: required double latitude 2: required double longitude } struct Tweet { 1: required i32 userId 2: required string userName 3: required string text 4: optional Location loc 5: optional TweetType tweetType = TweetType.TWEET 16: optional string language = "english" } typedef list TweetList struct TweetSearchResult { 1: TweetList tweets } exception TwitterUnavailable { 1: string message } const i32 MAX_RESULTS = 100 service Twitter { void ping() bool postTweet(1:Tweet tweet) throws (1:TwitterUnavailable unavailable) TweetSearchResult searchTweets(1:string query) void zip() } `) ) func BenchmarkParticipleThrift(b *testing.B) { b.ReportAllocs() parser, err := participle.Build(&Thrift{}) require.NoError(b, err) thrift := &Thrift{} err = parser.ParseString(source, thrift) require.NoError(b, err) b.ResetTimer() for i := 0; i < b.N; i++ { thrift := &Thrift{} _ = parser.ParseString(source, thrift) } } func BenchmarkGoThriftParser(b *testing.B) { b.ReportAllocs() _, err := parser.ParseReader("user.thrift", strings.NewReader(source)) require.NoError(b, err) b.ResetTimer() for i := 0; i < b.N; i++ { _, _ = parser.ParseReader("user.thrift", strings.NewReader(source)) } } participle-0.7.1/_examples/toml/000077500000000000000000000000001376001225600166055ustar00rootroot00000000000000participle-0.7.1/_examples/toml/example.toml000066400000000000000000000010611376001225600211330ustar00rootroot00000000000000# This is a TOML document. title = "TOML Example" [owner] name = "Tom Preston-Werner" dob = 1979-05-27T07:32:00-08:00 # First class dates [database] server = "192.168.1.1" ports = [ 8001, 8001, 8002 ] connection_max = 5000 enabled = true enabled = false [servers] # Indentation (tabs and/or spaces) is allowed but not required [servers.alpha] ip = "10.0.0.1" dc = "eqdc10" [servers.beta] ip = "10.0.0.2" dc = "eqdc10" [clients] data = [ ["gamma", "delta"], [1, 2] ] # Line breaks are OK when inside arrays hosts = [ "alpha", "omega" ] participle-0.7.1/_examples/toml/main.go000066400000000000000000000036771376001225600200750ustar00rootroot00000000000000package main import ( "os" "github.com/alecthomas/kong" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/ebnf" "github.com/alecthomas/repr" ) type TOML struct { Pos lexer.Position Entries []*Entry `{ @@ }` } type Entry struct { Field *Field ` @@` Section *Section `| @@` } type Field struct { Key string `@Ident "="` Value *Value `@@` } type Value struct { String *string ` @String` DateTime *string `| @DateTime` Date *string `| @Date` Time *string `| @Time` Bool *bool `| (@"true" | "false")` Integer *int64 `| @Int` Float *float64 `| @Float` List []*Value `| "[" [ @@ { "," @@ } ] "]"` } type Section struct { Name string `"[" @(Ident { "." Ident }) "]"` Fields []*Field `{ @@ }` } var ( tomlLexer = lexer.Must(ebnf.New(` Comment = "#" { "\u0000"…"\uffff"-"\n" } . DateTime = date "T" time [ "-" digit digit ":" digit digit ]. Date = date . Time = time . Ident = (alpha | "_") { "_" | alpha | digit } . String = "\"" { "\u0000"…"\uffff"-"\""-"\\" | "\\" any } "\"" . Int = [ "-" | "+" ] digit { digit } . Float = ("." | digit) {"." | digit} . Punct = "!"…"/" | ":"…"@" | "["…` + "\"`\"" + ` | "{"…"~" . Whitespace = " " | "\t" | "\n" | "\r" . alpha = "a"…"z" | "A"…"Z" . digit = "0"…"9" . any = "\u0000"…"\uffff" . date = digit digit digit digit "-" digit digit "-" digit digit . time = digit digit ":" digit digit ":" digit digit [ "." { digit } ] . `)) tomlParser = participle.MustBuild(&TOML{}, participle.Lexer(tomlLexer), participle.Unquote("String"), participle.Elide("Whitespace", "Comment"), ) cli struct { File string `help:"TOML file to parse." arg:""` } ) func main() { ctx := kong.Parse(&cli) toml := &TOML{} r, err := os.Open(cli.File) ctx.FatalIfErrorf(err) defer r.Close() err = tomlParser.Parse(r, toml) ctx.FatalIfErrorf(err) repr.Println(toml) } participle-0.7.1/_examples/µc/000077500000000000000000000000001376001225600166435ustar00rootroot00000000000000participle-0.7.1/_examples/µc/main.go000066400000000000000000000134341376001225600201230ustar00rootroot00000000000000package main import ( "github.com/alecthomas/repr" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/stateful" ) // https://www.it.uu.se/katalog/aleji304/CompilersProject/uc.html // // program ::= topdec_list // topdec_list ::= /empty/ | topdec topdec_list // topdec ::= vardec ";" // | funtype ident "(" formals ")" funbody // vardec ::= scalardec | arraydec // scalardec ::= typename ident // arraydec ::= typename ident "[" intconst "]" // typename ::= "int" | "char" // funtype ::= typename | "void" // funbody ::= "{" locals stmts "}" | ";" // formals ::= "void" | formal_list // formal_list ::= formaldec | formaldec "," formal_list // formaldec ::= scalardec | typename ident "[" "]" // locals ::= /empty/ | vardec ";" locals // stmts ::= /empty/ | stmt stmts // stmt ::= expr ";" // | "return" expr ";" | "return" ";" // | "while" condition stmt // | "if" condition stmt else_part // | "{" stmts "}" // | ";" // else_part ::= /empty/ | "else" stmt // condition ::= "(" expr ")" // expr ::= intconst // | ident | ident "[" expr "]" // | unop expr // | expr binop expr // | ident "(" actuals ")" // | "(" expr ")" // unop ::= "-" | "!" // binop ::= "+" | "-" | "*" | "/" // | "<" | ">" | "<=" | ">=" | "!=" | "==" // | "&&" // | "=" // actuals ::= /empty/ | expr_list // expr_list ::= expr | expr "," expr_list type Program struct { Pos lexer.Position TopDec []*TopDec `@@*` } type TopDec struct { Pos lexer.Position FunDec *FunDec ` @@` VarDec *VarDec `| @@ ";"` } type VarDec struct { Pos lexer.Position ArrayDec *ArrayDec ` @@` ScalarDec *ScalarDec `| @@` } type ScalarDec struct { Pos lexer.Position Type string `@Type` Name string `@Ident` } type ArrayDec struct { Pos lexer.Position Type string `@Type` Name string `@Ident` Size int `"[" @Int "]"` } type ReturnStmt struct { Pos lexer.Position Result *Expr `"return" @@?` } type WhileStmt struct { Pos lexer.Position Condition *Expr `"while" "(" @@ ")"` Body *Stmt `@@` } type IfStmt struct { Pos lexer.Position Condition *Expr `"if" "(" @@ ")"` Body *Stmt `@@` Else *Stmt `("else" @@)?` } type Stmts struct { Pos lexer.Position Stmts []*Stmt `@@*` } type Stmt struct { Pos lexer.Position IfStmt *IfStmt ` @@` ReturnStmt *ReturnStmt `| @@` WhileStmt *WhileStmt `| @@` Block *Stmts `| "{" @@ "}"` Expr *Expr `| @@` Empty bool `| ";"` } type FunBody struct { Pos lexer.Position Locals []*VarDec `(@@ ";")*` Stmts *Stmts `@@` } type FunDec struct { Pos lexer.Position ReturnType string `@(Type | "void")` Name string `@Ident` Parameters []*Parameter `"(" ((@@ ("," @@)*) | "void") ")"` FunBody *FunBody `(";" | "{" @@ "}")` } type Parameter struct { Pos lexer.Position Array *ArrayParameter ` @@` Scalar *ScalarDec `| @@` } type ArrayParameter struct { Pos lexer.Position Type string `@Type` Ident string `@Ident "[" "]"` } type Expr struct { Pos lexer.Position Assignment *Assignment `@@` } type Assignment struct { Pos lexer.Position Equality *Equality `@@` Op string `( @"="` Next *Equality ` @@ )?` } type Equality struct { Pos lexer.Position Comparison *Comparison `@@` Op string `[ @( "!" "=" | "=" "=" )` Next *Equality ` @@ ]` } type Comparison struct { Pos lexer.Position Addition *Addition `@@` Op string `[ @( ">" "=" | ">" | "<" "=" | "<" )` Next *Comparison ` @@ ]` } type Addition struct { Pos lexer.Position Multiplication *Multiplication `@@` Op string `[ @( "-" | "+" )` Next *Addition ` @@ ]` } type Multiplication struct { Pos lexer.Position Unary *Unary `@@` Op string `[ @( "/" | "*" )` Next *Multiplication ` @@ ]` } type Unary struct { Pos lexer.Position Op string ` ( @( "!" | "-" )` Unary *Unary ` @@ )` Primary *Primary `| @@` } type Primary struct { Pos lexer.Position Number *int ` @Int` ArrayIndex *ArrayIndex `| @@` CallFunc *CallFunc `| @@` Ident string `| @Ident` SubExpression *Expr `| "(" @@ ")" ` } type ArrayIndex struct { Pos lexer.Position Ident string `@Ident` Index []*Expr `("[" @@ "]")+` } type CallFunc struct { Pos lexer.Position Ident string `@Ident` Index []*Expr `"(" (@@ ("," @@)*)? ")"` } var ( lex = lexer.Must(stateful.New(stateful.Rules{ "Root": { {"comment", `//.*|/\*.*?\*/`, nil}, {"whitespace", `\s+`, nil}, {"Type", `\b(int|char)\b`, nil}, {"Ident", `\b([a-zA-Z_][a-zA-Z0-9_]*)\b`, nil}, {"Punct", `[-,()*/+%{};&!=:<>]|\[|\]`, nil}, {"Int", `\d+`, nil}, }, })) intType = lex.Symbols()["Int"] parser = participle.MustBuild(&Program{}, participle.Lexer(lex), participle.UseLookahead(2)) ) const sample = ` /* This is an example uC program. */ void putint(int i); int fac(int n) { if (n < 2) return n; return n * fac(n - 1); } int sum(int n, int a[]) { int i; int s; i = 0; s = 0; while (i <= n) { s = s + a[i]; i = i + 1; } return s; } int main(void) { int a[2]; a[0] = fac(5); a[1] = 27; putint(sum(2, a)); // prints 147 return 0; } ` func main() { ast := &Program{} defer func() { repr.Println(ast) }() err := parser.ParseString(sample, ast) if err != nil { panic(err) } } participle-0.7.1/_examples/µc/main_test.go000066400000000000000000000004301376001225600211520ustar00rootroot00000000000000package main import ( "strings" "testing" ) func BenchmarkParser(b *testing.B) { src := strings.Repeat(sample, 10) b.ReportAllocs() b.ReportMetric(float64(len(src)*b.N), "B/s") for i := 0; i < b.N; i++ { program := &Program{} _ = parser.ParseString(src, program) } } participle-0.7.1/api.go000066400000000000000000000010661376001225600147600ustar00rootroot00000000000000package participle import ( "github.com/alecthomas/participle/lexer" ) // Capture can be implemented by fields in order to transform captured tokens into field values. type Capture interface { Capture(values []string) error } // The Parseable interface can be implemented by any element in the grammar to provide custom parsing. type Parseable interface { // Parse into the receiver. // // Should return NextMatch if no tokens matched and parsing should continue. // Nil should be returned if parsing was successful. Parse(lex *lexer.PeekingLexer) error } participle-0.7.1/context.go000066400000000000000000000054741376001225600157020ustar00rootroot00000000000000package participle import ( "reflect" "github.com/alecthomas/participle/lexer" ) type contextFieldSet struct { pos lexer.Position strct reflect.Value field structLexerField fieldValue []reflect.Value } // Context for a single parse. type parseContext struct { *lexer.PeekingLexer deepestError error deepestErrorDepth int lookahead int caseInsensitive map[rune]bool apply []*contextFieldSet allowTrailing bool } func newParseContext(lex *lexer.PeekingLexer, lookahead int, caseInsensitive map[rune]bool) *parseContext { return &parseContext{ PeekingLexer: lex, caseInsensitive: caseInsensitive, lookahead: lookahead, } } func (p *parseContext) DeepestError(err error) error { if p.PeekingLexer.Cursor() >= p.deepestErrorDepth { return err } if p.deepestError != nil { return p.deepestError } return err } // Defer adds a function to be applied once a branch has been picked. func (p *parseContext) Defer(pos lexer.Position, strct reflect.Value, field structLexerField, fieldValue []reflect.Value) { p.apply = append(p.apply, &contextFieldSet{pos, strct, field, fieldValue}) } // Apply deferred functions. func (p *parseContext) Apply() error { for _, apply := range p.apply { if err := setField(apply.pos, apply.strct, apply.field, apply.fieldValue); err != nil { return err } } p.apply = nil return nil } // Branch accepts the branch as the correct branch. func (p *parseContext) Accept(branch *parseContext) { p.apply = append(p.apply, branch.apply...) p.PeekingLexer = branch.PeekingLexer if branch.deepestErrorDepth >= p.deepestErrorDepth { p.deepestErrorDepth = branch.deepestErrorDepth p.deepestError = branch.deepestError } } // Branch starts a new lookahead branch. func (p *parseContext) Branch() *parseContext { branch := &parseContext{} *branch = *p branch.apply = nil branch.PeekingLexer = p.PeekingLexer.Clone() return branch } func (p *parseContext) MaybeUpdateError(err error) { if p.PeekingLexer.Cursor() >= p.deepestErrorDepth { p.deepestError = err p.deepestErrorDepth = p.PeekingLexer.Cursor() } } // Stop returns true if parsing should terminate after the given "branch" failed to match. // // Additionally, "err" should be the branch error, if any. This will be tracked to // aid in error reporting under the assumption that the deepest occurring error is more // useful than errors further up. func (p *parseContext) Stop(err error, branch *parseContext) bool { if branch.PeekingLexer.Cursor() >= p.deepestErrorDepth { p.deepestError = err p.deepestErrorDepth = maxInt(branch.PeekingLexer.Cursor(), branch.deepestErrorDepth) } if branch.PeekingLexer.Cursor() > p.PeekingLexer.Cursor()+p.lookahead { p.Accept(branch) return true } return false } func maxInt(a, b int) int { if a > b { return a } return b } participle-0.7.1/doc.go000066400000000000000000000045661376001225600147640ustar00rootroot00000000000000// Package participle constructs parsers from definitions in struct tags and parses directly into // those structs. The approach is philosophically similar to how other marshallers work in Go, // "unmarshalling" an instance of a grammar into a struct. // // The supported annotation syntax is: // // - `@` Capture expression into the field. // - `@@` Recursively capture using the fields own type. // - `` Match named lexer token. // - `( ... )` Group. // - `"..."` Match the literal (note that the lexer must emit tokens matching this literal exactly). // - `"...":` Match the literal, specifying the exact lexer token type to match. // - ` ...` Match expressions. // - ` | ` Match one of the alternatives. // // The following modifiers can be used after any expression: // // - `*` Expression can match zero or more times. // - `+` Expression must match one or more times. // - `?` Expression can match zero or once. // - `!` Require a non-empty match (this is useful with a sequence of optional matches eg. `("a"? "b"? "c"?)!`). // // Supported but deprecated: // // - `{ ... }` Match 0 or more times (**DEPRECATED** - prefer `( ... )*`). // - `[ ... ]` Optional (**DEPRECATED** - prefer `( ... )?`). // // Here's an example of an EBNF grammar. // // type Group struct { // Expression *Expression `"(" @@ ")"` // } // // type Option struct { // Expression *Expression `"[" @@ "]"` // } // // type Repetition struct { // Expression *Expression `"{" @@ "}"` // } // // type Literal struct { // Start string `@String` // lexer.Lexer token "String" // End string `("…" @String)?` // } // // type Term struct { // Name string ` @Ident` // Literal *Literal `| @@` // Group *Group `| @@` // Option *Option `| @@` // Repetition *Repetition `| @@` // } // // type Sequence struct { // Terms []*Term `@@+` // } // // type Expression struct { // Alternatives []*Sequence `@@ ("|" @@)*` // } // // type Expressions []*Expression // // type Production struct { // Name string `@Ident "="` // Expressions Expressions `@@+ "."` // } // // type EBNF struct { // Productions []*Production `@@*` // } package participle participle-0.7.1/ebnf.go000066400000000000000000000044051376001225600151210ustar00rootroot00000000000000package participle import ( "fmt" "strings" ) // String returns the EBNF for the grammar. // // Productions are always upper case. Lexer tokens are always lower case. func (p *Parser) String() string { seen := map[node]bool{} outp := []*ebnfp{} ebnf(p.root, seen, nil, &outp) out := []string{} for _, p := range outp { out = append(out, fmt.Sprintf("%s = %s .", p.name, p.out)) } return strings.Join(out, "\n") } type ebnfp struct { name string out string } func ebnf(n node, seen map[node]bool, p *ebnfp, outp *[]*ebnfp) { switch n := n.(type) { case *disjunction: for i, next := range n.nodes { if i > 0 { p.out += " | " } ebnf(next, seen, p, outp) } return case *strct: name := strings.ToUpper(n.typ.Name()[:1]) + n.typ.Name()[1:] if p != nil { p.out += name } if seen[n] { return } seen[n] = true p = &ebnfp{name: name} *outp = append(*outp, p) ebnf(n.expr, seen, p, outp) return case *sequence: ebnf(n.node, seen, p, outp) if n.next != nil { p.out += " " ebnf(n.next, seen, p, outp) } return case *parseable: p.out += n.t.Name() case *capture: ebnf(n.node, seen, p, outp) case *reference: p.out += strings.ToLower(n.identifier) case *optional: ebnf(n.node, seen, p, outp) p.out += "?" case *repetition: ebnf(n.node, seen, p, outp) p.out += "*" case *negation: p.out += "!" ebnf(n.node, seen, p, outp) return case *literal: p.out += fmt.Sprintf("%q", n.s) case *group: composite := (n.mode != groupMatchOnce) && compositeNode(map[node]bool{}, n, false) if composite { p.out += "(" } if child, ok := n.expr.(*group); ok && child.mode == groupMatchOnce { ebnf(child.expr, seen, p, outp) } else if child, ok := n.expr.(*capture); ok { if grandchild, ok := child.node.(*group); ok && grandchild.mode == groupMatchOnce { ebnf(grandchild.expr, seen, p, outp) } else { ebnf(n.expr, seen, p, outp) } } else { ebnf(n.expr, seen, p, outp) } if composite { p.out += ")" } switch n.mode { case groupMatchNonEmpty: p.out += "!" case groupMatchZeroOrOne: p.out += "?" case groupMatchZeroOrMore: p.out += "*" case groupMatchOneOrMore: p.out += "+" } return default: panic(fmt.Sprintf("unsupported node type %T", n)) } } participle-0.7.1/ebnf_test.go000066400000000000000000000011231376001225600161520ustar00rootroot00000000000000package participle import ( "strings" "testing" "github.com/stretchr/testify/require" ) func TestEBNF(t *testing.T) { parser := mustTestParser(t, &EBNF{}) expected := ` EBNF = Production* . Production = ident "=" Expression Expression* "." . Expression = Sequence ("|" Sequence)* . Sequence = Term Term* . Term = ident | Literal | Range | Group | EBNFOption | Repetition . Literal = string . Range = string "…" string . Group = "(" Expression ")" . EBNFOption = "[" Expression "]" . Repetition = "{" Expression "}" . ` require.Equal(t, strings.TrimSpace(expected), parser.String()) } participle-0.7.1/error.go000066400000000000000000000045621376001225600153440ustar00rootroot00000000000000package participle import ( "fmt" "github.com/alecthomas/participle/lexer" ) // Error represents an error while parsing. // // The error will contain positional information if available. type Error interface { error // Unadorned message. Message() string // Closest token to error location. Token() lexer.Token } // UnexpectedTokenError is returned by Parse when an unexpected token is encountered. // // This is useful for composing parsers in order to detect when a sub-parser has terminated. type UnexpectedTokenError struct { Unexpected lexer.Token Expected string } func (u UnexpectedTokenError) Error() string { return lexer.FormatError(u.Unexpected.Pos, u.Message()) } func (u UnexpectedTokenError) Message() string { // nolint: golint var expected string if u.Expected != "" { expected = fmt.Sprintf(" (expected %s)", u.Expected) } return fmt.Sprintf("unexpected token %q%s", u.Unexpected, expected) } func (u UnexpectedTokenError) Token() lexer.Token { return u.Unexpected } // nolint: golint type parseError struct { Msg string Tok lexer.Token } func (p *parseError) Error() string { return lexer.FormatError(p.Tok.Pos, p.Msg) } func (p *parseError) Message() string { return p.Msg } func (p *parseError) Token() lexer.Token { return p.Tok } // AnnotateError wraps an existing error with a position. // // If the existing error is a lexer.Error or participle.Error it will be returned unmodified. func AnnotateError(pos lexer.Position, err error) error { if perr, ok := err.(Error); ok { return perr } return &parseError{Msg: err.Error(), Tok: lexer.Token{Pos: pos}} } // Errorf creats a new Error at the given position. func Errorf(pos lexer.Position, format string, args ...interface{}) error { return &parseError{Msg: fmt.Sprintf(format, args...), Tok: lexer.Token{Pos: pos}} } // ErrorWithTokenf creats a new Error with the given token as context. func ErrorWithTokenf(tok lexer.Token, format string, args ...interface{}) error { return &parseError{Msg: fmt.Sprintf(format, args...), Tok: tok} } // Wrapf attempts to wrap an existing participle.Error in a new message. func Wrapf(pos lexer.Position, err error, format string, args ...interface{}) error { if perr, ok := err.(Error); ok { return Errorf(perr.Token().Pos, "%s: %s", fmt.Sprintf(format, args...), perr.Message()) } return Errorf(pos, "%s: %s", fmt.Sprintf(format, args...), err.Error()) } participle-0.7.1/error_test.go000066400000000000000000000016201376001225600163730ustar00rootroot00000000000000package participle import ( "testing" "github.com/stretchr/testify/assert" ) func TestErrorReporting(t *testing.T) { type cls struct { Visibility string `@"public"?` Class string `"class" @Ident` } type union struct { Visibility string `@"public"?` Union string `"union" @Ident` } type decl struct { Class *cls `( @@` Union *union ` | @@ )` } type grammar struct { Decls []*decl `( @@ ";" )*` } p := mustTestParser(t, &grammar{}, UseLookahead(5)) var err error ast := &grammar{} err = p.ParseString(`public class A;`, ast) assert.NoError(t, err) err = p.ParseString(`public union A;`, ast) assert.NoError(t, err) err = p.ParseString(`public struct Bar;`, ast) assert.EqualError(t, err, `1:8: unexpected token "struct" (expected "union")`) err = p.ParseString(`public class 1;`, ast) assert.EqualError(t, err, `1:14: unexpected token "1" (expected )`) } participle-0.7.1/go.mod000066400000000000000000000003201376001225600147560ustar00rootroot00000000000000module github.com/alecthomas/participle require ( github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1 github.com/davecgh/go-spew v1.1.1 // indirect github.com/stretchr/testify v1.4.0 ) go 1.13 participle-0.7.1/go.sum000066400000000000000000000024041376001225600150100ustar00rootroot00000000000000github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1 h1:GDQdwm/gAcJcLAKQQZGOJ4knlw+7rfEQQcmwTbt4p5E= github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= participle-0.7.1/grammar.go000066400000000000000000000206321376001225600156350ustar00rootroot00000000000000package participle import ( "fmt" "reflect" "text/scanner" "github.com/alecthomas/participle/lexer" ) type generatorContext struct { lexer.Definition typeNodes map[reflect.Type]node symbolsToIDs map[rune]string } func newGeneratorContext(lex lexer.Definition) *generatorContext { return &generatorContext{ Definition: lex, typeNodes: map[reflect.Type]node{}, symbolsToIDs: lexer.SymbolsByRune(lex), } } // Takes a type and builds a tree of nodes out of it. func (g *generatorContext) parseType(t reflect.Type) (_ node, returnedError error) { t = indirectType(t) if n, ok := g.typeNodes[t]; ok { return n, nil } if t.Implements(parseableType) { return &parseable{t.Elem()}, nil } if reflect.PtrTo(t).Implements(parseableType) { return &parseable{t}, nil } switch t.Kind() { case reflect.Slice, reflect.Ptr: t = indirectType(t.Elem()) if t.Kind() != reflect.Struct { return nil, fmt.Errorf("expected a struct but got %T", t) } fallthrough case reflect.Struct: slexer, err := lexStruct(t) if err != nil { return nil, err } out := &strct{typ: t} g.typeNodes[t] = out // Ensure we avoid infinite recursion. if slexer.NumField() == 0 { return nil, fmt.Errorf("can not parse into empty struct %s", t) } defer decorate(&returnedError, func() string { return slexer.Field().Name }) e, err := g.parseDisjunction(slexer) if err != nil { return nil, err } if e == nil { return nil, fmt.Errorf("no grammar found in %s", t) } if token, _ := slexer.Peek(); !token.EOF() { return nil, fmt.Errorf("unexpected input %q", token.Value) } out.expr = e return out, nil } return nil, fmt.Errorf("%s should be a struct or should implement the Parseable interface", t) } func (g *generatorContext) parseDisjunction(slexer *structLexer) (node, error) { out := &disjunction{} for { n, err := g.parseSequence(slexer) if err != nil { return nil, err } if n == nil { return nil, fmt.Errorf("alternative expression %d cannot be empty", len(out.nodes)+1) } out.nodes = append(out.nodes, n) if token, _ := slexer.Peek(); token.Type != '|' { break } _, err = slexer.Next() // | if err != nil { return nil, err } } if len(out.nodes) == 1 { return out.nodes[0], nil } return out, nil } func (g *generatorContext) parseSequence(slexer *structLexer) (node, error) { head := &sequence{} cursor := head loop: for { if token, err := slexer.Peek(); err != nil { return nil, err } else if token.Type == lexer.EOF { break loop } term, err := g.parseTerm(slexer) if err != nil { return nil, err } if term == nil { break loop } if cursor.node == nil { cursor.head = true cursor.node = term } else { cursor.next = &sequence{node: term} cursor = cursor.next } } if head.node == nil { return nil, nil } if head.next == nil { return head.node, nil } return head, nil } func (g *generatorContext) parseTermNoModifiers(slexer *structLexer) (node, error) { t, err := slexer.Peek() if err != nil { return nil, err } var out node switch t.Type { case '@': out, err = g.parseCapture(slexer) case scanner.String, scanner.RawString, scanner.Char: out, err = g.parseLiteral(slexer) case '!': return g.parseNegation(slexer) case '[': return g.parseOptional(slexer) case '{': return g.parseRepetition(slexer) case '(': out, err = g.parseGroup(slexer) case scanner.Ident: out, err = g.parseReference(slexer) case lexer.EOF: _, _ = slexer.Next() return nil, nil default: return nil, nil } return out, err } func (g *generatorContext) parseTerm(slexer *structLexer) (node, error) { out, err := g.parseTermNoModifiers(slexer) if err != nil { return nil, err } return g.parseModifier(slexer, out) } // Parse modifiers: ?, *, + and/or ! func (g *generatorContext) parseModifier(slexer *structLexer, expr node) (node, error) { out := &group{expr: expr} t, err := slexer.Peek() if err != nil { return nil, err } switch t.Type { case '!': out.mode = groupMatchNonEmpty case '+': out.mode = groupMatchOneOrMore case '*': out.mode = groupMatchZeroOrMore case '?': out.mode = groupMatchZeroOrOne default: return expr, nil } _, _ = slexer.Next() return out, nil } // @ captures into the current field. func (g *generatorContext) parseCapture(slexer *structLexer) (node, error) { _, _ = slexer.Next() token, err := slexer.Peek() if err != nil { return nil, err } field := slexer.Field() if token.Type == '@' { _, _ = slexer.Next() n, err := g.parseType(field.Type) if err != nil { return nil, err } return &capture{field, n}, nil } if indirectType(field.Type).Kind() == reflect.Struct && !field.Type.Implements(captureType) && !field.Type.Implements(textUnmarshalerType) { return nil, fmt.Errorf("structs can only be parsed with @@ or by implementing the Capture or encoding.TextUnmarshaler interfaces") } n, err := g.parseTermNoModifiers(slexer) if err != nil { return nil, err } return &capture{field, n}, nil } // A reference in the form refers to a named token from the lexer. func (g *generatorContext) parseReference(slexer *structLexer) (node, error) { // nolint: interfacer token, err := slexer.Next() if err != nil { return nil, err } if token.Type != scanner.Ident { return nil, fmt.Errorf("expected identifier but got %q", token) } typ, ok := g.Symbols()[token.Value] if !ok { return nil, fmt.Errorf("unknown token type %q", token) } return &reference{typ: typ, identifier: token.Value}, nil } // [ ] optionally matches . func (g *generatorContext) parseOptional(slexer *structLexer) (node, error) { _, _ = slexer.Next() // [ disj, err := g.parseDisjunction(slexer) if err != nil { return nil, err } n := &group{expr: disj, mode: groupMatchZeroOrOne} next, err := slexer.Next() if err != nil { return nil, err } if next.Type != ']' { return nil, fmt.Errorf("expected ] but got %q", next) } return n, nil } // { } matches 0 or more repititions of func (g *generatorContext) parseRepetition(slexer *structLexer) (node, error) { _, _ = slexer.Next() // { disj, err := g.parseDisjunction(slexer) if err != nil { return nil, err } n := &group{expr: disj, mode: groupMatchZeroOrMore} next, err := slexer.Next() if err != nil { return nil, err } if next.Type != '}' { return nil, fmt.Errorf("expected } but got %q", next) } return n, nil } // ( ) groups a sub-expression func (g *generatorContext) parseGroup(slexer *structLexer) (node, error) { _, _ = slexer.Next() // ( disj, err := g.parseDisjunction(slexer) if err != nil { return nil, err } next, err := slexer.Next() // ) if err != nil { return nil, err } if next.Type != ')' { return nil, fmt.Errorf("expected ) but got %q", next) } return &group{expr: disj}, nil } // A token negation // // Accepts both the form !"some-literal" and !SomeNamedToken func (g *generatorContext) parseNegation(slexer *structLexer) (node, error) { _, _ = slexer.Next() // advance the parser since we have '!' right now. next, err := g.parseTermNoModifiers(slexer) if err != nil { return nil, err } return &negation{next}, nil } // A literal string. // // Note that for this to match, the tokeniser must be able to produce this string. For example, // if the tokeniser only produces individual characters but the literal is "hello", or vice versa. func (g *generatorContext) parseLiteral(lex *structLexer) (node, error) { // nolint: interfacer token, err := lex.Next() if err != nil { return nil, err } if token.Type != scanner.String && token.Type != scanner.RawString && token.Type != scanner.Char { return nil, fmt.Errorf("expected quoted string but got %q", token) } s := token.Value t := rune(-1) token, err = lex.Peek() if err != nil { return nil, err } if token.Value == ":" && (token.Type == scanner.Char || token.Type == ':') { _, _ = lex.Next() token, err = lex.Next() if err != nil { return nil, err } if token.Type != scanner.Ident { return nil, fmt.Errorf("expected identifier for literal type constraint but got %q", token) } var ok bool t, ok = g.Symbols()[token.Value] if !ok { return nil, fmt.Errorf("unknown token type %q in literal type constraint", token) } } return &literal{s: s, t: t, tt: g.symbolsToIDs[t]}, nil } func indirectType(t reflect.Type) reflect.Type { if t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { return indirectType(t.Elem()) } return t } participle-0.7.1/lexer/000077500000000000000000000000001376001225600147745ustar00rootroot00000000000000participle-0.7.1/lexer/doc.go000066400000000000000000000020511376001225600160660ustar00rootroot00000000000000// Package lexer defines interfaces and implementations used by Participle to perform lexing. // // The primary interfaces are Definition and Lexer. There are three implementations of these // interfaces: // // TextScannerLexer is based on text/scanner. This is the fastest, but least flexible, in that // tokens are restricted to those supported by that package. It can scan about 5M tokens/second on a // late 2013 15" MacBook Pro. // // The second lexer is constructed via the Regexp() function, mapping regexp capture groups // to tokens. The complete input source is read into memory, so it is unsuitable for large inputs. // // The final lexer provided accepts a lexical grammar in EBNF. Each capitalised production is a // lexical token supported by the resulting Lexer. This is very flexible, but a bit slower, scanning // around 730K tokens/second on the same machine, though it is currently completely unoptimised. // This could/should be converted to a table-based lexer. // // Lexer implementations must use Panic/Panicf to report errors. package lexer participle-0.7.1/lexer/ebnf/000077500000000000000000000000001376001225600157065ustar00rootroot00000000000000participle-0.7.1/lexer/ebnf/ebnf.go000066400000000000000000000237451376001225600171620ustar00rootroot00000000000000// Package ebnf is an EBNF lexer for Participle. // // The EBNF grammar syntax is as defined by "golang.org/x/exp/ebnf" with one extension: // ranges also support exclusions, eg. "a"…"z"-"f" and "a"…"z"-"f"…"g". package ebnf import ( "bufio" "bytes" "fmt" "io" "strings" "unicode/utf8" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/ebnf/internal" ) // New creates a Lexer from an EBNF grammar. // // The EBNF grammar syntax is as defined by "golang.org/x/exp/ebnf" with one extension: // ranges also support exclusions, eg. "a"…"z"-"f" and "a"…"z"-"f"…"g". // Exclusions can be chained. // // Upper-case productions are exported as terminals. Lower-case productions are non-terminals. // All productions are lexical. // // Here's an example grammar for parsing whitespace and identifiers: // // Identifier = alpha { alpha | number } . // Whitespace = "\n" | "\r" | "\t" | " " . // alpha = "a"…"z" | "A"…"Z" | "_" . // number = "0"…"9" . func New(grammar string, options ...Option) (lexer.Definition, error) { // Parse grammar. r := strings.NewReader(grammar) ast, err := internal.Parse("", r) if err != nil { return nil, err } // Validate grammar. for _, production := range ast.Index { if err = validate(ast, production); err != nil { return nil, err } } // Assign constants for roots. rn := lexer.EOF - 1 symbols := map[string]rune{ "EOF": lexer.EOF, } // Optimize and export public productions. productions := internal.Grammar{Index: map[string]*internal.Production{}} for _, namedProduction := range ast.Productions { symbol := namedProduction.Name production := namedProduction.Production ch := symbol[0:1] if strings.ToUpper(ch) == ch { symbols[symbol] = rn productions.Index[symbol] = production productions.Productions = append(productions.Productions, namedProduction) rn-- } } def := &ebnfLexerDefinition{ grammar: ast, symbols: symbols, productions: productions, elide: map[string]bool{}, } for _, production := range ast.Index { production.Expr = def.optimize(production.Expr) } for _, option := range options { option(def) } return def, nil } type ebnfLexer struct { r *tokenReader def *ebnfLexerDefinition buf *bytes.Buffer } func (e *ebnfLexer) Next() (lexer.Token, error) { nextToken: for { e.r.Begin() rn, err := e.peek() if err != nil { return lexer.Token{}, err } else if rn == lexer.EOF { return lexer.EOFToken(e.r.Pos()), nil } pos := e.r.Pos() for _, namedProduction := range e.def.productions.Productions { e.r.Rewind() e.buf.Reset() name := namedProduction.Name production := namedProduction.Production if ok, err := e.match(name, production.Expr, e.buf); err != nil { return lexer.Token{}, err } else if ok { if len(e.buf.String()) == 0 { return lexer.Token{}, fmt.Errorf("rule %q matched, but did not consume any input", name) } if e.def.elide[name] { continue nextToken } return lexer.Token{ Type: e.def.symbols[name], Pos: pos, Value: e.buf.String(), }, nil } } token := lexer.Token{Pos: pos, Value: string(rn)} return token, lexer.ErrorWithTokenf(token, "no match found for %c", rn) } } func (e *ebnfLexer) match(name string, expr internal.Expression, out *bytes.Buffer) (bool, error) { // nolint: gocyclo, unparam switch n := expr.(type) { case internal.Alternative: for _, an := range n { if ok, err := e.match(name, an, out); err != nil { return false, err } else if ok { return true, nil } } return false, nil case *internal.Group: return e.match(name, n.Body, out) case *internal.Name: return e.match(name, e.def.grammar.Index[n.String].Expr, out) case *internal.Option: _, err := e.match(name, n.Body, out) if err != nil { return false, err } return true, nil case *internal.Range: return false, fmt.Errorf("internal.Range should not occur here") case *internal.Repetition: for { ok, err := e.match(name, n.Body, out) if err != nil { return false, err } if !ok { return true, nil } } case internal.Sequence: for i, sn := range n { if ok, err := e.match(name, sn, out); err != nil { return false, err } else if ok { continue } if i > 0 { return false, nil } return false, nil } return true, nil case *internal.Token: return true, lexer.Errorf(e.r.Pos(), "internal.Token should not occur") case *ebnfToken: // If first rune doesn't match, we didn't match. if rn, err := e.peek(); err != nil { return false, err } else if n.runes[0] != rn { return false, nil } for _, rn := range n.runes { if r, err := e.read(); err != nil { return false, err } else if r != rn { return false, nil } out.WriteRune(rn) } return true, nil case *characterSet: rn, err := e.peek() if err != nil { return false, err } if n.Has(rn) { _, err = e.read() out.WriteRune(rn) return true, err } return false, nil case *rangeSet: rn, err := e.peek() if err != nil { return false, err } if n.Has(rn) { _, err = e.read() out.WriteRune(rn) return true, err } return false, nil case *asciiSet: rn, err := e.peek() if err != nil { return false, err } if n.Has(rn) { _, err = e.read() out.WriteRune(rn) return true, err } return false, nil case nil: if rn, err := e.peek(); err != nil { return false, err } else if rn == lexer.EOF { return false, nil } return false, fmt.Errorf("expected lexer.EOF") } return false, fmt.Errorf("unsupported lexer expression type %T", expr) } func (e *ebnfLexer) peek() (rune, error) { return e.fixRuneRead(e.r.Peek()) } func (e *ebnfLexer) read() (rune, error) { return e.fixRuneRead(e.r.Read()) } func (e *ebnfLexer) fixRuneRead(rn rune, err error) (rune, error) { if err == io.EOF { return lexer.EOF, nil } if err != nil { return 0, fmt.Errorf("failed to read rune: %s", err) } return rn, nil } type ebnfLexerDefinition struct { grammar internal.Grammar symbols map[string]rune elide map[string]bool productions internal.Grammar } func (e *ebnfLexerDefinition) Lex(r io.Reader) (lexer.Lexer, error) { return &ebnfLexer{ r: newTokenReader(bufio.NewReader(r), lexer.Position{ Filename: lexer.NameOfReader(r), Line: 1, Column: 1, }), def: e, buf: bytes.NewBuffer(make([]byte, 0, 128)), }, nil } func (e *ebnfLexerDefinition) Symbols() map[string]rune { return e.symbols } // Apply some optimizations to the EBNF. func (e *ebnfLexerDefinition) optimize(expr internal.Expression) internal.Expression { switch n := expr.(type) { case internal.Alternative: // Convert alternate characters into a character set (eg. "a" | "b" | "c" | "true" becomes // set("abc") | "true"). out := make(internal.Alternative, 0, len(n)) set := "" for _, expr := range n { if t, ok := expr.(*internal.Token); ok && utf8.RuneCountInString(t.String) == 1 { set += t.String continue } // Hit a node that is not a single-character Token. Flush set? if set != "" { out = append(out, &characterSet{pos: n.Pos(), Set: set}) set = "" } out = append(out, e.optimize(expr)) } if set != "" { out = append(out, &characterSet{pos: n.Pos(), Set: set}) } return out case internal.Sequence: for i, expr := range n { n[i] = e.optimize(expr) } case *internal.Group: n.Body = e.optimize(n.Body) case *internal.Option: n.Body = e.optimize(n.Body) case *internal.Repetition: n.Body = e.optimize(n.Body) case *internal.Range: // Convert range into a set. begin, end := beginEnd(n) set := &rangeSet{ pos: n.Pos(), include: [2]rune{begin, end}, } for next := n.Exclude; next != nil; { switch n := next.(type) { case *internal.Range: begin, end := beginEnd(n) set.exclude = append(set.exclude, [2]rune{begin, end}) next = n.Exclude case *internal.Token: rn, _ := utf8.DecodeRuneInString(n.String) set.exclude = append(set.exclude, [2]rune{rn, rn}) next = nil default: panic(fmt.Sprintf("should not have encountered %T", n)) } } // Use an asciiSet if the characters are in ASCII range. return makeSet(n.Pos(), set) case *internal.Token: return &ebnfToken{pos: n.Pos(), runes: []rune(n.String)} } return expr } func beginEnd(n *internal.Range) (rune, rune) { begin, _ := utf8.DecodeRuneInString(n.Begin.String) end := begin if n.End != nil { end, _ = utf8.DecodeRuneInString(n.End.String) } if begin > end { begin, end = end, begin } return begin, end } // Validate the grammar against the lexer rules. func validate(grammar internal.Grammar, expr internal.Expression) error { // nolint: gocyclo switch n := expr.(type) { case *internal.Production: return validate(grammar, n.Expr) case internal.Alternative: for _, e := range n { if err := validate(grammar, e); err != nil { return err } } return nil case *internal.Group: return validate(grammar, n.Body) case *internal.Name: if grammar.Index[n.String] == nil { token := lexer.Token{Pos: lexer.Position(n.Pos()), Value: n.String} return lexer.ErrorWithTokenf(token, "unknown production %q", n.String) } return nil case *internal.Option: return validate(grammar, n.Body) case *internal.Range: if utf8.RuneCountInString(n.Begin.String) != 1 { token := lexer.Token{Pos: lexer.Position(n.Pos()), Value: n.Begin.String} return lexer.ErrorWithTokenf(token, "start of range must be a single rune") } if utf8.RuneCountInString(n.End.String) != 1 { token := lexer.Token{Pos: lexer.Position(n.Pos()), Value: n.End.String} return lexer.ErrorWithTokenf(token, "end of range must be a single rune") } return nil case *internal.Repetition: return validate(grammar, n.Body) case internal.Sequence: for _, e := range n { if err := validate(grammar, e); err != nil { return err } } return nil case *internal.Token: return nil case nil: return nil } return lexer.Errorf(lexer.Position(expr.Pos()), "unknown EBNF expression %T", expr) } participle-0.7.1/lexer/ebnf/ebnf_test.go000066400000000000000000000131561376001225600202140ustar00rootroot00000000000000package ebnf import ( "strings" "testing" "github.com/stretchr/testify/require" "github.com/alecthomas/participle/lexer" ) func TestIssue54(t *testing.T) { d, err := New(` EqEqEq = "===" . EqEq = "==" . Integer = "0" | "1"…"9" { digit } . Whitespace = " " | "\t" | "\n" | "\r" . Punct = "!"…"/" | ":"…"@" | "["…` + "\"`\"" + ` | "{"…"~" . digit = "0"…"9" . `) require.NoError(t, err) l, err := d.Lex(strings.NewReader(`10 ==! 10`)) require.NoError(t, err) actual, err := lexer.ConsumeAll(l) require.NoError(t, err) expected := []lexer.Token{ {Type: -4, Value: "10", Pos: lexer.Position{Offset: 0, Line: 1, Column: 1}}, {Type: -5, Value: " ", Pos: lexer.Position{Offset: 2, Line: 1, Column: 3}}, {Type: -3, Value: "==", Pos: lexer.Position{Offset: 3, Line: 1, Column: 4}}, {Type: -6, Value: "!", Pos: lexer.Position{Offset: 5, Line: 1, Column: 6}}, {Type: -5, Value: " ", Pos: lexer.Position{Offset: 6, Line: 1, Column: 7}}, {Type: -4, Value: "10", Pos: lexer.Position{Offset: 7, Line: 1, Column: 8}}, {Type: -1, Pos: lexer.Position{Offset: 9, Line: 1, Column: 10}}, } require.Equal(t, expected, actual) } func TestBuilder(t *testing.T) { type entry struct { options []Option source string tokens []string roots []string fail bool } tests := []struct { name string grammar string cases []entry failBuild bool }{ { name: "BadEBNF", grammar: "Production = helper .", failBuild: true, }, { name: "EmptyProductionErrorsWithInput", grammar: `Extra = .`, cases: []entry{{ source: "a", fail: true, }}, }, { name: "ExtraInputErrors", grammar: `Extra = "b" .`, cases: []entry{{ source: "ba", fail: true, }}, }, { name: "TokenMatch", grammar: `Token = "token" .`, cases: []entry{{ source: `token`, tokens: []string{"token"}, roots: []string{"Token"}, }}, }, { name: "TokenNoMatch", grammar: `Token = "token" .`, cases: []entry{{ source: `toke`, fail: true, }}, }, { name: "RangeMatch", grammar: `Range = "a" … "z" .`, cases: []entry{{ source: "x", tokens: []string{"x"}, }}, }, { name: "RangeNoMatch", grammar: `Range = "a" … "z" .`, cases: []entry{{ source: "A", fail: true, }}, }, { name: "Alternative", grammar: `Alternatives = "a" | "b" | "c" .`, cases: []entry{{ source: "a", tokens: []string{"a"}, }}, }, { name: "2ndAlternative", grammar: `Alternatives = "a" | "b" | "c" .`, cases: []entry{{ source: "b", tokens: []string{"b"}, }}, }, { name: "3rdAlternative", grammar: `Alternatives = "a" | "b" | "c" .`, cases: []entry{{ source: "c", tokens: []string{"c"}, }}, }, { name: "AlternativeDoesNotMatch", grammar: `Alternatives = "a" | "b" | "c" .`, cases: []entry{{ source: "d", fail: true, }}, }, { name: "Group", grammar: `Group = ("token") .`, cases: []entry{{ source: "token", tokens: []string{"token"}, }}, }, { name: "OptionWithInnerMatch", grammar: `Option = [ "t" ] .`, cases: []entry{{ source: "t", tokens: []string{"t"}, }}, }, { name: "OptionWithNoInnerMatch", grammar: `Option = [ "t" ] .`, cases: []entry{{ source: "", }}, }, { name: "RangeWithExclusion", grammar: `Option = "a"…"z"-"f"…"g"-"z"-"y" .`, cases: []entry{{ source: "y", fail: true, }}, }, { name: "Ident", grammar: ` Identifier = alpha { alpha | number } . Whitespace = "\n" | "\r" | "\t" | " " . alpha = "a"…"z" | "A"…"Z" | "_" . number = "0"…"9" . `, cases: []entry{{ source: `some id withCase andNumb3rs a`, tokens: []string{"some", " ", "id", " ", "withCase", " ", "andNumb3rs", " ", "a"}, }}, }, { name: "Rewind", grammar: ` Comment = "//" . Operator = "/" . Whitespace = " " . `, cases: []entry{{ source: "//", tokens: []string{"//"}, }, { source: "/ /", tokens: []string{"/", " ", "/"}, }}, }, } for _, test := range tests { // nolint: scopelint t.Run(test.name, func(t *testing.T) { for _, entry := range test.cases { defi, err := New(test.grammar, entry.options...) if test.failBuild { require.Error(t, err) return } require.NoError(t, err) def := defi.(*ebnfLexerDefinition) if entry.roots != nil { roots := []string{} for sym := range def.symbols { if sym != "EOF" { roots = append(roots, sym) } } require.Equal(t, entry.roots, roots) } lexer, err := def.Lex(strings.NewReader(entry.source)) require.NoError(t, err) tokens, err := readAllTokens(lexer) if entry.fail { require.Error(t, err) } else { require.NoError(t, err) } require.Equal(t, entry.tokens, tokens) } }) } } func readAllTokens(lex lexer.Lexer) (out []string, err error) { for { token, err := lex.Next() if err != nil { return nil, err } if token.EOF() { return out, nil } out = append(out, token.Value) } } func BenchmarkEBNFLexer(b *testing.B) { b.ReportAllocs() def, err := New(` Identifier = alpha { alpha | digit } . Whitespace = "\n" | "\r" | "\t" | " " . Number = digit { digit } . alpha = "a"…"z" | "A"…"Z" | "_" . digit = "0"…"9" . `) require.NoError(b, err) r := strings.NewReader(strings.Repeat("hello world 123 hello world 123", 100)) b.ResetTimer() for i := 0; i < b.N; i++ { lex, _ := def.Lex(r) for { token, _ := lex.Next() if token.Type == lexer.EOF { break } } _, _ = r.Seek(0, 0) } } participle-0.7.1/lexer/ebnf/expressions.go000066400000000000000000000037201376001225600206210ustar00rootroot00000000000000package ebnf import ( "strings" "text/scanner" "github.com/alecthomas/participle/lexer/ebnf/internal" ) // TODO: Add a "repeatedrangeSet" to represent the common case of { set } ?? func makeSet(pos scanner.Position, set *rangeSet) internal.Expression { if set.include[0] < 0 || set.include[1] > 255 { return set } ascii := &asciiSet{pos: pos} for rn := set.include[0]; rn <= set.include[1]; rn++ { ascii.Insert(rn) } for _, exclude := range set.exclude { for rn := exclude[0]; rn <= exclude[1]; rn++ { ascii.Unset(rn) } } return ascii } type characterSet struct { pos scanner.Position Set string } func (c *characterSet) Pos() scanner.Position { return c.pos } func (c *characterSet) Has(rn rune) bool { return strings.ContainsRune(c.Set, rn) } // A set of arbitrary runes represented by a string. // // Uses strings.ContainsRune() to check if a rune is in the set. type rangeSet struct { pos scanner.Position include [2]rune exclude [][2]rune } func (c *rangeSet) Pos() scanner.Position { return c.pos } func (c *rangeSet) Has(rn rune) bool { if rn < c.include[0] || rn > c.include[1] { return false } for _, exclude := range c.exclude { if rn >= exclude[0] && rn <= exclude[1] { return false } } return true } // A faster representation of a character set using a 256-bit-wide bitset. type asciiSet struct { pos scanner.Position ascii [4]uint64 } func (a *asciiSet) Unset(rn rune) bool { if rn < 0 || rn > 255 { return false } a.ascii[rn>>6] &= ^(1 << uint64(rn&0x3f)) return true } func (a *asciiSet) Insert(rn rune) bool { if rn < 0 || rn > 255 { return false } a.ascii[rn>>6] |= (1 << uint64(rn&0x3f)) return true } func (a *asciiSet) Has(rn rune) bool { return rn >= 0 && rn <= 255 && a.ascii[rn>>6]&(1< 0 } func (a *asciiSet) Pos() scanner.Position { return a.pos } type ebnfToken struct { pos scanner.Position runes []rune } func (e *ebnfToken) Pos() scanner.Position { return e.pos } participle-0.7.1/lexer/ebnf/internal/000077500000000000000000000000001376001225600175225ustar00rootroot00000000000000participle-0.7.1/lexer/ebnf/internal/ebnf.go000066400000000000000000000165351376001225600207750ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package internal is a library for EBNF grammars. The input is text ([]byte) // satisfying the following grammar (represented itself in EBNF): // // Production = name "=" [ Expression ] "." . // Expression = Alternative { "|" Alternative } . // Alternative = Term { Term } . // Term = name | token [ "…" token [ "-" token ]] | Group | Option | Repetition . // Group = "(" Expression ")" . // Option = "[" Expression "]" . // Repetition = "{" Expression "}" . // // A name is a Go identifier, a token is a Go string, and comments // and white space follow the same rules as for the Go language. // Production names starting with an uppercase Unicode letter denote // non-terminal productions (i.e., productions which allow white-space // and comments between tokens); all other production names denote // lexical productions. // package internal import ( "fmt" "text/scanner" "unicode" "unicode/utf8" ) // ---------------------------------------------------------------------------- // Error handling type errorList []error func (list errorList) Err() error { if len(list) == 0 { return nil } return list } func (list errorList) Error() string { switch len(list) { case 0: return "no errors" case 1: return list[0].Error() } return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1) } func newError(pos scanner.Position, msg string) error { return fmt.Errorf("%s: %s", pos, msg) } // ---------------------------------------------------------------------------- // Internal representation type ( // An Expression node represents a production expression. Expression interface { // Pos is the position of the first character of the syntactic construct Pos() scanner.Position } // An Alternative node represents a non-empty list of alternative expressions. Alternative []Expression // x | y | z // A Sequence node represents a non-empty list of sequential expressions. Sequence []Expression // x y z // A Name node represents a production name. Name struct { StringPos scanner.Position String string } // A Token node represents a literal. Token struct { StringPos scanner.Position String string } // A Range node represents a range of characters. Range struct { Begin, End *Token // begin ... end Exclude Expression // - token or range } // A Group node represents a grouped expression. Group struct { Lparen scanner.Position Body Expression // (body) } // An Option node represents an optional expression. Option struct { Lbrack scanner.Position Body Expression // [body] } // A Repetition node represents a repeated expression. Repetition struct { Lbrace scanner.Position Body Expression // {body} } // A Production node represents an EBNF production. Production struct { Name *Name Expr Expression } // A Bad node stands for pieces of source code that lead to a parse error. Bad struct { TokPos scanner.Position Error string // parser error message } // A Grammar is a set of EBNF productions. The map // is indexed by production name. // Grammar struct { Index map[string]*Production Productions []*NamedProduction } // A NamedProduction is an ordered Production. NamedProduction struct { Name string Production *Production } ) func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences func (x *Name) Pos() scanner.Position { return x.StringPos } func (x *Token) Pos() scanner.Position { return x.StringPos } func (x *Range) Pos() scanner.Position { return x.Begin.Pos() } func (x *Group) Pos() scanner.Position { return x.Lparen } func (x *Option) Pos() scanner.Position { return x.Lbrack } func (x *Repetition) Pos() scanner.Position { return x.Lbrace } func (x *Production) Pos() scanner.Position { return x.Name.Pos() } func (x *Bad) Pos() scanner.Position { return x.TokPos } // ---------------------------------------------------------------------------- // Grammar verification func isLexical(name string) bool { ch, _ := utf8.DecodeRuneInString(name) return !unicode.IsUpper(ch) } type verifier struct { errors errorList worklist []*Production reached Grammar // set of productions reached from (and including) the root production grammar Grammar } func (v *verifier) error(pos scanner.Position, msg string) { v.errors = append(v.errors, newError(pos, msg)) } func (v *verifier) push(prod *Production) { name := prod.Name.String if _, found := v.reached.Index[name]; !found { v.worklist = append(v.worklist, prod) v.reached.Index[name] = prod } } func (v *verifier) verifyChar(x *Token) rune { s := x.String if utf8.RuneCountInString(s) != 1 { v.error(x.Pos(), "single char expected, found "+s) return 0 } ch, _ := utf8.DecodeRuneInString(s) return ch } func (v *verifier) verifyExpr(expr Expression, lexical bool) { switch x := expr.(type) { case nil: // empty expression case Alternative: for _, e := range x { v.verifyExpr(e, lexical) } case Sequence: for _, e := range x { v.verifyExpr(e, lexical) } case *Name: // a production with this name must exist; // add it to the worklist if not yet processed if prod, found := v.grammar.Index[x.String]; found { v.push(prod) } else { v.error(x.Pos(), "missing production "+x.String) } // within a lexical production references // to non-lexical productions are invalid if lexical && !isLexical(x.String) { v.error(x.Pos(), "reference to non-lexical production "+x.String) } case *Token: // nothing to do for now case *Range: i := v.verifyChar(x.Begin) j := v.verifyChar(x.End) if i >= j { v.error(x.Pos(), "decreasing character range") } case *Group: v.verifyExpr(x.Body, lexical) case *Option: v.verifyExpr(x.Body, lexical) case *Repetition: v.verifyExpr(x.Body, lexical) case *Bad: v.error(x.Pos(), x.Error) default: panic(fmt.Sprintf("internal error: unexpected type %T", expr)) } } func (v *verifier) verify(grammar Grammar, start string) { // find root production root, found := grammar.Index[start] if !found { var noPos scanner.Position v.error(noPos, "no start production "+start) return } // initialize verifier v.worklist = v.worklist[0:0] v.reached = Grammar{Index: map[string]*Production{}} v.grammar = grammar // work through the worklist v.push(root) for { n := len(v.worklist) - 1 if n < 0 { break } prod := v.worklist[n] v.worklist = v.worklist[0:n] v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) } // check if all productions were reached if len(v.reached.Index) < len(v.grammar.Index) { for name, prod := range v.grammar.Index { if _, found := v.reached.Index[name]; !found { v.error(prod.Pos(), name+" is unreachable") } } } } // Verify checks that: // - all productions used are defined // - all productions defined are used when beginning at start // - lexical productions refer only to other lexical productions // // Position information is interpreted relative to the file set fset. // func Verify(grammar Grammar, start string) error { var v verifier v.verify(grammar, start) return v.errors.Err() } participle-0.7.1/lexer/ebnf/internal/ebnf_test.go000066400000000000000000000025341376001225600220260ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package internal import ( "bytes" "testing" ) var goodGrammars = []string{ `Program = .`, `Program = foo . foo = "foo" .`, `Program = "a" | "b" "c" .`, `Program = "a" … "z" .`, `Program = "a" … "z" - "f" .`, `Program = Song . Song = { Note } . Note = Do | (Re | Mi | Fa | So | La) | Ti . Do = "c" . Re = "d" . Mi = "e" . Fa = "f" . So = "g" . La = "a" . Ti = ti . ti = "b" .`, } var badGrammars = []string{ `Program = | .`, `Program = | b .`, `Program = a … b .`, `Program = "a" … .`, `Program = … "b" .`, `Program = () .`, `Program = [] .`, `Program = {} .`, } func checkGood(t *testing.T, src string) { grammar, err := Parse("", bytes.NewBuffer([]byte(src))) if err != nil { t.Errorf("Parse(%s) failed: %v", src, err) return } if err = Verify(grammar, "Program"); err != nil { t.Errorf("Verify(%s) failed: %v", src, err) } } func checkBad(t *testing.T, src string) { _, err := Parse("", bytes.NewBuffer([]byte(src))) if err == nil { t.Errorf("Parse(%s) should have failed", src) } } func TestGrammars(t *testing.T) { for _, src := range goodGrammars { checkGood(t, src) } for _, src := range badGrammars { checkBad(t, src) } } participle-0.7.1/lexer/ebnf/internal/parser.go000066400000000000000000000104241376001225600213460ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package internal import ( "io" "strconv" "text/scanner" ) type parser struct { errors errorList scanner scanner.Scanner pos scanner.Position // token position tok rune // one token look-ahead lit string // token literal } func (p *parser) next() { p.tok = p.scanner.Scan() p.pos = p.scanner.Position p.lit = p.scanner.TokenText() } func (p *parser) error(pos scanner.Position, msg string) { p.errors = append(p.errors, newError(pos, msg)) } func (p *parser) errorExpected(pos scanner.Position, msg string) { msg = `expected "` + msg + `"` if pos.Offset == p.pos.Offset { // the error happened at the current position; // make the error message more specific msg += ", found " + scanner.TokenString(p.tok) if p.tok < 0 { msg += " " + p.lit } } p.error(pos, msg) } func (p *parser) expect(tok rune) { pos := p.pos if p.tok != tok { p.errorExpected(pos, scanner.TokenString(tok)) } p.next() // make progress in any case } func (p *parser) parseIdentifier() *Name { pos := p.pos name := p.lit p.expect(scanner.Ident) return &Name{pos, name} } func (p *parser) parseToken() *Token { pos := p.pos value := "" if p.tok == scanner.String { value, _ = strconv.Unquote(p.lit) // Unquote may fail with an error, but only if the scanner found // an illegal string in the first place. In this case the error // has already been reported. p.next() } else { p.expect(scanner.String) } return &Token{pos, value} } const ellipsis = '…' // U+2026, the horizontal ellipsis character func (p *parser) parseRange() (x Expression) { tok := p.parseToken() x = tok if p.tok == ellipsis { p.next() end := p.parseToken() var exclude Expression if p.tok == '-' { p.next() exclude = p.parseRange() } x = &Range{tok, end, exclude} } else if p.tok == '-' { p.next() x = &Range{tok, nil, p.parseRange()} } return x } // ParseTerm returns nil if no term was found. func (p *parser) parseTerm() (x Expression) { pos := p.pos switch p.tok { case scanner.Ident: x = p.parseIdentifier() case scanner.String: x = p.parseRange() case '(': p.next() x = &Group{pos, p.parseExpression()} p.expect(')') case '[': p.next() x = &Option{pos, p.parseExpression()} p.expect(']') case '{': p.next() x = &Repetition{pos, p.parseExpression()} p.expect('}') } return x } func (p *parser) parseSequence() Expression { var list Sequence for x := p.parseTerm(); x != nil; x = p.parseTerm() { list = append(list, x) } // no need for a sequence if list.Len() < 2 switch len(list) { case 0: p.errorExpected(p.pos, "term") return &Bad{p.pos, "term expected"} case 1: return list[0] } return list } func (p *parser) parseExpression() Expression { var list Alternative for { list = append(list, p.parseSequence()) if p.tok != '|' { break } p.next() } // len(list) > 0 // no need for an Alternative node if list.Len() < 2 if len(list) == 1 { return list[0] } return list } func (p *parser) parseProduction() *Production { name := p.parseIdentifier() p.expect('=') var expr Expression if p.tok != '.' { expr = p.parseExpression() } p.expect('.') return &Production{name, expr} } func (p *parser) parse(filename string, src io.Reader) Grammar { p.scanner.Init(src) p.scanner.Filename = filename p.next() // initializes pos, tok, lit grammar := Grammar{Index: map[string]*Production{}} for p.tok != scanner.EOF { prod := p.parseProduction() name := prod.Name.String if _, found := grammar.Index[name]; !found { grammar.Index[name] = prod grammar.Productions = append(grammar.Productions, &NamedProduction{ Name: name, Production: prod, }) } else { p.error(prod.Pos(), name+" declared already") } } return grammar } // Parse parses a set of EBNF productions from source src. // It returns a set of productions. Errors are reported // for incorrect syntax and if a production is declared // more than once; the filename is used only for error // positions. // func Parse(filename string, src io.Reader) (Grammar, error) { var p parser grammar := p.parse(filename, src) return grammar, p.errors.Err() } participle-0.7.1/lexer/ebnf/options.go000066400000000000000000000001371376001225600177310ustar00rootroot00000000000000package ebnf // Option for configuring the EBNF lexer. type Option func(*ebnfLexerDefinition) participle-0.7.1/lexer/ebnf/reader.go000066400000000000000000000024701376001225600175020ustar00rootroot00000000000000package ebnf import ( "io" "unicode/utf8" "github.com/alecthomas/participle/lexer" ) // A rewindable rune reader. // // Allows for multiple attempts to be made to read a sequence of runes. type tokenReader struct { r io.RuneReader cursor int runes []rune oldPos lexer.Position pos lexer.Position } func newTokenReader(r io.RuneReader, pos lexer.Position) *tokenReader { return &tokenReader{r: r, pos: pos} } func (r *tokenReader) Pos() lexer.Position { return r.pos } // Begin a new token attempt. func (r *tokenReader) Begin() { r.runes = r.runes[r.cursor:] r.cursor = 0 r.oldPos = r.pos } // Rewind to beginning of token attempt. func (r *tokenReader) Rewind() { r.cursor = 0 r.pos = r.oldPos } func (r *tokenReader) Read() (rune, error) { // Need to buffer? rn, err := r.Peek() if err != nil { return 0, err } r.pos.Offset += utf8.RuneLen(rn) if rn == '\n' { r.pos.Line++ r.pos.Column = 1 } else { r.pos.Column++ } r.cursor++ return rn, nil } func (r *tokenReader) Peek() (rune, error) { if r.cursor >= len(r.runes) { return r.buffer() } return r.runes[r.cursor], nil } // Buffer a rune without moving the cursor. func (r *tokenReader) buffer() (rune, error) { rn, _, err := r.r.ReadRune() if err != nil { return 0, err } r.runes = append(r.runes, rn) return rn, nil } participle-0.7.1/lexer/ebnf/reader_test.go000066400000000000000000000014671376001225600205460ustar00rootroot00000000000000package ebnf import ( "strings" "testing" "github.com/alecthomas/participle/lexer" "github.com/stretchr/testify/require" ) func TestTokenReader(t *testing.T) { r := strings.NewReader("hello world") tr := newTokenReader(r, lexer.Position{Column: 1, Line: 1}) tr.Begin() for _, ch := range "hello" { rn, err := tr.Peek() require.NoError(t, err) require.Equal(t, ch, rn) rn, err = tr.Read() require.NoError(t, err) require.Equal(t, ch, rn) } tr.Rewind() for _, ch := range "hello" { rn, err := tr.Peek() require.NoError(t, err) require.Equal(t, ch, rn) rn, err = tr.Read() require.NoError(t, err) require.Equal(t, ch, rn) } rn, err := tr.Peek() require.NoError(t, err) require.Equal(t, ' ', rn) tr.Begin() rn, err = tr.Read() require.NoError(t, err) require.Equal(t, ' ', rn) } participle-0.7.1/lexer/errors.go000066400000000000000000000022651376001225600166440ustar00rootroot00000000000000package lexer import "fmt" // Error represents an error while parsing. type Error struct { Msg string Tok Token } // Errorf creats a new Error at the given position. func Errorf(pos Position, format string, args ...interface{}) *Error { return &Error{Msg: fmt.Sprintf(format, args...), Tok: Token{Pos: pos}} } // ErrorWithTokenf creats a new Error with the given token as context. func ErrorWithTokenf(tok Token, format string, args ...interface{}) *Error { return &Error{Msg: fmt.Sprintf(format, args...), Tok: tok} } func (e *Error) Message() string { return e.Msg } // nolint: golint func (e *Error) Token() Token { return e.Tok } // nolint: golint // Error complies with the error interface and reports the position of an error. func (e *Error) Error() string { return FormatError(e.Tok.Pos, e.Msg) } // FormatError formats an error in the form "[:][::] " func FormatError(pos Position, message string) string { msg := "" if pos.Filename != "" { msg += pos.Filename + ":" } if pos.Line != 0 || pos.Column != 0 { msg += fmt.Sprintf("%d:%d:", pos.Line, pos.Column) } if msg != "" { msg += " " + message } else { msg = message } return msg } participle-0.7.1/lexer/lexer.go000066400000000000000000000065331376001225600164510ustar00rootroot00000000000000package lexer import ( "fmt" "io" ) const ( // EOF represents an end of file. EOF rune = -(iota + 1) ) // EOFToken creates a new EOF token at the given position. func EOFToken(pos Position) Token { return Token{Type: EOF, Pos: pos} } // Definition provides the parser with metadata for a lexer. type Definition interface { // Lex an io.Reader. Lex(io.Reader) (Lexer, error) // Symbols returns a map of symbolic names to the corresponding pseudo-runes for those symbols. // This is the same approach as used by text/scanner. For example, "EOF" might have the rune // value of -1, "Ident" might be -2, and so on. Symbols() map[string]rune } // A Lexer returns tokens from a source. type Lexer interface { // Next consumes and returns the next token. Next() (Token, error) } // SymbolsByRune returns a map of lexer symbol names keyed by rune. func SymbolsByRune(def Definition) map[rune]string { out := map[rune]string{} for s, r := range def.Symbols() { out[r] = s } return out } // NameOfReader attempts to retrieve the filename of a reader. func NameOfReader(r interface{}) string { if nr, ok := r.(interface{ Name() string }); ok { return nr.Name() } return "" } // Must takes the result of a Definition constructor call and returns the definition, but panics if // it errors // // eg. // // lex = lexer.Must(lexer.Build(`Symbol = "symbol" .`)) func Must(def Definition, err error) Definition { if err != nil { panic(err) } return def } // ConsumeAll reads all tokens from a Lexer. func ConsumeAll(lexer Lexer) ([]Token, error) { tokens := []Token{} for { token, err := lexer.Next() if err != nil { return nil, err } tokens = append(tokens, token) if token.Type == EOF { return tokens, nil } } } // Position of a token. type Position struct { Filename string Offset int Line int Column int } func (p Position) GoString() string { return fmt.Sprintf("Position{Filename: %q, Offset: %d, Line: %d, Column: %d}", p.Filename, p.Offset, p.Line, p.Column) } func (p Position) String() string { filename := p.Filename if filename == "" { return fmt.Sprintf("%d:%d", p.Line, p.Column) } return fmt.Sprintf("%s:%d:%d", filename, p.Line, p.Column) } // A Token returned by a Lexer. type Token struct { // Type of token. This is the value keyed by symbol as returned by Definition.Symbols(). Type rune Value string Pos Position } // RuneToken represents a rune as a Token. func RuneToken(r rune) Token { return Token{Type: r, Value: string(r)} } // EOF returns true if this Token is an EOF token. func (t Token) EOF() bool { return t.Type == EOF } func (t Token) String() string { if t.EOF() { return "" } return t.Value } func (t Token) GoString() string { if t.Pos == (Position{}) { return fmt.Sprintf("Token{%d, %q}", t.Type, t.Value) } return fmt.Sprintf("Token@%s{%d, %q}", t.Pos.String(), t.Type, t.Value) } // MakeSymbolTable builds a lookup table for checking token ID existence. // // For each symbolic name in "types", the returned map will contain the corresponding token ID as a key. func MakeSymbolTable(def Definition, types ...string) (map[rune]bool, error) { symbols := def.Symbols() table := map[rune]bool{} for _, symbol := range types { rn, ok := symbols[symbol] if !ok { return nil, fmt.Errorf("lexer does not support symbol %q", symbol) } table[rn] = true } return table, nil } participle-0.7.1/lexer/peek.go000066400000000000000000000023771376001225600162600ustar00rootroot00000000000000package lexer // PeekingLexer supports arbitrary lookahead as well as cloning. type PeekingLexer struct { cursor int eof Token tokens []Token } // Upgrade a Lexer to a PeekingLexer with arbitrary lookahead. func Upgrade(lex Lexer) (*PeekingLexer, error) { r := &PeekingLexer{} for { t, err := lex.Next() if err != nil { return r, err } if t.EOF() { r.eof = t break } r.tokens = append(r.tokens, t) } return r, nil } // Cursor position in tokens. func (p *PeekingLexer) Cursor() int { return p.cursor } // Length returns the number of tokens consumed by the lexer. func (p *PeekingLexer) Length() int { return len(p.tokens) } // Next consumes and returns the next token. func (p *PeekingLexer) Next() (Token, error) { if p.cursor >= len(p.tokens) { return p.eof, nil } p.cursor++ return p.tokens[p.cursor-1], nil } // Peek ahead at the n+1 token. ie. Peek(0) will peek at the next token. func (p *PeekingLexer) Peek(n int) (Token, error) { i := p.cursor + n if i >= len(p.tokens) { return p.eof, nil } return p.tokens[i], nil } // Clone creates a clone of this PeekingLexer at its current token. // // The parent and clone are completely independent. func (p *PeekingLexer) Clone() *PeekingLexer { clone := *p return &clone } participle-0.7.1/lexer/peek_test.go000066400000000000000000000017321376001225600173110ustar00rootroot00000000000000package lexer import ( "testing" "github.com/stretchr/testify/require" ) type staticLexer struct { tokens []Token } func (s *staticLexer) Next() (Token, error) { if len(s.tokens) == 0 { return EOFToken(Position{}), nil } t := s.tokens[0] s.tokens = s.tokens[1:] return t, nil } func TestUpgrade(t *testing.T) { t0 := Token{Type: 1, Value: "moo"} t1 := Token{Type: 2, Value: "blah"} l, err := Upgrade(&staticLexer{tokens: []Token{t0, t1}}) require.NoError(t, err) require.Equal(t, t0, mustPeek(t, l, 0)) require.Equal(t, t0, mustPeek(t, l, 0)) require.Equal(t, t1, mustPeek(t, l, 1)) require.Equal(t, t1, mustPeek(t, l, 1)) require.True(t, mustPeek(t, l, 2).EOF()) require.True(t, mustPeek(t, l, 3).EOF()) } func mustPeek(t *testing.T, lexer *PeekingLexer, n int) Token { token, err := lexer.Peek(n) require.NoError(t, err) return token } func mustNext(t *testing.T, lexer Lexer) Token { token, err := lexer.Next() require.NoError(t, err) return token } participle-0.7.1/lexer/regex/000077500000000000000000000000001376001225600161065ustar00rootroot00000000000000participle-0.7.1/lexer/regex/regex.go000066400000000000000000000026031376001225600175500ustar00rootroot00000000000000// Package regex provides a regex based lexer using a readable list of named patterns. // // eg. // // Ident = [[:ascii:]][\w\d]* // Whitespace = \s+ package regex import ( "fmt" "strings" "github.com/alecthomas/participle/lexer" "github.com/alecthomas/participle/lexer/stateful" ) // New creates a regex lexer from a readable list of named patterns. // // This accepts a grammar where each line is a named regular expression in the form: // // # // = // // eg. // // Ident = [[:ascii:]][\w\d]* // Whitespace = \s+ // // Order is relevant. Comments may only occur at the beginning of a line. The regular // expression will have surrounding whitespace trimmed before being parsed. Lower-case // rules are ignored. func New(grammar string) (lexer.Definition, error) { rules := []stateful.Rule{} lines := strings.Split(grammar, "\n") for _, rule := range lines { rule = strings.TrimSpace(rule) if rule == "" || strings.HasPrefix(rule, "#") { continue } parts := strings.SplitN(rule, "=", 2) if len(parts) == 1 { return nil, fmt.Errorf("rule should be in the form = , not %q", rule) } name := strings.TrimSpace(parts[0]) pattern := "^(?:" + strings.TrimSpace(parts[1]) + ")" rules = append(rules, stateful.Rule{ Name: name, Pattern: pattern, }) } return stateful.New(stateful.Rules{"Root": rules}) } participle-0.7.1/lexer/regex/regex_test.go000066400000000000000000000015211376001225600206050ustar00rootroot00000000000000package regex import ( "strings" "testing" "github.com/alecthomas/repr" "github.com/stretchr/testify/require" "github.com/alecthomas/participle/lexer" ) func TestLexer(t *testing.T) { d, err := New(` Ident = [[:alpha:]]\w* Equal = = whitespace = \s+ `) require.NoError(t, err) l, err := d.Lex(strings.NewReader("hello = world")) require.NoError(t, err) actual, err := lexer.ConsumeAll(l) require.NoError(t, err) repr.Println(actual, repr.IgnoreGoStringer()) expected := []lexer.Token{ {Type: -2, Value: "hello", Pos: lexer.Position{Line: 1, Column: 1}}, {Type: -3, Value: "=", Pos: lexer.Position{Offset: 6, Line: 1, Column: 7}}, {Type: -2, Value: "world", Pos: lexer.Position{Offset: 8, Line: 1, Column: 9}}, {Type: -1, Pos: lexer.Position{Offset: 13, Line: 1, Column: 14}}, } require.Equal(t, expected, actual) } participle-0.7.1/lexer/regexp.go000066400000000000000000000043601376001225600166200ustar00rootroot00000000000000package lexer import ( "bytes" "io" "io/ioutil" "regexp" "unicode/utf8" ) var eolBytes = []byte("\n") type regexpDefinition struct { re *regexp.Regexp symbols map[string]rune } // Regexp creates a lexer definition from a regular expression. // // Each named sub-expression in the regular expression matches a token. Anonymous sub-expressions // will be matched and discarded. // // eg. // // def, err := Regexp(`(?P[a-z]+)|(\s+)|(?P\d+)`) func Regexp(pattern string) (Definition, error) { re, err := regexp.Compile(pattern) if err != nil { return nil, err } symbols := map[string]rune{ "EOF": EOF, } for i, sym := range re.SubexpNames()[1:] { if sym != "" { symbols[sym] = EOF - 1 - rune(i) } } return ®expDefinition{re: re, symbols: symbols}, nil } func (d *regexpDefinition) Lex(r io.Reader) (Lexer, error) { b, err := ioutil.ReadAll(r) if err != nil { return nil, err } return ®expLexer{ pos: Position{ Filename: NameOfReader(r), Line: 1, Column: 1, }, b: b, re: d.re, names: d.re.SubexpNames(), }, nil } func (d *regexpDefinition) Symbols() map[string]rune { return d.symbols } type regexpLexer struct { pos Position b []byte re *regexp.Regexp names []string } func (r *regexpLexer) Next() (Token, error) { nextToken: for len(r.b) != 0 { matches := r.re.FindSubmatchIndex(r.b) if matches == nil || matches[0] != 0 { rn, _ := utf8.DecodeRune(r.b) return Token{}, Errorf(r.pos, "invalid token %q", rn) } match := r.b[:matches[1]] token := Token{ Pos: r.pos, Value: string(match), } // Update lexer state. r.pos.Offset += matches[1] lines := bytes.Count(match, eolBytes) r.pos.Line += lines // Update column. if lines == 0 { r.pos.Column += utf8.RuneCount(match) } else { r.pos.Column = utf8.RuneCount(match[bytes.LastIndex(match, eolBytes):]) } // Move slice along. r.b = r.b[matches[1]:] // Finally, assign token type. If it is not a named group, we continue to the next token. for i := 2; i < len(matches); i += 2 { if matches[i] != -1 { if r.names[i/2] == "" { continue nextToken } token.Type = EOF - rune(i/2) break } } return token, nil } return EOFToken(r.pos), nil } participle-0.7.1/lexer/regexp_test.go000066400000000000000000000027331376001225600176610ustar00rootroot00000000000000package lexer import ( "strings" "testing" "github.com/stretchr/testify/require" ) func TestRegexp(t *testing.T) { def, err := Regexp(`(?P[⌘a-z]+)|(\s+)|(?P\d+)`) require.NoError(t, err) require.Equal(t, map[string]rune{ "EOF": -1, "Ident": -2, "Number": -4, }, def.Symbols()) lexer, err := def.Lex(strings.NewReader("hello\n123 456\n⌘orld")) require.NoError(t, err) tokens, err := ConsumeAll(lexer) require.NoError(t, err) require.Equal(t, []Token{ {Type: -2, Value: "hello", Pos: Position{Filename: "", Offset: 0, Line: 1, Column: 1}}, {Type: -4, Value: "123", Pos: Position{Filename: "", Offset: 6, Line: 2, Column: 1}}, {Type: -4, Value: "456", Pos: Position{Filename: "", Offset: 10, Line: 2, Column: 5}}, {Type: -2, Value: "⌘orld", Pos: Position{Filename: "", Offset: 14, Line: 3, Column: 1}}, {Type: EOF, Value: "", Pos: Position{Filename: "", Offset: 21, Line: 3, Column: 6}}, }, tokens) lexer, err = def.Lex(strings.NewReader("hello ?")) require.NoError(t, err) _, err = ConsumeAll(lexer) require.Error(t, err) } func BenchmarkRegexpLexer(b *testing.B) { b.ReportAllocs() def, err := Regexp(`(?P[a-z]+)|(?P\s+)|(?P\d+)`) require.NoError(b, err) r := strings.NewReader(strings.Repeat("hello world 123 hello world 123", 100)) b.ResetTimer() for i := 0; i < b.N; i++ { lex, _ := def.Lex(r) for { token, _ := lex.Next() if token.Type == EOF { break } } _, _ = r.Seek(0, 0) } } participle-0.7.1/lexer/stateful/000077500000000000000000000000001376001225600166235ustar00rootroot00000000000000participle-0.7.1/lexer/stateful/stateful.go000066400000000000000000000234031376001225600210030ustar00rootroot00000000000000// Package stateful defines a nested stateful lexer. // // This lexer is based heavily on the approach used by Chroma (and Pygments). // // The lexer is a state machine defined by a map of rules keyed by state. Each rule // is a named regex and optional operation to apply when the rule matches. // // As a convenience, any Rule starting with a lowercase letter will be elided from output. // // Lexing starts in the "Root" group. Each rule is matched in order, with the first // successful match producing a lexeme. If the matching rule has an associated Action // it will be executed. The name of each non-root rule is prefixed with the name // of its group to yield the token identifier used during matching. // // A state change can be introduced with the Action `Push(state)`. `Pop()` will // return to the previous state. // // To reuse rules from another state, use `Include(state)`. // // As a special case, regexes containing backrefs in the form \N (where N is a digit) // will match the corresponding capture group from the immediate parent group. This // can be used to parse, among other things, heredocs. // // See the example and tests in this package for details. package stateful import ( "bytes" "errors" "fmt" "io" "io/ioutil" "regexp" "sort" "strconv" "strings" "sync" "unicode" "unicode/utf8" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" ) var ( eolBytes = []byte("\n") backrefReplace = regexp.MustCompile(`(\\+)(\d)`) ) // A Rule matching input and possibly changing state. type Rule struct { Name string Pattern string Action Action } // Rules grouped by name. type Rules map[string][]Rule // compiledRule is a Rule with its pattern compiled. type compiledRule struct { Rule ignore bool RE *regexp.Regexp } // compiledRules grouped by name. type compiledRules map[string][]compiledRule // A Action is applied when a rule matches. type Action interface { // Actions are responsible for validating the match. ie. if they consumed any input. applyAction(lexer *Lexer, groups []string) error } // RulesAction is an optional interface that Actions can implement. // // It is applied during rule construction to mutate the rule map. type RulesAction interface { applyRules(state string, rule int, rules compiledRules) error } // ActionFunc is a function that is also a Action. type ActionFunc func(*Lexer, []string) error func (m ActionFunc) applyAction(lexer *Lexer, groups []string) error { return m(lexer, groups) } // nolint: golint // Pop to the previous state. func Pop() Action { return ActionFunc(func(lexer *Lexer, groups []string) error { if groups[0] == "" { return errors.New("did not consume any input") } lexer.stack = lexer.stack[:len(lexer.stack)-1] return nil }) } var returnToParent = Rule{"popIfEmpty", "", nil} // Return to the parent state. // // Useful as the last rule in a sub-state. func Return() Rule { return returnToParent } // Push to the given state. // // The target state will then be the set of rules used for matching // until another Push or Pop is encountered. func Push(state string) Action { return ActionFunc(func(lexer *Lexer, groups []string) error { if groups[0] == "" { return errors.New("did not consume any input") } lexer.stack = append(lexer.stack, lexerState{name: state, groups: groups}) return nil }) } type include struct{ state string } func (i include) applyAction(lexer *Lexer, groups []string) error { panic("should not be called") } func (i include) applyRules(state string, rule int, rules compiledRules) error { includedRules, ok := rules[i.state] if !ok { return fmt.Errorf("invalid include state %q", i.state) } clone := make([]compiledRule, len(includedRules)) copy(clone, includedRules) rules[state] = append(rules[state][:rule], append(clone, rules[state][rule+1:]...)...) return nil } // Include rules from another state in this one. func Include(state string) Rule { return Rule{Action: include{state}} } // Definition is the lexer.Definition. type Definition struct { rules compiledRules symbols map[string]rune // Map of key->*regexp.Regexp backrefCache sync.Map } // NewSimple creates a new stateful lexer with a single "Root" state. func NewSimple(rules []Rule) (*Definition, error) { return New(Rules{"Root": rules}) } // New constructs a new stateful lexer from rules. func New(rules Rules) (*Definition, error) { compiled := compiledRules{} for key, set := range rules { for i, rule := range set { pattern := "^(?:" + rule.Pattern + ")" var ( re *regexp.Regexp err error ) var match = backrefReplace.FindStringSubmatch(rule.Pattern) if match == nil || len(match[1])%2 == 0 { re, err = regexp.Compile(pattern) if err != nil { return nil, fmt.Errorf("%s.%d: %s", key, i, err) } } compiled[key] = append(compiled[key], compiledRule{ Rule: rule, ignore: len(rule.Name) > 0 && unicode.IsLower(rune(rule.Name[0])), RE: re, }) } } restart: for state, rules := range compiled { for i, rule := range rules { if action, ok := rule.Action.(RulesAction); ok { if err := action.applyRules(state, i, compiled); err != nil { return nil, fmt.Errorf("%s.%d: %s", state, i, err) } goto restart } } } keys := make([]string, 0, len(compiled)) for key := range compiled { keys = append(keys, key) } symbols := map[string]rune{ "EOF": lexer.EOF, } sort.Strings(keys) duplicates := map[string]compiledRule{} rn := lexer.EOF - 1 for _, key := range keys { for i, rule := range compiled[key] { if dup, ok := duplicates[rule.Name]; ok && rule.Pattern != dup.Pattern { panic(fmt.Sprintf("duplicate key %q with different patterns %q != %q", rule.Name, rule.Pattern, dup.Pattern)) } duplicates[rule.Name] = rule compiled[key][i] = rule symbols[rule.Name] = rn rn-- } } return &Definition{ rules: compiled, symbols: symbols, }, nil } func (d *Definition) Lex(r io.Reader) (lexer.Lexer, error) { // nolint: golint data, err := ioutil.ReadAll(r) if err != nil { return nil, err } return &Lexer{ def: d, data: data, stack: []lexerState{{name: "Root"}}, pos: lexer.Position{ Filename: lexer.NameOfReader(r), Line: 1, Column: 1, }, }, nil } func (d *Definition) Symbols() map[string]rune { // nolint: golint return d.symbols } type lexerState struct { name string groups []string } // Lexer implementation. type Lexer struct { stack []lexerState def *Definition data []byte pos lexer.Position } func (l *Lexer) Next() (lexer.Token, error) { // nolint: golint parent := l.stack[len(l.stack)-1] rules := l.def.rules[parent.name] next: for len(l.data) > 0 { var ( rule *compiledRule match []int ) for _, candidate := range rules { // Special case "Return()". if candidate.Rule == returnToParent { l.stack = l.stack[:len(l.stack)-1] parent = l.stack[len(l.stack)-1] rules = l.def.rules[parent.name] continue next } re, err := l.getPattern(candidate) if err != nil { return lexer.Token{}, participle.Wrapf(l.pos, err, "rule %q", candidate.Name) } match = re.FindSubmatchIndex(l.data) if match != nil { rule = &candidate // nolint: scopelint break } } if match == nil || rule == nil { sample := "" if len(l.data) < 16 { sample = string(l.data) } else { sample = string(l.data[:16]) + "..." } return lexer.Token{}, participle.Errorf(l.pos, "no lexer rules in state %q matched input text %q", parent.name, sample) } if rule.Action != nil { groups := make([]string, 0, len(match)/2) for i := 0; i < len(match); i += 2 { groups = append(groups, string(l.data[match[i]:match[i+1]])) } if err := rule.Action.applyAction(l, groups); err != nil { return lexer.Token{}, participle.Errorf(l.pos, "rule %q: %s", rule.Name, err) } } else if match[0] == match[1] { return lexer.Token{}, participle.Errorf(l.pos, "rule %q did not match any input", rule.Name) } span := l.data[match[0]:match[1]] l.data = l.data[match[1]:] // l.groups = groups // Update position. pos := l.pos l.pos.Offset += match[1] lines := bytes.Count(span, eolBytes) l.pos.Line += lines // Update column. if lines == 0 { l.pos.Column += utf8.RuneCount(span) } else { l.pos.Column = utf8.RuneCount(span[bytes.LastIndex(span, eolBytes):]) } if rule.ignore { parent = l.stack[len(l.stack)-1] rules = l.def.rules[parent.name] continue } return lexer.Token{ Type: l.def.symbols[rule.Name], Value: string(span), Pos: pos, }, nil } return lexer.EOFToken(l.pos), nil } func (l *Lexer) getPattern(candidate compiledRule) (*regexp.Regexp, error) { if candidate.RE != nil { return candidate.RE, nil } // We don't have a compiled RE. This means there are back-references // that need to be substituted first. parent := l.stack[len(l.stack)-1] key := candidate.Pattern + "\000" + strings.Join(parent.groups, "\000") cached, ok := l.def.backrefCache.Load(key) if ok { return cached.(*regexp.Regexp), nil } var ( re *regexp.Regexp err error ) pattern := backrefReplace.ReplaceAllStringFunc(candidate.Pattern, func(s string) string { var rematch = backrefReplace.FindStringSubmatch(s) n, nerr := strconv.ParseInt(rematch[2], 10, 64) if nerr != nil { err = nerr return s } if len(parent.groups) == 0 || int(n) >= len(parent.groups) { err = fmt.Errorf("invalid group %d from parent with %d groups", n, len(parent.groups)) return s } // concatenate the leading \\\\ which are already escaped to the quoted match. return rematch[1][:len(rematch[1])-1] + regexp.QuoteMeta(parent.groups[n]) }) if err == nil { re, err = regexp.Compile("^(?:" + pattern + ")") } if err != nil { return nil, fmt.Errorf("invalid backref expansion: %q: %s", pattern, err) } l.def.backrefCache.Store(key, re) return re, nil } participle-0.7.1/lexer/stateful/stateful_test.go000066400000000000000000000174761376001225600220570ustar00rootroot00000000000000package stateful import ( "log" "strings" "testing" "github.com/alecthomas/repr" "github.com/stretchr/testify/require" "github.com/alecthomas/participle" "github.com/alecthomas/participle/lexer" ) var interpolatedRules = Rules{ "Root": { {`String`, `"`, Push("String")}, }, "String": { {"Escaped", `\\.`, nil}, {"StringEnd", `"`, Pop()}, {"Expr", `\${`, Push("Expr")}, {"Char", `[^$"\\]+`, nil}, }, "Expr": { Include("Root"), {`whitespace`, `\s+`, nil}, {`Oper`, `[-+/*%]`, nil}, {"Ident", `\w+`, nil}, {"ExprEnd", `}`, Pop()}, }, } func TestStatefulLexer(t *testing.T) { tests := []struct { name string rules Rules input string tokens []string err string }{ {name: "BackrefNoGroups", input: `hello`, err: `1:1: rule "Backref": invalid backref expansion: "\\1": invalid group 1 from parent with 0 groups`, rules: Rules{"Root": {{"Backref", `\1`, nil}}}, }, {name: "BackrefInvalidGroups", input: `< 1 { token.Type = scanner.String } case scanner.RawString: token.Value = token.Value[1 : len(token.Value)-1] } return token, nil } participle-0.7.1/lexer/text_scanner_go110_test.go000066400000000000000000000006521376001225600217710ustar00rootroot00000000000000// +build !go1.11 package lexer import ( "testing" "text/scanner" "github.com/stretchr/testify/require" ) func TestLexBacktickString(t *testing.T) { lexer := LexString("`hello\\nworld`") token := lexer.Next() // See https://github.com/golang/go/issues/23675. Go 1.11 fixes token type into RawString. require.Equal(t, Token{Type: scanner.String, Value: "hello\\nworld", Pos: Position{Line: 1, Column: 1}}, token) } participle-0.7.1/lexer/text_scanner_go111_test.go000066400000000000000000000005551376001225600217740ustar00rootroot00000000000000// +build go1.11 package lexer import ( "testing" "text/scanner" "github.com/stretchr/testify/require" ) func TestLexBacktickString(t *testing.T) { lexer := LexString("`hello\\nworld`") token, err := lexer.Next() require.NoError(t, err) require.Equal(t, Token{Type: scanner.RawString, Value: "hello\\nworld", Pos: Position{Line: 1, Column: 1}}, token) } participle-0.7.1/lexer/text_scanner_test.go000066400000000000000000000040651376001225600210640ustar00rootroot00000000000000package lexer import ( "strings" "testing" "text/scanner" "github.com/stretchr/testify/require" ) func TestLexer(t *testing.T) { lexer, err := Upgrade(LexString("hello world")) require.NoError(t, err) helloPos := Position{Offset: 0, Line: 1, Column: 1} worldPos := Position{Offset: 6, Line: 1, Column: 7} eofPos := Position{Offset: 11, Line: 1, Column: 12} require.Equal(t, Token{Type: scanner.Ident, Value: "hello", Pos: helloPos}, mustPeek(t, lexer, 0)) require.Equal(t, Token{Type: scanner.Ident, Value: "hello", Pos: helloPos}, mustPeek(t, lexer, 0)) require.Equal(t, Token{Type: scanner.Ident, Value: "hello", Pos: helloPos}, mustNext(t, lexer)) require.Equal(t, Token{Type: scanner.Ident, Value: "world", Pos: worldPos}, mustPeek(t, lexer, 0)) require.Equal(t, Token{Type: scanner.Ident, Value: "world", Pos: worldPos}, mustNext(t, lexer)) require.Equal(t, Token{Type: scanner.EOF, Value: "", Pos: eofPos}, mustPeek(t, lexer, 0)) require.Equal(t, Token{Type: scanner.EOF, Value: "", Pos: eofPos}, mustNext(t, lexer)) } func TestLexString(t *testing.T) { lexer := LexString(`"hello\nworld"`) token, err := lexer.Next() require.NoError(t, err) require.Equal(t, Token{Type: scanner.String, Value: "hello\nworld", Pos: Position{Line: 1, Column: 1}}, token) } func TestLexSingleString(t *testing.T) { lexer := LexString(`'hello\nworld'`) token, err := lexer.Next() require.NoError(t, err) require.Equal(t, Token{Type: scanner.String, Value: "hello\nworld", Pos: Position{Line: 1, Column: 1}}, token) lexer = LexString(`'\U00008a9e'`) token, err = lexer.Next() require.NoError(t, err) require.Equal(t, Token{Type: scanner.Char, Value: "\U00008a9e", Pos: Position{Line: 1, Column: 1}}, token) } func BenchmarkTextScannerLexer(b *testing.B) { input := strings.Repeat("hello world 123 hello world 123", 100) r := strings.NewReader(input) b.ReportMetric(float64(len(input)), "B") b.ReportAllocs() for i := 0; i < b.N; i++ { lex, _ := TextScannerLexer.Lex(r) for { token, _ := lex.Next() if token.Type == EOF { break } } _, _ = r.Seek(0, 0) } } participle-0.7.1/lookahead_test.go000066400000000000000000000226141376001225600171770ustar00rootroot00000000000000package participle import ( "testing" "github.com/stretchr/testify/require" ) func TestIssue3Example1(t *testing.T) { type LAT1Decl struct { SourceFilename string ` "source_filename" "=" @String` DataLayout string `| "target" "datalayout" "=" @String` TargetTriple string `| "target" "triple" "=" @String` } type LAT1Module struct { Decls []*LAT1Decl `{ @@ }` } g := &LAT1Module{} p := mustTestParser(t, g, UseLookahead(5)) err := p.ParseString(` source_filename = "foo.c" target datalayout = "bar" target triple = "baz" `, g) require.NoError(t, err) require.Equal(t, g, &LAT1Module{ Decls: []*LAT1Decl{ {SourceFilename: "foo.c"}, {DataLayout: "bar"}, {TargetTriple: "baz"}, }, }) } type LAT2Config struct { Entries []*LAT2Entry `@@ { @@ }` } type LAT2Entry struct { Attribute *LAT2Attribute `@@` Group *LAT2Group `| @@` } type LAT2Attribute struct { Key string `@Ident "="` Value string `@String` } type LAT2Group struct { Name string `@Ident "{"` Entries []*LAT2Entry `@@ { @@ } "}"` } func TestIssue3Example2(t *testing.T) { g := &LAT2Config{} p := mustTestParser(t, g, UseLookahead(2)) err := p.ParseString(` key = "value" block { inner_key = "inner_value" } `, g) require.NoError(t, err) require.Equal(t, g, &LAT2Config{ Entries: []*LAT2Entry{ {Attribute: &LAT2Attribute{Key: "key", Value: "value"}}, { Group: &LAT2Group{ Name: "block", Entries: []*LAT2Entry{ {Attribute: &LAT2Attribute{Key: "inner_key", Value: "inner_value"}}, }, }, }, }, }, ) } type LAT3Grammar struct { Expenses []*LAT3Expense `{ @@ }` } type LAT3Expense struct { Name string `@Ident "paid"` Amount *LAT3Value `@@ { Ident } "."` } type LAT3Value struct { Float float64 ` "$" @Float` Integer int `| "$" @Int` } func TestIssue11(t *testing.T) { g := &LAT3Grammar{} p := mustTestParser(t, g, UseLookahead(5)) err := p.ParseString(` A paid $30.80 for snacks. B paid $70 for housecleaning. C paid $63.50 for utilities. `, g) require.NoError(t, err) require.Equal(t, g, &LAT3Grammar{ Expenses: []*LAT3Expense{ {Name: "A", Amount: &LAT3Value{Float: 30.8}}, {Name: "B", Amount: &LAT3Value{Integer: 70}}, {Name: "C", Amount: &LAT3Value{Float: 63.5}}, }, }, ) } func TestLookaheadOptional(t *testing.T) { type grammar struct { Key string `[ @Ident "=" ]` Value string `@Ident` } p := mustTestParser(t, &grammar{}, UseLookahead(5)) actual := &grammar{} err := p.ParseString(`value`, actual) require.NoError(t, err) require.Equal(t, &grammar{Value: "value"}, actual) err = p.ParseString(`key = value`, actual) require.NoError(t, err) require.Equal(t, &grammar{Key: "key", Value: "value"}, actual) } func TestLookaheadOptionalNoTail(t *testing.T) { type grammar struct { Key string `@Ident` Value string `[ "=" @Int ]` } p := mustTestParser(t, &grammar{}, UseLookahead(5)) actual := &grammar{} err := p.ParseString(`key`, actual) require.NoError(t, err) } func TestLookaheadDisjunction(t *testing.T) { type grammar struct { B string ` "hello" @Ident "world"` C string `| "hello" "world" @Ident` A string `| "hello" @Ident` } p := mustTestParser(t, &grammar{}, UseLookahead(5)) g := &grammar{} err := p.ParseString(`hello moo`, g) require.NoError(t, err) require.Equal(t, &grammar{A: "moo"}, g) err = p.ParseString(`hello moo world`, g) require.NoError(t, err) require.Equal(t, &grammar{B: "moo"}, g) } func TestLookaheadNestedDisjunctions(t *testing.T) { g := &struct { A string ` "hello" ( "foo" @Ident | "bar" "waz" @Ident)` B string `| "hello" @"world"` }{} p := mustTestParser(t, g, UseLookahead(5)) err := p.ParseString(`hello foo FOO`, g) require.NoError(t, err) require.Equal(t, g.A, "FOO") err = p.ParseString(`hello world`, g) require.NoError(t, err) require.Equal(t, g.B, "world") } func TestLookaheadTerm(t *testing.T) { g := &struct { A string ` @Ident` B struct { A string `@String` } `| @@` C struct { A string `@String` B string `"…" @String` } `| @@` D struct { A string `"[" @Ident "]"` } `| @@` E struct { A string `"(" @Ident ")"` } `| @@` }{} mustTestParser(t, g, UseLookahead(5)) } // Term holds the different possible terms type issue28Term struct { KV *issue28KV ` @@ ` Text *string `| @String ` } // KV represents a json kv type issue28KV struct { Key *issue28Key `@@` Value *issue28Value `@@` } // Key holds the possible key types for a kv type issue28Key struct { Ident *string `@Ident ":"` Str *string `| @String ":"` } // Value holds the possible values for a kv type issue28Value struct { Bool *bool `(@"true" | "false")` Str *string `| @String` Ident *string `| @Ident` Int *int64 `| @Int` Float *float64 `| @Float` } func TestIssue28(t *testing.T) { p := mustTestParser(t, &issue28Term{}, UseLookahead(5)) actual := &issue28Term{} err := p.ParseString(`"key": "value"`, actual) require.NoError(t, err) key := "key" value := "value" expected := &issue28Term{ KV: &issue28KV{ Key: &issue28Key{ Str: &key, }, Value: &issue28Value{ Str: &value, }, }, } require.Equal(t, expected, actual) err = p.ParseString(`"some text string"`, actual) require.NoError(t, err) text := "some text string" expected = &issue28Term{ Text: &text, } require.Equal(t, expected, actual) } // This test used to fail because the lookahead table only tracks (root, depth, token) for each root. In this case there // are two roots that have the same second token (0, 1, "=") and (2, 1, "="). As (depth, token) is the uniqueness // constraint, this never disambiguates. // // To solve this, each ambiguous group will need to track the history of tokens. // // eg. // // 0. groups = [ // {history: [">"] roots: [0, 1]}, // {history: ["<"], roots: [2, 3]}, // ] // 1. groups = [ // {history: [">", "="], roots: [0]}, // {history: [">"], roots: [1]}, // {history: ["<", "="], roots: [2]}, // {history: ["<"], roots: [3]}, // ] func TestLookaheadWithConvergingTokens(t *testing.T) { type grammar struct { Left string `@Ident` Op string `[ @( ">" "=" | ">" | "<" "=" | "<" )` Next *grammar ` @@ ]` } p := mustTestParser(t, &grammar{}, UseLookahead(5)) actual := &grammar{} err := p.ParseString("a >= b", actual) require.NoError(t, err) } // type leftRecursionType struct { // Type string ` @("int" | "float" | "string")` // Function *leftRecursionFuncType `| @@` // } // type leftRecursionFuncType struct { // Return *leftRecursionType `@@` // Function string `@Ident` // Args []*leftRecursionType `"(" @@ { "," @@ } ")"` // } // func TestLeftRecursion(t *testing.T) { // p := mustTestParser(t, &leftRecursionType{}, UseLookahead(5)) // actual := &leftRecursionType{} // err := p.ParseString(`int f()`, actual) // require.NoError(t, err) // require.Equal(t, &leftRecursionType{ // Function: &leftRecursionFuncType{ // Return: &leftRecursionType{Type: "int"}, // Function: "f", // }, // }, actual) // } func TestIssue27(t *testing.T) { type grammar struct { Number int ` @(["-"] Int)` String string `| @String` } p := mustTestParser(t, &grammar{}) actual := &grammar{} err := p.ParseString(`- 100`, actual) require.NoError(t, err) require.Equal(t, &grammar{Number: -100}, actual) err = p.ParseString(`100`, actual) require.NoError(t, err) require.Equal(t, &grammar{Number: 100}, actual) } func TestLookaheadDisambiguateByType(t *testing.T) { type grammar struct { Int int ` @(["-"] Int)` Float float64 `| @(["-"] Float)` } p := mustTestParser(t, &grammar{}, UseLookahead(5)) actual := &grammar{} err := p.ParseString(`- 100`, actual) require.NoError(t, err) require.Equal(t, &grammar{Int: -100}, actual) err = p.ParseString(`- 100.5`, actual) require.NoError(t, err) require.Equal(t, &grammar{Float: -100.5}, actual) } func TestShowNearestError(t *testing.T) { type grammar struct { A string ` @"a" @"b" @"c"` B string `| @"a" @"z"` } p := mustTestParser(t, &grammar{}, UseLookahead(10)) actual := &grammar{} err := p.ParseString(`a b d`, actual) require.EqualError(t, err, `1:5: unexpected token "d" (expected "c")`) } func TestRewindDisjunction(t *testing.T) { type grammar struct { Function string ` @Ident "(" ")"` Ident string `| @Ident` } p := mustTestParser(t, &grammar{}, UseLookahead(2)) ast := &grammar{} err := p.ParseString(`name`, ast) require.NoError(t, err) require.Equal(t, &grammar{Ident: "name"}, ast) } func TestRewindOptional(t *testing.T) { type grammar struct { Var string ` [ "int" "int" ] @Ident` } p := mustTestParser(t, &grammar{}, UseLookahead(3)) ast := &grammar{} err := p.ParseString(`one`, ast) require.NoError(t, err) require.Equal(t, &grammar{Var: "one"}, ast) err = p.ParseString(`int int one`, ast) require.NoError(t, err) require.Equal(t, &grammar{Var: "one"}, ast) } func TestRewindRepetition(t *testing.T) { type grammar struct { Ints []string `{ @"int" }` Ident string `@Ident` } p := mustTestParser(t, &grammar{}, UseLookahead(3)) ast := &grammar{} err := p.ParseString(`int int one`, ast) require.NoError(t, err) require.Equal(t, &grammar{Ints: []string{"int", "int"}, Ident: "one"}, ast) err = p.ParseString(`int int one`, ast) require.NoError(t, err) require.Equal(t, &grammar{Ints: []string{"int", "int"}, Ident: "one"}, ast) } participle-0.7.1/map.go000066400000000000000000000054401376001225600147640ustar00rootroot00000000000000package participle import ( "errors" "io" "strconv" "strings" "github.com/alecthomas/participle/lexer" ) type mapperByToken struct { symbols []string mapper Mapper } // DropToken can be returned by a Mapper to remove a token from the stream. var DropToken = errors.New("drop token") // nolint: golint // Mapper function for mutating tokens before being applied to the AST. // // If the Mapper func returns an error of DropToken, the token will be removed from the stream. type Mapper func(token lexer.Token) (lexer.Token, error) // Map is an Option that configures the Parser to apply a mapping function to each Token from the lexer. // // This can be useful to eg. upper-case all tokens of a certain type, or dequote strings. // // "symbols" specifies the token symbols that the Mapper will be applied to. If empty, all tokens will be mapped. func Map(mapper Mapper, symbols ...string) Option { return func(p *Parser) error { p.mappers = append(p.mappers, mapperByToken{ mapper: mapper, symbols: symbols, }) return nil } } // Unquote applies strconv.Unquote() to tokens of the given types. // // Tokens of type "String" will be unquoted if no other types are provided. func Unquote(types ...string) Option { if len(types) == 0 { types = []string{"String"} } return Map(func(t lexer.Token) (lexer.Token, error) { value, err := unquote(t.Value) if err != nil { return t, lexer.ErrorWithTokenf(t, "invalid quoted string %q: %s", t.Value, err.Error()) } t.Value = value return t, nil }, types...) } func unquote(s string) (string, error) { quote := s[0] s = s[1 : len(s)-1] out := "" for s != "" { value, _, tail, err := strconv.UnquoteChar(s, quote) if err != nil { return "", err } s = tail out += string(value) } return out, nil } // Upper is an Option that upper-cases all tokens of the given type. Useful for case normalisation. func Upper(types ...string) Option { return Map(func(token lexer.Token) (lexer.Token, error) { token.Value = strings.ToUpper(token.Value) return token, nil }, types...) } // Elide drops tokens of the specified types. func Elide(types ...string) Option { return Map(func(token lexer.Token) (lexer.Token, error) { return lexer.Token{}, DropToken }, types...) } // Apply a Mapping to all tokens coming out of a Lexer. type mappingLexerDef struct { lexer.Definition mapper Mapper } func (m *mappingLexerDef) Lex(r io.Reader) (lexer.Lexer, error) { lexer, err := m.Definition.Lex(r) if err != nil { return nil, err } return &mappingLexer{lexer, m.mapper}, nil } type mappingLexer struct { lexer.Lexer mapper Mapper } func (m *mappingLexer) Next() (lexer.Token, error) { for { t, err := m.Lexer.Next() if err != nil { return t, err } t, err = m.mapper(t) if err == DropToken { continue } return t, err } } participle-0.7.1/map_test.go000066400000000000000000000034001376001225600160150ustar00rootroot00000000000000package participle import ( "strings" "testing" "github.com/stretchr/testify/require" "github.com/alecthomas/participle/lexer" ) func TestUpper(t *testing.T) { var grammar struct { Text string `@Ident` } def := lexer.Must(lexer.Regexp(`(?P\s+)|(?P\w+)`)) parser := mustTestParser(t, &grammar, Lexer(def), Upper("Ident")) actual, err := parser.Lex(strings.NewReader("hello world")) require.NoError(t, err) expected := []lexer.Token{ {Type: -3, Value: "HELLO", Pos: lexer.Position{Filename: "", Offset: 0, Line: 1, Column: 1}}, {Type: -2, Value: " ", Pos: lexer.Position{Filename: "", Offset: 5, Line: 1, Column: 6}}, {Type: -3, Value: "WORLD", Pos: lexer.Position{Filename: "", Offset: 6, Line: 1, Column: 7}}, {Type: lexer.EOF, Value: "", Pos: lexer.Position{Filename: "", Offset: 11, Line: 1, Column: 12}}, } require.Equal(t, expected, actual) } func TestUnquote(t *testing.T) { var grammar struct { Text string `@Ident` } lex := lexer.Must(lexer.Regexp("(\\s+)|(?P\\w+)|(?P\"(?:[^\"]|\\.)*\")|(?P`[^`]*`)")) parser := mustTestParser(t, &grammar, Lexer(lex), Unquote("String", "RawString")) actual, err := parser.Lex(strings.NewReader("hello world \"quoted\\tstring\" `backtick quotes`")) require.NoError(t, err) expected := []lexer.Token{ {Type: -3, Value: "hello", Pos: lexer.Position{Line: 1, Column: 1}}, {Type: -3, Value: "world", Pos: lexer.Position{Offset: 6, Line: 1, Column: 7}}, {Type: -4, Value: "quoted\tstring", Pos: lexer.Position{Offset: 12, Line: 1, Column: 13}}, {Type: -5, Value: "backtick quotes", Pos: lexer.Position{Offset: 29, Line: 1, Column: 30}}, {Type: lexer.EOF, Value: "", Pos: lexer.Position{Offset: 46, Line: 1, Column: 47}}, } require.Equal(t, expected, actual) } participle-0.7.1/nodes.go000066400000000000000000000403501376001225600153160ustar00rootroot00000000000000package participle import ( "encoding" "errors" "fmt" "reflect" "strconv" "strings" "github.com/alecthomas/participle/lexer" ) var ( // MaxIterations limits the number of elements capturable by {}. MaxIterations = 1000000 positionType = reflect.TypeOf(lexer.Position{}) tokenType = reflect.TypeOf(lexer.Token{}) captureType = reflect.TypeOf((*Capture)(nil)).Elem() textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() parseableType = reflect.TypeOf((*Parseable)(nil)).Elem() // NextMatch should be returned by Parseable.Parse() method implementations to indicate // that the node did not match and that other matches should be attempted, if appropriate. NextMatch = errors.New("no match") // nolint: golint ) // A node in the grammar. type node interface { // Parse from scanner into value. // // Returned slice will be nil if the node does not match. Parse(ctx *parseContext, parent reflect.Value) ([]reflect.Value, error) // Return a decent string representation of the Node. String() string } func decorate(err *error, name func() string) { if *err == nil { return } switch realError := (*err).(type) { case *lexer.Error: *err = &parseError{Msg: name() + ": " + realError.Msg, Tok: realError.Token()} case *parseError: *err = &parseError{Msg: name() + ": " + realError.Msg, Tok: realError.Token()} default: *err = &parseError{Msg: fmt.Sprintf("%s: %s", name(), realError)} } } // A node that proxies to an implementation that implements the Parseable interface. type parseable struct { t reflect.Type } func (p *parseable) String() string { return stringer(p) } func (p *parseable) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { rv := reflect.New(p.t) v := rv.Interface().(Parseable) err = v.Parse(ctx.PeekingLexer) if err != nil { if err == NextMatch { return nil, nil } return nil, err } return []reflect.Value{rv.Elem()}, nil } // @@ type strct struct { typ reflect.Type expr node } func (s *strct) String() string { return stringer(s) } func (s *strct) maybeInjectStartToken(token lexer.Token, v reflect.Value) { if f := v.FieldByName("Pos"); f.IsValid() && f.Type() == positionType { f.Set(reflect.ValueOf(token.Pos)) } else if f := v.FieldByName("Tok"); f.IsValid() && f.Type() == tokenType { f.Set(reflect.ValueOf(token)) } } func (s *strct) maybeInjectEndToken(token lexer.Token, v reflect.Value) { if f := v.FieldByName("EndPos"); f.IsValid() && f.Type() == positionType { f.Set(reflect.ValueOf(token.Pos)) } else if f := v.FieldByName("EndTok"); f.IsValid() && f.Type() == tokenType { f.Set(reflect.ValueOf(token)) } } func (s *strct) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { sv := reflect.New(s.typ).Elem() t, err := ctx.Peek(0) if err != nil { return nil, err } s.maybeInjectStartToken(t, sv) if out, err = s.expr.Parse(ctx, sv); err != nil { _ = ctx.Apply() // Best effort to give partial AST. ctx.MaybeUpdateError(err) return []reflect.Value{sv}, err } else if out == nil { return nil, nil } t, _ = ctx.Peek(0) s.maybeInjectEndToken(t, sv) return []reflect.Value{sv}, ctx.Apply() } type groupMatchMode int const ( groupMatchOnce groupMatchMode = iota groupMatchZeroOrOne = iota groupMatchZeroOrMore = iota groupMatchOneOrMore = iota groupMatchNonEmpty = iota ) // ( ) - match once // ( )* - match zero or more times // ( )+ - match one or more times // ( )? - match zero or once // ( )! - must be a non-empty match // // The additional modifier "!" forces the content of the group to be non-empty if it does match. type group struct { expr node mode groupMatchMode } func (g *group) String() string { return stringer(g) } func (g *group) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { // Configure min/max matches. min := 1 max := 1 switch g.mode { case groupMatchNonEmpty: out, err = g.expr.Parse(ctx, parent) if err != nil { return out, err } if len(out) == 0 { t, _ := ctx.Peek(0) return out, lexer.ErrorWithTokenf(t, "sub-expression %s cannot be empty", g) } return out, nil case groupMatchOnce: return g.expr.Parse(ctx, parent) case groupMatchZeroOrOne: min = 0 case groupMatchZeroOrMore: min = 0 max = MaxIterations case groupMatchOneOrMore: min = 1 max = MaxIterations } matches := 0 for ; matches < max; matches++ { branch := ctx.Branch() v, err := g.expr.Parse(branch, parent) out = append(out, v...) if err != nil { ctx.MaybeUpdateError(err) // Optional part failed to match. if ctx.Stop(err, branch) { return out, err } break } else { ctx.Accept(branch) } if v == nil { break } } // fmt.Printf("%d < %d < %d: out == nil? %v\n", min, matches, max, out == nil) t, _ := ctx.Peek(0) if matches >= MaxIterations { panic(lexer.ErrorWithTokenf(t, "too many iterations of %s (> %d)", g, MaxIterations)) } if matches < min { return out, lexer.ErrorWithTokenf(t, "sub-expression %s must match at least once", g) } // The idea here is that something like "a"? is a successful match and that parsing should proceed. if min == 0 && out == nil { out = []reflect.Value{} } return out, nil } // {"|" } type disjunction struct { nodes []node } func (d *disjunction) String() string { return stringer(d) } func (d *disjunction) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { var ( deepestError = 0 firstError error firstValues []reflect.Value ) for _, a := range d.nodes { branch := ctx.Branch() if value, err := a.Parse(branch, parent); err != nil { // If this branch progressed too far and still didn't match, error out. if ctx.Stop(err, branch) { return value, err } // Show the closest error returned. The idea here is that the further the parser progresses // without error, the more difficult it is to trace the error back to its root. if branch.Cursor() >= deepestError { firstError = err firstValues = value deepestError = branch.Cursor() } } else if value != nil { bt, _ := branch.Peek(0) ct, _ := ctx.Peek(0) if bt == ct { panic(Errorf(bt.Pos, "branch %s was accepted but did not progress the lexer at %s (%q)", a, bt.Pos, bt.Value)) } ctx.Accept(branch) return value, nil } } if firstError != nil { ctx.MaybeUpdateError(firstError) return firstValues, firstError } return nil, nil } // ... type sequence struct { head bool node node next *sequence } func (s *sequence) String() string { return stringer(s) } func (s *sequence) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { for n := s; n != nil; n = n.next { child, err := n.node.Parse(ctx, parent) out = append(out, child...) if err != nil { return out, err } if child == nil { // Early exit if first value doesn't match, otherwise all values must match. if n == s { return nil, nil } token, err := ctx.Peek(0) if err != nil { return nil, err } return out, UnexpectedTokenError{Unexpected: token, Expected: n.String()} } } return out, nil } // @ type capture struct { field structLexerField node node } func (c *capture) String() string { return stringer(c) } func (c *capture) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { token, err := ctx.Peek(0) if err != nil { return nil, err } pos := token.Pos v, err := c.node.Parse(ctx, parent) if err != nil { if v != nil { ctx.Defer(pos, parent, c.field, v) } return []reflect.Value{parent}, err } if v == nil { return nil, nil } ctx.Defer(pos, parent, c.field, v) return []reflect.Value{parent}, nil } // - named lexer token reference type reference struct { typ rune identifier string // Used for informational purposes. } func (r *reference) String() string { return stringer(r) } func (r *reference) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { token, err := ctx.Peek(0) if err != nil { return nil, err } if token.Type != r.typ { return nil, nil } _, _ = ctx.Next() return []reflect.Value{reflect.ValueOf(token.Value)}, nil } // [ ] type optional struct { node node } func (o *optional) String() string { return stringer(o) } func (o *optional) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { branch := ctx.Branch() out, err = o.node.Parse(branch, parent) if err != nil { // Optional part failed to match. if ctx.Stop(err, branch) { return out, err } } else { ctx.Accept(branch) } if out == nil { out = []reflect.Value{} } return out, nil } // { } type repetition struct { node node } func (r *repetition) String() string { return stringer(r) } // Parse a repetition. Once a repetition is encountered it will always match, so grammars // should ensure that branches are differentiated prior to the repetition. func (r *repetition) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { i := 0 for ; i < MaxIterations; i++ { branch := ctx.Branch() v, err := r.node.Parse(branch, parent) out = append(out, v...) if err != nil { // Optional part failed to match. if ctx.Stop(err, branch) { return out, err } break } else { ctx.Accept(branch) } if v == nil { break } } if i >= MaxIterations { t, _ := ctx.Peek(0) panic(lexer.ErrorWithTokenf(t, "too many iterations of %s (> %d)", r, MaxIterations)) } if out == nil { out = []reflect.Value{} } return out, nil } // Match a token literal exactly "..."[:]. type literal struct { s string t rune tt string // Used for display purposes - symbolic name of t. } func (l *literal) String() string { return stringer(l) } func (l *literal) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { token, err := ctx.Peek(0) if err != nil { return nil, err } equal := false // nolint: ineffassign if ctx.caseInsensitive[token.Type] { equal = strings.EqualFold(token.Value, l.s) } else { equal = token.Value == l.s } if equal && (l.t == -1 || l.t == token.Type) { next, err := ctx.Next() if err != nil { return nil, err } return []reflect.Value{reflect.ValueOf(next.Value)}, nil } return nil, nil } type negation struct { node node } func (n *negation) String() string { return "!" + stringer(n.node) } func (n *negation) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) { // Create a branch to avoid advancing the parser, but call neither Stop nor Accept on it // since we will discard a match. branch := ctx.Branch() notEOF, err := ctx.Peek(0) if err != nil { return nil, err } if notEOF.EOF() { // EOF cannot match a negation, which expects something return nil, nil } out, err = n.node.Parse(branch, parent) if out != nil && err == nil { // out being non-nil means that what we don't want is actually here, so we report nomatch return nil, lexer.ErrorWithTokenf(notEOF, "unexpected '%s'", notEOF.Value) } // Just give the next token next, err := ctx.Next() if err != nil { return nil, err } return []reflect.Value{reflect.ValueOf(next.Value)}, nil } // Attempt to transform values to given type. // // This will dereference pointers, and attempt to parse strings into integer values, floats, etc. func conform(t reflect.Type, values []reflect.Value) (out []reflect.Value, err error) { for _, v := range values { for t != v.Type() && t.Kind() == reflect.Ptr && v.Kind() != reflect.Ptr { // This can occur during partial failure. if !v.CanAddr() { return } v = v.Addr() } // Already of the right kind, don't bother converting. if v.Kind() == t.Kind() { if v.Type() != t { v = v.Convert(t) } out = append(out, v) continue } kind := t.Kind() switch kind { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: n, err := strconv.ParseInt(v.String(), 0, sizeOfKind(kind)) if err != nil { return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err) } v = reflect.New(t).Elem() v.SetInt(n) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: n, err := strconv.ParseUint(v.String(), 0, sizeOfKind(kind)) if err != nil { return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err) } v = reflect.New(t).Elem() v.SetUint(n) case reflect.Bool: v = reflect.ValueOf(true) case reflect.Float32, reflect.Float64: n, err := strconv.ParseFloat(v.String(), sizeOfKind(kind)) if err != nil { return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err) } v = reflect.New(t).Elem() v.SetFloat(n) } out = append(out, v) } return out, nil } func sizeOfKind(kind reflect.Kind) int { switch kind { case reflect.Int8, reflect.Uint8: return 8 case reflect.Int16, reflect.Uint16: return 16 case reflect.Int32, reflect.Uint32, reflect.Float32: return 32 case reflect.Int64, reflect.Uint64, reflect.Float64: return 64 case reflect.Int, reflect.Uint: return strconv.IntSize } panic("unsupported kind " + kind.String()) } // Set field. // // If field is a pointer the pointer will be set to the value. If field is a string, value will be // appended. If field is a slice, value will be appended to slice. // // For all other types, an attempt will be made to convert the string to the corresponding // type (int, float32, etc.). func setField(pos lexer.Position, strct reflect.Value, field structLexerField, fieldValue []reflect.Value) (err error) { // nolint: gocognit defer decorate(&err, func() string { return strct.Type().Name() + "." + field.Name }) f := strct.FieldByIndex(field.Index) // Any kind of pointer, hydrate it first. if f.Kind() == reflect.Ptr { if f.IsNil() { fv := reflect.New(f.Type().Elem()).Elem() f.Set(fv.Addr()) f = fv } else { f = f.Elem() } } if f.Kind() == reflect.Slice { fieldValue, err = conform(f.Type().Elem(), fieldValue) if err != nil { return err } f.Set(reflect.Append(f, fieldValue...)) return nil } if f.Kind() == reflect.Struct { if pf := f.FieldByName("Pos"); pf.IsValid() && pf.Type() == positionType { pf.Set(reflect.ValueOf(pos)) } } if f.CanAddr() { if d, ok := f.Addr().Interface().(Capture); ok { ifv := []string{} for _, v := range fieldValue { ifv = append(ifv, v.Interface().(string)) } return d.Capture(ifv) } else if d, ok := f.Addr().Interface().(encoding.TextUnmarshaler); ok { for _, v := range fieldValue { if err := d.UnmarshalText([]byte(v.Interface().(string))); err != nil { return err } } return nil } } // Strings concatenate all captured tokens. if f.Kind() == reflect.String { fieldValue, err = conform(f.Type(), fieldValue) if err != nil { return err } for _, v := range fieldValue { f.Set(reflect.ValueOf(f.String() + v.String()).Convert(f.Type())) } return nil } // Coalesce multiple tokens into one. This allows eg. ["-", "10"] to be captured as separate tokens but // parsed as a single string "-10". if len(fieldValue) > 1 { out := []string{} for _, v := range fieldValue { out = append(out, v.String()) } fieldValue = []reflect.Value{reflect.ValueOf(strings.Join(out, ""))} } fieldValue, err = conform(f.Type(), fieldValue) if err != nil { return err } fv := fieldValue[0] switch f.Kind() { // Numeric types will increment if the token can not be coerced. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: if fv.Type() != f.Type() { f.SetInt(f.Int() + 1) } else { f.Set(fv) } case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: if fv.Type() != f.Type() { f.SetUint(f.Uint() + 1) } else { f.Set(fv) } case reflect.Float32, reflect.Float64: if fv.Type() != f.Type() { f.SetFloat(f.Float() + 1) } else { f.Set(fv) } case reflect.Bool, reflect.Struct: if f.Kind() == reflect.Bool && fv.Kind() == reflect.Bool { f.SetBool(fv.Bool()) break } if fv.Type() != f.Type() { return fmt.Errorf("value %q is not correct type %s", fv, f.Type()) } f.Set(fv) default: return fmt.Errorf("unsupported field type %s for field %s", f.Type(), field.Name) } return nil } participle-0.7.1/options.go000066400000000000000000000024301376001225600156760ustar00rootroot00000000000000package participle import ( "github.com/alecthomas/participle/lexer" ) // An Option to modify the behaviour of the Parser. type Option func(p *Parser) error // Lexer is an Option that sets the lexer to use with the given grammar. func Lexer(def lexer.Definition) Option { return func(p *Parser) error { p.lex = def return nil } } // UseLookahead allows branch lookahead up to "n" tokens. // // If parsing cannot be disambiguated before "n" tokens of lookahead, parsing will fail. // // Note that increasing lookahead has a minor performance impact, but also // reduces the accuracy of error reporting. func UseLookahead(n int) Option { return func(p *Parser) error { p.useLookahead = n return nil } } // CaseInsensitive allows the specified token types to be matched case-insensitively. func CaseInsensitive(tokens ...string) Option { return func(p *Parser) error { for _, token := range tokens { p.caseInsensitive[token] = true } return nil } } // ParseOption modifies how an individual parse is applied. type ParseOption func(p *parseContext) // AllowTrailing tokens without erroring. // // That is, do not error if a full parse completes but additional tokens remain. func AllowTrailing(ok bool) ParseOption { return func(p *parseContext) { p.allowTrailing = ok } } participle-0.7.1/parser.go000066400000000000000000000147041376001225600155060ustar00rootroot00000000000000package participle import ( "bytes" "fmt" "io" "reflect" "strings" "github.com/alecthomas/participle/lexer" ) // A Parser for a particular grammar and lexer. type Parser struct { root node lex lexer.Definition typ reflect.Type useLookahead int caseInsensitive map[string]bool mappers []mapperByToken } // MustBuild calls Build(grammar, options...) and panics if an error occurs. func MustBuild(grammar interface{}, options ...Option) *Parser { parser, err := Build(grammar, options...) if err != nil { panic(err) } return parser } // Build constructs a parser for the given grammar. // // If "Lexer()" is not provided as an option, a default lexer based on text/scanner will be used. This scans typical Go- // like tokens. // // See documentation for details func Build(grammar interface{}, options ...Option) (parser *Parser, err error) { // Configure Parser struct with defaults + options. p := &Parser{ lex: lexer.TextScannerLexer, caseInsensitive: map[string]bool{}, useLookahead: 1, } for _, option := range options { if option == nil { return nil, fmt.Errorf("nil Option passed, signature has changed; " + "if you intended to provide a custom Lexer, try participle.Build(grammar, participle.Lexer(lexer))") } if err = option(p); err != nil { return nil, err } } if len(p.mappers) > 0 { mappers := map[rune][]Mapper{} symbols := p.lex.Symbols() for _, mapper := range p.mappers { if len(mapper.symbols) == 0 { mappers[lexer.EOF] = append(mappers[lexer.EOF], mapper.mapper) } else { for _, symbol := range mapper.symbols { if rn, ok := symbols[symbol]; !ok { return nil, fmt.Errorf("mapper %#v uses unknown token %q", mapper, symbol) } else { // nolint: golint mappers[rn] = append(mappers[rn], mapper.mapper) } } } } p.lex = &mappingLexerDef{p.lex, func(t lexer.Token) (lexer.Token, error) { combined := make([]Mapper, 0, len(mappers[t.Type])+len(mappers[lexer.EOF])) combined = append(combined, mappers[lexer.EOF]...) combined = append(combined, mappers[t.Type]...) var err error for _, m := range combined { t, err = m(t) if err != nil { return t, err } } return t, nil }} } context := newGeneratorContext(p.lex) v := reflect.ValueOf(grammar) if v.Kind() == reflect.Interface { v = v.Elem() } p.typ = v.Type() p.root, err = context.parseType(p.typ) if err != nil { return nil, err } return p, nil } // Lexer returns the parser's builtin lexer. func (p *Parser) Lexer() lexer.Definition { return p.lex } // Lex uses the parser's lexer to tokenise input. func (p *Parser) Lex(r io.Reader) ([]lexer.Token, error) { lex, err := p.lex.Lex(r) if err != nil { return nil, err } tokens, err := lexer.ConsumeAll(lex) return tokens, err } // ParseFromLexer into grammar v which must be of the same type as the grammar passed to // participle.Build(). // // This may return a participle.Error. func (p *Parser) ParseFromLexer(lex *lexer.PeekingLexer, v interface{}, options ...ParseOption) error { rv := reflect.ValueOf(v) if rv.Kind() == reflect.Interface { rv = rv.Elem() } var stream reflect.Value if rv.Kind() == reflect.Chan { stream = rv rt := rv.Type().Elem() rv = reflect.New(rt).Elem() } rt := rv.Type() if rt != p.typ { return fmt.Errorf("must parse into value of type %s not %T", p.typ, v) } if rt.Kind() != reflect.Ptr || rt.Elem().Kind() != reflect.Struct { return fmt.Errorf("target must be a pointer to a struct, not %s", rt) } caseInsensitive := map[rune]bool{} for sym, rn := range p.lex.Symbols() { if p.caseInsensitive[sym] { caseInsensitive[rn] = true } } ctx := newParseContext(lex, p.useLookahead, caseInsensitive) defer func() { *lex = *ctx.PeekingLexer }() for _, option := range options { option(ctx) } // If the grammar implements Parseable, use it. if parseable, ok := v.(Parseable); ok { return p.rootParseable(ctx, parseable) } if stream.IsValid() { return p.parseStreaming(ctx, stream) } return p.parseOne(ctx, rv) } // Parse from r into grammar v which must be of the same type as the grammar passed to // participle.Build(). // // This may return a participle.Error. func (p *Parser) Parse(r io.Reader, v interface{}, options ...ParseOption) (err error) { lex, err := p.lex.Lex(r) if err != nil { return err } peeker, err := lexer.Upgrade(lex) if err != nil { return err } return p.ParseFromLexer(peeker, v, options...) } func (p *Parser) parseStreaming(ctx *parseContext, rv reflect.Value) error { t := rv.Type().Elem().Elem() for { if token, _ := ctx.Peek(0); token.EOF() { rv.Close() return nil } v := reflect.New(t) if err := p.parseInto(ctx, v); err != nil { return err } rv.Send(v) } } func (p *Parser) parseOne(ctx *parseContext, rv reflect.Value) error { err := p.parseInto(ctx, rv) if err != nil { return err } token, err := ctx.Peek(0) if err != nil { return err } else if !token.EOF() && !ctx.allowTrailing { return ctx.DeepestError(UnexpectedTokenError{Unexpected: token}) } return nil } func (p *Parser) parseInto(ctx *parseContext, rv reflect.Value) error { if rv.IsNil() { return fmt.Errorf("target must be a non-nil pointer to a struct, but is a nil %s", rv.Type()) } pv, err := p.root.Parse(ctx, rv.Elem()) if len(pv) > 0 && pv[0].Type() == rv.Elem().Type() { rv.Elem().Set(reflect.Indirect(pv[0])) } if err != nil { return err } if pv == nil { token, _ := ctx.Peek(0) return ctx.DeepestError(UnexpectedTokenError{Unexpected: token}) } return nil } func (p *Parser) rootParseable(ctx *parseContext, parseable Parseable) error { peek, err := ctx.Peek(0) if err != nil { return err } err = parseable.Parse(ctx.PeekingLexer) if err == NextMatch { token, _ := ctx.Peek(0) return ctx.DeepestError(UnexpectedTokenError{Unexpected: token}) } peek, err = ctx.Peek(0) if err != nil { return err } if !peek.EOF() && !ctx.allowTrailing { return ctx.DeepestError(UnexpectedTokenError{Unexpected: peek}) } return nil } // ParseString is a convenience around Parse(). // // This may return a participle.Error. func (p *Parser) ParseString(s string, v interface{}, options ...ParseOption) error { return p.Parse(strings.NewReader(s), v, options...) } // ParseBytes is a convenience around Parse(). // // This may return a participle.Error. func (p *Parser) ParseBytes(b []byte, v interface{}, options ...ParseOption) error { return p.Parse(bytes.NewReader(b), v, options...) } participle-0.7.1/parser_test.go000066400000000000000000000671051376001225600165500ustar00rootroot00000000000000package participle import ( "fmt" "math" "reflect" "strconv" "strings" "testing" "github.com/stretchr/testify/require" "github.com/alecthomas/participle/lexer" ) func TestProductionCapture(t *testing.T) { type testCapture struct { A string `@Test` } _, err := Build(&testCapture{}) require.Error(t, err) } func TestTermCapture(t *testing.T) { type grammar struct { A string `@{"."}` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{"..."} err := parser.ParseString("...", actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestParseScalar(t *testing.T) { type grammar struct { A string `@"one"` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} err := parser.ParseString("one", actual) require.NoError(t, err) require.Equal(t, &grammar{"one"}, actual) } func TestParseGroup(t *testing.T) { type grammar struct { A string `@("one" | "two")` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} err := parser.ParseString("one", actual) require.NoError(t, err) require.Equal(t, &grammar{"one"}, actual) actual = &grammar{} err = parser.ParseString("two", actual) require.NoError(t, err) require.Equal(t, &grammar{"two"}, actual) } func TestParseAlternative(t *testing.T) { type grammar struct { A string `@"one" |` B string `@"two"` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} err := parser.ParseString("one", actual) require.NoError(t, err) require.Equal(t, &grammar{A: "one"}, actual) actual = &grammar{} err = parser.ParseString("two", actual) require.NoError(t, err) require.Equal(t, &grammar{B: "two"}, actual) } func TestParseSequence(t *testing.T) { type grammar struct { A string `@"one"` B string `@"two"` C string `@"three"` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{"one", "two", "three"} err := parser.ParseString("one two three", actual) require.NoError(t, err) require.Equal(t, expected, actual) actual = &grammar{} expected = &grammar{} err = parser.ParseString("moo", actual) require.Error(t, err) require.Equal(t, expected, actual) } func TestNested(t *testing.T) { type nestedInner struct { B string `@"one"` C string `@"two"` } type testNested struct { A *nestedInner `@@` } parser := mustTestParser(t, &testNested{}) actual := &testNested{} expected := &testNested{A: &nestedInner{B: "one", C: "two"}} err := parser.ParseString("one two", actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestAccumulateNested(t *testing.T) { type nestedInner struct { B string `@"one"` C string `@"two"` } type testAccumulateNested struct { A []*nestedInner `@@ { @@ }` } parser := mustTestParser(t, &testAccumulateNested{}) actual := &testAccumulateNested{} expected := &testAccumulateNested{A: []*nestedInner{{B: "one", C: "two"}, {B: "one", C: "two"}}} err := parser.ParseString("one two one two", actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestRepititionNoMatch(t *testing.T) { type grammar struct { A []string `{ @"." }` } parser := mustTestParser(t, &grammar{}) expected := &grammar{} actual := &grammar{} err := parser.ParseString(``, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestRepitition(t *testing.T) { type grammar struct { A []string `{ @"." }` } parser := mustTestParser(t, &grammar{}) expected := &grammar{A: []string{".", ".", "."}} actual := &grammar{} err := parser.ParseString(`...`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestRepititionAcrossFields(t *testing.T) { type testRepitition struct { A []string `{ @"." }` B *string `(@"b" |` C *string ` @"c")` } parser := mustTestParser(t, &testRepitition{}) b := "b" c := "c" actual := &testRepitition{} expected := &testRepitition{ A: []string{".", ".", "."}, B: &b, } err := parser.ParseString("...b", actual) require.NoError(t, err) require.Equal(t, expected, actual) actual = &testRepitition{} expected = &testRepitition{ A: []string{".", ".", "."}, C: &c, } err = parser.ParseString("...c", actual) require.NoError(t, err) require.Equal(t, expected, actual) actual = &testRepitition{} expected = &testRepitition{ B: &b, } err = parser.ParseString("b", actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestAccumulateString(t *testing.T) { type testAccumulateString struct { A string `@"." { @"." }` } parser := mustTestParser(t, &testAccumulateString{}) actual := &testAccumulateString{} expected := &testAccumulateString{ A: "...", } err := parser.ParseString("...", actual) require.NoError(t, err) require.Equal(t, expected, actual) } type Group struct { Expression *Expression `"(" @@ ")"` } type EBNFOption struct { Expression *Expression `"[" @@ "]"` } type Repetition struct { Expression *Expression `"{" @@ "}"` } type Literal struct { Start string `@String` } type Range struct { Start string `@String` End string `"…" @String` } type Term struct { Name string `@Ident |` Literal *Literal `@@ |` Range *Range `@@ |` Group *Group `@@ |` Option *EBNFOption `@@ |` Repetition *Repetition `@@` } type Sequence struct { Terms []*Term `@@ { @@ }` } type Expression struct { Alternatives []*Sequence `@@ { "|" @@ }` } type Production struct { Name string `@Ident "="` Expression []*Expression `@@ { @@ } "."` } type EBNF struct { Productions []*Production `{ @@ }` } func TestEBNFParser(t *testing.T) { parser := mustTestParser(t, &EBNF{}) expected := &EBNF{ Productions: []*Production{ { Name: "Production", Expression: []*Expression{ { Alternatives: []*Sequence{ { Terms: []*Term{ {Name: "name"}, {Literal: &Literal{Start: "="}}, { Option: &EBNFOption{ Expression: &Expression{ Alternatives: []*Sequence{ { Terms: []*Term{ {Name: "Expression"}, }, }, }, }, }, }, {Literal: &Literal{Start: "."}}, }, }, }, }, }, }, { Name: "Expression", Expression: []*Expression{ { Alternatives: []*Sequence{ { Terms: []*Term{ {Name: "Alternative"}, { Repetition: &Repetition{ Expression: &Expression{ Alternatives: []*Sequence{ { Terms: []*Term{ {Literal: &Literal{Start: "|"}}, {Name: "Alternative"}, }, }, }, }, }, }, }, }, }, }, }, }, { Name: "Alternative", Expression: []*Expression{ { Alternatives: []*Sequence{ { Terms: []*Term{ {Name: "Term"}, { Repetition: &Repetition{ Expression: &Expression{ Alternatives: []*Sequence{ { Terms: []*Term{ {Name: "Term"}, }, }, }, }, }, }, }, }, }, }, }, }, { Name: "Term", Expression: []*Expression{ { Alternatives: []*Sequence{ {Terms: []*Term{{Name: "name"}}}, { Terms: []*Term{ {Name: "token"}, { Option: &EBNFOption{ Expression: &Expression{ Alternatives: []*Sequence{ { Terms: []*Term{ {Literal: &Literal{Start: "…"}}, {Name: "token"}, }, }, }, }, }, }, }, }, {Terms: []*Term{{Literal: &Literal{Start: "@@"}}}}, {Terms: []*Term{{Name: "Group"}}}, {Terms: []*Term{{Name: "EBNFOption"}}}, {Terms: []*Term{{Name: "Repetition"}}}, }, }, }, }, { Name: "Group", Expression: []*Expression{ { Alternatives: []*Sequence{ { Terms: []*Term{ {Literal: &Literal{Start: "("}}, {Name: "Expression"}, {Literal: &Literal{Start: ")"}}, }, }, }, }, }, }, { Name: "EBNFOption", Expression: []*Expression{ { Alternatives: []*Sequence{ { Terms: []*Term{ {Literal: &Literal{Start: "["}}, {Name: "Expression"}, {Literal: &Literal{Start: "]"}}, }, }, }, }, }, }, { Name: "Repetition", Expression: []*Expression{ { Alternatives: []*Sequence{ { Terms: []*Term{ {Literal: &Literal{Start: "{"}}, {Name: "Expression"}, {Literal: &Literal{Start: "}"}}, }, }, }, }, }, }, }, } actual := &EBNF{} err := parser.ParseString(strings.TrimSpace(` Production = name "=" [ Expression ] "." . Expression = Alternative { "|" Alternative } . Alternative = Term { Term } . Term = name | token [ "…" token ] | "@@" | Group | EBNFOption | Repetition . Group = "(" Expression ")" . EBNFOption = "[" Expression "]" . Repetition = "{" Expression "}" . `), actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestParseExpression(t *testing.T) { type testNestA struct { A string `":" @{ "a" }` } type testNestB struct { B string `";" @{ "b" }` } type testExpression struct { A *testNestA `@@ |` B *testNestB `@@` } parser := mustTestParser(t, &testExpression{}) expected := &testExpression{ B: &testNestB{ B: "b", }, } actual := &testExpression{} err := parser.ParseString(";b", actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestParseOptional(t *testing.T) { type testOptional struct { A string `[ @"a" @"b" ]` B string `@"c"` } parser := mustTestParser(t, &testOptional{}) expected := &testOptional{B: "c"} actual := &testOptional{} err := parser.ParseString(`c`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestHello(t *testing.T) { type testHello struct { Hello string `@"hello"` To string `@String` } parser := mustTestParser(t, &testHello{}) expected := &testHello{"hello", "Bobby Brown"} actual := &testHello{} err := parser.ParseString(`hello "Bobby Brown"`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func mustTestParser(t *testing.T, grammar interface{}, options ...Option) *Parser { t.Helper() parser, err := Build(grammar, options...) require.NoError(t, err) return parser } func BenchmarkEBNFParser(b *testing.B) { parser, err := Build(&EBNF{}) require.NoError(b, err) b.ResetTimer() source := strings.TrimSpace(` Production = name "=" [ Expression ] "." . Expression = Alternative { "|" Alternative } . Alternative = Term { Term } . Term = name | token [ "…" token ] | "@@" | Group | EBNFOption | Repetition . Group = "(" Expression ")" . EBNFOption = "[" Expression "]" . Repetition = "{" Expression "}" . `) for i := 0; i < b.N; i++ { actual := &EBNF{} _ = parser.ParseString(source, actual) } } func TestRepeatAcrossFields(t *testing.T) { type grammar struct { A string `{ @("." ">") |` B string ` @("," "<") }` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{A: ".>.>.>.>", B: ",<,<,<"} err := parser.ParseString(".>,<.>.>,<.>,<", actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestPosInjection(t *testing.T) { type subgrammar struct { Pos lexer.Position B string `@{ "," }` EndPos lexer.Position } type grammar struct { Pos lexer.Position A string `@{ "." }` B *subgrammar `@@` C string `@"."` EndPos lexer.Position } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{ Pos: lexer.Position{ Offset: 3, Line: 1, Column: 4, }, A: "...", B: &subgrammar{ B: ",,,", Pos: lexer.Position{ Offset: 6, Line: 1, Column: 7, }, EndPos: lexer.Position{ Offset: 9, Line: 1, Column: 10, }, }, C: ".", EndPos: lexer.Position{ Offset: 10, Line: 1, Column: 11, }, } err := parser.ParseString(" ...,,,.", actual) require.NoError(t, err) require.Equal(t, expected, actual) } type parseableCount int func (c *parseableCount) Capture(values []string) error { *c += parseableCount(len(values)) return nil } func TestCaptureInterface(t *testing.T) { type grammar struct { Count parseableCount `{ @"a" }` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{Count: 3} err := parser.ParseString("a a a", actual) require.NoError(t, err) require.Equal(t, expected, actual) } type unmarshallableCount int func (u *unmarshallableCount) UnmarshalText(text []byte) error { *u += unmarshallableCount(len(text)) return nil } func TestTextUnmarshalerInterface(t *testing.T) { type grammar struct { Count unmarshallableCount `{ @"a" }` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{Count: 3} err := parser.ParseString("a a a", actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestLiteralTypeConstraint(t *testing.T) { type grammar struct { Literal string `@"123456":String` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{Literal: "123456"} err := parser.ParseString(`"123456"`, actual) require.NoError(t, err) require.Equal(t, expected, actual) err = parser.ParseString(`123456`, actual) require.Error(t, err) } type nestedCapture struct { Tokens []string } func (n *nestedCapture) Capture(tokens []string) error { n.Tokens = tokens return nil } func TestStructCaptureInterface(t *testing.T) { type grammar struct { Capture *nestedCapture `@String` } parser, err := Build(&grammar{}) require.NoError(t, err) actual := &grammar{} expected := &grammar{Capture: &nestedCapture{Tokens: []string{"hello"}}} err = parser.ParseString(`"hello"`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } type parseableStruct struct { Tokens []string } func (p *parseableStruct) Parse(lex *lexer.PeekingLexer) error { tokens, err := lexer.ConsumeAll(lex) if err != nil { return err } for _, t := range tokens { p.Tokens = append(p.Tokens, t.Value) } return nil } func TestParseable(t *testing.T) { type grammar struct { Inner *parseableStruct `@@` } parser, err := Build(&grammar{}) require.NoError(t, err) actual := &grammar{} expected := &grammar{Inner: &parseableStruct{Tokens: []string{"hello", "123", "world", ""}}} err = parser.ParseString(`hello 123 "world"`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestStringConcat(t *testing.T) { type grammar struct { Field string `@"." { @"." }` } parser, err := Build(&grammar{}) require.NoError(t, err) actual := &grammar{} expected := &grammar{"...."} err = parser.ParseString(`. . . .`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestParseIntSlice(t *testing.T) { type grammar struct { Field []int `@Int { @Int }` } parser := mustTestParser(t, &grammar{}) actual := &grammar{} expected := &grammar{[]int{1, 2, 3, 4}} err := parser.ParseString(`1 2 3 4`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestEmptyStructErrorsNotPanicsIssue21(t *testing.T) { type grammar struct { Foo struct{} `@@` } _, err := Build(&grammar{}) require.Error(t, err) } func TestMultipleTokensIntoScalar(t *testing.T) { var grammar struct { Field int `@("-" Int)` } p, err := Build(&grammar) require.NoError(t, err) err = p.ParseString(`- 10`, &grammar) require.NoError(t, err) require.Equal(t, -10, grammar.Field) } type posMixin struct { Pos lexer.Position } func TestMixinPosIsPopulated(t *testing.T) { var grammar struct { posMixin Int int `@Int` } p := mustTestParser(t, &grammar) err := p.ParseString("10", &grammar) require.NoError(t, err) require.Equal(t, 10, grammar.Int) require.Equal(t, 1, grammar.Pos.Column) require.Equal(t, 1, grammar.Pos.Line) } type testParserMixin struct { A string `@Ident` B string `@Ident` } func TestMixinFieldsAreParsed(t *testing.T) { var grammar struct { testParserMixin C string `@Ident` } p := mustTestParser(t, &grammar) err := p.ParseString("one two three", &grammar) require.NoError(t, err) require.Equal(t, "one", grammar.A) require.Equal(t, "two", grammar.B) require.Equal(t, "three", grammar.C) } func TestNestedOptional(t *testing.T) { type grammar struct { Args []string `"(" [ @Ident { "," @Ident } ] ")"` } p := mustTestParser(t, &grammar{}) actual := &grammar{} err := p.ParseString(`()`, actual) require.NoError(t, err) err = p.ParseString(`(a)`, actual) require.NoError(t, err) err = p.ParseString(`(a, b, c)`, actual) require.NoError(t, err) err = p.ParseString(`(1)`, actual) require.Error(t, err) } type captureableWithPosition struct { Pos lexer.Position Value string } func (c *captureableWithPosition) Capture(values []string) error { c.Value = strings.Join(values, " ") return nil } func TestIssue35(t *testing.T) { type grammar struct { Value *captureableWithPosition `@Ident` } p := mustTestParser(t, &grammar{}) actual := &grammar{} err := p.ParseString(`hello`, actual) require.NoError(t, err) expected := &grammar{Value: &captureableWithPosition{ Pos: lexer.Position{Column: 1, Offset: 0, Line: 1}, Value: "hello", }} require.Equal(t, expected, actual) } func TestInvalidNumbers(t *testing.T) { type grammar struct { Int8 int8 ` "int8" @Int` Int16 int16 `| "int16" @Int` Int32 int32 `| "int32" @Int` Int64 int64 `| "int64" @Int` Uint8 uint8 `| "uint8" @Int` Uint16 uint16 `| "uint16" @Int` Uint32 uint32 `| "uint32" @Int` Uint64 uint64 `| "uint64" @Int` Float32 float32 `| "float32" @Float` Float64 float64 `| "float64" @Float` } p := mustTestParser(t, &grammar{}) tests := []struct { name string input string expected *grammar err bool }{ {name: "ValidInt8", input: "int8 127", expected: &grammar{Int8: 127}}, {name: "InvalidInt8", input: "int8 129", err: true}, {name: "ValidInt16", input: "int16 32767", expected: &grammar{Int16: 32767}}, {name: "InvalidInt16", input: "int16 32768", err: true}, {name: "ValidInt32", input: fmt.Sprintf("int32 %d", math.MaxInt32), expected: &grammar{Int32: math.MaxInt32}}, {name: "InvalidInt32", input: fmt.Sprintf("int32 %d", math.MaxInt32+1), err: true}, {name: "ValidInt64", input: fmt.Sprintf("int64 %d", math.MaxInt64), expected: &grammar{Int64: math.MaxInt64}}, {name: "InvalidInt64", input: "int64 9223372036854775808", err: true}, {name: "ValidFloat64", input: "float64 1234.5", expected: &grammar{Float64: 1234.5}}, {name: "InvalidFloat64", input: "float64 asdf", err: true}, } for _, test := range tests { // nolint: scopelint t.Run(test.name, func(t *testing.T) { actual := &grammar{} err := p.ParseString(test.input, actual) if test.err { require.Error(t, err, fmt.Sprintf("%#v", actual)) } else { require.NoError(t, err) require.Equal(t, test.expected, actual) } }) } } // We'd like this to work, but it can wait. func TestPartialAST(t *testing.T) { type grammar struct { Succeed string `@Ident` Fail string `@"foo"` } p := mustTestParser(t, &grammar{}) actual := &grammar{} err := p.ParseString(`foo bar`, actual) require.Error(t, err) expected := &grammar{Succeed: "foo"} require.Equal(t, expected, actual) } func TestCaseInsensitive(t *testing.T) { type grammar struct { Select string `"select":Keyword @Ident` } lex := lexer.Must(lexer.Regexp( `(?i)(?PSELECT)` + `|(?P\w+)` + `|(\s+)`, )) p := mustTestParser(t, &grammar{}, Lexer(lex), CaseInsensitive("Keyword")) actual := &grammar{} err := p.ParseString(`SELECT foo`, actual) expected := &grammar{"foo"} require.NoError(t, err) require.Equal(t, expected, actual) actual = &grammar{} err = p.ParseString(`select foo`, actual) require.NoError(t, err) require.Equal(t, expected, actual) } func TestTokenAfterRepeatErrors(t *testing.T) { type grammar struct { Text string `{ @Ident } "foo"` } p := mustTestParser(t, &grammar{}) ast := &grammar{} err := p.ParseString(``, ast) require.Error(t, err) } func TestEOFAfterRepeat(t *testing.T) { type grammar struct { Text string `{ @Ident }` } p := mustTestParser(t, &grammar{}) ast := &grammar{} err := p.ParseString(``, ast) require.NoError(t, err) } func TestTrailing(t *testing.T) { type grammar struct { Text string `@Ident` } p := mustTestParser(t, &grammar{}) err := p.ParseString(`foo bar`, &grammar{}) require.Error(t, err) } func TestModifiers(t *testing.T) { nonEmptyGrammar := &struct { A string `@( ("x"? "y"? "z"?)! "b" )` }{} tests := []struct { name string grammar interface{} input string expected string fail bool }{ {name: "NonMatchingOptionalNonEmpty", input: "b", fail: true, grammar: nonEmptyGrammar}, {name: "NonEmptyMatch", input: "x b", expected: "xb", grammar: nonEmptyGrammar}, {name: "NonEmptyMatchAll", input: "x y z b", expected: "xyzb", grammar: nonEmptyGrammar}, {name: "NonEmptyMatchSome", input: "x z b", expected: "xzb", grammar: nonEmptyGrammar}, {name: "MatchingOptional", input: "a b", expected: "ab", grammar: &struct { A string `@( "a"? "b" )` }{}}, {name: "NonMatchingOptionalIsSkipped", input: "b", expected: "b", grammar: &struct { A string `@( "a"? "b" )` }{}}, {name: "MatchingOneOrMore", input: "a a a a a", expected: "aaaaa", grammar: &struct { A string `@( "a"+ )` }{}}, {name: "NonMatchingOneOrMore", input: "", fail: true, grammar: &struct { A string `@( "a"+ )` }{}}, {name: "MatchingZeroOrMore", input: "aaaaaaa", fail: true, grammar: &struct { A string `@( "a"* )` }{}}, {name: "NonMatchingZeroOrMore", input: "", grammar: &struct { A string `@( "a"* )` }{}}, } for _, test := range tests { // nolint: scopelint t.Run(test.name, func(t *testing.T) { p := mustTestParser(t, test.grammar) err := p.ParseString(test.input, test.grammar) if test.fail { require.Error(t, err) } else { require.NoError(t, err) actual := reflect.ValueOf(test.grammar).Elem().FieldByName("A").String() require.Equal(t, test.expected, actual) } }) } } func TestStreamingParser(t *testing.T) { type token struct { Str string ` @Ident` Num int `| @Int` } parser := mustTestParser(t, &token{}) tokens := make(chan *token, 128) err := parser.ParseString(`hello 10 11 12 world`, tokens) actual := []*token{} for token := range tokens { actual = append(actual, token) } expected := []*token{ {Str: "hello", Num: 0}, {Str: "", Num: 10}, {Str: "", Num: 11}, {Str: "", Num: 12}, {Str: "world", Num: 0}, } require.Equal(t, expected, actual) require.NoError(t, err) } func TestIssue60(t *testing.T) { type grammar struct { A string `@("one" | | "two")` } _, err := Build(&grammar{}) require.Error(t, err) } type Issue62Bar struct { A int } func (x *Issue62Bar) Parse(lex *lexer.PeekingLexer) error { token, err := lex.Next() if err != nil { return err } x.A, err = strconv.Atoi(token.Value) return err } type Issue62Foo struct { Bars []Issue62Bar `parser:"@@+"` } func TestIssue62(t *testing.T) { _, err := Build(&Issue62Foo{}) require.NoError(t, err) } // nolint: structcheck func TestIssue71(t *testing.T) { type Sub struct { name string `@Ident` } type grammar struct { pattern *Sub `@@` } _, err := Build(&grammar{}) require.Error(t, err) } func TestAllowTrailing(t *testing.T) { type G struct { Name string `@Ident` } p, err := Build(&G{}) require.NoError(t, err) g := &G{} err = p.ParseString(`hello world`, g) require.Error(t, err) err = p.ParseString(`hello world`, g, AllowTrailing(true)) require.NoError(t, err) require.Equal(t, &G{"hello"}, g) } func TestDisjunctionErrorReporting(t *testing.T) { type statement struct { Add bool ` @"add"` Remove bool `| @"remove"` } type grammar struct { Statements []*statement `"{" ( @@ )* "}"` } p := mustTestParser(t, &grammar{}) ast := &grammar{} err := p.ParseString(`{ add foo }`, ast) // TODO: This should produce a more useful error. This is returned by sequence.Parse(). require.EqualError(t, err, `1:7: unexpected token "foo" (expected "}")`) } func TestCustomInt(t *testing.T) { type MyInt int type G struct { Value MyInt `@Int` } p, err := Build(&G{}) require.NoError(t, err) g := &G{} err = p.ParseString(`42`, g) require.NoError(t, err) require.Equal(t, &G{42}, g) } func TestBoolIfSet(t *testing.T) { type G struct { Value bool `@"true"?` } p, err := Build(&G{}) require.NoError(t, err) g := &G{} err = p.ParseString(`true`, g) require.NoError(t, err) require.Equal(t, &G{true}, g) err = p.ParseString(``, g) require.NoError(t, err) require.Equal(t, &G{false}, g) } func TestCustomBoolIfSet(t *testing.T) { type MyBool bool type G struct { Value MyBool `@"true"?` } p, err := Build(&G{}) require.NoError(t, err) g := &G{} err = p.ParseString(`true`, g) require.NoError(t, err) require.Equal(t, &G{true}, g) err = p.ParseString(``, g) require.NoError(t, err) require.Equal(t, &G{false}, g) } func TestPointerToList(t *testing.T) { type grammar struct { List *[]string `@Ident*` } p := mustTestParser(t, &grammar{}) ast := &grammar{} err := p.ParseString(`foo bar`, ast) require.NoError(t, err) l := []string{"foo", "bar"} require.Equal(t, &grammar{List: &l}, ast) } // I'm not sure if this is a problem that should be solved like this. // func TestMatchHydratesNullFields(t *testing.T) { // type grammar struct { // List []string `"{" @Ident* "}"` // } // p := mustTestParser(t, &grammar{}) // ast := &grammar{} // err := p.ParseString(`{}`, ast) // require.NoError(t, err) // require.NotNil(t, ast.List) // } func TestNegation(t *testing.T) { type grammar struct { EverythingUntilSemicolon *[]string `@!';'* @';'` } p := mustTestParser(t, &grammar{}) ast := &grammar{} err := p.ParseString(`hello world ;`, ast) require.NoError(t, err) require.Equal(t, &[]string{"hello", "world", ";"}, ast.EverythingUntilSemicolon) err = p.ParseString(`hello world`, ast) require.Error(t, err) } func TestNegationWithPattern(t *testing.T) { type grammar struct { EverythingMoreComplex *[]string `@!(';' String)* @';' @String` } p := mustTestParser(t, &grammar{}) // j, err := json.MarshalIndent(p.root, "", " ") // log.Print(j) // log.Print(stringer(p.root)) ast := &grammar{} err := p.ParseString(`hello world ; 'some-str'`, ast) require.NoError(t, err) require.Equal(t, &[]string{"hello", "world", ";", `some-str`}, ast.EverythingMoreComplex) err = p.ParseString(`hello ; world ; 'hey'`, ast) require.NoError(t, err) require.Equal(t, &[]string{"hello", ";", "world", ";", `hey`}, ast.EverythingMoreComplex) err = p.ParseString(`hello ; world ;`, ast) require.Error(t, err) } func TestNegationWithDisjunction(t *testing.T) { type grammar struct { EverythingMoreComplex *[]string `@!(';' | ',')* @(';' | ',')` } // Note: we need more lookahead since (';' String) needs some before failing to match p := mustTestParser(t, &grammar{}) ast := &grammar{} err := p.ParseString(`hello world ;`, ast) require.NoError(t, err) require.Equal(t, &[]string{"hello", "world", ";"}, ast.EverythingMoreComplex) err = p.ParseString(`hello world , `, ast) require.NoError(t, err) require.Equal(t, &[]string{"hello", "world", ","}, ast.EverythingMoreComplex) } participle-0.7.1/stringer.go000066400000000000000000000071621376001225600160470ustar00rootroot00000000000000package participle import ( "bytes" "fmt" "strings" "github.com/alecthomas/participle/lexer" ) type stringerVisitor struct { bytes.Buffer seen map[node]bool } func stringern(n node, depth int) string { v := &stringerVisitor{seen: map[node]bool{}} v.visit(n, depth) return v.String() } func stringer(n node) string { return stringern(n, 1) } func (s *stringerVisitor) visit(n node, depth int) { // nolint: gocognit if s.seen[n] || depth <= 0 { fmt.Fprintf(s, "...") return } s.seen[n] = true switch n := n.(type) { case *disjunction: for i, c := range n.nodes { if i > 0 { fmt.Fprint(s, " | ") } s.visit(c, depth) } case *strct: s.visit(n.expr, depth) case *sequence: c := n for i := 0; c != nil && depth-i > 0; c, i = c.next, i+1 { if c != n { fmt.Fprint(s, " ") } s.visit(c.node, depth-i) } case *parseable: fmt.Fprintf(s, "<%s>", strings.ToLower(n.t.Name())) case *capture: if _, ok := n.node.(*parseable); ok { fmt.Fprintf(s, "<%s>", strings.ToLower(n.field.Name)) } else { if n.node == nil { fmt.Fprintf(s, "<%s>", strings.ToLower(n.field.Name)) } else { s.visit(n.node, depth) } } case *reference: fmt.Fprintf(s, "<%s>", strings.ToLower(n.identifier)) case *optional: composite := compositeNode(map[node]bool{}, n, true) if composite { fmt.Fprint(s, "(") } s.visit(n.node, depth) if composite { fmt.Fprint(s, ")") } fmt.Fprint(s, "?") case *repetition: composite := compositeNode(map[node]bool{}, n, true) if composite { fmt.Fprint(s, "(") } s.visit(n.node, depth) if composite { fmt.Fprint(s, ")") } fmt.Fprint(s, "*") case *negation: fmt.Fprintf(s, "!") composite := compositeNode(map[node]bool{}, n, true) if composite { fmt.Fprint(s, "(") } s.visit(n.node, depth) if composite { fmt.Fprint(s, ")") } case *literal: fmt.Fprintf(s, "%q", n.s) if n.t != lexer.EOF && n.s == "" { fmt.Fprintf(s, ":%s", n.tt) } case *group: composite := (n.mode != groupMatchOnce) && compositeNode(map[node]bool{}, n, true) if composite { fmt.Fprint(s, "(") } if child, ok := n.expr.(*group); ok && child.mode == groupMatchOnce { s.visit(child.expr, depth) } else if child, ok := n.expr.(*capture); ok { if grandchild, ok := child.node.(*group); ok && grandchild.mode == groupMatchOnce { s.visit(grandchild.expr, depth) } else { s.visit(n.expr, depth) } } else { s.visit(n.expr, depth) } if composite { fmt.Fprint(s, ")") } switch n.mode { case groupMatchNonEmpty: fmt.Fprintf(s, "!") case groupMatchZeroOrOne: fmt.Fprintf(s, "?") case groupMatchZeroOrMore: fmt.Fprintf(s, "*") case groupMatchOneOrMore: fmt.Fprintf(s, "+") } default: panic("unsupported") } } func compositeNode(seen map[node]bool, n node, strctAsComposite bool) bool { if n == nil || seen[n] { return false } seen[n] = true switch n := n.(type) { case *sequence: return n.next != nil case *disjunction: for _, c := range n.nodes { if compositeNode(seen, c, strctAsComposite) { return true } } return false case *reference, *literal, *parseable: return false case *negation: return compositeNode(seen, n.node, strctAsComposite) case *strct: return strctAsComposite && compositeNode(seen, n.expr, strctAsComposite) case *capture: return compositeNode(seen, n.node, strctAsComposite) case *optional: return compositeNode(seen, n.node, strctAsComposite) case *repetition: return compositeNode(seen, n.node, strctAsComposite) case *group: return compositeNode(seen, n.expr, strctAsComposite) default: panic("unsupported") } } participle-0.7.1/struct.go000066400000000000000000000057211376001225600155350ustar00rootroot00000000000000package participle import ( "fmt" "reflect" "github.com/alecthomas/participle/lexer" ) // A structLexer lexes over the tags of struct fields while tracking the current field. type structLexer struct { s reflect.Type field int indexes [][]int lexer *lexer.PeekingLexer } func lexStruct(s reflect.Type) (*structLexer, error) { indexes, err := collectFieldIndexes(s) if err != nil { return nil, err } slex := &structLexer{ s: s, indexes: indexes, } if len(slex.indexes) > 0 { tag := fieldLexerTag(slex.Field().StructField) slex.lexer, err = lexer.Upgrade(lexer.LexString(tag)) if err != nil { return nil, err } } return slex, nil } // NumField returns the number of fields in the struct associated with this structLexer. func (s *structLexer) NumField() int { return len(s.indexes) } type structLexerField struct { reflect.StructField Index []int } // Field returns the field associated with the current token. func (s *structLexer) Field() structLexerField { return s.GetField(s.field) } func (s *structLexer) GetField(field int) structLexerField { if field >= len(s.indexes) { field = len(s.indexes) - 1 } return structLexerField{ StructField: s.s.FieldByIndex(s.indexes[field]), Index: s.indexes[field], } } func (s *structLexer) Peek() (lexer.Token, error) { field := s.field lex := s.lexer for { token, err := lex.Peek(0) if err != nil { return token, err } if !token.EOF() { token.Pos.Line = field + 1 return token, nil } field++ if field >= s.NumField() { return lexer.EOFToken(token.Pos), nil } tag := fieldLexerTag(s.GetField(field).StructField) lex, err = lexer.Upgrade(lexer.LexString(tag)) if err != nil { return token, err } } } func (s *structLexer) Next() (lexer.Token, error) { token, err := s.lexer.Next() if err != nil { return token, err } if !token.EOF() { token.Pos.Line = s.field + 1 return token, nil } if s.field+1 >= s.NumField() { return lexer.EOFToken(token.Pos), nil } s.field++ tag := fieldLexerTag(s.Field().StructField) s.lexer, err = lexer.Upgrade(lexer.LexString(tag)) if err != nil { return token, err } return s.Next() } func fieldLexerTag(field reflect.StructField) string { if tag, ok := field.Tag.Lookup("parser"); ok { return tag } return string(field.Tag) } // Recursively collect flattened indices for top-level fields and embedded fields. func collectFieldIndexes(s reflect.Type) (out [][]int, err error) { if s.Kind() != reflect.Struct { return nil, fmt.Errorf("expected a struct but got %q", s) } defer decorate(&err, s.String) for i := 0; i < s.NumField(); i++ { f := s.Field(i) switch { case f.Anonymous: // nolint: gocritic children, err := collectFieldIndexes(f.Type) if err != nil { return nil, err } for _, idx := range children { out = append(out, append(f.Index, idx...)) } case f.PkgPath != "": continue case fieldLexerTag(f) != "": out = append(out, f.Index) } } return } participle-0.7.1/struct_test.go000066400000000000000000000036401376001225600165720ustar00rootroot00000000000000package participle import ( "reflect" "testing" "text/scanner" "github.com/stretchr/testify/require" "github.com/alecthomas/participle/lexer" ) func TestStructLexerTokens(t *testing.T) { type testScanner struct { A string `12` B string `34` } scan, err := lexStruct(reflect.TypeOf(testScanner{})) require.NoError(t, err) t12 := lexer.Token{Type: scanner.Int, Value: "12", Pos: lexer.Position{Line: 1, Column: 1}} t34 := lexer.Token{Type: scanner.Int, Value: "34", Pos: lexer.Position{Line: 2, Column: 1}} require.Equal(t, t12, mustPeek(scan)) require.Equal(t, 0, scan.field) require.Equal(t, t12, mustNext(scan)) require.Equal(t, t34, mustPeek(scan)) require.Equal(t, 0, scan.field) require.Equal(t, t34, mustNext(scan)) require.Equal(t, 1, scan.field) require.True(t, mustNext(scan).EOF()) } func TestStructLexer(t *testing.T) { g := struct { A string `"a"|` B string `"b"` }{} gt := reflect.TypeOf(g) r, err := lexStruct(gt) require.NoError(t, err) f := []structLexerField{} s := "" for { _, err := r.Peek() require.NoError(t, err) rn, err := r.Next() require.NoError(t, err) if rn.EOF() { break } f = append(f, r.Field()) s += rn.String() } require.Equal(t, `a|b`, s) f0 := r.GetField(0) f1 := r.GetField(1) require.Equal(t, []structLexerField{f0, f0, f1}, f) } type testEmbeddedIndexes struct { A string `@String` B string `@String` } func TestCollectFieldIndexes(t *testing.T) { var grammar struct { testEmbeddedIndexes C string `@String` } typ := reflect.TypeOf(grammar) indexes, err := collectFieldIndexes(typ) require.NoError(t, err) require.Equal(t, [][]int{{0, 0}, {0, 1}, {1}}, indexes) } func mustPeek(scan *structLexer) lexer.Token { token, err := scan.Peek() if err != nil { panic(err) } return token } func mustNext(scan *structLexer) lexer.Token { // nolint: interfacer token, err := scan.Next() if err != nil { panic(err) } return token }