pax_global_header00006660000000000000000000000064145741204350014517gustar00rootroot0000000000000052 comment=d827142436db3724af4b4dd9162132ca343f34e0 generator-1.7.0/000077500000000000000000000000001457412043500135125ustar00rootroot00000000000000generator-1.7.0/.gitignore000066400000000000000000000000651457412043500155030ustar00rootroot00000000000000/node_modules/ .tern-* /dist /test/*.js /.rpt2_cache generator-1.7.0/.mocharc.cjs000066400000000000000000000001551457412043500157060ustar00rootroot00000000000000module.exports = { extension: ["ts"], spec: ["test/test-*.ts"], loader: "ts-node/esm/transpile-only" } generator-1.7.0/.npmignore000066400000000000000000000000161457412043500155060ustar00rootroot00000000000000/node_modules generator-1.7.0/CHANGELOG.md000066400000000000000000000324211457412043500153250ustar00rootroot00000000000000## 1.7.0 (2024-03-12) ### Bug fixes Include type declarations for the Rollup plugin. Named or `@export`-ed specialized tokens are now available in the terms file. ### New features The generator now emits a warning when rules generate a lot of different variants (usually due to a combinatory explosion of `?` and `|` operators). ## 1.6.0 (2024-01-08) ### Bug fixes Fix an issue where the generator could output invalid JavaScript when a specialization used a string that started with a number. Adjust TypeScript output to compile with recent tsc versions. Add a test for zero-length node mounts ### New features Support an `exportName` option to the Rollup plugin. ## 1.5.1 (2023-09-15) ### Bug fixes Fix a quadratic complexity in state merging. ## 1.5.0 (2023-08-20) ### Bug fixes Fix a build issue that made the ES version of the Rollup plugin fail to load. ### New features The new `typeScript` option to `buildParserFile` (and `--typeScript` option to lezer-generator) makes the tool emit TypeScript code. ## 1.4.2 (2023-08-17) ### Bug fixes Fix a regression in the build process that caused the Rollup plugin to not be part of the npm package. ## 1.4.1 (2023-08-17) ### Bug fixes Make this package usable in TypeScript setups with node16/nodenext resolution. ## 1.4.0 (2023-08-11) ### New features `BuildOptions.contextTracker` now takes a function, so that the code that produces it has access to the term IDs. ## 1.3.0 (2023-06-15) ### New features The test utilities can now be imported as `"@lezer/generator/test"`. ## 1.2.4 (2023-06-12) ### Bug fixes Fix a bug where precedences specified for local tokens were not properly applied. ## 1.2.3 (2023-04-28) ### Bug fixes Make sure the Rollup plugin imports the rest of the library using a full file path. Make `--help` show the correct executable name. ## 1.2.2 (2023-01-18) ### Bug fixes Make sure `require` isn't used as an identifier in generator output. ## 1.2.1 (2023-01-13) ### Bug fixes Fix an invalid optimization that sometimes led to incomplete token precedence tables, leading to incorrect tokenization. ## 1.2.0 (2023-01-09) ### Bug fixes Fix a bug where the error to use `()` to denote empty options in a choice expression was signalled even when that is what the input did. ### New features Grammars can now declare `@local tokens` blocks defining all the tokens that may appear in a set of state, and allowing `@else` tokens that match everything else. ## 1.1.3 (2022-11-07) ### Bug fixes Fix broken Rollup plugin due to a bad import. ## 1.1.2 (2022-11-07) ### Bug fixes Fix a bug where the _ notation in tokens would match only half of a surrogate pair. Fix a bug that caused invalid tokenizer data to be generated for character ranges ending in \uffff. ## 1.1.1 (2022-08-03) ### Bug fixes Emit enough information about external specializers to make `ParserConfig.specializers` fixable. ## 1.1.0 (2022-06-27) ### New features Things that used to be written like `std.digit` are now written `@digit`. The old notation will remain available until a breaking release. The new `@eof` marker can be used in tokens to match the end of the input. ## 1.0.0 (2022-06-06) ### New features First stable version. ## 0.16.0 (2022-04-20) ### Bug fixes Fix an issue in the tokenizer for grammars that could cause it to run very slowly on some inputs. ### New features A grammar can now include an `@external propSource a from "b"` declaration to import a programmatically defined node prop source. ## 0.15.4 (2022-01-28) ### Bug fixes Fix a bug where explicitly specified token precedences were sometimes not properly enforced. ## 0.15.3 (2022-01-21) ### Bug fixes Fix a bug that caused some kind of skip rules (those ending in something optional) to not work correctly. ## 0.15.2 (2021-09-24) ### Bug fixes Fix an infinite recursion caused by some kinds of (obscure) token state machines. ## 0.15.1 (2021-09-03) ### Bug fixes Fix a bug that could lead to spurious 'inconsistent skip sets' errors. Fix a bug that caused an unescaped '-' at the start or end of a character set to silently be converted to a nonsensical character. Fix a confusing behavior where literal tokens declared in the `@tokens` block didn't get names when they started with a lower-case character. ### New features Top rules may now be defined inside `@skip` scopes. The parser no longer treats an empty position in a choice operator as the empty expression, but requires an explicit () marker (to avoid a common mistake). ## 0.15.0 (2021-08-11) ### Breaking changes The module's name changed from `lezer-generator` to `@lezer/generator`. Nested parsers can no longer be specified in the grammar (they must now be set up programmatically). ### Bug fixes Fix an issue where newlines in string tokens could silently corrupt the token. Handle alternative output file extensions more gracefully. ### New features `@export` props may now have a value to set a specific export name. ## 0.13.4 (2021-05-14) ### Bug fixes Don't add inline rules to the terms file (since they may not be uniquely identified by name). Generate more minimal state machines for the tokenizer. ## 0.13.3 (2021-02-17) ### New features Support `@context` syntax to register a context tracker for a grammar. ## 0.13.2 (2021-01-20) ### Bug fixes Fix an issue where imported identifiers could clash with the export name in generated code. ## 0.13.1 (2020-12-04) ### Bug fixes Fix versions of lezer packages depended on. ## 0.13.0 (2020-12-04) ### Breaking changes Adjust to the new way nested parsers work in Lezer. ### Bug fixes Top rule node types will now show up in the terms file. It is no longer allowed for a top rule to share a name with another rule. ## 0.12.0 (2020-10-23) ### Breaking changes The serialized parser format changed. Pseudo-props like `name`, `dialect`, `inline` and `dynamicPrec` now require an `@` in front of them when specified in a rule's prop list. `@export` is now specified as a pseudo-prop instead of in front of the rule. Top rule names are now required. ### New features Rules can now specify an `@isGroup` pseudo-prop to automatically attach a group name to all the (single) named nodes they produce. ## 0.11.2 (2020-09-29) ### Bug fixes Fix a crash that could happen when reporting a conflict error. ### New features A `@conflict` block inside `@tokens` can now be used to explicitly indicate a conflict between two tokens. Allow rules to be explicitly inlineable with an `[inline]` pseudo-prop. ## 0.11.1 (2020-09-26) ### Bug fixes Fix lezer depencency versions ## 0.11.0 (2020-09-26) ### Breaking changes Simplify the representation of repeat expressions in the grammar in a way that avoids some spurious conflicts. The output format has been modified to allow states to share part of their action table for better compression. ### Bug fixes Fix a bug where the state collapsing could introduce GLR parsing in grammars that otherwise didn't require it. ## 0.10.5 (2020-09-15) ### Bug fixes Fix a bug where `moduleStyle` defaulted to `"cjs"` when using the node API (rather than to `"es"` as documented). ### New features You can now import `"lezer-generator/rollup"` to get a rollup plugin that will transform grammars during the build. ## 0.10.4 (2020-09-14) ### Bug fixes Fix a bug that broke `@external prop` declarations in grammars. ## 0.10.3 (2020-09-11) ### Bug fixes Make sure unrelated precedence declarations for non-cyclic overlapping tokens don't end up also defining a relation between those tokens. ## 0.10.2 (2020-09-02) ### Bug fixes Actually reuse parser states when skip rules refer to rules also used in other contexts. Fix a bug where the automaton generated for skip rules wasn't being compressed. Properly raise an error when different specializations for the same token are given different names. Fix a bug that prevented `NodeProp.skipped` from being properly attached to node types. Fix a corner-case infinite loop in the state-collapsing algorithm (and speed it up). Compile `+` and `*` operators in a way that is less likely to lead to conflicts. Emit all shift/reduce and reduce/reduce conflicts in a single run, rather than stopping on the first one. Emit all overlapping token errors, rather than only the first one. ### New features Inline rules can now be anonymous (with syntax `[props..] { body }`). Dynamic precedences can now be associated with productions, which can help pick the preferred GLR parse when a grammar is ambiguous. Token `@precedence` declarations can now refer to a parameterized rule by name (without arguments) to indicate that all instances of that rule have a given precedence. ## 0.10.1 (2020-08-07) ### Bug fixes Fix an issue where the output file would in some cases have a superfluous comma. ## 0.10.0 (2020-08-07) ### Bug fixes Fix a bug in the reuse of compiled rules (which sometimes got confused by inline rules). The error message for overlapping tokens is a bit more concrete now, including an example of a string that matches both tokens. ### New features Add support for grammar dialects. Add support for external specializers. Commas in precedence tag, dialect, or external token lists are now optional. ### Breaking changes Changes the serialized parser format. ## 0.9.1 (2020-07-08) ### New features The test runner helper now allows tests to pass additional configuration options. ## 0.9.0 (2020-06-08) ### Breaking changes The `@detectDelim` directive now assigns `NodeProp.openedBy`/`closedBy` props to the bracket nodes, instead of `NodeProp.delim` to the parent node. ## 0.8.5 (2020-05-01) ### Bug fixes Publish less useless cruft to npm, reducing package size. ## 0.8.4 (2020-04-14) ### Bug fixes Fix an issue where token groups were inappropriately merged when conflicting tokens didn't appear in overlapping state sets. Fix an issue where external tokenizers needed for skipped tokens were not properly enabled for some states. Fix a bug where the tool would sometimes overeagerly merge states, resulting in incorrect output. ## 0.8.3 (2020-04-09) ### Bug fixes Make dist/test loadable from CommonJS modules again. Fix a bug that prevented `NodeProp.top` from being assigned to top rules in most cases. ## 0.8.2 (2020-04-01) ### Bug fixes Fix an issue that broke the bin command. ## 0.8.1 (2020-04-01) ### Bug fixes Make the package load as an ES module on node ## 0.8.0 (2020-02-03) ### Breaking changes Changes the serialized parser format. ### New features Add support for multiple `@top` rules. ## 0.7.1 (2020-01-23) ### New features Support `Foo(...)` syntax in test specs to indicate "ignore the children of this node". ## 0.7.0 (2020-01-20) ### New features You can now write a node name directly after `@top` to give your top node type a name. ### Breaking changes Changes the way repeated nodes are represented in the generated parser. ## 0.5.2 (2020-01-15) ### Bug fixes Fix crash when the top rule had no node name. Adjust the way states' forced reductions are computed to avoid cycles (where force-reducing multiple times gets you back in your original state). ## 0.5.1 (2019-10-22) ### Bug fixes Fix issue where serialized parsers didn't have the top node prop set. ## 0.5.0 (2019-10-22) ### New features The generator now automatically assigns the `top` node prop to the grammar's top node. ## 0.4.0 (2019-09-10) ### Bug fixes Fix bug that made matching single-token skipped expressions unnecessarily expensive. ### Breaking changes Do not emit recovery actions in the parse table anymore. ## 0.3.0 (2019-08-22) ### Bug fixes Fix several issues in the way forced reductions were picked, to avoid infinite reduction loops. ### New features Add support for props and custom node names in the grammar notation. Allow importing of props via `@external prop`. Rule capitalization is now relevant (again), only capitalized rules appear in the tree by default. ### Breaking changes Remove support for node tags, tag expressions, the `@tags` block, and everything else related to tags. Replace tagged expression syntax with inline rule syntax. Literal declarations must now go into the `@tokens` block. The `@detectDelim` declaration must now appear at the top level. The dash in `@external-...` syntax was dropped—`@external` is now a separate token. External grammars that default to null must now have the word `empty` instead of `from "..."` (to resolve an ambiguity that syntax introduced). ## 0.2.0 (2019-08-02) ### Bug fixes Fix bug where the grammar parser unintentionally required semicolon between rules in skip blocks. Actually throw an error when detecing a skip inconsistency. Track skip context more accurately through parse states. Fix specializing of external tokens. ### New features Add support for tags. Add `@tags` blocks, allow tags for literals. Add `@punctuation` to succinctly declare punctuation tags. Add `@infer-delim` to enable automatic delimiter detection. Add `@all` as a way to append tags to all tagged rules in the grammar. Allow a choice of literals to be passed to `@specialize`/`@extend`. Add `dist/test.js` with test helper functions. ### Breaking changes Require `@` in front of grammar keywords. Remove support for `=`-style tag declarations. Replace `tag.foo` syntax with colon suffix syntax. ## 0.1.1 (2019-07-09) ### Bug Fixes Actually include the .d.ts file in the published package. ## 0.1.0 (2019-07-09) ### New Features First documented release. generator-1.7.0/LICENSE000066400000000000000000000021311457412043500145140ustar00rootroot00000000000000MIT License Copyright (C) 2018 by Marijn Haverbeke and others Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. generator-1.7.0/README.md000066400000000000000000000032241457412043500147720ustar00rootroot00000000000000# @lezer/generator [ [**WEBSITE**](http://lezer.codemirror.net) | [**ISSUES**](https://github.com/lezer-parser/lezer/issues) | [**FORUM**](https://discuss.codemirror.net/c/lezer) | [**CHANGELOG**](https://github.com/lezer-parser/generator/blob/master/CHANGELOG.md) ] This is an [LR(1)](https://en.wikipedia.org/wiki/LR_parser) (more precisely pseudo-[LALR](https://en.wikipedia.org/wiki/LALR_parser),with opt-in [GLR](https://en.wikipedia.org/wiki/GLR_parser)) parser generator which outputs grammars that can be used by the [Lezer](https://github.com/lezer-parser/lezer/) parser. This package exports both a command-line parser generator tool called [`lezer-generator`](https://lezer.codemirror.net/docs/guide/#building-a-grammar) and a [programming interface](https://lezer.codemirror.net/docs/ref/#generator). The grammar format that the tool accepts is documented in the [system guide](https://lezer.codemirror.net/docs/guide/#writing-a-grammar). See `test/cases/` for some simple example grammars, or [lezer-javascript](https://github.com/lezer-parser/javascript) for a real grammar. You can import `"@lezer/generator/rollup"` to get a [Rollup](https://rollupjs.org/guide/en/) plugin that will transform files ending in `.grammar` or `.grammar.terms` (a pseudo-source referring to the terms produced by the `.grammar` file) as part of the rollup build process. ```javascript import {lezer} from "@lezer/generator/rollup" export default { input: "./in.js", output: {file: "out.js", format: "cjs"}, plugins: [lezer()] } ``` The plugin can be passed `lezer({exportName})` option to configure the name of the parser export. The code is licensed under an MIT license. generator-1.7.0/build.js000066400000000000000000000022361457412043500151520ustar00rootroot00000000000000import {build, watch} from "@marijn/buildtool" import {fileURLToPath} from "url" import {dirname, join} from "path" import {readFileSync, writeFileSync, mkdirSync} from "fs" import {rollup} from "rollup" let tsOptions = { lib: ["dom", "es2016"], types: ["mocha", "node"], target: "es6" } let base = dirname(fileURLToPath(import.meta.url)), src = join(base, "src"), dist = join(base, "dist") let main = join(src, "index.ts"), mainConf = {tsOptions} let test = join(src, "test.ts"), testConf = {tsOptions, bundleName: "test"} let rollupFile = join(src, "rollup-plugin-lezer.js") try { mkdirSync(dist) } catch {} writeFileSync(join(dist, "rollup-plugin-lezer.js"), readFileSync(rollupFile, "utf8")) rollup({ input: rollupFile, external: () => true }).then(bundle => bundle.generate({ format: "cjs", file: join(dist, "rollup-plugin-lezer.cjs"), paths: id => id.endsWith("/index.js") ? "./index.cjs" : id })).then(result => { writeFileSync(join(dist, "rollup-plugin-lezer.cjs"), result.output[0].code) }) if (process.argv.includes("--watch")) { watch([main], [], mainConf) watch([test], [], testConf) } else { build(main, mainConf) build(test, testConf) } generator-1.7.0/dist/000077500000000000000000000000001457412043500144555ustar00rootroot00000000000000generator-1.7.0/dist/rollup-plugin-lezer.d.cts000066400000000000000000000001351457412043500213410ustar00rootroot00000000000000import {Plugin} from "rollup" export function lezer(config?: {exportName?: string}): Plugin generator-1.7.0/dist/rollup-plugin-lezer.d.ts000066400000000000000000000001351457412043500211760ustar00rootroot00000000000000import {Plugin} from "rollup" export function lezer(config?: {exportName?: string}): Plugin generator-1.7.0/package.json000066400000000000000000000024071457412043500160030ustar00rootroot00000000000000{ "name": "@lezer/generator", "version": "1.7.0", "description": "Parser generator for the incremental lezer parser", "main": "dist/index.cjs", "type": "module", "exports": { ".": { "import": "./dist/index.js", "require": "./dist/index.cjs" }, "./test": { "import": "./dist/test.js", "require": "./dist/test.cjs" }, "./dist/test": { "import": "./dist/test.js", "require": "./dist/test.cjs" }, "./rollup": { "import": "./dist/rollup-plugin-lezer.js", "require": "./dist/rollup-plugin-lezer.cjs" } }, "module": "dist/index.js", "types": "dist/index.d.ts", "author": "Marijn Haverbeke ", "license": "MIT", "devDependencies": { "@marijn/buildtool": "^0.1.6", "@types/mocha": "^5.2.6", "@types/node": "^20.5.0", "ist": "^1.1.1", "mocha": "^10.2.0" }, "dependencies": { "@lezer/common": "^1.1.0", "@lezer/lr": "^1.3.0" }, "files": [ "dist" ], "repository": { "type": "git", "url": "https://github.com/lezer-parser/generator.git" }, "scripts": { "watch": "node build.js --watch", "prepare": "node build.js", "test": "mocha" }, "bin": { "lezer-generator": "./src/lezer-generator.cjs" } } generator-1.7.0/src/000077500000000000000000000000001457412043500143015ustar00rootroot00000000000000generator-1.7.0/src/README.md000066400000000000000000000003361457412043500155620ustar00rootroot00000000000000The parser generator is usually ran through its [command-line interface](../guide/index.html#building-a-grammar), but can also be invoked as a JavaScript function. @BuildOptions @buildParserFile @buildParser @GenError generator-1.7.0/src/automaton.ts000066400000000000000000000620501457412043500166630ustar00rootroot00000000000000import {Term, TermSet, Rule, cmpSet, Conflicts, union} from "./grammar" import {hash, hashString} from "./hash" import {GenError} from "./error" import {timing} from "./log" export class Pos { hash: number = 0 constructor(readonly rule: Rule, readonly pos: number, // NOTE `ahead` and `ambigAhead` aren't mutated anymore after `finish()` has been called readonly ahead: Term[], public ambigAhead: readonly string[], readonly skipAhead: Term, readonly via: Pos | null) {} finish() { let h = hash(hash(this.rule.id, this.pos), this.skipAhead.hash) for (let a of this.ahead) h = hash(h, a.hash) for (let group of this.ambigAhead) h = hashString(h, group) this.hash = h return this } get next() { return this.pos < this.rule.parts.length ? this.rule.parts[this.pos] : null } advance() { return new Pos(this.rule, this.pos + 1, this.ahead, this.ambigAhead, this.skipAhead, this.via).finish() } get skip() { return this.pos == this.rule.parts.length ? this.skipAhead : this.rule.skip } cmp(pos: Pos) { return this.rule.cmp(pos.rule) || this.pos - pos.pos || this.skipAhead.hash - pos.skipAhead.hash || cmpSet(this.ahead, pos.ahead, (a, b) => a.cmp(b)) || cmpSet(this.ambigAhead, pos.ambigAhead, cmpStr) } eqSimple(pos: Pos) { return pos.rule == this.rule && pos.pos == this.pos } toString() { let parts = this.rule.parts.map(t => t.name) parts.splice(this.pos, 0, "·") return `${this.rule.name} -> ${parts.join(" ")}` } eq(other: Pos) { return this == other || this.hash == other.hash && this.rule == other.rule && this.pos == other.pos && this.skipAhead == other.skipAhead && sameSet(this.ahead, other.ahead) && sameSet(this.ambigAhead, other.ambigAhead) } trail(maxLen: number = 60) { let result = [] for (let pos: Pos | null = this; pos; pos = pos.via) { for (let i = pos.pos - 1; i >= 0; i--) result.push(pos.rule.parts[i]) } let value = result.reverse().join(" ") if (value.length > maxLen) value = value.slice(value.length - maxLen).replace(/.*? /, "… ") return value } conflicts(pos = this.pos) { let result = this.rule.conflicts[pos] if (pos == this.rule.parts.length && this.ambigAhead.length) result = result.join(new Conflicts(0, this.ambigAhead)) return result } static addOrigins(group: readonly Pos[], context: readonly Pos[]) { let result = group.slice() for (let i = 0; i < result.length; i++) { let next = result[i] if (next.pos == 0) for (let pos of context) { if (pos.next == next.rule.name && !result.includes(pos)) result.push(pos) } } return result } } function conflictsAt(group: readonly Pos[]) { let result = Conflicts.none for (let pos of group) result = result.join(pos.conflicts()) return result } // Applies automatic action precedence based on repeat productions. // These are left-associative, so reducing the `R -> R R` rule has // higher precedence. function compareRepeatPrec(a: readonly Pos[], b: readonly Pos[]) { for (let pos of a) if (pos.rule.name.repeated) { for (let posB of b) if (posB.rule.name == pos.rule.name) { if (pos.rule.isRepeatWrap && pos.pos == 2) return 1 if (posB.rule.isRepeatWrap && posB.pos == 2) return -1 } } return 0 } function cmpStr(a: string, b: string) { return a < b ? -1 : a > b ? 1 : 0 } function termsAhead(rule: Rule, pos: number, after: readonly Term[], first: {[name: string]: (Term | null)[]}): Term[] { let found: Term[] = [] for (let i = pos + 1; i < rule.parts.length; i++) { let next = rule.parts[i], cont = false if (next.terminal) { addTo(next, found) } else for (let term of first[next.name]) { if (term == null) cont = true else addTo(term, found) } if (!cont) return found } for (let a of after) addTo(a, found) return found } function eqSet(a: readonly T[], b: readonly T[]): boolean { if (a.length != b.length) return false for (let i = 0; i < a.length; i++) if (!a[i].eq(b[i])) return false return true } function sameSet(a: readonly T[], b: readonly T[]) { if (a.length != b.length) return false for (let i = 0; i < a.length; i++) if (a[i] != b[i]) return false return true } export class Shift { constructor(readonly term: Term, readonly target: State) {} eq(other: Shift | Reduce): boolean { return other instanceof Shift && this.term == other.term && other.target.id == this.target.id } cmp(other: Shift | Reduce): number { return other instanceof Reduce ? -1 : this.term.id - other.term.id || this.target.id - other.target.id } matches(other: Shift | Reduce, mapping: readonly number[]) { return other instanceof Shift && mapping[other.target.id] == mapping[this.target.id] } toString() { return "s" + this.target.id } map(mapping: readonly number[], states: readonly State[]) { let mapped = states[mapping[this.target.id]] return mapped == this.target ? this : new Shift(this.term, mapped) } } export class Reduce { constructor(readonly term: Term, readonly rule: Rule) {} eq(other: Shift | Reduce): boolean { return other instanceof Reduce && this.term == other.term && other.rule.sameReduce(this.rule) } cmp(other: Shift | Reduce): number { return other instanceof Shift ? 1 : this.term.id - other.term.id || this.rule.name.id - other.rule.name.id || this.rule.parts.length - other.rule.parts.length } matches(other: Shift | Reduce, mapping: readonly number[]) { return other instanceof Reduce && other.rule.sameReduce(this.rule) } toString() { return `${this.rule.name.name}(${this.rule.parts.length})` } map() { return this } } function hashPositions(set: readonly Pos[]) { let h = 5381 for (let pos of set) h = hash(h, pos.hash) return h } class ConflictContext { conflicts: Conflict[] = [] constructor(readonly first: {[name: string]: (Term | null)[]}) {} } export class State { actions: (Shift | Reduce)[] = [] actionPositions: (readonly Pos[])[] = [] goto: Shift[] = [] tokenGroup: number = -1 defaultReduce: Rule | null = null constructor(public id: number, public set: readonly Pos[], public flags = 0, readonly skip: Term, readonly hash = hashPositions(set), readonly startRule: Term | null = null) {} toString() { let actions = this.actions.map(t => t.term + "=" + t).join(",") + (this.goto.length ? " | " + this.goto.map(g => g.term + "=" + g).join(",") : "") return this.id + ": " + this.set.filter(p => p.pos > 0).join() + (this.defaultReduce ? `\n always ${this.defaultReduce.name}(${this.defaultReduce.parts.length})` : actions.length ? "\n " + actions : "") } addActionInner(value: Shift | Reduce, positions: readonly Pos[]): Shift | Reduce | null { check: for (let i = 0; i < this.actions.length; i++) { let action = this.actions[i] if (action.term == value.term) { if (action.eq(value)) return null let fullPos = Pos.addOrigins(positions, this.set), actionFullPos = Pos.addOrigins(this.actionPositions[i], this.set) let conflicts = conflictsAt(fullPos), actionConflicts = conflictsAt(actionFullPos) let diff = compareRepeatPrec(fullPos, actionFullPos) || conflicts.precedence - actionConflicts.precedence if (diff > 0) { // Drop the existing action this.actions.splice(i, 1) this.actionPositions.splice(i, 1) i-- continue check } else if (diff < 0) { // Drop this one return null } else if (conflicts.ambigGroups.some(g => actionConflicts.ambigGroups.includes(g))) { // Explicitly allowed ambiguity continue check } else { // Not resolved return action } } } this.actions.push(value) this.actionPositions.push(positions) return null } addAction(value: Shift | Reduce, positions: readonly Pos[], context: ConflictContext) { let conflict = this.addActionInner(value, positions) if (conflict) { let conflictPos = this.actionPositions[this.actions.indexOf(conflict)][0] let rules = [positions[0].rule.name, conflictPos.rule.name] if (context.conflicts.some(c => c.rules.some(r => rules.includes(r)))) return let error if (conflict instanceof Shift) error = `shift/reduce conflict between\n ${conflictPos}\nand\n ${positions[0].rule}` else error = `reduce/reduce conflict between\n ${conflictPos.rule}\nand\n ${positions[0].rule}` error += `\nWith input:\n ${positions[0].trail(70)} · ${value.term} …` if (conflict instanceof Shift) error += findConflictShiftSource(positions[0], conflict.term, context.first) error += findConflictOrigin(conflictPos, positions[0]) context.conflicts.push(new Conflict(error, rules)) } } getGoto(term: Term) { return this.goto.find(a => a.term == term) } hasSet(set: readonly Pos[]) { return eqSet(this.set, set) } _actionsByTerm: null | {[id: number]: (Shift | Reduce)[]} = null actionsByTerm() { let result = this._actionsByTerm if (!result) { this._actionsByTerm = result = Object.create(null) as {[id: number]: (Shift | Reduce)[]} for (let action of this.actions) (result[action.term.id] || (result[action.term.id] = [])).push(action) } return result } finish() { if (this.actions.length) { let first = this.actions[0] if (first instanceof Reduce) { let {rule} = first if (this.actions.every(a => a instanceof Reduce && a.rule.sameReduce(rule))) this.defaultReduce = rule } } this.actions.sort((a, b) => a.cmp(b)) this.goto.sort((a, b) => a.cmp(b)) } eq(other: State) { let dThis = this.defaultReduce, dOther = other.defaultReduce if (dThis || dOther) return dThis && dOther ? dThis.sameReduce(dOther) : false return this.skip == other.skip && this.tokenGroup == other.tokenGroup && eqSet(this.actions, other.actions) && eqSet(this.goto, other.goto) } } function closure(set: readonly Pos[], first: {[name: string]: (Term | null)[]}) { let added: Pos[] = [], redo: Pos[] = [] function addFor(name: Term, ahead: readonly Term[], ambigAhead: readonly string[], skipAhead: Term, via: Pos) { for (let rule of name.rules) { let add = added.find(a => a.rule == rule) if (!add) { let existing = set.find(p => p.pos == 0 && p.rule == rule) add = existing ? new Pos(rule, 0, existing.ahead.slice(), existing.ambigAhead, existing.skipAhead, existing.via) : new Pos(rule, 0, [], none, skipAhead, via) added.push(add) } if (add.skipAhead != skipAhead) throw new GenError("Inconsistent skip sets after " + via.trail()) add.ambigAhead = union(add.ambigAhead, ambigAhead) for (let term of ahead) if (!add.ahead.includes(term)) { add.ahead.push(term) if (add.rule.parts.length && !add.rule.parts[0].terminal) addTo(add, redo) } } } for (let pos of set) { let next = pos.next if (next && !next.terminal) addFor(next, termsAhead(pos.rule, pos.pos, pos.ahead, first), pos.conflicts(pos.pos + 1).ambigGroups, pos.pos == pos.rule.parts.length - 1 ? pos.skipAhead : pos.rule.skip, pos) } while (redo.length) { let add = redo.pop()! addFor(add.rule.parts[0], termsAhead(add.rule, 0, add.ahead, first), union(add.rule.conflicts[1].ambigGroups, add.rule.parts.length == 1 ? add.ambigAhead : none), add.rule.parts.length == 1 ? add.skipAhead : add.rule.skip, add) } let result = set.slice() for (let add of added) { add.ahead.sort((a, b) => a.hash - b.hash) add.finish() let origIndex = set.findIndex(p => p.pos == 0 && p.rule == add.rule) if (origIndex > -1) result[origIndex] = add else result.push(add) } return result.sort((a, b) => a.cmp(b)) } function addTo(value: T, array: T[]) { if (!array.includes(value)) array.push(value) } export function computeFirstSets(terms: TermSet) { let table: {[term: string]: (Term | null)[]} = Object.create(null) for (let t of terms.terms) if (!t.terminal) table[t.name] = [] for (;;) { let change = false for (let nt of terms.terms) if (!nt.terminal) for (let rule of nt.rules) { let set = table[nt.name] let found = false, startLen = set.length for (let part of rule.parts) { found = true if (part.terminal) { addTo(part, set) } else { for (let t of table[part.name]) { if (t == null) found = false else addTo(t, set) } } if (found) break } if (!found) addTo(null, set) if (set.length > startLen) change = true } if (!change) return table } } class Core { constructor(readonly set: readonly Pos[], readonly state: State) {} } class Conflict { constructor(readonly error: string, readonly rules: readonly Term[]) {} } function findConflictOrigin(a: Pos, b: Pos) { if (a.eqSimple(b)) return "" function via(root: Pos, start: Pos) { let hist = [] for (let p = start.via!; !p.eqSimple(root); p = p.via!) hist.push(p) if (!hist.length) return "" hist.unshift(start) return hist.reverse().map((p, i) => "\n" + " ".repeat(i + 1) + (p == start ? "" : "via ") + p).join("") } for (let p: Pos | null = a; p; p = p.via) for (let p2: Pos | null = b; p2; p2 = p2.via) { if (p.eqSimple(p2)) return "\nShared origin: " + p + via(p, a) + via(p, b) } return "" } // Search for the reason that a given 'after' token exists at the // given pos, by scanning up the trail of positions. Because the `via` // link is only one source of a pos, of potentially many, this // requires a re-simulation of the whole path up to the pos. function findConflictShiftSource(conflictPos: Pos, termAfter: Term, first: {[name: string]: (Term | null)[]}) { let pos = conflictPos, path: Term[] = [] for (;;) { for (let i = pos.pos - 1; i >= 0; i--) path.push(pos.rule.parts[i]) if (!pos.via) break pos = pos.via } path.reverse() let seen = new Set() function explore(pos: Pos, i: number, hasMatch: Pos | null): string { if (i == path.length && hasMatch && !pos.next) return `\nThe reduction of ${conflictPos.rule.name} is allowed before ${termAfter} because of this rule:\n ${hasMatch}` for (let next; next = pos.next;) { if (i < path.length && next == path[i]) { let inner = explore(pos.advance(), i + 1, hasMatch) if (inner) return inner } let after = pos.rule.parts[pos.pos + 1], match = pos.pos + 1 == pos.rule.parts.length ? hasMatch : null if (after && (after.terminal ? after == termAfter : first[after.name].includes(termAfter))) match = pos.advance() for (let rule of next.rules) { let hash = (rule.id << 5) + i + (match ? 555 : 0) if (!seen.has(hash)) { seen.add(hash) let inner = explore(new Pos(rule, 0, [], [], next, pos), i, match) if (inner) return inner } } if (!next.terminal && first[next.name].includes(null)) pos = pos.advance() else break } return "" } return explore(pos, 0, null) } // Builds a full LR(1) automaton export function buildFullAutomaton(terms: TermSet, startTerms: Term[], first: {[name: string]: (Term | null)[]}) { let states: State[] = [], statesBySetHash: {[hash: number]: State[]} = {} let cores: {[hash: number]: Core[]} = {} let t0 = Date.now() function getState(core: readonly Pos[], top?: Term) { if (core.length == 0) return null let coreHash = hashPositions(core), byHash = cores[coreHash] let skip: Term | undefined for (let pos of core) { if (!skip) skip = pos.skip else if (skip != pos.skip) throw new GenError("Inconsistent skip sets after " + pos.trail()) } if (byHash) for (let known of byHash) if (eqSet(core, known.set)) { if (known.state.skip != skip) throw new GenError("Inconsistent skip sets after " + known.set[0].trail()) return known.state } let set = closure(core, first) let hash = hashPositions(set), forHash = statesBySetHash[hash] || (statesBySetHash[hash] = []) let found if (!top) for (let state of forHash) if (state.hasSet(set)) found = state if (!found) { found = new State(states.length, set, 0, skip!, hash, top) forHash.push(found) states.push(found) if (timing && states.length % 500 == 0) console.log(`${states.length} states after ${((Date.now() - t0) / 1000).toFixed(2)}s`) } ;(cores[coreHash] || (cores[coreHash] = [])).push(new Core(core, found)) return found } for (const startTerm of startTerms) { const startSkip = startTerm.rules.length ? startTerm.rules[0].skip : terms.names["%noskip"]! getState(startTerm.rules.map(rule => new Pos(rule, 0, [terms.eof], none, startSkip, null).finish()), startTerm) } let conflicts = new ConflictContext(first) for (let filled = 0; filled < states.length; filled++) { let state = states[filled] let byTerm: Term[] = [], byTermPos: Pos[][] = [], atEnd: Pos[] = [] for (let pos of state.set) { if (pos.pos == pos.rule.parts.length) { if (!pos.rule.name.top) atEnd.push(pos) } else { let next = pos.rule.parts[pos.pos] let index = byTerm.indexOf(next) if (index < 0) { byTerm.push(next) byTermPos.push([pos]) } else { byTermPos[index].push(pos) } } } for (let i = 0; i < byTerm.length; i++) { let term = byTerm[i], positions = byTermPos[i].map(p => p.advance()) if (term.terminal) { let set = applyCut(positions) let next = getState(set) if (next) state.addAction(new Shift(term, next), byTermPos[i], conflicts) } else { let goto = getState(positions) if (goto) state.goto.push(new Shift(term, goto)) } } let replaced = false for (let pos of atEnd) for (let ahead of pos.ahead) { let count = state.actions.length state.addAction(new Reduce(ahead, pos.rule), [pos], conflicts) if (state.actions.length == count) replaced = true } // If some actions were replaced by others, double-check whether // goto entries are now superfluous (for example, in an operator // precedence-related state that has a shift for `*` but only a // reduce for `+`, we don't need a goto entry for rules that start // with `+`) if (replaced) for (let i = 0; i < state.goto.length; i++) { let start = first[state.goto[i].term.name] if (!start.some(term => state.actions.some(a => a.term == term && (a instanceof Shift)))) state.goto.splice(i--, 1) } } if (conflicts.conflicts.length) throw new GenError(conflicts.conflicts.map(c => c.error).join("\n\n")) // Resolve alwaysReduce and sort actions for (let state of states) state.finish() if (timing) console.log(`${states.length} states total.`) return states } function applyCut(set: readonly Pos[]): readonly Pos[] { let found: null | Pos[] = null, cut = 1 for (let pos of set) { let value = pos.rule.conflicts[pos.pos - 1].cut if (value < cut) continue if (!found || value > cut) { cut = value found = [] } found.push(pos) } return found || set } // Verify that there are no conflicting actions or goto entries in the // two given states (using the state ID remapping provided in mapping) function canMerge(a: State, b: State, mapping: readonly number[]) { // If a goto for the same term differs, that makes the states // incompatible for (let goto of a.goto) for (let other of b.goto) { if (goto.term == other.term && mapping[goto.target.id] != mapping[other.target.id]) return false } // If there is an action where a conflicting action exists in the // other state, the merge is only allowed when both states have the // exact same set of actions for this term. let byTerm = b.actionsByTerm() for (let action of a.actions) { let setB = byTerm[action.term.id] if (setB && setB.some(other => !other.matches(action, mapping))) { if (setB.length == 1) return false let setA = a.actionsByTerm()[action.term.id] if (setA.length != setB.length || setA.some(a1 => !setB.some(a2 => a1.matches(a2, mapping)))) return false } } return true } function mergeStates(states: readonly State[], mapping: readonly number[]) { let newStates = [] for (let state of states) { let newID = mapping[state.id] if (!newStates[newID]) { newStates[newID] = new State(newID, state.set, 0, state.skip, state.hash, state.startRule) newStates[newID].tokenGroup = state.tokenGroup newStates[newID].defaultReduce = state.defaultReduce } } for (let state of states) { let newID = mapping[state.id], target = newStates[newID] target.flags |= state.flags for (let i = 0; i < state.actions.length; i++) { let action = state.actions[i].map(mapping, newStates) if (!target.actions.some(a => a.eq(action))) { target.actions.push(action) target.actionPositions.push(state.actionPositions[i]) } } for (let goto of state.goto) { let mapped = goto.map(mapping, newStates) if (!target.goto.some(g => g.eq(mapped))) target.goto.push(mapped) } } return newStates } class Group { members: number[] constructor(readonly origin: number, member: number) { this.members = [member] } } function samePosSet(a: readonly Pos[], b: readonly Pos[]) { if (a.length != b.length) return false for (let i = 0; i < a.length; i++) if (!a[i].eqSimple(b[i])) return false return true } // Collapse an LR(1) automaton to an LALR-like automaton function collapseAutomaton(states: readonly State[]): readonly State[] { let mapping: number[] = [], groups: Group[] = [] assignGroups: for (let i = 0; i < states.length; i++) { let state = states[i] if (!state.startRule) for (let j = 0; j < groups.length; j++) { let group = groups[j], other = states[group.members[0]] if (state.tokenGroup == other.tokenGroup && state.skip == other.skip && !other.startRule && samePosSet(state.set, other.set)) { group.members.push(i) mapping.push(j) continue assignGroups } } mapping.push(groups.length) groups.push(new Group(groups.length, i)) } function spill(groupIndex: number, index: number) { let group = groups[groupIndex], state = states[group.members[index]] let pop = group.members.pop()! if (index != group.members.length) group.members[index] = pop for (let i = groupIndex + 1; i < groups.length; i++) { mapping[state.id] = i if (groups[i].origin == group.origin && groups[i].members.every(id => canMerge(state, states[id], mapping))) { groups[i].members.push(state.id) return } } mapping[state.id] = groups.length groups.push(new Group(group.origin, state.id)) } for (let pass = 1;; pass++) { let conflicts = false, t0 = Date.now() for (let g = 0, startLen = groups.length; g < startLen; g++) { let group = groups[g] for (let i = 0; i < group.members.length - 1; i++) { for (let j = i + 1; j < group.members.length; j++) { let idA = group.members[i], idB = group.members[j] if (!canMerge(states[idA], states[idB], mapping)) { conflicts = true spill(g, j--) } } } } if (timing) console.log(`Collapse pass ${pass}${conflicts ? `` : `, done`} (${((Date.now() - t0) / 1000).toFixed(2)}s)`) if (!conflicts) return mergeStates(states, mapping) } } function mergeIdentical(states: readonly State[]): readonly State[] { for (let pass = 1;; pass++) { let mapping: number[] = [], didMerge = false, t0 = Date.now() let newStates: State[] = [] // Find states that either have the same alwaysReduce or the same // actions, and merge them. for (let i = 0; i < states.length; i++) { let state = states[i] let match = newStates.findIndex(s => state.eq(s)) if (match < 0) { mapping[i] = newStates.length newStates.push(state) } else { mapping[i] = match didMerge = true let other = newStates[match], add: Pos[] | null = null for (let pos of state.set) if (!other.set.some(p => p.eqSimple(pos))) (add || (add = [])).push(pos) if (add) other.set = add.concat(other.set).sort((a, b) => a.cmp(b)) } } if (timing) console.log(`Merge identical pass ${pass}${didMerge ? "" : ", done"} (${((Date.now() - t0) / 1000).toFixed(2)}s)`) if (!didMerge) return states // Make sure actions point at merged state objects for (let state of newStates) if (!state.defaultReduce) { state.actions = state.actions.map(a => a.map(mapping, newStates)) state.goto = state.goto.map(a => a.map(mapping, newStates)) } // Renumber ids for (let i = 0; i < newStates.length; i++) newStates[i].id = i states = newStates } } const none: readonly any[] = [] export function finishAutomaton(full: readonly State[]) { return mergeIdentical(collapseAutomaton(full)) } generator-1.7.0/src/build.ts000066400000000000000000002436641457412043500157670ustar00rootroot00000000000000import {GrammarDeclaration, RuleDeclaration, TokenDeclaration, LocalTokenDeclaration, ExternalTokenDeclaration, ExternalSpecializeDeclaration, Expression, Identifier, LiteralExpression, NameExpression, SequenceExpression, ChoiceExpression, RepeatExpression, SetExpression, AnyExpression, ConflictMarker, InlineRuleExpression, SpecializeExpression, Prop, PropPart, CharClass, CharClasses, exprsEq, exprEq} from "./node" import {Term, TermSet, Rule, Conflicts, Props, hasProps} from "./grammar" import {State, MAX_CHAR, Conflict} from "./token" import {Input} from "./parse" import {computeFirstSets, buildFullAutomaton, finishAutomaton, State as LRState, Shift, Reduce, Pos} from "./automaton" import {encodeArray} from "./encode" import {GenError} from "./error" import {verbose, time} from "./log" import {NodeProp, NodePropSource} from "@lezer/common" import {LRParser, ExternalTokenizer, LocalTokenGroup, Stack, ContextTracker} from "@lezer/lr" import {Action, Specialize, StateFlag, Seq, ParseState, File} from "@lezer/lr/dist/constants" const none: readonly any[] = [] class Parts { constructor(readonly terms: readonly Term[], readonly conflicts: null | readonly Conflicts[]) {} concat(other: Parts) { if (this == Parts.none) return other if (other == Parts.none) return this let conflicts: null | Conflicts[] = null if (this.conflicts || other.conflicts) { conflicts = this.conflicts ? this.conflicts.slice() : this.ensureConflicts() as Conflicts[] let otherConflicts = other.ensureConflicts() conflicts[conflicts.length - 1] = conflicts[conflicts.length - 1].join(otherConflicts[0]) for (let i = 1; i < otherConflicts.length; i++) conflicts.push(otherConflicts[i]) } return new Parts(this.terms.concat(other.terms), conflicts) } withConflicts(pos: number, conflicts: Conflicts) { if (conflicts == Conflicts.none) return this let array = this.conflicts ? this.conflicts.slice() : this.ensureConflicts() as Conflicts[] array[pos] = array[pos].join(conflicts) return new Parts(this.terms, array) } ensureConflicts() { if (this.conflicts) return this.conflicts let empty = [] for (let i = 0; i <= this.terms.length; i++) empty.push(Conflicts.none) return empty } static none = new Parts(none, null) } function p(...terms: Term[]) { return new Parts(terms, null) } class BuiltRule { constructor(readonly id: string, readonly args: readonly Expression[], readonly term: Term) {} matches(expr: NameExpression) { return this.id == expr.id.name && exprsEq(expr.args, this.args) } matchesRepeat(expr: RepeatExpression) { return this.id == "+" && exprEq(expr.expr, this.args[0]) } } export type BuildOptions = { /// The name of the grammar file fileName?: string, /// A function that should be called with warnings. The default is /// to call `console.warn`. warn?: (message: string) => void, /// Whether to include term names in the output file. Defaults to /// false. includeNames?: boolean, /// Determines the module system used by the output file. Can be /// either `"cjs"` (CommonJS) or `"es"` (ES2015 module), defaults to /// `"es"`. moduleStyle?: string, /// Set this to true to output TypeScript code instead of plain /// JavaScript. typeScript?: boolean, /// The name of the export that holds the parser in the output file. /// Defaults to `"parser"`. exportName?: string, /// When calling `buildParser`, this can be used to provide /// placeholders for external tokenizers. externalTokenizer?: (name: string, terms: {[name: string]: number}) => ExternalTokenizer /// Used by `buildParser` to resolve external prop sources. externalPropSource?: (name: string) => NodePropSource /// Provide placeholders for external specializers when using /// `buildParser`. externalSpecializer?: (name: string, terms: {[name: string]: number}) => (value: string, stack: Stack) => number /// If given, will be used to initialize external props in the parser /// returned by `buildParser`. externalProp?: (name: string) => NodeProp /// If given, will be used as context tracker in a parser built with /// `buildParser`. contextTracker?: ContextTracker | ((terms: {[name: string]: number}) => ContextTracker) } type SkipInfo = {skip: readonly Term[], rule: Term | null, startTokens: readonly Term[], id: number} class Builder { ast!: GrammarDeclaration input!: Input terms = new TermSet tokens: MainTokenSet localTokens: readonly LocalTokenSet[] externalTokens: ExternalTokenSet[] externalSpecializers: ExternalSpecializer[] specialized: {[name: string]: {value: string, name: string | null, term: Term, type: string, dialect: number | null}[]} = Object.create(null) tokenOrigins: {[name: string]: {spec?: Term, external?: ExternalTokenSet | ExternalSpecializer, group?: LocalTokenSet}} = Object.create(null) rules: Rule[] = [] built: BuiltRule[] = [] ruleNames: {[name: string]: Identifier | null} = Object.create(null) namespaces: {[name: string]: Namespace} = Object.create(null) namedTerms: {[name: string]: Term} = Object.create(null) termTable: {[name: string]: number} = Object.create(null) knownProps: {[name: string]: {prop: NodeProp, source: {name: string, from: string | null}}} = Object.create(null) dialects: readonly string[] dynamicRulePrecedences: {rule: Term, prec: number}[] = [] definedGroups: {name: Term, group: string, rule: RuleDeclaration}[] = [] astRules: {skip: Term, rule: RuleDeclaration}[] = [] currentSkip: Term[] = [] skipRules!: Term[] constructor(text: string, readonly options: BuildOptions) { time("Parse", () => { this.input = new Input(text, options.fileName) this.ast = this.input.parse() }) let NP: {[key: string]: any} = NodeProp for (let prop in NP) { if (NP[prop] instanceof NodeProp && !NP[prop].perNode) this.knownProps[prop] = {prop: NP[prop], source: {name: prop, from: null}} } for (let prop of this.ast.externalProps) { this.knownProps[prop.id.name] = { prop: this.options.externalProp ? this.options.externalProp(prop.id.name) : new NodeProp(), source: {name: prop.externalID.name, from: prop.source} } } this.dialects = this.ast.dialects.map(d => d.name) this.tokens = new MainTokenSet(this, this.ast.tokens) this.localTokens = this.ast.localTokens.map(g => new LocalTokenSet(this, g)) this.externalTokens = this.ast.externalTokens.map(ext => new ExternalTokenSet(this, ext)) this.externalSpecializers = this.ast.externalSpecializers.map(decl => new ExternalSpecializer(this, decl)) time("Build rules", () => { let noSkip = this.newName("%noskip", true) this.defineRule(noSkip, []) let mainSkip = this.ast.mainSkip ? this.newName("%mainskip", true) : noSkip let scopedSkip: Term[] = [], topRules: {rule: RuleDeclaration, skip: Term}[] = [] for (let rule of this.ast.rules) this.astRules.push({skip: mainSkip, rule}) for (let rule of this.ast.topRules) topRules.push({skip: mainSkip, rule}) for (let scoped of this.ast.scopedSkip) { let skip = noSkip, found = this.ast.scopedSkip.findIndex((sc, i) => i < scopedSkip.length && exprEq(sc.expr, scoped.expr)) if (found > -1) skip = scopedSkip[found] else if (this.ast.mainSkip && exprEq(scoped.expr, this.ast.mainSkip)) skip = mainSkip else if (!isEmpty(scoped.expr)) skip = this.newName("%skip", true) scopedSkip.push(skip) for (let rule of scoped.rules) this.astRules.push({skip, rule}) for (let rule of scoped.topRules) topRules.push({skip, rule}) } for (let {rule} of this.astRules) { this.unique(rule.id) } this.currentSkip.push(noSkip) this.skipRules = mainSkip == noSkip ? [mainSkip] : [noSkip, mainSkip] if (mainSkip != noSkip) this.defineRule(mainSkip, this.normalizeExpr(this.ast.mainSkip!)) for (let i = 0; i < this.ast.scopedSkip.length; i++) { let skip = scopedSkip[i] if (!this.skipRules.includes(skip)) { this.skipRules.push(skip) if (skip != noSkip) this.defineRule(skip, this.normalizeExpr(this.ast.scopedSkip[i].expr)) } } this.currentSkip.pop() for (let {rule, skip} of topRules.sort((a, b) => a.rule.start - b.rule.start)) { this.unique(rule.id) this.used(rule.id.name) this.currentSkip.push(skip) let {name, props} = this.nodeInfo(rule.props, "a", rule.id.name, none, none, rule.expr) let term = this.terms.makeTop(name, props) this.namedTerms[name!] = term this.defineRule(term, this.normalizeExpr(rule.expr)) this.currentSkip.pop() } for (let ext of this.externalSpecializers) ext.finish() for (let {skip, rule} of this.astRules) { if (this.ruleNames[rule.id.name] && isExported(rule) && !rule.params.length) { this.buildRule(rule, [], skip, false) if (rule.expr instanceof SequenceExpression && rule.expr.exprs.length == 0) this.used(rule.id.name) } } }) for (let name in this.ruleNames) { let value = this.ruleNames[name] if (value) this.warn(`Unused rule '${value.name}'`, value.start) } this.tokens.takePrecedences() this.tokens.takeConflicts() for (let lt of this.localTokens) lt.takePrecedences() for (let {name, group, rule} of this.definedGroups) this.defineGroup(name, group, rule) this.checkGroups() } unique(id: Identifier) { if (id.name in this.ruleNames) this.raise(`Duplicate definition of rule '${id.name}'`, id.start) this.ruleNames[id.name] = id } used(name: string) { this.ruleNames[name] = null } newName(base: string, nodeName: string | null | true = null, props: Props = {}): Term { for (let i = nodeName ? 0 : 1;; i++) { let name = i ? `${base}-${i}` : base if (!this.terms.names[name]) return this.terms.makeNonTerminal(name, nodeName === true ? null : nodeName, props) } } prepareParser() { let rules = time("Simplify rules", () => simplifyRules(this.rules, [ ...this.skipRules, ...this.terms.tops])) let {nodeTypes, names: termNames, minRepeatTerm, maxTerm} = this.terms.finish(rules) for (let prop in this.namedTerms) this.termTable[prop] = this.namedTerms[prop].id if (/\bgrammar\b/.test(verbose)) console.log(rules.join("\n")) let startTerms = this.terms.tops.slice() let first = computeFirstSets(this.terms) let skipInfo: readonly SkipInfo[] = this.skipRules.map((name, id) => { let skip = [], startTokens: Term[] = [], rules: Rule[] = [] for (let rule of name.rules) { if (!rule.parts.length) continue let start = rule.parts[0] for (let t of start.terminal ? [start] : first[start.name] || []) if (t && !startTokens.includes(t)) startTokens.push(t) if (start.terminal && rule.parts.length == 1 && !rules.some(r => r != rule && r.parts[0] == start)) skip.push(start) else rules.push(rule) } name.rules = rules if (rules.length) startTerms.push(name) return {skip, rule: rules.length ? name : null, startTokens, id} }) let fullTable = time("Build full automaton", () => buildFullAutomaton(this.terms, startTerms, first)) let localTokens = this.localTokens .map((grp, i) => grp.buildLocalGroup(fullTable, skipInfo, i)) let {tokenGroups, tokenPrec, tokenData} = time("Build token groups", () => this.tokens.buildTokenGroups(fullTable, skipInfo, localTokens.length)) let table = time("Finish automaton", () => finishAutomaton(fullTable)) let skipState = findSkipStates(table, this.terms.tops) if (/\blr\b/.test(verbose)) console.log(table.join("\n")) let specialized: (ExternalSpecializer | {token: Term, table: {[value: string]: number}})[] = [] for (let ext of this.externalSpecializers) specialized.push(ext) for (let name in this.specialized) specialized.push({token: this.terms.names[name], table: buildSpecializeTable(this.specialized[name])}) let tokStart = (tokenizer: TokenGroup | ExternalTokenSet) => { if (tokenizer instanceof ExternalTokenSet) return tokenizer.ast.start return this.tokens.ast ? this.tokens.ast.start : -1 } let tokenizers = ((tokenGroups as (TokenGroup | ExternalTokenSet)[]) .concat(this.externalTokens) .sort((a, b) => tokStart(a) - tokStart(b)) as TokenizerSpec[]) .concat(localTokens) let data = new DataBuilder let skipData = skipInfo.map(info => { let actions: number[] = [] for (let term of info.skip) actions.push(term.id, 0, Action.StayFlag >> 16) if (info.rule) { let state = table.find(s => s.startRule == info.rule)! for (let action of state.actions as Shift[]) actions.push(action.term.id, state.id, Action.GotoFlag >> 16) } actions.push(Seq.End, Seq.Done) return data.storeArray(actions) }) let states = time("Finish states", () => { let states = new Uint32Array(table.length * ParseState.Size) let forceReductions = this.computeForceReductions(table, skipInfo) let finishCx = new FinishStateContext(tokenizers, data, states, skipData, skipInfo, table, this) for (let s of table) finishCx.finish(s, skipState(s.id), forceReductions[s.id]) return states }) let dialects: {[name: string]: number} = Object.create(null) for (let i = 0; i < this.dialects.length; i++) dialects[this.dialects[i]] = data.storeArray((this.tokens.byDialect[i] || none).map(t => t.id).concat(Seq.End)) let dynamicPrecedences = null if (this.dynamicRulePrecedences.length) { dynamicPrecedences = Object.create(null) for (let {rule, prec} of this.dynamicRulePrecedences) dynamicPrecedences[rule.id] = prec } let topRules: {[rule: string]: [number, number]} = Object.create(null) for (let term of this.terms.tops) topRules[term.nodeName!] = [table.find(state => state.startRule == term)!.id, term.id] let precTable = data.storeArray(tokenPrec.concat(Seq.End)) let {nodeProps, skippedTypes} = this.gatherNodeProps(nodeTypes) return { states, stateData: data.finish(), goto: computeGotoTable(table), nodeNames: nodeTypes.filter(t => t.id < minRepeatTerm).map(t => t.nodeName).join(" "), nodeProps, skippedTypes, maxTerm, repeatNodeCount: nodeTypes.length - minRepeatTerm, tokenizers, tokenData, topRules, dialects, dynamicPrecedences, specialized, tokenPrec: precTable, termNames } } getParser() { let { states, stateData, goto, nodeNames, nodeProps: rawNodeProps, skippedTypes, maxTerm, repeatNodeCount, tokenizers, tokenData, topRules, dialects, dynamicPrecedences, specialized: rawSpecialized, tokenPrec, termNames } = this.prepareParser() let specialized = rawSpecialized.map(v => { if (v instanceof ExternalSpecializer) { let ext = this.options.externalSpecializer!(v.ast.id.name, this.termTable) return { term: v.term!.id, get: (value: string, stack: Stack) => (ext(value, stack) << 1) | (v.ast.type == "extend" ? Specialize.Extend : Specialize.Specialize), external: ext, extend: v.ast.type == "extend" } } else { return {term: v.token.id, get: (value: string) => v.table[value] || -1} } }) return LRParser.deserialize({ version: File.Version, states, stateData, goto, nodeNames, maxTerm, repeatNodeCount, nodeProps: rawNodeProps.map(({prop, terms}) => [this.knownProps[prop].prop, ...terms]), propSources: !this.options.externalPropSource ? undefined : this.ast.externalPropSources.map(s => this.options.externalPropSource!(s.id.name)), skippedNodes: skippedTypes, tokenData, tokenizers: tokenizers.map(tok => tok.create()), context: !this.ast.context ? undefined : typeof this.options.contextTracker == "function" ? this.options.contextTracker(this.termTable) : this.options.contextTracker, topRules, dialects, dynamicPrecedences, specialized, tokenPrec, termNames }) as LRParser } getParserFile() { let { states, stateData, goto, nodeNames, nodeProps: rawNodeProps, skippedTypes, maxTerm, repeatNodeCount, tokenizers: rawTokenizers, tokenData, topRules, dialects: rawDialects, dynamicPrecedences, specialized: rawSpecialized, tokenPrec, termNames } = this.prepareParser() let mod = this.options.moduleStyle || "es" let gen = "// This file was generated by lezer-generator. You probably shouldn't edit it.\n", head = gen let imports: {[source: string]: string[]} = {}, imported: {[spec: string]: string} = Object.create(null) let defined: {[name: string]: boolean} = Object.create(null) for (let word of KEYWORDS) defined[word] = true let exportName = this.options.exportName || "parser" defined[exportName] = true let getName = (prefix: string) => { for (let i = 0;; i++) { let id = prefix + (i ? "_" + i : "") if (!defined[id]) return id } } let importName = (name: string, source: string, prefix: string = name) => { let spec = name + " from " + source if (imported[spec]) return imported[spec] let src = JSON.stringify(source), varName = name if (name in defined) { varName = getName(prefix) name += `${mod == "cjs" ? ":" : " as"} ${varName}` } defined[varName] = true ;(imports[src] || (imports[src] = [])).push(name) return imported[spec] = varName } let lrParser = importName("LRParser", "@lezer/lr") let tokenizers = rawTokenizers.map(tok => tok.createSource(importName)) let context = this.ast.context ? importName(this.ast.context.id.name, this.ast.context.source) : null let nodeProps = rawNodeProps.map(({prop, terms}) => { let {source} = this.knownProps[prop] let propID = source.from ? importName(source.name, source.from) : JSON.stringify(source.name) return `[${propID}, ${terms.map(serializePropValue).join(",")}]` }) function specializationTableString(table: {[name: string]: number}) { return "{__proto__:null," + Object.keys(table).map(key => `${/^(\d+|[a-zA-Z_]\w*)$/.test(key) ? key : JSON.stringify(key)}:${table[key]}`) .join(", ") + "}" } let specHead = "" let specialized = rawSpecialized.map(v => { if (v instanceof ExternalSpecializer) { let name = importName(v.ast.id.name, v.ast.source) let ts = this.options.typeScript ? ": any" : "" return `{term: ${v.term!.id}, get: (value${ts}, stack${ts}) => (${name}(value, stack) << 1)${ v.ast.type == "extend" ? ` | ${Specialize.Extend}` : ''}, external: ${name}${ v.ast.type == "extend" ? ', extend: true' : ''}}` } else { let tableName = getName("spec_" + v.token.name.replace(/\W/g, "")) defined[tableName] = true specHead += `const ${tableName} = ${specializationTableString(v.table)}\n` let ts = this.options.typeScript ? `: keyof typeof ${tableName}` : "" return `{term: ${v.token.id}, get: (value${ts}) => ${tableName}[value] || -1}` } }) let propSources = this.ast.externalPropSources.map(s => importName(s.id.name, s.source)) for (let source in imports) { if (mod == "cjs") head += `const {${imports[source].join(", ")}} = require(${source})\n` else head += `import {${imports[source].join(", ")}} from ${source}\n` } head += specHead function serializePropValue(value: any) { return typeof value != "string" || /^(true|false|\d+(\.\d+)?|\.\d+)$/.test(value) ? value : JSON.stringify(value) } let dialects = Object.keys(rawDialects).map(d => `${d}: ${rawDialects[d]}`) let parserStr = `${lrParser}.deserialize({ version: ${File.Version}, states: ${encodeArray(states, 0xffffffff)}, stateData: ${encodeArray(stateData)}, goto: ${encodeArray(goto)}, nodeNames: ${JSON.stringify(nodeNames)}, maxTerm: ${maxTerm}${context ? `, context: ${context}` : ""}${nodeProps.length ? `, nodeProps: [ ${nodeProps.join(",\n ")} ]` : ""}${propSources.length ? `, propSources: [${propSources.join()}]` : ""}${skippedTypes.length ? `, skippedNodes: ${JSON.stringify(skippedTypes)}` : ""}, repeatNodeCount: ${repeatNodeCount}, tokenData: ${encodeArray(tokenData)}, tokenizers: [${tokenizers.join(", ")}], topRules: ${JSON.stringify(topRules)}${dialects.length ? `, dialects: {${dialects.join(", ")}}` : ""}${dynamicPrecedences ? `, dynamicPrecedences: ${JSON.stringify(dynamicPrecedences)}` : ""}${specialized.length ? `, specialized: [${specialized.join(",")}]` : ""}, tokenPrec: ${tokenPrec}${this.options.includeNames ? `, termNames: ${JSON.stringify(termNames)}` : ''} })` let terms: string[] = [] for (let name in this.termTable) { let id = name if (KEYWORDS.includes(id)) for (let i = 1;; i++) { id = "_".repeat(i) + name if (!(id in this.termTable)) break } else if (!/^[\w$]+$/.test(name)) { continue } terms.push(`${id}${mod == "cjs" ? ":" : " ="} ${this.termTable[name]}`) } for (let id = 0; id < this.dialects.length; id++) terms.push(`Dialect_${this.dialects[id]}${mod == "cjs" ? ":" : " ="} ${id}`) return { parser: head + (mod == "cjs" ? `exports.${exportName} = ${parserStr}\n` : `export const ${exportName} = ${parserStr}\n`), terms: mod == "cjs" ? `${gen}module.exports = {\n ${terms.join(",\n ")}\n}` : `${gen}export const\n ${terms.join(",\n ")}\n` } } gatherNonSkippedNodes() { let seen: {[term: number]: boolean} = Object.create(null) let work: Term[] = [] let add = (term: Term) => { if (!seen[term.id]) { seen[term.id] = true work.push(term) } } this.terms.tops.forEach(add) for (let i = 0; i < work.length; i++) { for (let rule of work[i].rules) for (let part of rule.parts) add(part) } return seen } gatherNodeProps(nodeTypes: readonly Term[]) { let notSkipped = this.gatherNonSkippedNodes(), skippedTypes = [] let nodeProps: {prop: string, values: {[val: string]: number[]}}[] = [] for (let type of nodeTypes) { if (!notSkipped[type.id] && !type.error) skippedTypes.push(type.id) for (let prop in type.props) { let known = this.knownProps[prop] if (!known) throw new GenError("No known prop type for " + prop) if (known.source.from == null && (known.source.name == "repeated" || known.source.name == "error")) continue let rec = nodeProps.find(r => r.prop == prop) if (!rec) nodeProps.push(rec = {prop, values: {}}) ;(rec.values[type.props[prop]] || (rec.values[type.props[prop]] = [])).push(type.id) } } return { nodeProps: nodeProps.map(({prop, values}) => { let terms: (string | number)[] = [] for (let val in values) { let ids = values[val] if (ids.length == 1) { terms.push(ids[0], val) } else { terms.push(-ids.length) for (let id of ids) terms.push(id) terms.push(val) } } return {prop, terms} }), skippedTypes } } makeTerminal(name: string, tag: string | null, props: Props) { return this.terms.makeTerminal(this.terms.uniqueName(name), tag, props) } computeForceReductions(states: readonly LRState[], skipInfo: readonly SkipInfo[]) { // This finds a forced reduction for every state, trying to guard // against cyclic forced reductions, where a given parse stack can // endlessly continue running forced reductions without making any // progress. // // This occurs with length-1 reductions. We never generate // length-0 reductions, and length-2+ reductions always shrink the // stack, so they are guaranteed to make progress. // // If there are states S1 and S2 whose forced reductions reduce // terms T1 and T2 respectively, both with a length of 1, _and_ // there is a state S3, which has goto entries T1 -> S2, T2 -> S1, // you can get cyclic reductions. Of course, the cycle may also // contain more than two steps. let reductions: number[] = [] let candidates: Pos[][] = [] // A map from terms to states that they are mapped to in goto // entries. let gotoEdges: {[term: number]: {parents: number[], target: number}[]} = Object.create(null) for (let state of states) { reductions.push(0) for (let edge of state.goto) { let array = gotoEdges[edge.term.id] || (gotoEdges[edge.term.id] = []) let found = array.find(o => o.target == edge.target.id) if (found) found.parents.push(state.id) else array.push({parents: [state.id], target: edge.target.id}) } candidates[state.id] = state.set.filter(pos => pos.pos > 0 && !pos.rule.name.top) .sort((a, b) => b.pos - a.pos || a.rule.parts.length - b.rule.parts.length) } // Mapping from state ids to terms that that state has a length-1 // forced reduction for. let length1Reductions: {[state: number]: number} = Object.create(null) function createsCycle(term: number, startState: number, parents: number[] | null = null): boolean { let edges = gotoEdges[term] if (!edges) return false return edges.some(val => { let parentIntersection = parents ? parents.filter(id => val.parents.includes(id)) : val.parents if (parentIntersection.length == 0) return false if (val.target == startState) return true let found = length1Reductions[val.target] return found != null && createsCycle(found, startState, parentIntersection) }) } for (let state of states) { if (state.defaultReduce && state.defaultReduce.parts.length > 0) { reductions[state.id] = reduceAction(state.defaultReduce, skipInfo) if (state.defaultReduce.parts.length == 1) length1Reductions[state.id] = state.defaultReduce.name.id } } // To avoid painting states that only have one potential forced // reduction into a corner, reduction assignment is done by // candidate size, starting with the states with fewer candidates. for (let setSize = 1;; setSize++) { let done = true for (let state of states) { if (state.defaultReduce) continue let set = candidates[state.id] if (set.length != setSize) { if (set.length > setSize) done = false continue } for (let pos of set) { if (pos.pos != 1 || !createsCycle(pos.rule.name.id, state.id)) { reductions[state.id] = reduceAction(pos.rule, skipInfo, pos.pos) if (pos.pos == 1) length1Reductions[state.id] = pos.rule.name.id break } } } if (done) break } return reductions } substituteArgs(expr: Expression, args: readonly Expression[], params: readonly Identifier[]) { if (args.length == 0) return expr return expr.walk(expr => { let found if (expr instanceof NameExpression && (found = params.findIndex(p => p.name == expr.id.name)) > -1) { let arg = args[found] if (expr.args.length) { if (arg instanceof NameExpression && !arg.args.length) return new NameExpression(expr.start, arg.id, expr.args) this.raise(`Passing arguments to a parameter that already has arguments`, expr.start) } return arg } else if (expr instanceof InlineRuleExpression) { let r = expr.rule, props = this.substituteArgsInProps(r.props, args, params) return props == r.props ? expr : new InlineRuleExpression(expr.start, new RuleDeclaration(r.start, r.id, props, r.params, r.expr)) } else if (expr instanceof SpecializeExpression) { let props = this.substituteArgsInProps(expr.props, args, params) return props == expr.props ? expr : new SpecializeExpression(expr.start, expr.type, props, expr.token, expr.content) } return expr }) } substituteArgsInProps(props: readonly Prop[], args: readonly Expression[], params: readonly Identifier[]) { let substituteInValue = (value: readonly PropPart[]) => { let result = value as PropPart[] for (let i = 0; i < value.length; i++) { let part = value[i] if (!part.name) continue let found = params.findIndex(p => p.name == part.name) if (found < 0) continue if (result == value) result = value.slice() let expr = args[found] if (expr instanceof NameExpression && !expr.args.length) result[i] = new PropPart(part.start, expr.id.name, null) else if (expr instanceof LiteralExpression) result[i] = new PropPart(part.start, expr.value, null) else this.raise(`Trying to interpolate expression '${expr}' into a prop`, part.start) } return result } let result = props as Prop[] for (let i = 0; i < props.length; i++) { let prop = props[i], value = substituteInValue(prop.value) if (value != prop.value) { if (result == props) result = props.slice() result[i] = new Prop(prop.start, prop.at, prop.name, value) } } return result } conflictsFor(markers: readonly ConflictMarker[]) { let here = Conflicts.none, atEnd = Conflicts.none for (let marker of markers) { if (marker.type == "ambig") { here = here.join(new Conflicts(0, [marker.id.name])) } else { let precs = this.ast.precedences! let index = precs ? precs.items.findIndex(item => item.id.name == marker.id.name) : -1 if (index < 0) this.raise(`Reference to unknown precedence: '${marker.id.name}'`, marker.id.start) let prec = precs.items[index], value = precs.items.length - index if (prec.type == "cut") { here = here.join(new Conflicts(0, none, value)) } else { here = here.join(new Conflicts(value << 2)) atEnd = atEnd.join(new Conflicts((value << 2) + (prec.type == "left" ? 1 : prec.type == "right" ? -1 : 0))) } } } return {here, atEnd} } raise(message: string, pos = 1): never { return this.input.raise(message, pos) } warn(message: string, pos = -1) { let msg = this.input.message(message, pos) if (this.options.warn) this.options.warn(msg) else console.warn(msg) } defineRule(name: Term, choices: Parts[]) { let skip = this.currentSkip[this.currentSkip.length - 1] for (let choice of choices) this.rules.push(new Rule(name, choice.terms, choice.ensureConflicts(), skip)) } resolve(expr: NameExpression): Parts[] { for (let built of this.built) if (built.matches(expr)) return [p(built.term)] let found = this.tokens.getToken(expr) if (found) return [p(found)] for (let grp of this.localTokens) { let found = grp.getToken(expr) if (found) return [p(found)] } for (let ext of this.externalTokens) { let found = ext.getToken(expr) if (found) return [p(found)] } for (let ext of this.externalSpecializers) { let found = ext.getToken(expr) if (found) return [p(found)] } let known = this.astRules.find(r => r.rule.id.name == expr.id.name) if (!known) return this.raise(`Reference to undefined rule '${expr.id.name}'`, expr.start) if (known.rule.params.length != expr.args.length) this.raise(`Wrong number or arguments for '${expr.id.name}'`, expr.start) this.used(known.rule.id.name) return [p(this.buildRule(known.rule, expr.args, known.skip))] } // For tree-balancing reasons, repeat expressions X+ have to be // normalized to something like // // R -> X | R R // // Returns the `R` term. normalizeRepeat(expr: RepeatExpression) { let known = this.built.find(b => b.matchesRepeat(expr)) if (known) return p(known.term) let name = expr.expr.prec < expr.prec ? `(${expr.expr})+` : `${expr.expr}+` let term = this.terms.makeRepeat(this.terms.uniqueName(name)) this.built.push(new BuiltRule("+", [expr.expr], term)) this.defineRule(term, this.normalizeExpr(expr.expr).concat(p(term, term))) return p(term) } normalizeSequence(expr: SequenceExpression) { let result: Parts[][] = expr.exprs.map(e => this.normalizeExpr(e)) let builder = this function complete(start: Parts, from: number, endConflicts: Conflicts): Parts[] { let {here, atEnd} = builder.conflictsFor(expr.markers[from]) if (from == result.length) return [start.withConflicts(start.terms.length, here.join(endConflicts))] let choices = [] for (let choice of result[from]) { for (let full of complete(start.concat(choice).withConflicts(start.terms.length, here), from + 1, endConflicts.join(atEnd))) choices.push(full) } return choices } return complete(Parts.none, 0, Conflicts.none) } normalizeExpr(expr: Expression): Parts[] { if (expr instanceof RepeatExpression && expr.kind == "?") { return [Parts.none, ...this.normalizeExpr(expr.expr)] } else if (expr instanceof RepeatExpression) { let repeated = this.normalizeRepeat(expr) return expr.kind == "+" ? [repeated] : [Parts.none, repeated] } else if (expr instanceof ChoiceExpression) { return expr.exprs.reduce((o, e) => o.concat(this.normalizeExpr(e)), [] as Parts[]) } else if (expr instanceof SequenceExpression) { return this.normalizeSequence(expr) } else if (expr instanceof LiteralExpression) { return [p(this.tokens.getLiteral(expr)!)] } else if (expr instanceof NameExpression) { return this.resolve(expr) } else if (expr instanceof SpecializeExpression) { return [p(this.resolveSpecialization(expr))] } else if (expr instanceof InlineRuleExpression) { return [p(this.buildRule(expr.rule, none, this.currentSkip[this.currentSkip.length - 1], true))] } else { return this.raise(`This type of expression ('${expr}') may not occur in non-token rules`, expr.start) } } buildRule(rule: RuleDeclaration, args: readonly Expression[], skip: Term, inline = false): Term { let expr = this.substituteArgs(rule.expr, args, rule.params) let {name: nodeName, props, dynamicPrec, inline: explicitInline, group, exported} = this.nodeInfo(rule.props || none, inline ? "pg" : "pgi", rule.id.name, args, rule.params, rule.expr) if (exported && rule.params.length) this.warn(`Can't export parameterized rules`, rule.start) if (exported && inline) this.warn(`Can't export inline rule`, rule.start) let name = this.newName(rule.id.name + (args.length ? "<" + args.join(",") + ">" : ""), nodeName || true, props) if (explicitInline) name.inline = true if (dynamicPrec) this.registerDynamicPrec(name, dynamicPrec) if ((name.nodeType || exported) && rule.params.length == 0) { if (!nodeName) name.preserve = true if (!inline) this.namedTerms[exported || rule.id.name] = name } if (!inline) this.built.push(new BuiltRule(rule.id.name, args, name)) this.currentSkip.push(skip) let parts = this.normalizeExpr(expr) if (parts.length > 100 * (expr instanceof ChoiceExpression ? expr.exprs.length : 1)) this.warn(`Rule ${rule.id.name} is generating a lot (${parts.length}) of choices.\n Consider splitting it up or reducing the amount of ? or | operator uses.`, rule.start) if (/\brulesize\b/.test(verbose) && parts.length > 10) console.log(`Rule ${rule.id.name}: ${parts.length} variants`) this.defineRule(name, parts) this.currentSkip.pop() if (group) this.definedGroups.push({name, group, rule}) return name } nodeInfo(props: readonly Prop[], // p for dynamic precedence, d for dialect, i for inline, g for group, a for disabling the ignore test for default name allow: string, defaultName: string | null = null, args: readonly Expression[] = none, params: readonly Identifier[] = none, expr?: Expression, defaultProps?: Props): { name: string | null, props: Props, dialect: number | null, dynamicPrec: number, inline: boolean, group: string | null, exported: string | null } { let result: Props = {} let name = defaultName && (allow.indexOf("a") > -1 || !ignored(defaultName)) && !/ /.test(defaultName) ? defaultName : null let dialect = null, dynamicPrec = 0, inline = false, group: string | null = null, exported = null for (let prop of props) { if (!prop.at) { if (!this.knownProps[prop.name]) { let builtin = ["name", "dialect", "dynamicPrecedence", "export", "isGroup"].includes(prop.name) ? ` (did you mean '@${prop.name}'?)` : "" this.raise(`Unknown prop name '${prop.name}'${builtin}`, prop.start) } result[prop.name] = this.finishProp(prop, args, params) } else if (prop.name == "name") { name = this.finishProp(prop, args, params) if (/ /.test(name)) this.raise(`Node names cannot have spaces ('${name}')`, prop.start) } else if (prop.name == "dialect") { if (allow.indexOf("d") < 0) this.raise("Can't specify a dialect on non-token rules", props[0].start) if (prop.value.length != 1 && !prop.value[0].value) this.raise("The '@dialect' rule prop must hold a plain string value") let dialectID = this.dialects.indexOf(prop.value[0].value!) if (dialectID < 0) this.raise(`Unknown dialect '${prop.value[0].value}'`, prop.value[0].start) dialect = dialectID } else if (prop.name == "dynamicPrecedence") { if (allow.indexOf("p") < 0) this.raise("Dynamic precedence can only be specified on nonterminals") if (prop.value.length != 1 || !/^-?(?:10|\d)$/.test(prop.value[0].value!)) this.raise("The '@dynamicPrecedence' rule prop must hold an integer between -10 and 10") dynamicPrec = +prop.value[0].value! } else if (prop.name == "inline") { if (prop.value.length) this.raise("'@inline' doesn't take a value", prop.value[0].start) if (allow.indexOf("i") < 0) this.raise("Inline can only be specified on nonterminals") inline = true } else if (prop.name == "isGroup") { if (allow.indexOf("g") < 0) this.raise("'@isGroup' can only be specified on nonterminals") group = prop.value.length ? this.finishProp(prop, args, params) : defaultName } else if (prop.name == "export") { if (prop.value.length) exported = this.finishProp(prop, args, params) else exported = defaultName } else { this.raise(`Unknown built-in prop name '@${prop.name}'`, prop.start) } } if (expr && this.ast.autoDelim && (name || hasProps(result))) { let delim = this.findDelimiters(expr) if (delim) { addToProp(delim[0], "closedBy", delim[1].nodeName!) addToProp(delim[1], "openedBy", delim[0].nodeName!) } } if (defaultProps && hasProps(defaultProps)) { for (let prop in defaultProps) if (!(prop in result)) result[prop] = defaultProps[prop] } if (hasProps(result) && !name) this.raise(`Node has properties but no name`, props.length ? props[0].start : expr!.start) if (inline && (hasProps(result) || dialect || dynamicPrec)) this.raise(`Inline nodes can't have props, dynamic precedence, or a dialect`, props[0].start) if (inline && name) name = null return {name, props: result, dialect, dynamicPrec, inline, group, exported} } finishProp(prop: Prop, args: readonly Expression[], params: readonly Identifier[]): string { return prop.value.map(part => { if (part.value) return part.value let pos = params.findIndex(param => param.name == part.name) if (pos < 0) this.raise(`Property refers to '${part.name}', but no parameter by that name is in scope`, part.start) let expr = args[pos] if (expr instanceof NameExpression && !expr.args.length) return expr.id.name if (expr instanceof LiteralExpression) return expr.value return this.raise(`Expression '${expr}' can not be used as part of a property value`, part.start) }).join("") } resolveSpecialization(expr: SpecializeExpression) { let type = expr.type let {name, props, dialect, exported} = this.nodeInfo(expr.props, "d") let terminal = this.normalizeExpr(expr.token) if (terminal.length != 1 || terminal[0].terms.length != 1 || !terminal[0].terms[0].terminal) this.raise(`The first argument to '${type}' must resolve to a token`, expr.token.start) let values if (expr.content instanceof LiteralExpression) values = [expr.content.value] else if ((expr.content instanceof ChoiceExpression) && expr.content.exprs.every(e => e instanceof LiteralExpression)) values = expr.content.exprs.map(expr => (expr as LiteralExpression).value) else return this.raise(`The second argument to '${expr.type}' must be a literal or choice of literals`, expr.content.start) let term = terminal[0].terms[0], token = null let table = this.specialized[term.name] || (this.specialized[term.name] = []) for (let value of values) { let known = table.find(sp => sp.value == value) if (known == null) { if (!token) { token = this.makeTerminal(term.name + "/" + JSON.stringify(value), name, props) if (dialect != null) (this.tokens.byDialect[dialect] || (this.tokens.byDialect[dialect] = [])).push(token) } table.push({value, term: token, type, dialect, name}) this.tokenOrigins[token.name] = {spec: term} if (name || exported) { if (!name) token.preserve = true this.namedTerms[exported || name!] = token } } else { if (known.type != type) this.raise(`Conflicting specialization types for ${JSON.stringify(value)} of ${term.name} (${type} vs ${known.type})`, expr.start) if (known.dialect != dialect) this.raise(`Conflicting dialects for specialization ${JSON.stringify(value)} of ${term.name}`, expr.start) if (known.name != name) this.raise(`Conflicting names for specialization ${JSON.stringify(value)} of ${term.name}`, expr.start) if (token && known.term != token) this.raise(`Conflicting specialization tokens for ${JSON.stringify(value)} of ${term.name}`, expr.start) token = known.term } } return token! } findDelimiters(expr: Expression) { if (!(expr instanceof SequenceExpression) || expr.exprs.length < 2) return null let findToken = (expr: Expression): {term: Term, str: string} | null => { if (expr instanceof LiteralExpression) return {term: this.tokens.getLiteral(expr), str: expr.value} if (expr instanceof NameExpression && expr.args.length == 0) { let rule = this.ast.rules.find(r => r.id.name == expr.id.name) if (rule) return findToken(rule.expr) let token = this.tokens.rules.find(r => r.id.name == expr.id.name) if (token && token.expr instanceof LiteralExpression) return {term: this.tokens.getToken(expr)!, str: token.expr.value} } return null } let lastToken = findToken(expr.exprs[expr.exprs.length - 1]) if (!lastToken || !lastToken.term.nodeName) return null const brackets = ["()", "[]", "{}", "<>"] let bracket = brackets.find(b => lastToken!.str.indexOf(b[1]) > -1 && lastToken!.str.indexOf(b[0]) < 0) if (!bracket) return null let firstToken = findToken(expr.exprs[0]) if (!firstToken || !firstToken.term.nodeName || firstToken.str.indexOf(bracket[0]) < 0 || firstToken.str.indexOf(bracket[1]) > -1) return null return [firstToken.term, lastToken.term] } registerDynamicPrec(term: Term, prec: number) { this.dynamicRulePrecedences.push({rule: term, prec}) term.preserve = true } defineGroup(rule: Term, group: string, ast: RuleDeclaration) { let recur: Term[] = [] let getNamed = (rule: Term): Term[] => { if (rule.nodeName) return [rule] if (recur.includes(rule)) this.raise(`Rule '${ast.id.name}' cannot define a group because it contains a non-named recursive rule ('${rule.name}')`, ast.start) let result: Term[] = [] recur.push(rule) for (let r of this.rules) if (r.name == rule) { let names = r.parts.map(getNamed).filter(x => x.length) if (names.length > 1) this.raise(`Rule '${ast.id.name}' cannot define a group because some choices produce multiple named nodes`, ast.start) if (names.length == 1) for (let n of names[0]) result.push(n) } recur.pop() return result } for (let name of getNamed(rule)) name.props["group"] = (name.props["group"]?.split(" ") || []).concat(group).sort().join(" ") } checkGroups() { let groups: {[name: string]: Term[]} = Object.create(null), nodeNames: {[name: string]: boolean} = Object.create(null) for (let term of this.terms.terms) if (term.nodeName) { nodeNames[term.nodeName] = true if (term.props["group"]) for (let group of term.props["group"].split(" ")) { ;(groups[group] || (groups[group] = [])).push(term) } } let names = Object.keys(groups) for (let i = 0; i < names.length; i++) { let name = names[i], terms = groups[name] if (nodeNames[name]) this.warn(`Group name '${name}' conflicts with a node of the same name`) for (let j = i + 1; j < names.length; j++) { let other = groups[names[j]] if (terms.some(t => other.includes(t)) && (terms.length > other.length ? other.some(t => !terms.includes(t)) : terms.some(t => !other.includes(t)))) this.warn(`Groups '${name}' and '${names[j]}' overlap without one being a superset of the other`) } } } } const MinSharedActions = 5 type SharedActions = {actions: readonly (Shift | Reduce)[], addr: number} interface TokenizerSpec { groupID?: number, create: () => any, createSource: (importName: (name: string, source: string, prefix?: string) => string) => string } class FinishStateContext { sharedActions: SharedActions[] = [] constructor( readonly tokenizers: TokenizerSpec[], readonly data: DataBuilder, readonly stateArray: Uint32Array, readonly skipData: readonly number[], readonly skipInfo: readonly SkipInfo[], readonly states: readonly LRState[], readonly builder: Builder ) {} findSharedActions(state: LRState): SharedActions | null { if (state.actions.length < MinSharedActions) return null let found = null for (let shared of this.sharedActions) { if ((!found || shared.actions.length > found.actions.length) && shared.actions.every(a => state.actions.some(b => b.eq(a)))) found = shared } if (found) return found let max: (Shift | Reduce)[] | null = null, scratch = [] for (let i = state.id + 1; i < this.states.length; i++) { let other = this.states[i], fill = 0 if (other.defaultReduce || other.actions.length < MinSharedActions) continue for (let a of state.actions) for (let b of other.actions) if (a.eq(b)) scratch[fill++] = a if (fill >= MinSharedActions && (!max || max.length < fill)) { max = scratch scratch = [] } } if (!max) return null let result = {actions: max, addr: this.storeActions(max, -1, null)} this.sharedActions.push(result) return result } storeActions(actions: readonly (Shift | Reduce)[], skipReduce: number, shared: SharedActions | null) { if (skipReduce < 0 && shared && shared.actions.length == actions.length) return shared.addr let data = [] for (let action of actions) { if (shared && shared.actions.some(a => a.eq(action))) continue if (action instanceof Shift) { data.push(action.term.id, action.target.id, 0) } else { let code = reduceAction(action.rule, this.skipInfo) if (code != skipReduce) data.push(action.term.id, code & Action.ValueMask, code >> 16) } } data.push(Seq.End) if (skipReduce > -1) data.push(Seq.Other, skipReduce & Action.ValueMask, skipReduce >> 16) else if (shared) data.push(Seq.Next, shared.addr & 0xffff, shared.addr >> 16) else data.push(Seq.Done) return this.data.storeArray(data) } finish(state: LRState, isSkip: boolean, forcedReduce: number) { let b = this.builder let skipID = b.skipRules.indexOf(state.skip) let skipTable = this.skipData[skipID], skipTerms = this.skipInfo[skipID].startTokens let defaultReduce = state.defaultReduce ? reduceAction(state.defaultReduce, this.skipInfo) : 0 let flags = isSkip ? StateFlag.Skipped : 0 let skipReduce = -1, shared = null if (defaultReduce == 0) { if (isSkip) for (const action of state.actions) if (action instanceof Reduce && action.term.eof) skipReduce = reduceAction(action.rule, this.skipInfo) if (skipReduce < 0) shared = this.findSharedActions(state) } if (state.set.some(p => p.rule.name.top && p.pos == p.rule.parts.length)) flags |= StateFlag.Accepting let external: TokenizerSpec[] = [] for (let i = 0; i < state.actions.length + skipTerms.length; i++) { let term = i < state.actions.length ? state.actions[i].term : skipTerms[i - state.actions.length] for (;;) { let orig = b.tokenOrigins[term.name] if (orig && orig.spec) { term = orig.spec; continue } if (orig && (orig.external instanceof ExternalTokenSet)) addToSet(external, orig.external) break } } let tokenizerMask = 0 for (let i = 0; i < this.tokenizers.length; i++) { let tok = this.tokenizers[i] if (external.includes(tok) || tok.groupID == state.tokenGroup) tokenizerMask |= (1 << i) } let base = state.id * ParseState.Size this.stateArray[base + ParseState.Flags] = flags this.stateArray[base + ParseState.Actions] = this.storeActions(defaultReduce ? none : state.actions, skipReduce, shared) this.stateArray[base + ParseState.Skip] = skipTable this.stateArray[base + ParseState.TokenizerMask] = tokenizerMask this.stateArray[base + ParseState.DefaultReduce] = defaultReduce this.stateArray[base + ParseState.ForcedReduce] = forcedReduce } } function addToProp(term: Term, prop: string, value: string) { let cur = term.props[prop] if (!cur || cur.split(" ").indexOf(value) < 0) term.props[prop] = cur ? cur + " " + value : value } function buildSpecializeTable(spec: {value: string, term: Term, type: string}[]): {[value: string]: number} { let table: {[value: string]: number} = Object.create(null) for (let {value, term, type} of spec) { let code = type == "specialize" ? Specialize.Specialize : Specialize.Extend table[value] = (term.id << 1) | code } return table } function reduceAction(rule: Rule, skipInfo: readonly SkipInfo[], depth = rule.parts.length) { return rule.name.id | Action.ReduceFlag | (rule.isRepeatWrap && depth == rule.parts.length ? Action.RepeatFlag : 0) | (skipInfo.some(i => i.rule == rule.name) ? Action.StayFlag : 0) | (depth << Action.ReduceDepthShift) } function findArray(data: number[], value: number[]) { search: for (let i = 0;;) { let next = data.indexOf(value[0], i) if (next == -1 || next + value.length > data.length) break for (let j = 1; j < value.length; j++) { if (value[j] != data[next + j]) { i = next + 1 continue search } } return next } return -1 } function findSkipStates(table: readonly LRState[], startRules: readonly Term[]) { let nonSkip: {[id: number]: boolean} = Object.create(null) let work: LRState[] = [] let add = (state: LRState) => { if (!nonSkip[state.id]) { nonSkip[state.id] = true work.push(state) } } for (let state of table) if (state.startRule && startRules.includes(state.startRule)) add(state) for (let i = 0; i < work.length; i++) { for (let a of work[i].actions) if (a instanceof Shift) add(a.target) for (let a of work[i].goto) add(a.target) } return (id: number) => !nonSkip[id] } class DataBuilder { data: number[] = [] storeArray(data: number[]) { let found = findArray(this.data, data) if (found > -1) return found let pos = this.data.length for (let num of data) this.data.push(num) return pos } finish() { return Uint16Array.from(this.data) } } // The goto table maps a start state + a term to a new state, and is // used to determine the new state when reducing. Because this allows // more more efficient representation and access, unlike the action // tables, the goto table is organized by term, with groups of start // states that map to a given end state enumerated for each term. // Since many terms only have a single valid goto target, this makes // it cheaper to look those up. // // (Unfortunately, though the standard LR parsing mechanism never // looks up invalid goto states, the incremental parsing mechanism // needs accurate goto information for a state/term pair, so we do // need to store state ids even for terms that have only one target.) // // - First comes the amount of terms in the table // // - Then, for each term, the offset of the term's data // // - At these offsets, there's a record for each target state // // - Such a record starts with the amount of start states that go to // this target state, shifted one to the left, with the first bit // only set if this is the last record for this term. // // - Then follows the target state id // // - And then the start state ids function computeGotoTable(states: readonly LRState[]) { let goto: {[term: number]: {[to: number]: number[]}} = {} let maxTerm = 0 for (let state of states) { for (let entry of state.goto) { maxTerm = Math.max(entry.term.id, maxTerm) let set = goto[entry.term.id] || (goto[entry.term.id] = {}) ;(set[entry.target.id] || (set[entry.target.id] = [])).push(state.id) } } let data = new DataBuilder let index: number[] = [] let offset = maxTerm + 2 // Offset of the data, taking index size into account for (let term = 0; term <= maxTerm; term++) { let entries = goto[term] if (!entries) { index.push(1) continue } let termTable: number[] = [] let keys = Object.keys(entries) for (let target of keys) { let list = entries[target as any] termTable.push((target == keys[keys.length - 1] ? 1 : 0) + (list.length << 1)) termTable.push(+target) for (let source of list) termTable.push(source) } index.push(data.storeArray(termTable) + offset) } if (index.some(n => n > 0xffff)) throw new GenError("Goto table too large") return Uint16Array.from([maxTerm + 1, ...index, ...data.data]) } class TokenGroup implements TokenizerSpec { constructor(readonly tokens: Term[], readonly groupID: number) {} create() { return this.groupID } createSource() { return String(this.groupID) } } function addToSet(set: T[], value: T) { if (!set.includes(value)) set.push(value) } function buildTokenMasks(groups: TokenGroup[]) { let masks: {[id: number]: number} = Object.create(null) for (let group of groups) { let groupMask = 1 << group.groupID for (let term of group.tokens) { masks[term.id] = (masks[term.id] || 0) | groupMask } } return masks } interface Namespace { resolve(expr: NameExpression, builder: Builder): Parts[] } class TokenArg { constructor(readonly name: string, readonly expr: Expression, readonly scope: readonly TokenArg[]) {} } class BuildingRule { constructor(readonly name: string, readonly start: State, readonly to: State, readonly args: readonly Expression[]) {} } class TokenSet { startState: State = new State built: BuiltRule[] = [] building: BuildingRule[] = [] // Used for recursion check rules: readonly RuleDeclaration[] byDialect: {[dialect: number]: Term[]} = Object.create(null) precedenceRelations: readonly {term: Term, after: readonly Term[]}[] = [] constructor(readonly b: Builder, readonly ast: TokenDeclaration | LocalTokenDeclaration | null) { this.rules = ast ? ast.rules : none for (let rule of this.rules) b.unique(rule.id) } getToken(expr: NameExpression) { for (let built of this.built) if (built.matches(expr)) return built.term let name = expr.id.name let rule = this.rules.find(r => r.id.name == name) if (!rule) return null let {name: nodeName, props, dialect, exported} = this.b.nodeInfo(rule.props, "d", name, expr.args, rule.params.length != expr.args.length ? none : rule.params) let term = this.b.makeTerminal(expr.toString(), nodeName, props) if (dialect != null) (this.byDialect[dialect] || (this.byDialect[dialect] = [])).push(term) if ((term.nodeType || exported) && rule.params.length == 0) { if (!term.nodeType) term.preserve = true this.b.namedTerms[exported || name] = term } this.buildRule(rule, expr, this.startState, new State([term])) this.built.push(new BuiltRule(name, expr.args, term)) return term } buildRule(rule: RuleDeclaration, expr: NameExpression, from: State, to: State, args: readonly TokenArg[] = none) { let name = expr.id.name if (rule.params.length != expr.args.length) this.b.raise(`Incorrect number of arguments for token '${name}'`, expr.start) let building = this.building.find(b => b.name == name && exprsEq(expr.args, b.args)) if (building) { if (building.to == to) { from.nullEdge(building.start) return } let lastIndex = this.building.length - 1 while (this.building[lastIndex].name != name) lastIndex-- this.b.raise(`Invalid (non-tail) recursion in token rules: ${ this.building.slice(lastIndex).map(b => b.name).join(" -> ")}`, expr.start) } this.b.used(rule.id.name) let start = new State from.nullEdge(start) this.building.push(new BuildingRule(name, start, to, expr.args)) this.build(this.b.substituteArgs(rule.expr, expr.args, rule.params), start, to, expr.args.map((e, i) => new TokenArg(rule!.params[i].name, e, args))) this.building.pop() } build(expr: Expression, from: State, to: State, args: readonly TokenArg[]): void { if (expr instanceof NameExpression) { let name = expr.id.name, arg = args.find(a => a.name == name) if (arg) return this.build(arg.expr, from, to, arg.scope) let rule for (let i = 0, lt = this.b.localTokens; i <= lt.length; i++) { let set = i == lt.length ? this.b.tokens : lt[i] rule = set.rules.find(r => r.id.name == name) } if (!rule) return this.b.raise(`Reference to token rule '${expr.id.name}', which isn't found`, expr.start) this.buildRule(rule, expr, from, to, args) } else if (expr instanceof CharClass) { for (let [a, b] of CharClasses[expr.type]) from.edge(a, b, to) } else if (expr instanceof ChoiceExpression) { for (let choice of expr.exprs) this.build(choice, from, to, args) } else if (isEmpty(expr)) { from.nullEdge(to) } else if (expr instanceof SequenceExpression) { let conflict = expr.markers.find(c => c.length > 0) if (conflict) this.b.raise("Conflict marker in token expression", conflict[0].start) for (let i = 0; i < expr.exprs.length; i++) { let next = i == expr.exprs.length - 1 ? to : new State this.build(expr.exprs[i], from, next, args) from = next } } else if (expr instanceof RepeatExpression) { if (expr.kind == "*") { let loop = new State from.nullEdge(loop) this.build(expr.expr, loop, loop, args) loop.nullEdge(to) } else if (expr.kind == "+") { let loop = new State this.build(expr.expr, from, loop, args) this.build(expr.expr, loop, loop, args) loop.nullEdge(to) } else { // expr.kind == "?" from.nullEdge(to) this.build(expr.expr, from, to, args) } } else if (expr instanceof SetExpression) { for (let [a, b] of expr.inverted ? invertRanges(expr.ranges) : expr.ranges) rangeEdges(from, to, a, b) } else if (expr instanceof LiteralExpression) { for (let i = 0; i < expr.value.length; i++) { let ch = expr.value.charCodeAt(i) let next = i == expr.value.length - 1 ? to : new State from.edge(ch, ch + 1, next) from = next } } else if (expr instanceof AnyExpression) { let mid = new State from.edge(0, 0xDC00, to) from.edge(0xDC00, MAX_CHAR + 1, to) from.edge(0xD800, 0xDC00, mid) mid.edge(0xDC00, 0xE000, to) } else { return this.b.raise(`Unrecognized expression type in token`, (expr as any).start) } } takePrecedences() { let rel: {term: Term, after: Term[]}[] = this.precedenceRelations = [] if (this.ast) for (let group of this.ast.precedences) { let prev: Term[] = [] for (let item of group.items) { let level = [] if (item instanceof NameExpression) { for (let built of this.built) if (item.args.length ? built.matches(item) : built.id == item.id.name) level.push(built.term) } else { let id = JSON.stringify(item.value), found = this.built.find(b => b.id == id) if (found) level.push(found.term) } if (!level.length) this.b.warn(`Precedence specified for unknown token ${item}`, item.start) for (let term of level) addRel(rel, term, prev) prev = prev.concat(level) } } } precededBy(a: Term, b: Term) { let found = this.precedenceRelations.find(r => r.term == a) return found && found.after.includes(b) } buildPrecTable(softConflicts: readonly Conflict[]) { let precTable: number[] = [], rel = this.precedenceRelations.slice() // Add entries for soft-conflicting tokens that are in the // precedence table, to make sure they'll appear in the right // order and don't mess up the longer-wins default rule. for (let {a, b, soft} of softConflicts) if (soft) { if (!rel.some(r => r.term == a) || !rel.some(r => r.term == b)) continue if (soft < 0) [a, b] = [b, a] // Now a is longer than b (and should thus take precedence) addRel(rel, b, [a]) addRel(rel, a, []) } add: while (rel.length) { for (let i = 0; i < rel.length; i++) { let record = rel[i] if (record.after.every(t => precTable.includes(t.id))) { precTable.push(record.term.id) if (rel.length == 1) break add rel[i] = rel.pop()! continue add } } this.b.raise(`Cyclic token precedence relation between ${rel.map(r => r.term).join(", ")}`) } return precTable } } class MainTokenSet extends TokenSet { explicitConflicts: {a: Term, b: Term}[] = [] ast!: TokenDeclaration | null getLiteral(expr: LiteralExpression) { let id = JSON.stringify(expr.value) for (let built of this.built) if (built.id == id) return built.term let name = null, props = {}, dialect = null, exported = null let decl = this.ast ? this.ast.literals.find(l => l.literal == expr.value) : null if (decl) ({name, props, dialect, exported} = this.b.nodeInfo(decl.props, "da", expr.value)) let term = this.b.makeTerminal(id, name, props) if (dialect != null) (this.byDialect[dialect] || (this.byDialect[dialect] = [])).push(term) if (exported) this.b.namedTerms[exported] = term this.build(expr, this.startState, new State([term]), none) this.built.push(new BuiltRule(id, none, term)) return term } takeConflicts() { let resolve = (expr: NameExpression | LiteralExpression) => { if (expr instanceof NameExpression) { for (let built of this.built) if (built.matches(expr)) return built.term } else { let id = JSON.stringify(expr.value), found = this.built.find(b => b.id == id) if (found) return found.term } this.b.warn(`Precedence specified for unknown token ${expr}`, expr.start) return null } for (let c of this.ast?.conflicts || []) { let a = resolve(c.a), b = resolve(c.b) if (a && b) { if (a.id < b.id) [a, b] = [b, a] this.explicitConflicts.push({a, b}) } } } // Token groups are a mechanism for allowing conflicting (matching // overlapping input, without an explicit precedence being given) // tokens to exist in a grammar _if_ they don't occur in the same // place (aren't used in the same states). // // States that use tokens that conflict will raise an error when any // of the conflicting pairs of tokens both occur in that state. // Otherwise, they are assigned a token group, which includes all // the potentially-conflicting tokens they use. If there's already a // group that doesn't have any conflicts with those tokens, that is // reused, otherwise a new group is created. // // So each state has zero or one token groups, and each conflicting // token may belong to one or more groups. Tokens get assigned a // 16-bit bitmask with the groups they belong to set to 1 (all-1s // for non-conflicting tokens). When tokenizing, that mask is // compared to the current state's group (again using all-1s for // group-less states) to determine whether a token is applicable for // this state. // // Extended/specialized tokens are treated as their parent token for // this purpose. buildTokenGroups(states: readonly LRState[], skipInfo: readonly SkipInfo[], startID: number) { let tokens = this.startState.compile() if (tokens.accepting.length) this.b.raise(`Grammar contains zero-length tokens (in '${tokens.accepting[0].name}')`, this.rules.find(r => r.id.name == tokens.accepting[0].name)!.start) if (/\btokens\b/.test(verbose)) console.log(tokens.toString()) // If there is a precedence specified for the pair, the conflict is resolved let allConflicts = tokens.findConflicts(checkTogether(states, this.b, skipInfo)) .filter(({a, b}) => !this.precededBy(a, b) && !this.precededBy(b, a)) for (let {a, b} of this.explicitConflicts) { if (!allConflicts.some(c => c.a == a && c.b == b)) allConflicts.push(new Conflict(a, b, 0, "", "")) } let softConflicts = allConflicts.filter(c => c.soft), conflicts = allConflicts.filter(c => !c.soft) let errors: {conflict: Conflict, error: string}[] = [] let groups: TokenGroup[] = [] for (let state of states) { if (state.defaultReduce || state.tokenGroup > -1) continue // Find potentially-conflicting terms (in terms) and the things // they conflict with (in conflicts), and raise an error if // there's a token conflict directly in this state. let terms: Term[] = [], incompatible: Term[] = [] let skip = skipInfo[this.b.skipRules.indexOf(state.skip)].startTokens for (let term of skip) if (state.actions.some(a => a.term == term)) this.b.raise(`Use of token ${term.name} conflicts with skip rule`) let stateTerms: Term[] = [] for (let i = 0; i < state.actions.length + (skip ? skip.length : 0); i++) { let term = i < state.actions.length ? state.actions[i].term : skip[i - state.actions.length] let orig = this.b.tokenOrigins[term.name] if (orig && orig.spec) term = orig.spec else if (orig && orig.external) continue addToSet(stateTerms, term) } if (stateTerms.length == 0) continue for (let term of stateTerms) { for (let conflict of conflicts) { let conflicting = conflict.a == term ? conflict.b : conflict.b == term ? conflict.a : null if (!conflicting) continue if (stateTerms.includes(conflicting) && !errors.some(e => e.conflict == conflict)) { let example = conflict.exampleA ? ` (example: ${JSON.stringify(conflict.exampleA)}${ conflict.exampleB ? ` vs ${JSON.stringify(conflict.exampleB)}` : ""})` : "" errors.push({ error: `Overlapping tokens ${term.name} and ${conflicting.name} used in same context${example}\n` + `After: ${state.set[0].trail()}`, conflict }) } addToSet(terms, term) addToSet(incompatible, conflicting) } } let tokenGroup = null for (let group of groups) { if (incompatible.some(term => group.tokens.includes(term))) continue for (let term of terms) addToSet(group.tokens, term) tokenGroup = group break } if (!tokenGroup) { tokenGroup = new TokenGroup(terms, groups.length + startID) groups.push(tokenGroup) } state.tokenGroup = tokenGroup.groupID } if (errors.length) this.b.raise(errors.map(e => e.error).join("\n\n")) if (groups.length + startID > 16) this.b.raise(`Too many different token groups (${groups.length}) to represent them as a 16-bit bitfield`) let precTable = this.buildPrecTable(softConflicts) return { tokenGroups: groups, tokenPrec: precTable, tokenData: tokens.toArray(buildTokenMasks(groups), precTable) } } } class LocalTokenSet extends TokenSet { fallback: Term | null = null ast!: LocalTokenDeclaration constructor(b: Builder, ast: LocalTokenDeclaration) { super(b, ast) if (ast.fallback) b.unique(ast.fallback.id) } getToken(expr: NameExpression) { let term = null if (this.ast.fallback && this.ast.fallback.id.name == expr.id.name) { if (expr.args.length) this.b.raise(`Incorrect number of arguments for ${expr.id.name}`, expr.start) if (!this.fallback) { let {name: nodeName, props, exported} = this.b.nodeInfo(this.ast.fallback.props, "", expr.id.name, none, none) let term = this.fallback = this.b.makeTerminal(expr.id.name, nodeName, props) if (term.nodeType || exported) { if (!term.nodeType) term.preserve = true this.b.namedTerms[exported || expr.id.name] = term } this.b.used(expr.id.name) } term = this.fallback } else { term = super.getToken(expr) } if (term && !this.b.tokenOrigins[term.name]) this.b.tokenOrigins[term.name] = {group: this} return term } buildLocalGroup(states: readonly LRState[], skipInfo: readonly SkipInfo[], id: number): TokenizerSpec { let tokens = this.startState.compile() if (tokens.accepting.length) this.b.raise(`Grammar contains zero-length tokens (in '${tokens.accepting[0].name}')`, this.rules.find(r => r.id.name == tokens.accepting[0].name)!.start) for (let {a, b, exampleA} of tokens.findConflicts(() => true)) { if (!this.precededBy(a, b) && !this.precededBy(b, a)) this.b.raise(`Overlapping tokens ${a.name} and ${b.name} in local token group${ exampleA ? ` (example: ${JSON.stringify(exampleA)})` : ''}`) } for (let state of states) { if (state.defaultReduce) continue // See if this state uses any of the tokens in this group, and // if so, make sure it *only* uses tokens from this group. let usesThis: Term | null = null let usesOther: Term | undefined = skipInfo[this.b.skipRules.indexOf(state.skip)].startTokens[0] for (let {term} of state.actions) { let orig = this.b.tokenOrigins[term.name] if (orig?.group == this) usesThis = term else usesOther = term } if (usesThis) { if (usesOther) this.b.raise(`Tokens from a local token group used together with other tokens (${ usesThis.name} with ${usesOther.name})`) state.tokenGroup = id } } let precTable = this.buildPrecTable(none) let tokenData = tokens.toArray({[id]: Seq.End}, precTable) let precOffset = tokenData.length let fullData = new Uint16Array(tokenData.length + precTable.length + 1) fullData.set(tokenData, 0) fullData.set(precTable, precOffset) fullData[fullData.length - 1] = Seq.End return { groupID: id, create: () => new LocalTokenGroup(fullData, precOffset, this.fallback ? this.fallback.id : undefined), createSource: importName => `new ${importName("LocalTokenGroup", "@lezer/lr")}(${encodeArray(fullData)}, ${precOffset}${ this.fallback ? `, ${this.fallback.id}` : ''})` } } } function checkTogether(states: readonly LRState[], b: Builder, skipInfo: readonly SkipInfo[]) { let cache: {[id: number]: boolean} = Object.create(null) function hasTerm(state: LRState, term: Term) { return state.actions.some(a => a.term == term) || skipInfo[b.skipRules.indexOf(state.skip)].startTokens.includes(term) } return (a: Term, b: Term) => { if (a.id < b.id) [a, b] = [b, a] let key = a.id | (b.id << 16), cached = cache[key] if (cached != null) return cached return cache[key] = states.some(state => hasTerm(state, a) && hasTerm(state, b)) } } function invertRanges(ranges: [number, number][]) { let pos = 0, result: [number, number][] = [] for (let [a, b] of ranges) { if (a > pos) result.push([pos, a]) pos = b } if (pos <= MAX_CODE) result.push([pos, MAX_CODE + 1]) return result } const ASTRAL = 0x10000, GAP_START = 0xd800, GAP_END = 0xe000, MAX_CODE = 0x10ffff const LOW_SURR_B = 0xdc00, HIGH_SURR_B = 0xdfff // Create intermediate states for astral characters in a range, if // necessary, since the tokenizer acts on UTF16 characters function rangeEdges(from: State, to: State, low: number, hi: number) { if (low < ASTRAL) { if (low < GAP_START) from.edge(low, Math.min(hi, GAP_START), to) if (hi > GAP_END) from.edge(Math.max(low, GAP_END), Math.min(hi, MAX_CHAR + 1), to) low = ASTRAL } if (hi <= ASTRAL) return let lowStr = String.fromCodePoint(low), hiStr = String.fromCodePoint(hi - 1) let lowA = lowStr.charCodeAt(0), lowB = lowStr.charCodeAt(1) let hiA = hiStr.charCodeAt(0), hiB = hiStr.charCodeAt(1) if (lowA == hiA) { // Share the first char code let hop = new State from.edge(lowA, lowA + 1, hop) hop.edge(lowB, hiB + 1, to) } else { let midStart = lowA, midEnd = hiA if (lowB > LOW_SURR_B) { midStart++ let hop = new State from.edge(lowA, lowA + 1, hop) hop.edge(lowB, HIGH_SURR_B + 1, to) } if (hiB < HIGH_SURR_B) { midEnd-- let hop = new State from.edge(hiA, hiA + 1, hop) hop.edge(LOW_SURR_B, hiB + 1, to) } if (midStart <= midEnd) { let hop = new State from.edge(midStart, midEnd + 1, hop) hop.edge(LOW_SURR_B, HIGH_SURR_B + 1, to) } } } function isEmpty(expr: Expression) { return expr instanceof SequenceExpression && expr.exprs.length == 0 } function gatherExtTokens(b: Builder, tokens: readonly {id: Identifier, props: readonly Prop[]}[]) { let result: {[name: string]: Term} = Object.create(null) for (let token of tokens) { b.unique(token.id) let {name, props, dialect} = b.nodeInfo(token.props, "d", token.id.name) let term = b.makeTerminal(token.id.name, name, props) if (dialect != null) (b.tokens.byDialect[dialect] || (b.tokens.byDialect[dialect] = [])).push(term) b.namedTerms[token.id.name] = result[token.id.name] = term } return result } function findExtToken(b: Builder, tokens: {[name: string]: Term}, expr: NameExpression) { let found = tokens[expr.id.name] if (!found) return null if (expr.args.length) b.raise("External tokens cannot take arguments", expr.args[0].start) b.used(expr.id.name) return found } function addRel(rel: {term: Term, after: readonly Term[]}[], term: Term, after: readonly Term[]) { let found = rel.findIndex(r => r.term == term) if (found < 0) rel.push({term, after}) else rel[found] = {term, after: rel[found].after.concat(after)} } class ExternalTokenSet implements TokenizerSpec { tokens: {[name: string]: Term} constructor(readonly b: Builder, readonly ast: ExternalTokenDeclaration) { this.tokens = gatherExtTokens(b, ast.tokens) for (let name in this.tokens) this.b.tokenOrigins[this.tokens[name].name] = {external: this} } getToken(expr: NameExpression) { return findExtToken(this.b, this.tokens, expr) } create() { return this.b.options.externalTokenizer!(this.ast.id.name, this.b.termTable) } createSource(importName: (name: string, source: string, prefix?: string) => string) { let {source, id: {name}} = this.ast return importName(name, source) } } class ExternalSpecializer { term: Term | null = null tokens: {[name: string]: Term} constructor(readonly b: Builder, readonly ast: ExternalSpecializeDeclaration) { this.tokens = gatherExtTokens(b, ast.tokens) } finish() { let terms = this.b.normalizeExpr(this.ast.token) if (terms.length != 1 || terms[0].terms.length != 1 || !terms[0].terms[0].terminal) this.b.raise(`The token expression to '@external ${this.ast.type}' must resolve to a token`, this.ast.token.start) this.term = terms[0].terms[0] for (let name in this.tokens) this.b.tokenOrigins[this.tokens[name].name] = {spec: this.term, external: this} } getToken(expr: NameExpression) { return findExtToken(this.b, this.tokens, expr) } } function inlineRules(rules: readonly Rule[], preserve: readonly Term[]): readonly Rule[] { for (let pass = 0;; pass++) { let inlinable: {[name: string]: readonly Rule[]} = Object.create(null), found if (pass == 0) for (let rule of rules) { if (rule.name.inline && !inlinable[rule.name.name]) { let group = rules.filter(r => r.name == rule.name) if (group.some(r => r.parts.includes(rule.name))) continue found = inlinable[rule.name.name] = group } } for (let i = 0; i < rules.length; i++) { let rule = rules[i] if (!rule.name.interesting && !rule.parts.includes(rule.name) && rule.parts.length < 3 && !preserve.includes(rule.name) && (rule.parts.length == 1 || rules.every(other => other.skip == rule.skip || !other.parts.includes(rule.name))) && !rule.parts.some(p => !!inlinable[p.name]) && !rules.some((r, j) => j != i && r.name == rule.name)) found = inlinable[rule.name.name] = [rule] } if (!found) return rules let newRules = [] for (let rule of rules) { if (inlinable[rule.name.name]) continue if (!rule.parts.some(p => !!inlinable[p.name])) { newRules.push(rule) continue } function expand(at: number, conflicts: readonly Conflicts[], parts: readonly Term[]) { if (at == rule.parts.length) { newRules.push(new Rule(rule.name, parts, conflicts, rule.skip)) return } let next = rule.parts[at], replace = inlinable[next.name] if (!replace) { expand(at + 1, conflicts.concat(rule.conflicts[at + 1]), parts.concat(next)) return } for (let r of replace) expand(at + 1, conflicts.slice(0, conflicts.length - 1) .concat(conflicts[at].join(r.conflicts[0])) .concat(r.conflicts.slice(1, r.conflicts.length - 1)) .concat(rule.conflicts[at + 1].join(r.conflicts[r.conflicts.length - 1])), parts.concat(r.parts)) } expand(0, [rule.conflicts[0]], []) } rules = newRules } } function mergeRules(rules: readonly Rule[]): readonly Rule[] { let merged: {[name: string]: Term} = Object.create(null), found for (let i = 0; i < rules.length;) { let groupStart = i let name = rules[i++].name while (i < rules.length && rules[i].name == name) i++ let size = i - groupStart if (name.interesting) continue for (let j = i; j < rules.length;) { let otherStart = j, otherName = rules[j++].name while (j < rules.length && rules[j].name == otherName) j++ if (j - otherStart != size || otherName.interesting) continue let match = true for (let k = 0; k < size && match; k++) { let a = rules[groupStart + k], b = rules[otherStart + k] if (a.cmpNoName(b) != 0) match = false } if (match) found = merged[name.name] = otherName } } if (!found) return rules let newRules = [] for (let rule of rules) if (!merged[rule.name.name]) { newRules.push(rule.parts.every(p => !merged[p.name]) ? rule : new Rule(rule.name, rule.parts.map(p => merged[p.name] || p), rule.conflicts, rule.skip)) } return newRules } function simplifyRules(rules: readonly Rule[], preserve: readonly Term[]): readonly Rule[] { return mergeRules(inlineRules(rules, preserve)) } /// Build an in-memory parser instance for a given grammar. This is /// mostly useful for testing. If your grammar uses external /// tokenizers, you'll have to provide the `externalTokenizer` option /// for the returned parser to be able to parse anything. export function buildParser(text: string, options: BuildOptions = {}): LRParser { let builder = new Builder(text, options), parser = builder.getParser() ;(parser as any).termTable = builder.termTable return parser } const KEYWORDS = ["await", "break", "case", "catch", "continue", "debugger", "default", "do", "else", "finally", "for", "function", "if", "return", "switch", "throw", "try", "var", "while", "with", "null", "true", "false", "instanceof", "typeof", "void", "delete", "new", "in", "this", "const", "class", "extends", "export", "import", "super", "enum", "implements", "interface", "let", "package", "private", "protected", "public", "static", "yield", "require"] /// Build the code that represents the parser tables for a given /// grammar description. The `parser` property in the return value /// holds the main file that exports the `Parser` instance. The /// `terms` property holds a declaration file that defines constants /// for all of the named terms in grammar, holding their ids as value. /// This is useful when external code, such as a tokenizer, needs to /// be able to use these ids. It is recommended to run a tree-shaking /// bundler when importing this file, since you usually only need a /// handful of the many terms in your code. export function buildParserFile(text: string, options: BuildOptions = {}): {parser: string, terms: string} { return new Builder(text, options).getParserFile() } function ignored(name: string) { let first = name[0] return first == "_" || first.toUpperCase() != first } function isExported(rule: RuleDeclaration) { return rule.props.some(p => p.at && p.name == "export") } generator-1.7.0/src/encode.ts000066400000000000000000000026261457412043500161140ustar00rootroot00000000000000// Encode numbers as groups of printable ascii characters // // - 0xffff, which is often used as placeholder, is encoded as "~" // // - The characters from " " (32) to "}" (125), excluding '"' and // "\\", indicate values from 0 to 92 // // - The first bit in a 'digit' is used to indicate whether this is // the end of a number. // // - That leaves 46 other values, which are actually significant. // // - The digits in a number are ordered from high to low significance. import {Encode} from "@lezer/lr/dist/constants" function digitToChar(digit: number) { let ch = digit + Encode.Start if (ch >= Encode.Gap1) ch++ if (ch >= Encode.Gap2) ch++ return String.fromCharCode(ch) } export function encode(value: number, max = 0xffff) { if (value > max) throw new Error("Trying to encode a number that's too big: " + value) if (value == Encode.BigVal) return String.fromCharCode(Encode.BigValCode) let result = "" for (let first = Encode.Base;; first = 0 as Encode) { let low = value % Encode.Base, rest = value - low result = digitToChar(low + first) + result if (rest == 0) break value = rest / Encode.Base } return result } export function encodeArray(values: {length: number, readonly [i: number]: number}, max = 0xffff) { let result = '"' + encode(values.length, 0xffffffff) for (let i = 0; i < values.length; i++) result += encode(values[i], max) result += '"' return result } generator-1.7.0/src/error.ts000066400000000000000000000001561457412043500160040ustar00rootroot00000000000000/// The type of error raised when the parser generator finds an issue. export class GenError extends Error {} generator-1.7.0/src/grammar.ts000066400000000000000000000143761457412043500163120ustar00rootroot00000000000000import {Term as T} from "@lezer/lr/dist/constants" import {GenError} from "./error" const enum TermFlag { // This term is a terminal Terminal = 1, // This is the top production Top = 2, // This represents end-of-file Eof = 4, // This should be preserved, even if it doesn't occur in any rule Preserve = 8, // Rules used for * and + constructs Repeated = 16, // Rules explicitly marked as [inline] Inline = 32 } export type Props = {[name: string]: string} export function hasProps(props: Props) { for (let _p in props) return true return false } let termHash = 0 export class Term { hash = ++termHash // Used for sorting and hashing during parser generation id = -1 // Assigned in a later stage, used in actual output // Filled in only after the rules are simplified, used in automaton.ts rules: Rule[] = [] constructor(readonly name: string, private flags: number, readonly nodeName: string | null, readonly props: Props = {}) {} toString() { return this.name } get nodeType() { return this.top || this.nodeName != null || hasProps(this.props) || this.repeated } get terminal() { return (this.flags & TermFlag.Terminal) > 0 } get eof() { return (this.flags & TermFlag.Eof) > 0 } get error() { return "error" in this.props } get top() { return (this.flags & TermFlag.Top) > 0 } get interesting() { return this.flags > 0 || this.nodeName != null } get repeated() { return (this.flags & TermFlag.Repeated) > 0 } set preserve(value: boolean) { this.flags = value ? this.flags | TermFlag.Preserve : this.flags & ~TermFlag.Preserve } get preserve() { return (this.flags & TermFlag.Preserve) > 0 } set inline(value: boolean) { this.flags = value ? this.flags | TermFlag.Inline : this.flags & ~TermFlag.Inline } get inline() { return (this.flags & TermFlag.Inline) > 0 } cmp(other: Term) { return this.hash - other.hash } } export class TermSet { terms: Term[] = [] // Map from term names to Term instances names: {[name: string]: Term} = Object.create(null) eof: Term error: Term tops: Term[] = [] constructor() { this.eof = this.term("␄", null, TermFlag.Terminal | TermFlag.Eof) this.error = this.term("⚠", "⚠", TermFlag.Preserve) } term(name: string, nodeName: string | null, flags: number = 0, props: Props = {}) { let term = new Term(name, flags, nodeName, props) this.terms.push(term) this.names[name] = term return term } makeTop(nodeName: string | null, props: Props) : Term { const term = this.term("@top", nodeName, TermFlag.Top, props) this.tops.push(term) return term } makeTerminal(name: string, nodeName: string | null, props = {}) { return this.term(name, nodeName, TermFlag.Terminal, props) } makeNonTerminal(name: string, nodeName: string | null, props = {}) { return this.term(name, nodeName, 0, props) } makeRepeat(name: string) { return this.term(name, null, TermFlag.Repeated) } uniqueName(name: string) { for (let i = 0;; i++) { let cur = i ? `${name}-${i}` : name if (!this.names[cur]) return cur } } finish(rules: readonly Rule[]) { for (let rule of rules) rule.name.rules.push(rule) this.terms = this.terms.filter(t => t.terminal || t.preserve || rules.some(r => r.name == t || r.parts.includes(t))) let names: {[id: number]: string} = {} let nodeTypes = [this.error] this.error.id = T.Err let nextID = T.Err + 1 // Assign ids to terms that represent node types for (let term of this.terms) if (term.id < 0 && term.nodeType && !term.repeated) { term.id = nextID++ nodeTypes.push(term) } // Put all repeated terms after the regular node types let minRepeatTerm = nextID for (let term of this.terms) if (term.repeated) { term.id = nextID++ nodeTypes.push(term) } // Then comes the EOF term this.eof.id = nextID++ // And then the remaining (non-node, non-repeat) terms. for (let term of this.terms) { if (term.id < 0) term.id = nextID++ if (term.name) names[term.id] = term.name } if (nextID >= 0xfffe) throw new GenError("Too many terms") return {nodeTypes, names, minRepeatTerm, maxTerm: nextID - 1} } } export function cmpSet(a: readonly T[], b: readonly T[], cmp: (a: T, b: T) => number) { if (a.length != b.length) return a.length - b.length for (let i = 0; i < a.length; i++) { let diff = cmp(a[i], b[i]) if (diff) return diff } return 0 } const none: readonly any[] = [] export class Conflicts { constructor(readonly precedence: number, readonly ambigGroups: readonly string[] = none, readonly cut = 0) {} join(other: Conflicts) { if (this == Conflicts.none || this == other) return other if (other == Conflicts.none) return this return new Conflicts(Math.max(this.precedence, other.precedence), union(this.ambigGroups, other.ambigGroups), Math.max(this.cut, other.cut)) } cmp(other: Conflicts) { return this.precedence - other.precedence || cmpSet(this.ambigGroups, other.ambigGroups, (a, b) => a < b ? -1 : a > b ? 1 : 0) || this.cut - other.cut } static none = new Conflicts(0) } export function union(a: readonly T[], b: readonly T[]): readonly T[] { if (a.length == 0 || a == b) return b if (b.length == 0) return a let result = a.slice() for (let value of b) if (!a.includes(value)) result.push(value) return result.sort() } let ruleID = 0 export class Rule { id = ruleID++ constructor(readonly name: Term, readonly parts: readonly Term[], readonly conflicts: readonly Conflicts[], readonly skip: Term) {} cmp(rule: Rule) { return this.id - rule.id } cmpNoName(rule: Rule) { return this.parts.length - rule.parts.length || this.skip.hash - rule.skip.hash || this.parts.reduce((r, s, i) => r || s.cmp(rule.parts[i]), 0) || cmpSet(this.conflicts, rule.conflicts, (a, b) => a.cmp(b)) } toString() { return this.name + " -> " + this.parts.join(" ") } get isRepeatWrap() { return this.name.repeated && this.parts.length == 2 && this.parts[0] == this.name } sameReduce(other: Rule) { return this.name == other.name && this.parts.length == other.parts.length && this.isRepeatWrap == other.isRepeatWrap } } generator-1.7.0/src/hash.ts000066400000000000000000000003221457412043500155710ustar00rootroot00000000000000export function hash(a: number, b: number): number { return (a << 5) + a + b } export function hashString(h: number, s: string) { for (let i = 0; i < s.length; i++) h = hash(h, s.charCodeAt(i)) return h } generator-1.7.0/src/index.ts000066400000000000000000000001441457412043500157570ustar00rootroot00000000000000export {BuildOptions, buildParser, buildParserFile} from "./build" export {GenError} from "./error" generator-1.7.0/src/lezer-generator.cjs000077500000000000000000000035701457412043500201170ustar00rootroot00000000000000#!/usr/bin/env node const {buildParserFile, GenError} = require("..") let file = undefined, out = undefined, moduleStyle = "es", includeNames = false, exportName = undefined, noTerms = false, typeScript = false const {writeFileSync, readFileSync} = require("fs") const usage = "Usage: lezer-generator [--cjs] [--names] [--noTerms] [--typeScript] [--output outfile] [--export name] file" for (let i = 2; i < process.argv.length;) { let arg = process.argv[i++] if (!/^-/.test(arg)) { if (file) error("Multiple input files given") file = arg } else if (arg == "--help") { console.log(usage) process.exit(0) } else if (arg == "--cjs") { moduleStyle = "cjs" } else if (arg == "-o" || arg == "--output") { if (out) error("Multiple output files given") out = process.argv[i++] } else if (arg == "--names") { includeNames = true } else if (arg == "--export") { exportName = process.argv[i++] } else if (arg == "--noTerms") { noTerms = true } else if (arg == "--typeScript") { typeScript = true } else { error("Unrecognized option " + arg) } } if (!file) error("No input file given") function error(msg) { console.error(msg) console.log(usage) process.exit(1) } let parser, terms try { ;({parser, terms} = buildParserFile(readFileSync(file, "utf8"), { fileName: file, moduleStyle, includeNames, exportName, typeScript })) } catch (e) { console.error(e instanceof GenError ? e.message : e.stack) process.exit(1) } if (out) { let ext = /^(.*)\.(c?js|mjs|ts|esm?)$/.exec(out) let outExt = typeScript ? "ts" : "js" let [parserFile, termFile] = ext ? [out, ext[1] + ".terms." + ext[2]] : [out + "." + outExt, out + ".terms." + outExt] writeFileSync(parserFile, parser) if (!noTerms) writeFileSync(termFile, terms) console.log(`Wrote ${parserFile}${noTerms ? "" : ` and ${termFile}`}`) } else { console.log(parser) } generator-1.7.0/src/log.ts000066400000000000000000000005471457412043500154400ustar00rootroot00000000000000export const verbose = (typeof process != "undefined" && process.env.LOG) || "" export const timing = /\btime\b/.test(verbose) export const time: (label: string, f: () => T) => T = timing ? (label, f) => { let t0 = Date.now() let result = f() console.log(`${label} (${((Date.now() - t0) / 1000).toFixed(2)}s)`) return result } : (_label, f) => f() generator-1.7.0/src/node.ts000066400000000000000000000302201457412043500155730ustar00rootroot00000000000000export class Node { constructor(readonly start: number) {} } export class GrammarDeclaration extends Node { constructor(start: number, readonly rules: readonly RuleDeclaration[], readonly topRules: readonly RuleDeclaration[], readonly tokens: TokenDeclaration | null, readonly localTokens: readonly LocalTokenDeclaration[], readonly context: ContextDeclaration | null, readonly externalTokens: readonly ExternalTokenDeclaration[], readonly externalSpecializers: readonly ExternalSpecializeDeclaration[], readonly externalPropSources: readonly ExternalPropSourceDeclaration[], readonly precedences: PrecDeclaration | null, readonly mainSkip: Expression | null, readonly scopedSkip: readonly { expr: Expression, topRules: readonly RuleDeclaration[], rules: readonly RuleDeclaration[] }[], readonly dialects: readonly Identifier[], readonly externalProps: readonly ExternalPropDeclaration[], readonly autoDelim: boolean) { super(start) } toString() { return Object.values(this.rules).join("\n") } } export class RuleDeclaration extends Node { constructor(start: number, readonly id: Identifier, readonly props: readonly Prop[], readonly params: readonly Identifier[], readonly expr: Expression) { super(start) } toString() { return this.id.name + (this.params.length ? `<${this.params.join()}>` : "") + " -> " + this.expr } } export class PrecDeclaration extends Node { constructor(start: number, readonly items: readonly {id: Identifier, type: "left" | "right" | "cut" | null}[]) { super(start) } } export class TokenPrecDeclaration extends Node { constructor(start: number, readonly items: readonly (NameExpression | LiteralExpression)[]) { super(start) } } export class TokenConflictDeclaration extends Node { constructor(start: number, readonly a: NameExpression | LiteralExpression, readonly b: NameExpression | LiteralExpression) { super(start) } } export class TokenDeclaration extends Node { constructor(start: number, readonly precedences: readonly TokenPrecDeclaration[], readonly conflicts: readonly TokenConflictDeclaration[], readonly rules: readonly RuleDeclaration[], readonly literals: readonly LiteralDeclaration[]) { super(start) } } export class LocalTokenDeclaration extends Node { constructor(start: number, readonly precedences: readonly TokenPrecDeclaration[], readonly rules: readonly RuleDeclaration[], readonly fallback: {readonly id: Identifier, readonly props: readonly Prop[]} | null) { super(start) } } export class LiteralDeclaration extends Node { constructor(start: number, readonly literal: string, readonly props: readonly Prop[]) { super(start) } } export class ContextDeclaration extends Node { constructor(start: number, readonly id: Identifier, readonly source: string) { super(start) } } export class ExternalTokenDeclaration extends Node { constructor(start: number, readonly id: Identifier, readonly source: string, readonly tokens: readonly {id: Identifier, props: readonly Prop[]}[]) { super(start) } } export class ExternalSpecializeDeclaration extends Node { constructor(start: number, readonly type: "extend" | "specialize", readonly token: Expression, readonly id: Identifier, readonly source: string, readonly tokens: readonly {id: Identifier, props: readonly Prop[]}[]) { super(start) } } export class ExternalPropSourceDeclaration extends Node { constructor(start: number, readonly id: Identifier, readonly source: string) { super(start) } } export class ExternalPropDeclaration extends Node { constructor(start: number, readonly id: Identifier, readonly externalID: Identifier, readonly source: string) { super(start) } } export class Identifier extends Node { constructor(start: number, readonly name: string) { super(start) } toString() { return this.name } } export class Expression extends Node { walk(f: (expr: Expression) => Expression): Expression { return f(this) } eq(_other: Expression): boolean { return false } prec!: number } Expression.prototype.prec = 10 export class NameExpression extends Expression { constructor(start: number, readonly id: Identifier, readonly args: readonly Expression[]) { super(start) } toString() { return this.id.name + (this.args.length ? `<${this.args.join()}>` : "") } eq(other: NameExpression) { return this.id.name == other.id.name && exprsEq(this.args, other.args) } walk(f: (expr: Expression) => Expression): Expression { let args = walkExprs(this.args, f) return f(args == this.args ? this : new NameExpression(this.start, this.id, args)) } } export class SpecializeExpression extends Expression { constructor(start: number, readonly type: string, readonly props: readonly Prop[], readonly token: Expression, readonly content: Expression) { super(start) } toString() { return `@${this.type}[${this.props.join(",")}]<${this.token}, ${this.content}>` } eq(other: SpecializeExpression) { return this.type == other.type && Prop.eqProps(this.props, other.props) && exprEq(this.token, other.token) && exprEq(this.content, other.content) } walk(f: (expr: Expression) => Expression): Expression { let token = this.token.walk(f), content = this.content.walk(f) return f(token == this.token && content == this.content ? this : new SpecializeExpression(this.start, this.type, this.props, token, content)) } } export class InlineRuleExpression extends Expression { constructor(start: number, readonly rule: RuleDeclaration) { super(start) } toString() { let rule = this.rule return `${rule.id}${rule.props.length ? `[${rule.props.join(",")}]` : ""} { ${rule.expr} }` } eq(other: InlineRuleExpression) { let rule = this.rule, oRule = other.rule return exprEq(rule.expr, oRule.expr) && rule.id.name == oRule.id.name && Prop.eqProps(rule.props, oRule.props) } walk(f: (expr: Expression) => Expression): Expression { let rule = this.rule, expr = rule.expr.walk(f) return f(expr == rule.expr ? this : new InlineRuleExpression(this.start, new RuleDeclaration(rule.start, rule.id, rule.props, [], expr))) } } export class ChoiceExpression extends Expression { constructor(start: number, readonly exprs: readonly Expression[]) { super(start) } toString() { return this.exprs.map(e => maybeParens(e, this)).join(" | ") } eq(other: ChoiceExpression) { return exprsEq(this.exprs, other.exprs) } walk(f: (expr: Expression) => Expression): Expression { let exprs = walkExprs(this.exprs, f) return f(exprs == this.exprs ? this : new ChoiceExpression(this.start, exprs)) } } ChoiceExpression.prototype.prec = 1 export class SequenceExpression extends Expression { constructor( start: number, readonly exprs: readonly Expression[], readonly markers: readonly (readonly ConflictMarker[])[], readonly empty = false ) { super(start) } toString() { return this.empty ? "()" : this.exprs.map(e => maybeParens(e, this)).join(" ") } eq(other: SequenceExpression) { return exprsEq(this.exprs, other.exprs) && this.markers.every((m, i) => { let om = other.markers[i] return m.length == om.length && m.every((x, i) => x.eq(om[i])) }) } walk(f: (expr: Expression) => Expression): Expression { let exprs = walkExprs(this.exprs, f) return f(exprs == this.exprs ? this : new SequenceExpression(this.start, exprs, this.markers, this.empty && !exprs.length)) } } SequenceExpression.prototype.prec = 2 export class ConflictMarker extends Node { constructor(start: number, readonly id: Identifier, readonly type: "ambig" | "prec") { super(start) } toString() { return (this.type == "ambig" ? "~" : "!") + this.id.name } eq(other: ConflictMarker) { return this.id.name == other.id.name && this.type == other.type } } export class RepeatExpression extends Expression { constructor(start: number, readonly expr: Expression, readonly kind: "?" | "*" | "+") { super(start) } toString() { return maybeParens(this.expr, this) + this.kind } eq(other: RepeatExpression) { return exprEq(this.expr, other.expr) && this.kind == other.kind } walk(f: (expr: Expression) => Expression): Expression { let expr: Expression = this.expr.walk(f) return f(expr == this.expr ? this : new RepeatExpression(this.start, expr, this.kind)) } } RepeatExpression.prototype.prec = 3 export class LiteralExpression extends Expression { // value.length is always > 0 constructor(start: number, readonly value: string) { super(start) } toString() { return JSON.stringify(this.value) } eq(other: LiteralExpression) { return this.value == other.value } } export class SetExpression extends Expression { constructor(start: number, readonly ranges: [number, number][], readonly inverted: boolean) { super(start) } toString() { return `[${this.inverted ? "^" : ""}${this.ranges.map(([a, b]) => { return String.fromCodePoint(a) + (b == a + 1 ? "" : "-" + String.fromCodePoint(b)) })}]` } eq(other: SetExpression) { return this.inverted == other.inverted && this.ranges.length == other.ranges.length && this.ranges.every(([a, b], i) => { let [x, y] = other.ranges[i]; return a == x && b == y }) } } export class AnyExpression extends Expression { constructor(start: number) { super(start) } toString() { return "_" } eq() { return true } } function walkExprs(exprs: readonly Expression[], f: (expr: Expression) => Expression): readonly Expression[] { let result: Expression[] | null = null for (let i = 0; i < exprs.length; i++) { let expr = exprs[i].walk(f) if (expr != exprs[i] && !result) result = exprs.slice(0, i) if (result) result.push(expr) } return result || exprs } export const CharClasses: {[name: string]: [number, number][]} = { asciiLetter: [[65, 91], [97, 123]], asciiLowercase: [[97, 123]], asciiUppercase: [[65, 91]], digit: [[48, 58]], whitespace: [[9, 14], [32, 33], [133, 134], [160, 161], [5760, 5761], [8192, 8203], [8232, 8234], [8239, 8240], [8287, 8288], [12288, 12289]], eof: [[0xffff, 0xffff]] } export class CharClass extends Expression { constructor(start: number, readonly type: string) { super(start) } toString() { return "@" + this.type } eq(expr: CharClass) { return this.type == expr.type } } export function exprEq(a: Expression, b: Expression): boolean { return a.constructor == b.constructor && a.eq(b as any) } export function exprsEq(a: readonly Expression[], b: readonly Expression[]) { return a.length == b.length && a.every((e, i) => exprEq(e, b[i])) } export class Prop extends Node { constructor(start: number, readonly at: boolean, readonly name: string, readonly value: readonly PropPart[]) { super(start) } eq(other: Prop) { return this.name == other.name && this.value.length == other.value.length && this.value.every((v, i) => v.value == other.value[i].value && v.name == other.value[i].name) } toString() { let result = (this.at ? "@" : "") + this.name if (this.value.length) { result += "=" for (let {name, value} of this.value) result += name ? `{${name}}` : /[^\w-]/.test(value!) ? JSON.stringify(value) : value } return result } static eqProps(a: readonly Prop[], b: readonly Prop[]) { return a.length == b.length && a.every((p, i) => p.eq(b[i])) } } export class PropPart extends Node { constructor(start: number, readonly value: string | null, readonly name: string | null) { super(start) } } function maybeParens(node: Expression, parent: Expression) { return node.prec < parent.prec ? "(" + node.toString() + ")" : node.toString() } generator-1.7.0/src/parse.ts000066400000000000000000000462421457412043500157730ustar00rootroot00000000000000import {GrammarDeclaration, RuleDeclaration, PrecDeclaration, TokenPrecDeclaration, TokenConflictDeclaration, TokenDeclaration, LocalTokenDeclaration, LiteralDeclaration, ContextDeclaration, ExternalTokenDeclaration, ExternalPropSourceDeclaration, ExternalSpecializeDeclaration, ExternalPropDeclaration, Identifier, Expression, NameExpression, ChoiceExpression, SequenceExpression, LiteralExpression, RepeatExpression, SetExpression, InlineRuleExpression, Prop, PropPart, SpecializeExpression, AnyExpression, ConflictMarker, CharClasses, CharClass} from "./node" import {GenError} from "./error" // Note that this is the parser for grammar files, not the generated parser let word = /[\w_-]+/gy // Some engines (specifically SpiderMonkey) have still not implemented \p try { word = /[\p{Alphabetic}\d_-]+/ugy } catch (_) {} const none: readonly any[] = [] export class Input { type = "sof" value: any = null start = 0 end = 0 constructor(readonly string: string, readonly fileName: string | null = null) { this.next() } lineInfo(pos: number) { for (let line = 1, cur = 0;;) { let next = this.string.indexOf("\n", cur) if (next > -1 && next < pos) { ++line cur = next + 1 } else { return {line, ch: pos - cur} } } } message(msg: string, pos: number = -1): string { let posInfo = this.fileName || "" if (pos > -1) { let info = this.lineInfo(pos) posInfo += (posInfo ? " " : "") + info.line + ":" + info.ch } return posInfo ? msg + ` (${posInfo})` : msg } raise(msg: string, pos: number = -1): never { throw new GenError(this.message(msg, pos)) } match(pos: number, re: RegExp) { let match = re.exec(this.string.slice(pos)) return match ? pos + match[0].length : -1 } next() { let start = this.match(this.end, /^(\s|\/\/.*|\/\*[^]*?\*\/)*/) if (start == this.string.length) return this.set("eof", null, start, start) let next = this.string[start] if (next == '"') { let end = this.match(start + 1, /^(\\.|[^"\\])*"/) if (end == -1) this.raise("Unterminated string literal", start) return this.set("string", readString(this.string.slice(start + 1, end - 1)), start, end) } else if (next == "'") { let end = this.match(start + 1, /^(\\.|[^'\\])*'/) if (end == -1) this.raise("Unterminated string literal", start) return this.set("string", readString(this.string.slice(start + 1, end - 1)), start, end) } else if (next == "@") { word.lastIndex = start + 1 let m = word.exec(this.string) if (!m) return this.raise("@ without a name", start) return this.set("at", m[0], start, start + 1 + m[0].length) } else if ((next == "$" || next == "!") && this.string[start + 1] == "[") { let end = this.match(start + 2, /^(?:\\.|[^\]\\])*\]/) if (end == -1) this.raise("Unterminated character set", start) return this.set("set", this.string.slice(start + 2, end - 1), start, end) } else if (/[\[\]()!~+*?{}<>\.,|:$=]/.test(next)) { return this.set(next, null, start, start + 1) } else { word.lastIndex = start let m = word.exec(this.string) if (!m) return this.raise("Unexpected character " + JSON.stringify(next), start) return this.set("id", m[0], start, start + m[0].length) } } set(type: string, value: any, start: number, end: number) { this.type = type this.value = value this.start = start this.end = end } eat(type: string, value: any = null) { if (this.type == type && (value == null || this.value === value)) { this.next() return true } else { return false } } unexpected(): never { return this.raise(`Unexpected token '${this.string.slice(this.start, this.end)}'`, this.start) } expect(type: string, value: any = null) { let val = this.value if (this.type != type || !(value == null || val === value)) this.unexpected() this.next() return val } parse() { return parseGrammar(this) } } function parseGrammar(input: Input) { let start = input.start let rules: RuleDeclaration[] = [] let prec: PrecDeclaration | null = null let tokens: TokenDeclaration | null = null let localTokens: LocalTokenDeclaration[] = [] let mainSkip: Expression | null = null let scopedSkip: {expr: Expression, topRules: readonly RuleDeclaration[], rules: readonly RuleDeclaration[]}[] = [] let dialects: Identifier[] = [] let context: ContextDeclaration | null = null let external: ExternalTokenDeclaration[] = [] let specialized: ExternalSpecializeDeclaration[] = [] let props: ExternalPropDeclaration[] = [] let propSources: ExternalPropSourceDeclaration[] = [] let tops: RuleDeclaration[] = [] let sawTop = false let autoDelim = false while (input.type != "eof") { let start = input.start if (input.eat("at", "top")) { if (input.type as any != "id") input.raise(`Top rules must have a name`, input.start) tops.push(parseRule(input, parseIdent(input))) sawTop = true } else if (input.type == "at" && input.value == "tokens") { if (tokens) input.raise(`Multiple @tokens declaractions`, input.start) else tokens = parseTokens(input) } else if (input.eat("at", "local")) { input.expect("id", "tokens") localTokens.push(parseLocalTokens(input, start)) } else if (input.eat("at", "context")) { if (context) input.raise(`Multiple @context declarations`, start) let id = parseIdent(input) input.expect("id", "from") let source = input.expect("string") context = new ContextDeclaration(start, id, source) } else if (input.eat("at", "external")) { if (input.eat("id", "tokens")) external.push(parseExternalTokens(input, start)) else if (input.eat("id", "prop")) props.push(parseExternalProp(input, start)) else if (input.eat("id", "extend")) specialized.push(parseExternalSpecialize(input, "extend", start)) else if (input.eat("id", "specialize")) specialized.push(parseExternalSpecialize(input, "specialize", start)) else if (input.eat("id", "propSource")) propSources.push(parseExternalPropSource(input, start)) else input.unexpected() } else if (input.eat("at", "dialects")) { input.expect("{") for (let first = true; !input.eat("}"); first = false) { if (!first) input.eat(",") dialects.push(parseIdent(input)) } } else if (input.type == "at" && input.value == "precedence") { if (prec) input.raise(`Multiple precedence declarations`, input.start) prec = parsePrecedence(input) } else if (input.eat("at", "detectDelim")) { autoDelim = true } else if (input.eat("at", "skip")) { let skip = parseBracedExpr(input) if (input.type == "{") { input.next() let rules = [], topRules = [] while (!input.eat("}")) { if (input.eat("at", "top")) { topRules.push(parseRule(input, parseIdent(input))) sawTop = true } else { rules.push(parseRule(input)) } } scopedSkip.push({expr: skip, topRules, rules}) } else { if (mainSkip) input.raise(`Multiple top-level skip declarations`, input.start) mainSkip = skip } } else { rules.push(parseRule(input)) } } if (!sawTop) return input.raise(`Missing @top declaration`) return new GrammarDeclaration(start, rules, tops, tokens, localTokens, context, external, specialized, propSources, prec, mainSkip, scopedSkip, dialects, props, autoDelim) } function parseRule(input: Input, named?: Identifier) { let start = named ? named.start : input.start let id = named || parseIdent(input) let props = parseProps(input) let params: Identifier[] = [] if (input.eat("<")) while (!input.eat(">")) { if (params.length) input.expect(",") params.push(parseIdent(input)) } let expr = parseBracedExpr(input) return new RuleDeclaration(start, id, props, params, expr) } function parseProps(input: Input) { if (input.type != "[") return none let props = [] input.expect("[") while (!input.eat("]")) { if (props.length) input.expect(",") props.push(parseProp(input)) } return props } function parseProp(input: Input) { let start = input.start, value = [], name = input.value, at = input.type == "at" if (!input.eat("at") && !input.eat("id")) input.unexpected() if (input.eat("=")) for (;;) { if (input.type == "string" || input.type == "id") { value.push(new PropPart(input.start, input.value, null)) input.next() } else if (input.eat(".")) { value.push(new PropPart(input.start, ".", null)) } else if (input.eat("{")) { value.push(new PropPart(input.start, null, input.expect("id"))) input.expect("}") } else { break } } return new Prop(start, at, name, value) } function parseBracedExpr(input: Input): Expression { input.expect("{") let expr = parseExprChoice(input) input.expect("}") return expr } const SET_MARKER = "\ufdda" // (Invalid unicode character) function parseExprInner(input: Input): Expression { let start = input.start if (input.eat("(")) { if (input.eat(")")) return new SequenceExpression(start, none, [none, none]) let expr = parseExprChoice(input) input.expect(")") return expr } else if (input.type == "string") { let value = input.value input.next() if (value.length == 0) return new SequenceExpression(start, none, [none, none]) return new LiteralExpression(start, value) } else if (input.eat("id", "_")) { return new AnyExpression(start) } else if (input.type == "set") { let content = input.value, invert = input.string[input.start] == "!" let unescaped = readString(content.replace(/\\.|-|"/g, (m: string) => { return m == "-" ? SET_MARKER : m == '"' ? '\\"' : m })) let ranges: [number, number][] = [] for (let pos = 0; pos < unescaped.length;) { let code = unescaped.codePointAt(pos)! pos += code > 0xffff ? 2 : 1 if (pos < unescaped.length - 1 && unescaped[pos] == SET_MARKER) { let end = unescaped.codePointAt(pos + 1)! pos += end > 0xffff ? 3 : 2 if (end < code) input.raise("Invalid character range", input.start) addRange(input, ranges, code, end + 1) } else { if (code == SET_MARKER.charCodeAt(0)) code = 45 addRange(input, ranges, code, code + 1) } } input.next() return new SetExpression(start, ranges.sort((a, b) => a[0] - b[0]), invert) } else if (input.type == "at" && (input.value == "specialize" || input.value == "extend")) { let {start, value} = input input.next() let props = parseProps(input) input.expect("<") let token = parseExprChoice(input), content if (input.eat(",")) { content = parseExprChoice(input) } else if (token instanceof LiteralExpression) { content = token } else { input.raise(`@${value} requires two arguments when its first argument isn't a literal string`) } input.expect(">") return new SpecializeExpression(start, value, props, token, content) } else if (input.type == "at" && CharClasses.hasOwnProperty(input.value)) { let cls = new CharClass(input.start, input.value) input.next() return cls } else if (input.type == "[") { let rule = parseRule(input, new Identifier(start, "_anon")) if (rule.params.length) input.raise(`Inline rules can't have parameters`, rule.start) return new InlineRuleExpression(start, rule) } else { let id = parseIdent(input) if (input.type == "[" || input.type == "{") { let rule = parseRule(input, id) if (rule.params.length) input.raise(`Inline rules can't have parameters`, rule.start) return new InlineRuleExpression(start, rule) } else { if (input.eat(".") && id.name == "std" && CharClasses.hasOwnProperty(input.value)) { let cls = new CharClass(start, input.value) input.next() return cls } return new NameExpression(start, id, parseArgs(input)) } } } function parseArgs(input: Input) { let args = [] if (input.eat("<")) while (!input.eat(">")) { if (args.length) input.expect(",") args.push(parseExprChoice(input)) } return args } function addRange(input: Input, ranges: [number, number][], from: number, to: number) { if (!ranges.every(([a, b]) => b <= from || a >= to)) input.raise("Overlapping character range", input.start) ranges.push([from, to]) } function parseExprSuffix(input: Input): Expression { let start = input.start let expr = parseExprInner(input) for (;;) { let kind = input.type if (input.eat("*") || input.eat("?") || input.eat("+")) expr = new RepeatExpression(start, expr, kind as "*" | "+" | "?") else return expr } } function endOfSequence(input: Input) { return input.type == "}" || input.type == ")" || input.type == "|" || input.type == "/" || input.type == "/\\" || input.type == "{" || input.type == "," || input.type == ">" } function parseExprSequence(input: Input) { let start = input.start, exprs: Expression[] = [], markers = [none] do { // Add markers at this position for (;;) { let localStart = input.start, markerType!: "ambig" | "prec" if (input.eat("~")) markerType = "ambig" else if (input.eat("!")) markerType = "prec" else break markers[markers.length - 1] = markers[markers.length - 1].concat(new ConflictMarker(localStart, parseIdent(input), markerType)) } if (endOfSequence(input)) break exprs.push(parseExprSuffix(input)) markers.push(none) } while (!endOfSequence(input)) if (exprs.length == 1 && markers.every(ms => ms.length == 0)) return exprs[0] return new SequenceExpression(start, exprs, markers, !exprs.length) } function parseExprChoice(input: Input) { let start = input.start, left = parseExprSequence(input) if (!input.eat("|")) return left let exprs: Expression[] = [left] do { exprs.push(parseExprSequence(input)) } while (input.eat("|")) let empty = exprs.find(s => s instanceof SequenceExpression && s.empty) if (empty) input.raise("Empty expression in choice operator. If this is intentional, use () to make it explicit.", empty.start) return new ChoiceExpression(start, exprs) } function parseIdent(input: Input) { if (input.type != "id") input.unexpected() let start = input.start, name = input.value input.next() return new Identifier(start, name) } function parsePrecedence(input: Input) { let start = input.start input.next() input.expect("{") let items: {id: Identifier, type: "left" | "right" | "cut" | null}[] = [] while (!input.eat("}")) { if (items.length) input.eat(",") items.push({ id: parseIdent(input), type: input.eat("at", "left") ? "left" : input.eat("at", "right") ? "right" : input.eat("at", "cut") ? "cut" : null }) } return new PrecDeclaration(start, items) } function parseTokens(input: Input) { let start = input.start input.next() input.expect("{") let tokenRules: RuleDeclaration[] = [] let literals: LiteralDeclaration[] = [] let precedences: TokenPrecDeclaration[] = [] let conflicts: TokenConflictDeclaration[] = [] while (!input.eat("}")) { if (input.type == "at" && input.value == "precedence") { precedences.push(parseTokenPrecedence(input)) } else if (input.type == "at" && input.value == "conflict") { conflicts.push(parseTokenConflict(input)) } else if (input.type == "string") { literals.push(new LiteralDeclaration(input.start, input.expect("string"), parseProps(input))) } else { tokenRules.push(parseRule(input)) } } return new TokenDeclaration(start, precedences, conflicts, tokenRules, literals) } function parseLocalTokens(input: Input, start: number) { input.expect("{") let tokenRules: RuleDeclaration[] = [] let precedences: TokenPrecDeclaration[] = [] let fallback: {id: Identifier, props: readonly Prop[]} | null = null while (!input.eat("}")) { if (input.type == "at" && input.value == "precedence") { precedences.push(parseTokenPrecedence(input)) } else if (input.eat("at", "else") && !fallback) { fallback = {id: parseIdent(input), props: parseProps(input)} } else { tokenRules.push(parseRule(input)) } } return new LocalTokenDeclaration(start, precedences, tokenRules, fallback) } function parseTokenPrecedence(input: Input) { let start = input.start input.next() input.expect("{") let tokens: (LiteralExpression | NameExpression)[] = [] while (!input.eat("}")) { if (tokens.length) input.eat(",") let expr = parseExprInner(input) if (expr instanceof LiteralExpression || expr instanceof NameExpression) tokens.push(expr) else input.raise(`Invalid expression in token precedences`, expr.start) } return new TokenPrecDeclaration(start, tokens) } function parseTokenConflict(input: Input) { let start = input.start input.next() input.expect("{") let a = parseExprInner(input) if (!(a instanceof LiteralExpression || a instanceof NameExpression)) input.raise(`Invalid expression in token conflict`, a.start) input.eat(",") let b = parseExprInner(input) if (!(b instanceof LiteralExpression || b instanceof NameExpression)) input.raise(`Invalid expression in token conflict`, b.start) input.expect("}") return new TokenConflictDeclaration(start, a, b) } function parseExternalTokenSet(input: Input) { let tokens: {id: Identifier, props: readonly Prop[]}[] = [] input.expect("{") while (!input.eat("}")) { if (tokens.length) input.eat(",") let id = parseIdent(input) let props = parseProps(input) tokens.push({id, props}) } return tokens } function parseExternalTokens(input: Input, start: number) { let id = parseIdent(input) input.expect("id", "from") let from = input.expect("string") return new ExternalTokenDeclaration(start, id, from, parseExternalTokenSet(input)) } function parseExternalSpecialize(input: Input, type: "extend" | "specialize", start: number) { let token = parseBracedExpr(input) let id = parseIdent(input) input.expect("id", "from") let from = input.expect("string") return new ExternalSpecializeDeclaration(start, type, token, id, from, parseExternalTokenSet(input)) } function parseExternalPropSource(input: Input, start: number) { let id = parseIdent(input) input.expect("id", "from") return new ExternalPropSourceDeclaration(start, id, input.expect("string")) } function parseExternalProp(input: Input, start: number) { let externalID = parseIdent(input) let id = input.eat("id", "as") ? parseIdent(input) : externalID input.expect("id", "from") let from = input.expect("string") return new ExternalPropDeclaration(start, id, externalID, from) } function readString(string: string) { let point = /\\(?:u\{([\da-f]+)\}|u([\da-f]{4})|x([\da-f]{2})|([ntbrf0])|(.))|[^]/yig let out = "", m while (m = point.exec(string)) { let [all, u1, u2, u3, single, unknown] = m if (u1 || u2 || u3) out += String.fromCodePoint(parseInt(u1 || u2 || u3, 16)) else if (single) out += single == "n" ? "\n" : single == "t" ? "\t" : single == "0" ? "\0" : single == "r" ? "\r" : single == "f" ? "\f" : "\b" else if (unknown) out += unknown else out += all } return out } generator-1.7.0/src/rollup-plugin-lezer.js000066400000000000000000000020501457412043500205640ustar00rootroot00000000000000import {resolve, dirname} from "path" import {promises as fs} from "fs" import {buildParserFile} from "./index.js" export function lezer(config = {}) { let built = Object.create(null) return { name: "rollup-plugin-lezer", resolveId(source, importer) { let m = /^(.*\.grammar)(\.terms)?$/.exec(source) if (!m) return null let id = resolve(importer ? dirname(importer) : process.cwd(), m[1]) return m[2] ? `\0${id}.terms` : id }, load(id) { let m = /^\0?(.*\.grammar)(\.terms)?$/.exec(id) if (!m) return null if (!m[2]) this.addWatchFile(id) let base = m[1] let build = built[base] || (built[base] = fs.readFile(base, "utf8").then(code => buildParserFile(code, { fileName: base, moduleStyle: "es", exportName: config.exportName, warn: message => this.warn(message) }))) return build.then(result => m[2] ? result.terms : result.parser) }, watchChange(id) { if (built[id]) built[id] = null } } } export const rollup = lezer generator-1.7.0/src/test.ts000066400000000000000000000131461457412043500156350ustar00rootroot00000000000000import {Tree, NodeType, NodeProp, Parser} from "@lezer/common" const none: readonly any[] = [] class TestSpec { constructor(readonly name: string, readonly props: {prop: NodeProp, value: any}[], readonly children: readonly TestSpec[] = none, readonly wildcard = false) {} static parse(spec: string): readonly TestSpec[] { let pos = 0, tok = "sof", value = "" function err(): never { throw new SyntaxError("Invalid test spec: " + spec) } function next() { while (pos < spec.length && /\s/.test(spec.charAt(pos))) pos++ if (pos == spec.length) return tok = "eof" let next = spec.charAt(pos++) if (next == "(" && spec.slice(pos, pos + 4) == "...)") { pos += 4 return tok = "..." } if (/[\[\](),=]/.test(next)) return tok = next if (/[^()\[\],="\s]/.test(next)) { let name = /[^()\[\],="\s]*/.exec(spec.slice(pos - 1)) value = name![0] pos += name![0].length - 1 return tok = "name" } if (next == '"') { let content = /^"((?:[^\\"]|\\.)*)"/.exec(spec.slice(pos - 1)) || err() value = JSON.parse(content[0]) pos += content[0].length - 1 return tok = "name" } return err() } next() function parseSeq() { let seq = [] while (tok != "eof" && tok != ")") { seq.push(parse()) if (tok == ",") next() } return seq } function parse() { let name = value, children = none, props = [], wildcard = false if (tok != "name") err() next() if (tok as any == "[") { next() while (tok as any != "]") { if (tok as any != "name") err() let prop = (NodeProp as any)[value] as NodeProp, val = "" if (!(prop instanceof NodeProp)) err() next() if (tok as any == "=") { next() if (tok as any != "name") err() val = value next() } props.push({prop, value: prop.deserialize(val)}) } next() } if (tok as any == "(") { next() children = parseSeq() // @ts-ignore TypeScript doesn't understand that `next` may have mutated `tok` (#9998) if (tok != ")") err() next() } else if (tok as any == "...") { wildcard = true next() } return new TestSpec(name, props, children, wildcard) } let result = parseSeq() if (tok != "eof") err() return result } matches(type: NodeType) { if (type.name != this.name) return false for (let {prop, value} of this.props) if ((value || type.prop(prop)) && JSON.stringify(type.prop(prop)) != JSON.stringify(value)) return false return true } } function defaultIgnore(type: NodeType) { return /\W/.test(type.name) } export function testTree(tree: Tree, expect: string, mayIgnore = defaultIgnore) { let specs = TestSpec.parse(expect) let stack = [specs], pos = [0] tree.iterate({ enter(n) { if (!n.name) return let last = stack.length - 1, index = pos[last], seq = stack[last] let next = index < seq.length ? seq[index] : null if (next && next.matches(n.type)) { if (next.wildcard) { pos[last]++ return false } pos.push(0) stack.push(next.children) return undefined } else if (mayIgnore(n.type)) { return false } else { let parent = last > 0 ? stack[last - 1][pos[last - 1]].name : "tree" let after = next ? next.name + (parent == "tree" ? "" : " in " + parent) : `end of ${parent}` throw new Error(`Expected ${after}, got ${n.name} at ${n.to} \n${tree}`) } }, leave(n) { if (!n.name) return let last = stack.length - 1, index = pos[last], seq = stack[last] if (index < seq.length) throw new Error(`Unexpected end of ${n.name}. Expected ${seq.slice(index).map(s => s.name).join(", ")} at ${n.from}\n${tree}`) pos.pop() stack.pop() pos[last - 1]++ } }) if (pos[0] != specs.length) throw new Error(`Unexpected end of tree. Expected ${stack[0].slice(pos[0]).map(s => s.name).join(", ")} at ${tree.length}\n${tree}`) } function toLineContext(file: string, index: number) { const endEol = file.indexOf('\n', index + 80); const endIndex = endEol === -1 ? file.length : endEol; return file.substring(index, endIndex).split(/\n/).map(str => ' | ' + str).join('\n'); } export function fileTests(file: string, fileName: string, mayIgnore = defaultIgnore) { let caseExpr = /\s*#[ \t]*(.*)(?:\r\n|\r|\n)([^]*?)==+>([^]*?)(?:$|(?:\r\n|\r|\n)+(?=#))/gy let tests: { name: string, text: string, expected: string, configStr: string, config: object, strict: boolean, run(parser: Parser): void }[] = [] let lastIndex = 0; for (;;) { let m = caseExpr.exec(file) if (!m) throw new Error(`Unexpected file format in ${fileName} around\n\n${toLineContext(file, lastIndex)}`) let text = m[2].trim(), expected = m[3].trim() let [, name, configStr] = /(.*?)(\{.*?\})?$/.exec(m[1])! let config = configStr ? JSON.parse(configStr) : null let strict = !/⚠|\.\.\./.test(expected) tests.push({ name, text, expected, configStr, config, strict, run(parser: Parser) { if ((parser as any).configure && (strict || config)) parser = (parser as any).configure({strict, ...config}) testTree(parser.parse(text), expected, mayIgnore) } }) lastIndex = m.index + m[0].length if (lastIndex == file.length) break } return tests } generator-1.7.0/src/token.ts000066400000000000000000000252071457412043500157770ustar00rootroot00000000000000import {Term, union} from "./grammar" import {GenError} from "./error" import {Seq} from "@lezer/lr/dist/constants" export const MAX_CHAR = 0xffff export class Edge { constructor(readonly from: number, readonly to: number, readonly target: State) {} toString() { return `-> ${this.target.id}[label=${JSON.stringify( this.from < 0 ? "ε" : charFor(this.from) + (this.to > this.from + 1 ? "-" + charFor(this.to - 1) : ""))}]` } } function charFor(n: number) { return n > MAX_CHAR ? "∞" : n == 10 ? "\\n" : n == 13 ? "\\r" : n < 32 || n >= 0xd800 && n < 0xdfff ? "\\u{" + n.toString(16) + "}" : String.fromCharCode(n) } type Partition = {[id: number]: State[]} function minimize(states: State[], start: State) { let partition: Partition = Object.create(null) let byAccepting: {[terms: string]: State[]} = Object.create(null) for (let state of states) { let id = ids(state.accepting) let group = byAccepting[id] || (byAccepting[id] = []) group.push(state) partition[state.id] = group } for (;;) { let split = false, newPartition: Partition = Object.create(null) for (let state of states) { if (newPartition[state.id]) continue let group = partition[state.id] if (group.length == 1) { newPartition[group[0].id] = group continue } let parts = [] groups: for (let state of group) { for (let p of parts) { if (isEquivalent(state, p[0], partition)) { p.push(state) continue groups } } parts.push([state]) } if (parts.length > 1) split = true for (let p of parts) for (let s of p) newPartition[s.id] = p } if (!split) return applyMinimization(states, start, partition) partition = newPartition } } function isEquivalent(a: State, b: State, partition: Partition) { if (a.edges.length != b.edges.length) return false for (let i = 0; i < a.edges.length; i++) { let eA = a.edges[i], eB = b.edges[i] if (eA.from != eB.from || eA.to != eB.to || partition[eA.target.id] != partition[eB.target.id]) return false } return true } function applyMinimization(states: readonly State[], start: State, partition: Partition) { for (let state of states) { for (let i = 0; i < state.edges.length; i++) { let edge = state.edges[i], target = partition[edge.target.id][0] if (target != edge.target) state.edges[i] = new Edge(edge.from, edge.to, target) } } return partition[start.id][0] } let stateID = 1 export class State { edges: Edge[] = [] constructor(readonly accepting: Term[] = [], readonly id = stateID++) {} edge(from: number, to: number, target: State) { this.edges.push(new Edge(from, to, target)) } nullEdge(target: State) { this.edge(-1, -1, target) } compile() { let labeled: {[id: string]: State} = Object.create(null), localID = 0 let startState = explore(this.closure().sort((a, b) => a.id - b.id)) return minimize(Object.values(labeled), startState) function explore(states: State[]) { let newState = labeled[ids(states)] = new State(states.reduce((a: readonly Term[], s: State) => union(a, s.accepting), []) as Term[], localID++) let out: Edge[] = [] for (let state of states) for (let edge of state.edges) { if (edge.from >= 0) out.push(edge) } let transitions = mergeEdges(out) for (let merged of transitions) { let targets = merged.targets.sort((a, b) => a.id - b.id) newState.edge(merged.from, merged.to, labeled[ids(targets)] || explore(targets)) } return newState } } closure() { let result: State[] = [], seen: {[id: number]: boolean} = Object.create(null) function explore(state: State): void { if (seen[state.id]) return seen[state.id] = true // States with only epsilon edges and no accepting term that // isn't also in the next states are left out to help reduce the // number of unique state combinations if (state.edges.some(e => e.from >= 0) || (state.accepting.length > 0 && !state.edges.some(e => sameSet(state.accepting, e.target.accepting)))) result.push(state) for (let edge of state.edges) if (edge.from < 0) explore(edge.target) } explore(this) return result } findConflicts(occurTogether: (a: Term, b: Term) => boolean): Conflict[] { let conflicts: Conflict[] = [], cycleTerms = this.cycleTerms() function add(a: Term, b: Term, soft: number, aEdges: Edge[], bEdges?: Edge[]) { if (a.id < b.id) { [a, b] = [b, a]; soft = -soft } let found = conflicts.find(c => c.a == a && c.b == b) if (!found) conflicts.push(new Conflict(a, b, soft, exampleFromEdges(aEdges), bEdges && exampleFromEdges(bEdges))) else if (found.soft != soft) found.soft = 0 } this.reachable((state, edges) => { if (state.accepting.length == 0) return for (let i = 0; i < state.accepting.length; i++) for (let j = i + 1; j < state.accepting.length; j++) add(state.accepting[i], state.accepting[j], 0, edges) state.reachable((s, es) => { if (s != state) for (let term of s.accepting) { let hasCycle = cycleTerms.includes(term) for (let orig of state.accepting) if (term != orig) add(term, orig, hasCycle || cycleTerms.includes(orig) || !occurTogether(term, orig) ? 0 : 1, edges, edges.concat(es)) } }) }) return conflicts } cycleTerms(): Term[] { let work: State[] = [] this.reachable(state => { for (let {target} of state.edges) work.push(state, target) }) let table: Map = new Map let haveCycle: State[] = [] for (let i = 0; i < work.length;) { let from = work[i++], to = work[i++] let entry = table.get(from) if (!entry) table.set(from, entry = []) if (entry.includes(to)) continue if (from == to) { if (!haveCycle.includes(from)) haveCycle.push(from) } else { for (let next of entry) work.push(from, next) entry.push(to) } } let result: Term[] = [] for (let state of haveCycle) { for (let term of state.accepting) { if (!result.includes(term)) result.push(term) } } return result } reachable(f: (s: State, edges: Edge[]) => void) { let seen: State[] = [], edges: Edge[] = [] ;(function explore(s: State) { f(s, edges) seen.push(s) for (let edge of s.edges) if (!seen.includes(edge.target)) { edges.push(edge) explore(edge.target) edges.pop() } })(this) } toString() { let out = "digraph {\n" this.reachable(state => { if (state.accepting.length) out += ` ${state.id} [label=${JSON.stringify(state.accepting.join())}];\n` for (let edge of state.edges) out += ` ${state.id} ${edge};\n` }) return out + "}" } // Tokenizer data is represented as a single flat array. This // contains regions for each tokenizer state. Region offsets are // used to identify states. // // Each state is laid out as: // - Token group mask // - Offset of the end of the accepting data // - Number of outgoing edges in the state // - Pairs of token masks and term ids that indicate the accepting // states, sorted by precedence // - Triples for the edges: each with a low and high bound and the // offset of the next state. toArray(groupMasks: {[id: number]: number}, precedence: readonly number[]) { let offsets: number[] = [] // Used to 'link' the states after building the arrays let data: number[] = [] this.reachable(state => { let start = data.length let acceptEnd = start + 3 + state.accepting.length * 2 offsets[state.id] = start data.push(state.stateMask(groupMasks), acceptEnd, state.edges.length) state.accepting.sort((a, b) => precedence.indexOf(a.id) - precedence.indexOf(b.id)) for (let term of state.accepting) data.push(term.id, groupMasks[term.id] || 0xffff) for (let edge of state.edges) data.push(edge.from, edge.to, -edge.target.id - 1) }) // Replace negative numbers with resolved state offsets for (let i = 0; i < data.length; i++) if (data[i] < 0) data[i] = offsets[-data[i] - 1] if (data.length > 2**16) throw new GenError("Tokenizer tables too big to represent with 16-bit offsets.") return Uint16Array.from(data) } stateMask(groupMasks: {[id: number]: number}) { let mask = 0 this.reachable(state => { for (let term of state.accepting) mask |= (groupMasks[term.id] || 0xffff) }) return mask } } export class Conflict { constructor(readonly a: Term, readonly b: Term, // Conflicts between two non-cyclic tokens are marked as // 'soft', with a negative number if a is shorter than // b, and a positive if b is shorter than a. public soft: number, readonly exampleA: string, readonly exampleB?: string) {} } function exampleFromEdges(edges: readonly Edge[]) { let str = "" for (let i = 0; i < edges.length; i++) str += String.fromCharCode(edges[i].from) return str } function ids(elts: {id: number}[]) { let result = "" for (let elt of elts) { if (result.length) result += "-" result += elt.id } return result } function sameSet(a: readonly T[], b: readonly T[]) { if (a.length != b.length) return false for (let i = 0; i < a.length; i++) if (a[i] != b[i]) return false return true } class MergedEdge { constructor(readonly from: number, readonly to: number, readonly targets: State[]) {} } // Merge multiple edges (tagged by character ranges) into a set of // mutually exclusive ranges pointing at all target states for that // range function mergeEdges(edges: Edge[]): MergedEdge[] { let separate: number[] = [], result: MergedEdge[] = [] for (let edge of edges) { if (!separate.includes(edge.from)) separate.push(edge.from) if (!separate.includes(edge.to)) separate.push(edge.to) } separate.sort((a, b) => a - b) for (let i = 1; i < separate.length; i++) { let from = separate[i - 1], to = separate[i] let found: State[] = [] for (let edge of edges) if (edge.to > from && edge.from < to) { for (let target of edge.target.closure()) if (!found.includes(target)) found.push(target) } if (found.length) result.push(new MergedEdge(from, to, found)) } let eof = edges.filter(e => e.from == Seq.End && e.to == Seq.End) if (eof.length) { let found: State[] = [] for (let edge of eof) for (let target of edge.target.closure()) if (!found.includes(target)) found.push(target) if (found.length) result.push(new MergedEdge(Seq.End, Seq.End, found)) } return result } generator-1.7.0/test/000077500000000000000000000000001457412043500144715ustar00rootroot00000000000000generator-1.7.0/test/cases/000077500000000000000000000000001457412043500155675ustar00rootroot00000000000000generator-1.7.0/test/cases/AlternativeTop.txt000066400000000000000000000002611457412043500212700ustar00rootroot00000000000000@top A { "a" } @top B { "b" } @tokens { "a" "b" } # Recognize "a" a ==> A(a) # Recognize "a" { "top": "A" } a ==> A(a) # Recognize "b" { "top": "B" } b ==> B(b) generator-1.7.0/test/cases/AnyChar.txt000066400000000000000000000002711457412043500176550ustar00rootroot00000000000000@top T { A+ } @tokens { A { "\\" _ "/" } } # Matches any char \_/\ /\x/ ==> T(A, A, A) # Matches astral chars \🤢/ ==> T(A) # Doesn't match multiple chars \xy/ ==> T(⚠) generator-1.7.0/test/cases/Astral.txt000066400000000000000000000004751457412043500175640ustar00rootroot00000000000000@top T { (Id | Thing)+ } @skip { whitespace } @tokens { whitespace { " "+ } identifierChar { @asciiLetter | $[\u{a1}-\u{10ffff}] } Id { identifierChar+ } Thing { "-" ("💩" | "🦆") } } # Tokens with astral characters foo föö 象𫝄鼻 -💩 -🦆 -& -🍰 ==> T(Id,Id,Id,Thing,Thing,⚠,⚠,Id) generator-1.7.0/test/cases/AstralRuleName.txt000066400000000000000000000002331457412043500212050ustar00rootroot00000000000000@top T { (象𫝄鼻 | Föó)+ } 象𫝄鼻 { "x" } @tokens { Föó { "y" } } # Rules with astral characters xyx ==> T(象𫝄鼻, Föó, 象𫝄鼻) generator-1.7.0/test/cases/AutoDelimTag.txt000066400000000000000000000017731457412043500206570ustar00rootroot00000000000000@top T { expr+ } expr { ParenExpr { "(" Number ")" } | DoubleExpr { "[[" Number "]]" } | BracketExpr | WeirdExpr | DualExpr } BracketExpr { BracketLeft Number BracketRight } WeirdExpr { "((" Number "()" } DualExpr { ("{" | "{{") Number ("}" | "}}") } @tokens { Number { @digit+ } BracketLeft { "[|" } BracketRight { "|]" } "[["[@name=DoubleLeft] "]]"[@name=DoubleRight] "(" ")" "{{" "}}" "{" "}" "((" "()" } @detectDelim # Assigns delimiter node props (11) ==> T(ParenExpr("("[closedBy=")"], Number, ")"[openedBy="("])) # Sees through rules [|50|] ==> T(BracketExpr(BracketLeft[closedBy="BracketRight"], Number, BracketRight[openedBy="BracketLeft"])) # Doesn't assign delim when tokens self-close ((5() ==> T(WeirdExpr("((", Number, "()")) # Doesn't assign delim when tokens are part of a choice {{8}} ==> T(DualExpr("{{", Number, "}}")) # Recognizes named literals [[5]] ==> T(DoubleExpr(DoubleLeft[closedBy="DoubleRight"], Number, DoubleRight[openedBy="DoubleLeft"])) generator-1.7.0/test/cases/BadGroup.txt000066400000000000000000000001401457412043500200260ustar00rootroot00000000000000@top T { expr+ } expr[@isGroup] { "foo" | A { "bar" } B { "baz" } } //! cannot define a group generator-1.7.0/test/cases/BadPseudoProp.txt000066400000000000000000000000771457412043500210430ustar00rootroot00000000000000@top T { a } a[name=Aha] { "x" } //! (did you mean '@name'?) generator-1.7.0/test/cases/BinaryExpr.txt000066400000000000000000000016321457412043500204150ustar00rootroot00000000000000@precedence { mult @left, plus @left } @top T { expr } expr { atom | BinaryExpr } BinaryExpr { expr !mult MultOp expr | expr !plus AddOp expr } atom { Symbol | "(" expr ")" } @tokens { MultOp { "*" | "/" } AddOp { "+" | "-" } Symbol { "x" | "y" } } # Parenthesized (x+y)/x ==> T(BinaryExpr(BinaryExpr(Symbol,AddOp,Symbol),MultOp,Symbol)) # Associativity x+x+x+x ==> T(BinaryExpr(BinaryExpr(BinaryExpr(Symbol,AddOp,Symbol),AddOp,Symbol),AddOp,Symbol)) # Precedence x+x*x-x ==> T(BinaryExpr(BinaryExpr(Symbol,AddOp,BinaryExpr(Symbol,MultOp,Symbol)),AddOp,Symbol)) # Mixed precedence x*x+y/y ==> T(BinaryExpr(BinaryExpr(Symbol,MultOp,Symbol),AddOp,BinaryExpr(Symbol,MultOp,Symbol))) # Duplicate operator x++y ==> T(BinaryExpr(BinaryExpr(Symbol,AddOp,⚠),AddOp,Symbol)) # Dropped character x%+y ==> T(BinaryExpr(Symbol,⚠,AddOp,Symbol)) # Missing operator xy ==> T(Symbol,⚠(Symbol)) generator-1.7.0/test/cases/Conflict.txt000066400000000000000000000006031457412043500200700ustar00rootroot00000000000000@top T { expr+ } expr { ArrowExpr | ParenExpr | Identifier } ParenExpr { "(" expr ")" } ArrowExpr { "(" ParamName ")" "=>" expr } Identifier { word ~arrow } ParamName { word ~arrow } @skip { whitespace } @tokens { whitespace { @whitespace+ } word { @asciiLetter+ } } # Arrow (a) => b ==> T(ArrowExpr(ParamName, Identifier)) # Paren expr (a) ==> T(ParenExpr(Identifier)) generator-1.7.0/test/cases/ConflictingTopName.txt000066400000000000000000000000621457412043500220510ustar00rootroot00000000000000@top A { A } A { "a" } //! Duplicate definition generator-1.7.0/test/cases/Cut.txt000066400000000000000000000003371457412043500170660ustar00rootroot00000000000000@precedence { statement @cut } @top T { statement* } statement { Block { !statement "{" statement* "}" } | expression ";" } expression { "x" | "(" expression ")" | "{" "}" } # Statement is block {} ==> T(Block) generator-1.7.0/test/cases/DeepBinary.txt000066400000000000000000000162311457412043500203550ustar00rootroot00000000000000@top T { expr* } @precedence { bin @left } expr { X { "x" } | Bin { expr !bin "+" expr } | Blk { "{" expr* "}" } } # Can parse a very deep set of expressions x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x ==> T(...) generator-1.7.0/test/cases/DeepConflict.txt000066400000000000000000000001411457412043500206630ustar00rootroot00000000000000@top T { A* } A { B ~ambig "." | C ~ambig "." } B { C } C { "x" } # Resolve x. ==> T(A(C)) generator-1.7.0/test/cases/DefineGroup.txt000066400000000000000000000004351457412043500205410ustar00rootroot00000000000000@top T { expr* } expr[@isGroup=Expression] { atom | ParenExpr { "(" expr ")" } } atom { Id | Number } @tokens { Id { "a"+ } Number { "1"+ } "(" ")" } # Adds the group prop a(1) ==> T(Id[group=Expression],ParenExpr[group=Expression]("(", Number[group=Expression], ")")) generator-1.7.0/test/cases/Dialect.txt000066400000000000000000000006461457412043500177030ustar00rootroot00000000000000@dialects { a, b } @top T { (A | B | C)+ } A { "a" } B { "b" } C { "c" } @tokens { "a"[@dialect=a,@name=] "b"[@dialect=b,@name=] } # No dialect cc ==> T(C, C) # No dialect, error ca ==> T(C, ⚠) # Dialect A {"dialect": "a"} ca ==> T(C, A) # Dialect A, error {"dialect": "a"} cab ==> T(C, A, ⚠) # Dialect B {"dialect": "b"} bc ==> T(B, C) # Both dialects {"dialect": "a b"} abc ==> T(A, B, C) generator-1.7.0/test/cases/DynamicPrecedence.txt000066400000000000000000000005401457412043500216710ustar00rootroot00000000000000@top T { (A | B)+ } A[@dynamicPrecedence=2] { "foo" "bar" ~ambig } B[@dynamicPrecedence=1] { "foo" ("bar" | "baz" ~ambig | Inner) ~ambig } Inner[@dynamicPrecedence=-1] { ("baz" | "quux") ~ambig } # Prefers As foobarfoobar ==> T(A, A) # Can parse Bs foobazfoobar ==> T(B, A) # Will use penalized rules when necessary fooquux ==> T(B(Inner)) generator-1.7.0/test/cases/EmptyChoice.txt000066400000000000000000000000641457412043500205410ustar00rootroot00000000000000@top Top { | "a" } //! empty expression in choice generator-1.7.0/test/cases/EmptyExpressions.txt000066400000000000000000000001311457412043500216640ustar00rootroot00000000000000@top T { X+ } y {} X { "x" "" () y } # Ignores empty expressions xxx ==> T(X, X, X) generator-1.7.0/test/cases/EmptyToken.txt000066400000000000000000000000701457412043500204240ustar00rootroot00000000000000@top T { X } @tokens { X { "x"* } } //! zero-length generator-1.7.0/test/cases/Eof.txt000066400000000000000000000002011457412043500170320ustar00rootroot00000000000000@top A { (X | Y)+ } @tokens { X { "x" } Y { "x" @eof } @precedence { Y, X } } # Matches EOF markers xxx ==> A(X, X, Y) generator-1.7.0/test/cases/ExplicitConflict.txt000066400000000000000000000001521457412043500215710ustar00rootroot00000000000000@top T { xA | xB } @tokens { xA { "x" } xB { "xx" } @conflict { xA, xB } } //! Overlapping tokens generator-1.7.0/test/cases/ExplicitInline.txt000066400000000000000000000002001457412043500212400ustar00rootroot00000000000000@top T { Foo } prefix[@inline] { "::" (A "::")* | (A "::")+ } Foo { prefix A } A { "a" } # Compiles a::a ==> T(Foo(A, A)) generator-1.7.0/test/cases/ExternalProp.txt000066400000000000000000000001471457412043500207550ustar00rootroot00000000000000@top T { A+ } @external prop tag from "./script" A[tag=55] { "A" } # Attaches the prop A ==> T(A) generator-1.7.0/test/cases/ExternalSpecializer.txt000066400000000000000000000004341457412043500223060ustar00rootroot00000000000000@top T { (One | Two | Id)+ } @skip { space } One { one Id } Two { two Id } @external specialize {Id} spec1 from "./something" { one, two } @tokens { Id { @asciiLetter+ } space { @whitespace+ } } # Produces specialized tokens one a two b three ==> T(One(Id), Two(Id), Id) generator-1.7.0/test/cases/ExternalTokens.txt000066400000000000000000000003721457412043500213000ustar00rootroot00000000000000@top T { expr* } expr { Braced { braceOpen expr braceClose } | X { "x" (Dot Y { "y" })? } } @external tokens ext1 from "./external_tokens.js" { braceOpen, braceClose, Dot } # Uses external tokens x{x}x.y ==> T(X, Braced(X), X(Dot, Y)) generator-1.7.0/test/cases/ForcedSkipReduce.txt000066400000000000000000000006011457412043500215060ustar00rootroot00000000000000@top T { "a"* } @skip { LineComment | space | BlockComment } @skip {} { BlockComment { "/*" blockCommentContent* blockCommentEnd } } @local tokens { blockCommentEnd { "*/" } @else blockCommentContent } @tokens { LineComment { "//" ![\n]* } space { @whitespace+ } } # Properly terminates unfinished skipped terms // Line /* Block ==> T(LineComment, BlockComment(⚠)) generator-1.7.0/test/cases/InconsistentSkip.txt000066400000000000000000000001661457412043500216420ustar00rootroot00000000000000@top T { A+ } @skip { space } @skip {} { A { "a" "b"? } } @tokens { space { " "+ } } //! Inconsistent skip sets generator-1.7.0/test/cases/InlineRule.txt000066400000000000000000000001321457412043500203720ustar00rootroot00000000000000@top T { (A{"a"} | B{"b"})+ } # Assigns tags to tagged expressions baba ==> T(B,A,B,A) generator-1.7.0/test/cases/InvalidTokenArg.txt000066400000000000000000000002271457412043500213520ustar00rootroot00000000000000@top T { foo } bar { bar "x" | "" } foo { token } @tokens { token { x "!" } } //! Reference to token rule 'bar', which isn't found generator-1.7.0/test/cases/LocalTokens.txt000066400000000000000000000007151457412043500205510ustar00rootroot00000000000000@top T { expr* } expr { X { "x" } | String { '"' (stringContent | Interpolation)* stringEnd } } Interpolation { InterpolationStart expr InterpolationEnd } @local tokens { stringEnd { '"' } InterpolationStart { "{{" } @else stringContent } @tokens { InterpolationEnd { "}}" } } # Can parse else tokens "foo{{x}}bar{{x}}" ==> T(String(Interpolation(InterpolationStart,X,InterpolationEnd),Interpolation(InterpolationStart,X,InterpolationEnd))) generator-1.7.0/test/cases/LocalTokensBadMix.txt000066400000000000000000000006011457412043500216300ustar00rootroot00000000000000@top T { expr* } expr { X { "x" } | String { '"' (stringContent | Interpolation)* stringEnd? } } Interpolation { InterpolationStart expr InterpolationEnd } @local tokens { stringEnd { '"' } InterpolationStart { "{{" } @else stringContent } @tokens { InterpolationEnd { "}}" } } //! Tokens from a local token group used together with other tokens (stringEnd with "\"") generator-1.7.0/test/cases/LocalTokensBadSkip.txt000066400000000000000000000006161457412043500220070ustar00rootroot00000000000000@top T { expr* } @skip { " " } expr { X { "x" } | String { '"' (stringContent | Interpolation)* stringEnd } } Interpolation { InterpolationStart expr InterpolationEnd } @local tokens { stringEnd { '"' } InterpolationStart { "{{" } @else stringContent } @tokens { InterpolationEnd { "}}" } } //! Tokens from a local token group used together with other tokens (stringEnd with " ") generator-1.7.0/test/cases/LongerToken.txt000066400000000000000000000001501457412043500205530ustar00rootroot00000000000000@top T { (A { "foo" } | B { "foobar" } | C { "baz" })+ } # Properly splits tokens foobaz ==> T(A, C) generator-1.7.0/test/cases/NameRules.txt000066400000000000000000000002501457412043500202200ustar00rootroot00000000000000@top T { (A | b | @specialize[@name=I])+ } A { "a" } @tokens { b[@name=B] { "b" } i { "i"+ } } # Uses names assigned to rules abaii ==> T(A, B, A, I) generator-1.7.0/test/cases/NodeDeclaration.txt000066400000000000000000000003411457412043500213610ustar00rootroot00000000000000@top Doc { ("a" | "(" B ")")+ } B { "b" } @tokens { "a"[@name=A] "("[@name=ParenOpen] ")"[@name=ParenClose] } # Adds a document node a ==> Doc(A) # Applies punctuation info (b) ==> Doc(ParenOpen, B, ParenClose) generator-1.7.0/test/cases/NotLALR.txt000066400000000000000000000002231457412043500175400ustar00rootroot00000000000000// This grammar is LR(1) but not LALR(1) @top T { "a" E "a" | "b" E "b" | "a" F "b" | "b" F "a" } E { "e" } F { "e" } # Can parse aeb ==> T(F) generator-1.7.0/test/cases/OverlappingGroup.txt000066400000000000000000000003101457412043500216250ustar00rootroot00000000000000@top T { statement* } expr[@isGroup=Expression] { Id | ParenExpr { "(" expr ")" } } statement[@isGroup=Statement] { Id | ExprStatement { ":" expr } } @tokens { Id { "a"+ } } //! overlap generator-1.7.0/test/cases/OverlappingToken.txt000066400000000000000000000001301457412043500216110ustar00rootroot00000000000000@top T { (x | y)+ } @tokens { x { "x"+ } y { "x" "y"* } } //! Overlapping tokens generator-1.7.0/test/cases/PrecedenceOrder.txt000066400000000000000000000004631457412043500213640ustar00rootroot00000000000000@top T { (Tag | "<" | "<<" | "<<<")+ } @skip { space } @tokens { space { " "+ } Tag { "<" "<"* @asciiLetter+ } @precedence { Tag, "<<" } @precedence { Tag, "<" } @precedence { Tag, "<<<" } "<" "<<" "<<<" } # Doesn't create an accidental ordering T(Tag, "<<<", "<<", "<") generator-1.7.0/test/cases/Recover.txt000066400000000000000000000006031457412043500177340ustar00rootroot00000000000000@top T { (Class | Block)* } Class { "class" "{" "classitem"* "}" } Block { "{" (Block | "statement")* "}" } @skip { whitespace } @tokens { whitespace { @whitespace+ } } # Can resynchronize to an outer context { { { class { classitem classitem } ==> T(Block(Block(Block(⚠),⚠),⚠),Class) # Properly places end-of-file errors { { { ==> T(Block(Block(Block(⚠),⚠),⚠)) generator-1.7.0/test/cases/RecoverInsertInParent.txt000066400000000000000000000003021457412043500225560ustar00rootroot00000000000000@top Program { Variable } Variable { Dollar Name } @tokens { Dollar { "$" } Name { $[a-z]+ } } # It inserts the error for insertion in the right place abc ==> Program(Variable(⚠,Name)) generator-1.7.0/test/cases/RecoverRecursiveEof.txt000066400000000000000000000002411457412043500222540ustar00rootroot00000000000000// Issue #9 @precedence { call } @top Script { A+ } A { B | "{" "x" "}" } B { A !call "{" "}" | "x" | "(" B ")" } # Incomplete (x ==> Script(A(B(B,⚠))) generator-1.7.0/test/cases/RecursiveToken.txt000066400000000000000000000001271457412043500213000ustar00rootroot00000000000000@top T { Foo } @tokens { Foo { bar+ } bar { Foo } } //! recursion in token rules generator-1.7.0/test/cases/ScopedSkip.txt000066400000000000000000000003731457412043500203770ustar00rootroot00000000000000@top T { A+ } @skip { spaces } @skip { dashes } { A { "x" | "(" "b" ")" } } @tokens { spaces { " "+ } dashes { "-"+ } } # Applies the correct skip rules x (---b---) (b) ==> T(A, A, A) # Marks invalid whitespace ( b ) ==> T(A(⚠, ⚠)) generator-1.7.0/test/cases/SeparateConflictingTokens.txt000066400000000000000000000001731457412043500234410ustar00rootroot00000000000000@top T { (A | B)+ } A { "<" "x" A? ">" } B { "<<" ">>" } # Token from unrelated state can't shadow > ==> T(A(A)) generator-1.7.0/test/cases/SkipExpr.txt000066400000000000000000000003751457412043500201020ustar00rootroot00000000000000@top T { Foo { "foo" }+ } @skip { " " | "<" skipContent* ">" } skipContent { A { "a" } | B { "b" } } # Can skip the simple part of the skip expression foo foo ==> T(Foo,Foo) # Outputs tags from skipped content foo foo ==> T(Foo,A,B,A,Foo) generator-1.7.0/test/cases/SkipPosition.txt000066400000000000000000000006171457412043500207670ustar00rootroot00000000000000@top T { Statement+ } Statement { Variable ";" | Variable Variable ";" } Variable { identifier } @skip { space | Comment } @tokens { space { @whitespace+ } Comment { "'" ![']* "'" } identifier { "x" } } # Puts skipped content in the right tree position x'c';'c' x 'c' x 'c'; 'c' ==> T(Statement(Variable, Comment), Comment, Statement(Variable, Comment, Variable, Comment), Comment) generator-1.7.0/test/cases/Specialize.txt000066400000000000000000000012661457412043500204250ustar00rootroot00000000000000@top T { (statement ";")+ } statement { Decl { kw<"let"> Id "=" Number } | Print { kw<"print"> Number } | Exit { kw<"exit"> } | Async { kwExt<"async"> Number } | Id } kw { @specialize } kwExt { @extend } @skip { whitespace } @tokens { whitespace { @whitespace+ } Id { @asciiLetter+ } Number { @digit+ } } # Can use a contextual keyword as regular identifier let async = 10; exit; ==> T(Decl(Id, Number), Exit) # Can not use a non-contextual keyword as identifier print 10; let print 2; ==> T(Print(Number),Decl(⚠),⚠,Print(Number)) # Use of extended token can be determined by next token async 1; async; ==> T(Async(Number), Id) generator-1.7.0/test/cases/SpecializeWrong.txt000066400000000000000000000002011457412043500214260ustar00rootroot00000000000000@top T { @specialize | @extend } @tokens { name { @asciiLetter+ } } //! Conflicting specialization generator-1.7.0/test/cases/TailRecursiveToken.txt000066400000000000000000000010631457412043500221120ustar00rootroot00000000000000@top T { (Single | Open "." Close)+ } @skip { spaces } @tokens { spaces { " "+ } Single { "`" toEnd } toEnd { "`" | ![`$] toEnd | "$" afterDollarToEnd } afterDollarToEnd { "`" | ![`{$] toEnd | "$" afterDollarToEnd } Open { "`" toBrace } toBrace { ![`$] toBrace | "$" afterDollarToBrace } afterDollarToBrace { "{" | ![`\{$] toBrace | "$" afterDollarToBrace } Close { "}" toEnd } } # Single part `foo` ==> T(Single) # Interpolation `foo${.}bar` ==> T(Open, Close) # Dollar signs `$$` `$` `$${.}` ==> T(Single, Single, Open, Close) generator-1.7.0/test/cases/TokenArgs.txt000066400000000000000000000004721457412043500202300ustar00rootroot00000000000000@top T { (kw<"let"> | kw<"if"> | Foo)+ } @skip { whitespace } @tokens { whitespace { @whitespace+ } kw[@name={word}] { word } Foo { bar<"foo"> } bar { baz } baz { "!" z } } # Allows parameterized tokens let if let ==> T(let, if, let) # Tokens can call other tokens !foo ==> T(Foo) generator-1.7.0/test/cases/TokenExpr.txt000066400000000000000000000007441457412043500202540ustar00rootroot00000000000000@top T { (Word | Number | Operator)+ } @skip { whitespace } @tokens { whitespace { @whitespace+ } Word { @asciiLetter (letter | @digit)* } Number { (@digit+ ("." @digit*)? | "." @digit+) (("e" | "E") ("+" | "-")? @digit+)? } Operator { "+" "+"? | "-" } letter { $[a-zA-Z] } } # Word tokens Hello Catch22 Foo azAZ09 ==> T(Word,Word,Word,Word) # Numbers and operators 50 + 200e-5 ++ .2 - 111.111e+111 ==> T(Number,Operator,Number,Operator,Number,Operator,Number) generator-1.7.0/test/cases/TokenGroups.txt000066400000000000000000000005041457412043500206070ustar00rootroot00000000000000@precedence { div @left } @top T { expr } expr { RegExp | Symbol | BinOp } BinOp { expr !div "/" expr } @skip { whitespace } @tokens { whitespace { @whitespace+ } Symbol { @asciiLetter+ } RegExp { "/" ![/]+ "/" } } # Disambiguates division from regexp x / y / /foo/ ==> T(BinOp(BinOp(Symbol,Symbol),RegExp)) generator-1.7.0/test/cases/TokenPrecedence.txt000066400000000000000000000002311457412043500213620ustar00rootroot00000000000000@top T { (A | B | BB)+ } @tokens { @precedence { BB, B } A { "A" } B { "B" "."? } BB { "BB" } } # Token precedence ABBAB ==> T(A, BB, A, B) generator-1.7.0/test/cases/TopName.txt000066400000000000000000000001201457412043500176640ustar00rootroot00000000000000@top lowerCase { "x" } # It keeps a lower-case top rule name x ==> lowerCase generator-1.7.0/test/cases/TrailingComma.txt000066400000000000000000000002371457412043500210600ustar00rootroot00000000000000@top T { ("," Expr)* ","? } @tokens { Expr { "E" } } # With trailing comma ,E,E, ==> T(Expr,Expr) # Without trailing comma ,E,E,E ==> T(Expr,Expr,Expr) generator-1.7.0/test/cases/Tuple.txt000066400000000000000000000002761457412043500174260ustar00rootroot00000000000000@top T { "(" (Expr ",")* Expr? ")" } @tokens { Expr { "E" } } # Can parse with trailing comma (E,E,) ==> T(Expr,Expr) # Can parse without trailing comma (E,E,E) ==> T(Expr,Expr,Expr) generator-1.7.0/test/cases/UndefinedTokenRule.txt000066400000000000000000000001301457412043500220540ustar00rootroot00000000000000@top T { X } @tokens { X { y } } //! Reference to token rule 'y', which isn't found generator-1.7.0/test/cases/Whitespace.txt000066400000000000000000000003521457412043500204240ustar00rootroot00000000000000@top T { X+ } @skip { whitespace | Comment } @tokens { whitespace { (" " | "\n")+ } Comment { "#" ![\n]* } X { "x" } } # Skips whitespace x x ==> T(X, X) # Skips comments x x # I'm a comment! x ==> T(X, X, Comment, X) generator-1.7.0/test/cases/WrongTokenArgs.txt000066400000000000000000000001251457412043500212400ustar00rootroot00000000000000@top T { X<"hi"> } @tokens { X { a b } } //! Incorrect number of arguments generator-1.7.0/test/cases/ZeroLengthReduction.txt000066400000000000000000000001741457412043500222700ustar00rootroot00000000000000@top T { Foo* } Foo { Attrs ";" } Attrs { "@"* } # Puts zero-length reductions in the right parent ; ==> T(Foo(Attrs)) generator-1.7.0/test/test-cases.ts000066400000000000000000000045441457412043500171230ustar00rootroot00000000000000import {buildParser} from "../dist/index.js" import {NodeProp} from "@lezer/common" import {LRParser, ExternalTokenizer, InputStream} from "@lezer/lr" // @ts-ignore import {fileTests} from "../dist/test.js" import ist from "ist" import * as fs from "fs" import * as path from "path" import {fileURLToPath} from "url" let caseDir = path.join(path.dirname(fileURLToPath(import.meta.url)), "cases") function externalTokenizer(name: string, terms: {[name: string]: number}) { if (name == "ext1") return new ExternalTokenizer((input: InputStream) => { if (input.next == "{".charCodeAt(0)) { input.advance(); input.acceptToken(terms.braceOpen) } else if (input.next == "}".charCodeAt(0)) { input.advance(); input.acceptToken(terms.braceClose) } else if (input.next == ".".charCodeAt(0)) { input.advance(); input.acceptToken(terms.Dot) } }) throw new Error("Undefined external tokenizer " + name) } function externalSpecializer(name: string, terms: {[name: string]: number}) { if (name == "spec1") return (value: string) => value == "one" ? terms.one : value == "two" ? terms.two : -1 throw new Error("Undefined external specialize " + name) } function externalProp() { return new NodeProp({deserialize: x => x}) } describe("Cases", () => { for (let file of fs.readdirSync(caseDir)) { let match = /^(.*)\.txt$/.exec(file) if (!match) continue let name = match[1], fileName = path.join(caseDir, file) let content = fs.readFileSync(fileName, "utf8") let grammar = /^([^]*?)($|\n# )/.exec(content)! content = content.slice(grammar[1].length) let parser: LRParser | null = null let force = () => { if (!parser) parser = buildParser(grammar[1], { fileName, externalTokenizer, externalSpecializer, externalProp, warn(msg) { throw new Error(msg) } }) return parser } let expectedErr = /\/\/! (.*)/.exec(grammar[1]) let noCases = !/\S/.test(content) if (noCases && !expectedErr) throw new Error("Test with neither expected errors nor input cases (" + file + ")") if (expectedErr) it(`${name} fails`, () => { ist.throws(force, (e: Error) => e.message.toLowerCase().indexOf(expectedErr![1].trim().toLowerCase()) >= 0) }) if (!noCases) for (let {name: n, run} of fileTests(content, file)) it(name + "/" + n, () => run(force())) } }) generator-1.7.0/test/test-parse.ts000066400000000000000000000551331457412043500171370ustar00rootroot00000000000000import {buildParser, BuildOptions} from "../dist/index.js" import {Tree, TreeFragment, NodeIterator, NodeProp, parseMixed, SyntaxNode} from "@lezer/common" import {LRParser, ParserConfig} from "@lezer/lr" // @ts-ignore import {testTree} from "../dist/test.js" import ist from "ist" function p(text: string, options?: BuildOptions, config?: ParserConfig): () => LRParser { let value: LRParser | null = null return () => { if (!value) { value = buildParser(text, Object.assign({}, {warn(e: string) { throw new Error(e) }}, options)) if (config) value = value.configure(config) } return value } } function shared(a: Tree, b: Tree) { let inA = new Set(), shared = 0 ;(function register(t: any) { if (t instanceof Tree) { let mounted = t.prop(NodeProp.mounted) if (mounted && !mounted.overlay) t = mounted.tree t.children.forEach(register) } inA.add(t) })(a) ;(function scan(t: any) { if (inA.has(t)) { shared += t.length } else if (t instanceof Tree) { let mounted = t.prop(NodeProp.mounted) ;(mounted && !mounted.overlay ? mounted.tree : t).children.forEach(scan) } })(b) return Math.round(100 * shared / b.length) } function fragments(tree: Tree, ...changes: ([number, number] | [number, number, number, number])[]) { return TreeFragment.applyChanges(TreeFragment.addTree(tree), changes.map(([fromA, toA, fromB = fromA, toB = toA]) => ({fromA, toA, fromB, toB})), 2) } describe("parsing", () => { let p1 = p(` @precedence { call } @top T { statement* } statement { Cond | Loop | Block | expression ";" } Cond { kw<"if"> expression statement } Block { "{" statement* "}" } Loop { kw<"while"> expression statement } expression { Call | Num | Var | "!" expression } Call { expression !call "(" expression* ")" } kw { @specialize } @tokens { Num { @digit+ } Var { @asciiLetter+ } whitespace { @whitespace+ } } @skip { whitespace }`) function qq(ast: Tree) { return function(query: string, offset = 1): {start: number, end: number} { let result = null, cursor = ast.cursor() do { if (cursor.name == query && --offset == 0) result = {start: cursor.from, end: cursor.to} } while (cursor.next()) if (result) return result throw new Error("Couldn't find " + query) } } it("can parse incrementally", () => { let doc = "if true { print(1); hello; } while false { if 1 do(something 1 2 3); }".repeat(10) let ast = p1().configure({bufferLength: 2}).parse(doc) let content = "Cond(Var,Block(Call(Var,Num),Var)),Loop(Var,Block(Cond(Num,Call(Var,Var,Num,Num,Num))))" let expected = "T(" + (content + ",").repeat(9) + content + ")" testTree(ast, expected) ist(ast.length, 700) let pos = doc.indexOf("false"), doc2 = doc.slice(0, pos) + "x" + doc.slice(pos + 5) let ast2 = p1().configure({bufferLength: 2}).parse(doc2, fragments(ast, [pos, pos + 5, pos, pos + 1])) testTree(ast2, expected) ist(shared(ast, ast2), 40, ">") ist(ast2.length, 696) }) it("assigns the correct node positions", () => { let doc = "if 1 { while 2 { foo(bar(baz bug)); } }" let ast = p1().configure({bufferLength: 10, strict: true}).parse(doc) let q = qq(ast) ist(ast.length, 39) let cond = q("Cond"), one = q("Num") ist(cond.start, 0); ist(cond.end, 39) ist(one.start, 3); ist(one.end, 4) let loop = q("Loop"), two = q("Num", 2) ist(loop.start, 7); ist(loop.end, 37) ist(two.start, 13); ist(two.end, 14) let call = q("Call"), inner = q("Call", 2) ist(call.start, 17); ist(call.end, 34) ist(inner.start, 21); ist(inner.end, 33) let bar = q("Var", 2), bug = q("Var", 4) ist(bar.start, 21); ist(bar.end, 24) ist(bug.start, 29); ist(bug.end, 32) }) let resolveDoc = "while 111 { one; two(three 20); }" function testResolve(bufferLength: number) { let ast = p1().configure({strict: true, bufferLength}).parse(resolveDoc) let cx111 = ast.cursorAt(7) ist(cx111.name, "Num") ist(cx111.from, 6) ist(cx111.to, 9) cx111.parent() ist(cx111.name, "Loop") ist(cx111.from, 0) ist(cx111.to, 33) let cxThree = ast.cursorAt(22) ist(cxThree.name, "Var") ist(cxThree.from, 21) ist(cxThree.to, 26) cxThree.parent() ist(cxThree.name, "Call") ist(cxThree.from, 17) ist(cxThree.to, 30) let branch = cxThree.moveTo(18) ist(branch.name, "Var") ist(branch.from, 17) ist(branch.to, 20) // Always resolve to the uppermost context for a position ist(ast.cursorAt(6).name, "Loop") ist(ast.cursorAt(9).name, "Loop") let c = ast.cursorAt(20) ist(c.firstChild()) ist(c.name, "Var") ist(c.nextSibling()) ist(c.name, "Var") ist(c.nextSibling()) ist(c.name, "Num") ist(!c.nextSibling()) } it("can resolve positions in buffers", () => testResolve(1024)) it("can resolve positions in trees", () => testResolve(2)) let iterDoc = "while 1 { a; b; c(d e); } while 2 { f; }" let iterSeq = ["T", 0, "Loop", 0, "Num", 6, "/Num", 7, "Block", 8, "Var", 10, "/Var", 11, "Var", 13, "/Var", 14, "Call", 16, "Var", 16, "/Var", 17, "Var", 18, "/Var", 19, "Var", 20, "/Var", 21, "/Call", 22, "/Block", 25, "/Loop", 25, "Loop", 26, "Num", 32, "/Num", 33, "Block", 34, "Var", 36, "/Var", 37, "/Block", 40, "/Loop", 40, "/T", 40] // Node boundaries seen when iterating range 13-19 ("b; c(d") let partialSeq = ["T", 0, "Loop", 0, "Block", 8, "Var", 13, "/Var", 14, "Call", 16, "Var", 16, "/Var", 17, "Var", 18, "/Var", 19, "/Call", 22, "/Block", 25, "/Loop", 25, "/T", 40] function testIter(bufferLength: number, partial: boolean) { let parser = p1(), output: any[] = [] let ast = parser.configure({strict: true, bufferLength}).parse(iterDoc) ast.iterate({ from: partial ? 13 : 0, to: partial ? 19 : ast.length, enter(n) { output.push(n.name, n.from) }, leave(n) { output.push("/" + n.name, n.to) } }) ist(output.join(), (partial ? partialSeq : iterSeq).join()) } it("supports forward iteration in buffers", () => testIter(1024, false)) it("supports forward iteration in trees", () => testIter(2, false)) it("supports partial forward iteration in buffers", () => testIter(1024, true)) it("supports partial forward iteration in trees", () => testIter(2, true)) it("can skip individual nodes during iteration", () => { let ast = p1().parse("foo(baz(baz), bug(quux)") let ids = 0 ast.iterate({ enter(n) { if (n.name == "Var") ids++ return n.from == 4 && n.name == "Call" ? false : undefined } }) ist(ids, 3) }) it("doesn't incorrectly reuse nodes", () => { let parser = buildParser(` @precedence { times @left, plus @left } @top T { expr+ } expr { Bin | Var } Bin { expr !plus "+" expr | expr !times "*" expr } @skip { space } @tokens { space { " "+ } Var { "x" } "*"[@name=Times] "+"[@name=Plus] } `) let p = parser.configure({strict: true, bufferLength: 2}) let ast = p.parse("x + x + x") testTree(ast, "T(Bin(Bin(Var,Plus,Var),Plus,Var))") let ast2 = p.parse("x * x + x + x", fragments(ast, [0, 0, 0, 4])) testTree(ast2, "T(Bin(Bin(Bin(Var,Times,Var),Plus,Var),Plus,Var))") }) it("can cache skipped content", () => { let comments = buildParser(` @top T { "x"+ } @skip { space | Comment } @skip {} { Comment { commentStart (Comment | commentContent)* commentEnd } } @tokens { space { " "+ } commentStart { "(" } commentEnd { ")" } commentContent { ![()]+ } }`) let doc = "x (one (two) (three " + "(y)".repeat(500) + ")) x" let ast = comments.configure({bufferLength: 10, strict: true}).parse(doc) let ast2 = comments.configure({bufferLength: 10}).parse(doc.slice(1), fragments(ast, [0, 1, 0, 0])) ist(shared(ast, ast2), 80, ">") }) it("doesn't get slow on long invalid input", () => { let t0 = Date.now() let ast = p1().parse("#".repeat(2000)) // Testing for timing is always dodgy, but I'm trying to ensure // there's no exponential complexity here. This runs (cold) in // ~60ms on my machine. In case of exponentiality it should become // _extremely_ slow. ist(Date.now() - t0 < 500) ist(ast.toString(), "T(⚠)") }) it("supports input ranges", () => { let tree = p1().parse(`if 1{{x}}0{{y}}0 foo {{z}};`, [], [{from: 0, to: 4}, {from: 9, to: 10}, {from: 15, to: 21}, {from: 26, to: 27}]) ist(tree.toString(), "T(Cond(Num,Var))") }) it("doesn't reuse nodes whose tokens looked ahead beyond the unchanged fragments", () => { let comments = buildParser(` @top Top { (Group | Char)* } @tokens { Group { "(" ![)]* ")" } Char { _ } }`).configure({bufferLength: 10}) let doc = "xxx(" + "x".repeat(996) let tree1 = comments.parse(doc) let tree2 = comments.parse( doc + ")", TreeFragment.applyChanges(TreeFragment.addTree(tree1), [{fromA: 1000, toA: 1000, fromB: 1000, toB: 1001}]) ) ist(tree2.toString(), "Top(Char,Char,Char,Group)") }) }) describe("sequences", () => { let p1 = p(` @top T { (X | Y)+ } @skip { C } C { "c" } X { "x" } Y { "y" ";"* }`) function depth(tree: any): number { return tree instanceof Tree ? tree.children.reduce((d, c) => Math.max(d, depth(c) + 1), 1) : 1 } function breadth(tree: any): number { return tree instanceof Tree ? tree.children.reduce((b, c) => Math.max(b, breadth(c)), tree.children.length) : 0 } it("balances parsed sequences", () => { let ast = p1().configure({strict: true, bufferLength: 10}).parse("x".repeat(1000)) let d = depth(ast), b = breadth(ast) ist(d, 6, "<=") ist(d, 4, ">=") ist(b, 5, ">=") ist(b, 10, "<=") }) it("creates a tree for long content-less repeats", () => { let p = buildParser(` @top T { (A | B { "[" b+ "]" })+ } @tokens { A { "a" } b { "b" } }`).configure({bufferLength: 10}) let tree = p.parse("a[" + "b".repeat(500) + "]") ist(tree.toString(), "T(A,B)") ist(depth(tree), 5, ">=") }) it("balancing doesn't get confused by skipped nodes", () => { let ast = p1().configure({strict: true, bufferLength: 10}).parse("xc".repeat(1000)) let d = depth(ast), b = breadth(ast) ist(d, 6, "<=") ist(d, 4, ">=") ist(b, 5, ">=") ist(b, 10, "<=") }) it("caches parts of sequences", () => { let doc = "x".repeat(1000), p = p1().configure({bufferLength: 10}) let ast = p.parse(doc) let full = p.parse(doc, TreeFragment.addTree(ast)) ist(shared(ast, full), 99, ">") let front = p.parse(doc, fragments(ast, [900, 1000])) ist(shared(ast, front), 50, ">") let back = p.parse(doc, fragments(ast, [0, 100])) ist(shared(ast, back), 50, ">") let middle = p.parse(doc, fragments(ast, [0, 100], [900, 1000])) ist(shared(ast, middle), 50, ">") let sides = p.parse(doc, fragments(ast, [450, 550])) ist(shared(ast, sides), 50, ">") }) it("assigns the right positions to sequences", () => { let doc = "x".repeat(100) + "y;;;;;;;;;" + "x".repeat(90) let ast = p1().configure({bufferLength: 10}).parse(doc) let i = 0 ast.iterate({enter(n) { if (i == 0) { ist(n.name, "T") } else if (i == 101) { ist(n.name, "Y") ist(n.from, 100) ist(n.to, 110) } else { ist(n.name, "X") ist(n.to, n.from + 1) ist(n.from, i <= 100 ? i - 1 : i + 8) } i++ }}) }) }) describe("multiple tops", () => { it("parses named tops", () => { let parser = buildParser(` @top X { FOO C } @top Y { B C } FOO { B } B { "b" } C { "c" } `) testTree(parser.parse("bc"), "X(FOO(B), C)") testTree(parser.configure({top: "X"}).parse("bc"), "X(FOO(B), C)") testTree(parser.configure({top: "Y"}).parse("bc"), "Y(B, C)") }) it("parses first top as default", () => { let parser = buildParser(` @top X { FOO C } @top Y { B C } FOO { B } B { "b" } C { "c" } `) testTree(parser.parse("bc"), "X(FOO(B), C)") testTree(parser.configure({top: "Y"}).parse("bc"), "Y(B, C)") }) }) describe("mixed languages", () => { const blob = p(`@top Blob { ch* } @tokens { ch { _ } }`, undefined, {bufferLength: 10}) it("can mix grammars", () => { let inner = buildParser(` @top I { expr+ } expr { B { Open{"("} expr+ Close{")"} } | Dot{"."} }`) let outer = buildParser(` @top O { expr+ } expr { "[[" NestContent "]]" | Bang{"!"} } @tokens { NestContent[@export] { ![\\]]+ } "[["[@name=Start] "]]"[@name=End] } `).configure({ wrap: parseMixed(node => { if (node.name == "NestContent") return {parser: inner} return null }) }) testTree(outer.parse("![[((.).)]][[.]]"), 'O(Bang,Start,I(B(Open,B(Open,Dot,Close),Dot,Close)),End,Start,I(Dot),End)') testTree(outer.parse("[[/\]]"), 'O(Start,I(⚠),End)') let tree = outer.parse("[[(.)]]") let innerNode = tree.topNode.childAfter(2)! ist(innerNode.name, "I") ist(innerNode.from, 2) ist(innerNode.to, 5) ist(innerNode.firstChild!.from, 2) ist(innerNode.firstChild!.to, 5) }) it("supports conditional nesting", () => { let inner = buildParser(`@top Script { any } @tokens { any { ![]+ } }`) let outer = buildParser(` @top T { Tag } Tag { Open Content? Close } Open { "<" name ">" } Close { "" } @tokens { name { @asciiLetter+ } Content { ![<]+ } } `).configure({ wrap: parseMixed((node, input) => { if (node.name == "Content") { let open = node.node.parent!.firstChild! if (input.read(open.from, open.to) == ""), "T(Tag(Open,Script,Close))") }) it("can parse incrementally across nesting", () => { let outer = buildParser(` @top Program { (Nest | Name)* } @skip { space } @skip {} { Nest { "{" Nested "}" } Nested { nestedChar* } } @tokens { space { @whitespace+ } nestedChar { ![}] } Name { $[a-z]+ } } `).configure({ bufferLength: 10, wrap: parseMixed(node => node.name == "Nested" ? {parser: blob()} : null) }) let base = "hello {bbbb} " let doc = base.repeat(500) + "{" + "b".repeat(1000) + "} " + base.repeat(500), off = base.length * 500 + 500 let ast1 = outer.parse(doc) let ast2 = outer.parse( doc.slice(0, off) + "bbb" + doc.slice(off), TreeFragment.applyChanges(TreeFragment.addTree(ast1), [{fromA: off, toA: off, fromB: off, toB: off + 3}]) ) ist(ast1.toString(), ast2.toString()) ist(shared(ast1, ast2), 90, ">") }) let templateParser = p(` @top Doc { (Dir | Content | Block)* } Dir { "{{" Word "}}" } Block { "{%" BlockContent { (Dir | Content)* } "%}" } @tokens { Content { ![{%]+ } Word { $[a-z]+ } }`) it("can create overlays", () => { let mix = templateParser().configure({ wrap: parseMixed(node => { return node.name == "Doc" ? { parser: blob(), overlay: node => node.name == "Content" } : null }) }) let tree = mix.parse("foo{{bar}}baz{{bug}}") ist(tree.toString(), "Doc(Content,Dir(Word),Content,Dir(Word))") let c1 = tree.resolveInner(1) ist(c1.name, "Blob") ist(c1.from, 0) ist(c1.to, 13) ist(c1.parent!.name, "Doc") ist(tree.resolveInner(10, 1).name, "Blob") let mix2 = templateParser().configure({ wrap: parseMixed(node => { return node.name == "Doc" ? { parser: blob(), overlay: [{from: 5, to: 7}] } : null }) }) let tree2 = mix2.parse("{{a}}bc{{d}}") let c2 = tree2.resolveInner(6) ist(c2.name, "Blob") ist(c2.from, 5) ist(c2.to, 7) }) it("adds a mount even for empty nodes", () => { let inner = p("@top E { tok? } @tokens { tok { \" \"+ } }")() let mix = templateParser().configure({ wrap: parseMixed(node => { return node.name == "BlockContent" ? {parser: inner} : null }) }) let ast = mix.parse("a{%%}b{% %}") testTree(ast, "Doc(Content,Block(E),Content,Block(E))") }) it("can resolve a stack", () => { let parens = buildParser(` @top T { (Text | Group)* } Group { "(" (Text | Group)* ")" } @tokens { Text { ![()]+ } }`) let mix = templateParser().configure({ wrap: parseMixed(node => node.type.isTop ? {parser: parens, overlay: n => n.name == "Content"} : null) }) let trail = (stack: NodeIterator | null) => { let result = [] for (; stack; stack = stack.next) result.push(stack.node.name) return result.join(" ") } for (let i = 0; i < 2; i++) { let parser = i ? mix.configure({bufferLength: 2}) : mix let ast = parser.parse("(hey{%okay(one)two%}three)!") ist(trail(ast.resolveStack(12)), "Text Group Content BlockContent Block Group T Doc") ist(trail(ast.resolveStack(2)), "Content Text Group T Doc") ist(trail(ast.resolveStack(5)), "Text Block Group Doc T") } }) it("reuses ranges from previous parses", () => { let outer = buildParser(` @top Doc { expr* } expr { Paren { "(" expr* ")" } | Array { "[" expr* "]" } | Number | String } @skip { space } @tokens { Number { $[0-9]+ } String { "'" ![']* "'" } space { $[ \n]+ } } `).configure({ bufferLength: 2, wrap: parseMixed(node => { return node.name == "Array" ? { parser: blob(), overlay: node => { if (node.name == "String") { queried.push(node.from) return true } return false } } : null }) }) let queried: number[] = [] let doc = " (100) (() [50] 123456789012345678901234 ((['one' 123456789012345678901234 (('two'))]) ['three'])) " let tree = outer.parse(doc) ist(tree.toString(), "Doc(Paren(Number),Paren(Paren,Array(Number),Number,Paren(Paren(Array(String,Number,Paren(Paren(String)))),Array(String))))") let inOne = tree.resolveInner(45) ist(inOne.name, "Blob") ist(inOne.from, 44) ist(inOne.to, 82) ist(inOne.nextSibling, null) ist(inOne.prevSibling, null) ist(inOne.parent!.name, "Array") ist(tree.resolveInner(89).name, "Blob") ist(queried.join(), "44,77,88") queried.length = 0 let tree2 = outer.parse(doc.slice(0, 45) + "x" + doc.slice(46), fragments(tree, [45, 46])) ist(queried.join(), "44") ist(shared(tree, tree2), 20, ">") }) it("properly handles fragment offsets", () => { let inner = buildParser(`@top Text { (Word | " ")* } @tokens { Word { ![ ]+ } }`).configure({bufferLength: 2}) let outer = buildParser(` @top Doc { expr* } expr { Wrap { "(" expr* ")" } | Templ { "[" expr* "]" } | Number | String } @skip { space } @tokens { Number { $[0-9]+ } String { "'" ![']* "'" } space { $[ \n]+ } } `).configure({ bufferLength: 2, wrap: parseMixed(node => { return node.name == "Templ" ? { parser: inner, overlay: node => node.name == "String" ? {from: node.from + 1, to: node.to - 1} : false } : null }) }) let doc = " 0123456789012345678901234 (['123456789 123456789 12345 stuff' 123456789 (('123456789 123456789 12345 other' 4))] 200)" let tree = outer.parse(doc) // Verify that mounts inside reused nodes don't get re-parsed let tree1 = outer.parse("88" + doc, fragments(tree, [0, 0, 0, 2])) ist(tree.resolveInner(50).tree, tree1.resolveInner(52).tree) // Verify that content inside the nested parse gets accurately reused let tree2 = outer.parse("88" + doc.slice(0, 30) + doc.slice(31), fragments(tree, [0, 0, 0, 2], [30, 31, 32, 32])) ist(shared(tree, tree2), 20, ">") ist(shared(tree.resolveInner(49).tree!, tree2.resolveInner(50).tree!), 20, ">") let other = tree2.resolveInner(103, 1) ist(other.from, 103) ist(other.to, 108) }) it("supports nested overlays", () => { let outer = buildParser(` @top Doc { expr* } expr { Paren { "(" expr* ")" } | Array { "[" expr* "]" } | Number | String } @skip { space } @tokens { Number { $[0-9]+ } String { "'" ![']* "'" } space { $[ \n]+ } } `).configure({ bufferLength: 2, }); function testMixed(parser: LRParser) { let tree = parser.parse("['x' 100 (['xxx' 20 ('xx')] 'xxx')]") let blob1 = tree.resolveInner(2, 1) ist(blob1.name, "Blob") ist(blob1.from, 2) ist(blob1.to, 32) let blob2 = tree.resolveInner(12, 1) ist(blob2.name, "Blob") ist(blob2.from, 12) ist(blob2.to, 24) } testMixed(outer.configure({ wrap: parseMixed(node => { return node.name == "Array" ? { parser: blob(), overlay: node => node.name == "String" ? {from: node.from + 1, to: node.to - 1} : false } : null }) })) testMixed(outer.configure({ wrap: parseMixed(node => { if (node.name != "Array") return null let ranges: {from: number, to: number}[] = [] let scan = (node: SyntaxNode) => { if (node.name == "String") ranges.push({from: node.from + 1, to: node.to - 1}) else for (let ch = node.firstChild; ch; ch = ch.nextSibling) if (ch.name != "Array") scan(ch) } scan(node.node) return {parser: blob(), overlay: ranges} }) })) }) it("re-parses cut-off inner parses even if the outer tree was finished", () => { let inner = buildParser(`@top Phrase { "<" ch* ">" } @tokens { ch { ![>] } }`).configure({bufferLength: 2}) let parser = buildParser(` @top Doc { Section* } Section { "{" SectionContent? "}" } @tokens { SectionContent { ![}]+ } } `).configure({ bufferLength: 2, wrap: parseMixed(node => node.name == "SectionContent" ? {parser: inner} : null) }) let input = `{<${"x".repeat(100)}>}{}`, tree1 let parse = parser.startParse(input) while (parse.parsedPos < 50) parse.advance() parse.stopAt(parse.parsedPos) while (!(tree1 = parse.advance())) {} ist(tree1.toString(), "Doc(Section(Phrase(⚠)),Section(Phrase(⚠)))") let tree2 = parser.parse(input, TreeFragment.addTree(tree1)) ist(tree2.toString(), "Doc(Section(Phrase),Section(Phrase))") }) }) generator-1.7.0/test/test-test.ts000066400000000000000000000012371457412043500170000ustar00rootroot00000000000000// @ts-ignore import {fileTests} from "../dist/test.js" describe("test", () => { it("handle parser error", () => { const content = ` # Working Spec b ==> B # Broken Spec bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb bbbb aaaa bbbb ` const expectedError = `Unexpected file format in test-error.txt around | # Broken Spec |${ ' ' } | bbbb bbbb bbbb bbbb | bbbb bbbb bbbb bbbb | bbbb bbbb bbbb bbbb | bbbb bbbb bbbb bbbb aaaa`; const file = "test-error.txt" try { fileTests(content, file) } catch (err: any) { if (err.message !== expectedError) { throw err; } } }) }) generator-1.7.0/tsconfig.json000066400000000000000000000004211457412043500162160ustar00rootroot00000000000000{ "compilerOptions": { "lib": ["es2017"], "noImplicitReturns": true, "noUnusedLocals": true, "strict": true, "target": "es2018", "module": "esnext", "newLine": "lf", "moduleResolution": "node" }, "include": ["src/*.ts", "test/*.ts"] }