pax_global_header00006660000000000000000000000064145534200200014505gustar00rootroot0000000000000052 comment=93e730998560618f21171be64f932ff972cda662 lr-1.4.0/000077500000000000000000000000001455342002000121245ustar00rootroot00000000000000lr-1.4.0/.gitignore000066400000000000000000000000521455342002000141110ustar00rootroot00000000000000/node_modules/ /dist/* .tern-* .rpt2_cachelr-1.4.0/.npmignore000066400000000000000000000000161455342002000141200ustar00rootroot00000000000000/node_modules lr-1.4.0/CHANGELOG.md000066400000000000000000000374031455342002000137440ustar00rootroot00000000000000## 1.4.0 (2024-01-22) ### New features The new `InputStream.acceptTokenTo` method allows a tokenizer to create a token that ends at some absolute position. ## 1.3.14 (2023-11-02) ### Bug fixes Further reduce maximum parse depth—the old limit could still cause the tree building function to overflow the stack on V8 when all rules on the stack produce actual visible tree nodes. ## 1.3.13 (2023-10-03) ### Bug fixes The parser will now emit an error node when it reaches the end of the top rule but not the end of the input, and restarts parsing the top rule. Fix an issue that could, when error-recovery happened after a skipped token, cause the parser to emit nodes whose start position was after their end position. ## 1.3.12 (2023-09-22) ### Bug fixes Reduce tree depth limit to avoid V8 overflowing its stack. ## 1.3.11 (2023-09-19) ### Bug fixes When parsing input ranges with gaps, don't treat the token after a gap as covering the entire gap. ## 1.3.10 (2023-08-17) ### Bug fixes Make this package usable in TypeScript setups using node16/nodenext resolution. ## 1.3.9 (2023-07-03) ### Bug fixes Fix another issue in the way local token groups and input gaps interact. ## 1.3.8 (2023-07-03) ### Bug fixes Fix an issue that caused incorrect trees to be emitted when a forced reduction happened inside a skipped term. ## 1.3.7 (2023-06-22) ### Bug fixes Fix a bug where the parser could skip characters while tokenizing in a local token group. ## 1.3.6 (2023-06-02) ### Bug fixes Fix an issue where context-tracking parsers, in specific circumstances, could create incorrectly nested trees. ## 1.3.5 (2023-05-25) ### Bug fixes Fix an issue where some grammars could make the parser unable to force reductions on invalid input, leading to trees that were missing some wrapping nodes around the broken input. ## 1.3.4 (2023-04-24) ### Bug fixes Fix tokenizing of `@else` tokens that span separate input ranges. ## 1.3.3 (2023-02-02) ### Bug fixes Fix a crash when parsing a non-atomic skippable at the top of a nested parse. Fix a bug that caused the tree-depth limit on left-associative rules to be applied to rules generated for repeat operators. ## 1.3.2 (2023-01-31) ### Bug fixes Place a limit on the amount of (non-recovering) parse stacks active at the same time to avoid exponential slowdown on inputs that nest ambiguous constructs. Reduce the maximum left-associative nesting depth, since the old value could still cause Chrome (but not Firefox) to overflow its call stack. ## 1.3.1 (2023-01-13) ### Bug fixes Fix a bug that could corrupt the parse stack when parsing a non-single-token skippable expression from the initial parse state. ## 1.3.0 (2023-01-09) ### Bug fixes Avoid the problem of huge numbers of repeated left-associative operator applications creating a stack too deep to recurse on by detecting such parses and force-reducing out of them. ### New features Add support for grammars that use `@local tokens` declarations. ## 1.2.5 (2022-11-16) ### Bug fixes Fix a bug that made `Stack.canShift` incorrectly return true in some circumstances. ## 1.2.4 (2022-11-05) ### Bug fixes Fix a tokenizer bug that broke tokenizing of the character \uffff. ## 1.2.3 (2022-08-16) ### Bug fixes Fix a regression in the way tokenizers read input when parsing multiple separate ranges. ## 1.2.2 (2022-08-12) ### Bug fixes Fix an issue that could cause the tokenizer to crash when tokens ended directly on a gap in the input ranges. ## 1.2.1 (2022-08-03) ### Bug fixes Include `LRParser.deserialize` in the TypeScript declarations. Fix a bug that broke `ParserConfig.specializers` (requires @lezer/generator 1.1.1). ## 1.2.0 (2022-07-04) ### New features External specializers can now be replaced when reconfiguring an `LRParser`. ## 1.1.0 (2022-06-27) ### New features Support the output produced by the new `@eof` marker in lezer-generator. ## 1.0.0 (2022-06-06) ### New features First stable version. ## 0.16.3 (2022-05-11) ### Bug fixes Don't crash when a non-Node environment defines a `process` global that doesn't have an `env` property. Fix a bug that could cause partially-parsed nodes to be reused when they ended in skipped nodes, when parsing incrementally. ## 0.16.2 (2022-04-21) ### Bug fixes Fix an issue that could make the parser merge adjacent error nodes even when they had different parent nodes. ## 0.16.1 (2022-04-21) ### Bug fixes Fix an issue that caused error nodes created for inserted tokens to sometimes end up in the wrong parent node. ## 0.16.0 (2022-04-20) ### Bug fixes Avoid the need for generated parser files to directly import @lezer/common. ## 0.15.8 (2022-02-04) ### Bug fixes Fix a bug that caused reductions that didn't consume anything to sometimes end up outside their parent node in the tree. ## 0.15.7 (2022-01-21) ### Bug fixes Fix a bug that could cause some kinds of composite skipped expressions to not be parsed correctly. ## 0.15.6 (2022-01-11) ### Bug fixes Make sure tree depth does not grow unbounded, so that recursive tree traversal is safe from overflowing the stack. Be less agressive about pruning long-running GLR parse splits. ## 0.15.5 (2021-12-01) ### Bug fixes Fix a bug that caused node lookahead to be one less than it should be. Fix an issue that could cause the parser to, when recovering, parse beyond the position given to `stopAt`. ## 0.15.4 (2021-09-27) ### Bug fixes Fix a mistake in the way forced reductions are checked. ## 0.15.3 (2021-09-24) ### Bug fixes Fix crashes or infinite recursion caused by applying forced reductions in situations where they were not valid. ## 0.15.2 (2021-08-31) ### Bug fixes Fix a bug where the parse position could get corrupted during a parse, leading to crashes or nonsensical output. ## 0.15.1 (2021-08-16) ### Bug fixes Fix an inconsistency in the .d.ts emitted by the build. ### New features `LRParser.configure` now takes a `contextTracker` option to replace the context tracker used by the parser. ## 0.15.0 (2021-08-11) ### Breaking changes The module's name changed from `lezer` to `@lezer/lr`. The `Parser` class was renamed to `LRParser` (`Parser` is now the abstract class that all parsers extend). Nested parsing is no longer handled inside the LR parser (instead, it is done by using `parseMixed` from the `@lezer/common` package as a wrapper). External tokenizers are passed different arguments and get a different input stream abstraction (to make accidental uncontrolled lookahead and lookbehind, which were easy ways to break incremental parsing, harder). Drops support for `Stack.startOf` (which is no longer useful without lookbehind). Context trackers also get passed different arguments, following the changed input stream format. This package no longer re-exports bindings from `@lezer/common`. ### Bug fixes Slightly prefer deleting token to inserting them to avoid far-fetched parses. Fix a problem where node reuse didn't take the amount of look-ahead done by the tokenizer into account, and could reuse nodes whose content would tokenize differently due to changes after them. Track tokenizer lookahead to fix a problem where some incremental parses would produce incorrect results. ### New features LR parsers now support a `wrapper` configuration option that allows you to inject additional logic by wrapping the `PartialParse` object it returns. ## 0.13.5 (2021-05-14) ### Bug fixes Fix a bug with overeager reuse of nodes on change boundaries. ## 0.13.4 (2021-03-03) ### New features `Parser` instances now have a `topNode` property that holds the type of the parser's top node. ## 0.13.3 (2021-02-17) ### New features Context trackers can now disable strictness in node reuse. ## 0.13.2 (2021-02-17) ### New features Add support for context trackers. ## 0.13.1 (2020-12-04) ### Bug fixes Fix versions of lezer packages depended on. ## 0.13.0 (2020-12-04) ### Breaking changes `Parser.group` is now called `Parser.nodeSet`. Nested parsers now work differently. They don't have to be Lezer parsers, but have to produce object conforoming to the `PartialParse` interface. The interface with which non-trivial nested parsers are specified also changed—see the `NestedParser` type. Parser objects no longer have a `topType` property (scan their node set for types with `isTop` set instead). `Parser` objects no longer have `withProps`, `withNested`, and `withTokenizer` methods (use `configure` instead). Both `Parser.parse` and `Parser.startParse` now take an optional start position as second parameter and an optional parse context as third. `startParse` returns an instance of the `PartialParse` interface instead of the old `ParseContext` class (whose name is now used for something else). Parse _options_ are no longer passed to these methods, but configured in advance through `Parser.configure`. During incremental parsing, instead of passing a tree as the `cache` option, reusable subtrees (see `TreeFragment` from lezer-tree) are now retrieved from the `fragments` property of the parse context object, if provided. `Parser.parse` and `Parser.startParse` no longer take an options parameter. Instead, `bufferLength` and `strict` can be configured with `Parser.configure` now, and the start position and context are passed as optional arguments. The `InputStream` type has been replaced by `Input` from the lezer-tree package (which has the same interface but a more appropriate name). ### New features The `Parser` class now has a `configure` method that is used to create a parser instance with a more specific configuration. ## 0.12.1 (2020-11-19) ### Bug fixes Fix an infinite loop in incremental parsing when repeatedly reusing a zero-length cached node. ## 0.12.0 (2020-10-23) ### Breaking changes Follow the change from `Subtree` to `TreeCursor` in lezer-tree. The serialized parser format changed. `Stack.startOf` now returns null, rather than -1, when it can't find the given element. ### New features `Stack.startOf` now takes an optional second argument that allows you to select a match beyond the innermost one. ## 0.11.2 (2020-09-26) ### Bug fixes Fix lezer depencency versions ## 0.11.1 (2020-09-26) ### Bug fixes Fix an infinite loop that was sometimes hit during error recovery. ## 0.11.0 (2020-09-26) ### Breaking changes Follow a breaking change in the way repeat nodes are represented. Support the new action table format that allows sharing between states. ## 0.10.4 (2020-09-15) ### New features Parser objects now have a `withTokenizer` method that can be used to replace external tokenizers. ## 0.10.3 (2020-09-10) ### Bug fixes Fix a bug that caused the value returned by `ParseContext.badness` to be much higher than intended. ## 0.10.2 (2020-09-02) ### Bug fixes `Stack.ruleStart` will now ignore repeat rules and node-less rules when determining the inner rule. Work around a failure mode where error-recovery got stuck in an end-of-grammar state and thus could not continue meaningfully parsing anything by restarting such states back to their initial state. ### New features External tokenizers can now provide an `extend` flag to allow their tokens to be used alongside tokens produced by other tokenizers. Add support for dynamic precedences. ## 0.10.1 (2020-08-20) ### Bug fixes Fixes an issue where repeated error recovery could lead to a tree so deep that recursive functions on it would overflow the stack. ## 0.10.0 (2020-08-07) ### New features Add support for grammar dialects. Add support for external specializers. Stacks now have a `parser` accessor that gets you the active parser instance. ### Breaking changes No longer list internal properties in the type definitions. Follow changes in the serialized parser format. The way different tokenizers are combined is now slightly different. The first one to return a token wins, even if that token has no actions in the current state. The old behavior, where further tokenizers are tried until actions are found, can be enabled for a given tokenizer by setting its `fallback` flag. ## 0.9.1 (2020-06-29) ### Bug fixes Fix accidental use of non-ES5 library methods. ## 0.9.0 (2020-06-08) ### Breaking changes Upgrade to 0.9 parser serialization ## 0.8.5 (2020-05-01) ### Bug fixes Publish less useless cruft to npm, reducing package size. ## 0.8.4 (2020-04-14) ### Bug fixes Fix a bug in `Stack.startOf` that made it fail to find rules that were actually on the stack in many situations. ## 0.8.3 (2020-04-01) ### Bug fixes Make the package load as an ES module on node ## 0.8.2 (2020-02-28) ### New features The package now provides an ES6 module. ## 0.8.1 (2020-02-26) ### New features You can now find the top node type parsed by the parser through its `topType` property. ## 0.8.0 (2020-02-03) ### Breaking changes The serialized parser format changed. ### New features Add support for multiple `@top` rules through the `top` parse option. ## 0.7.1 (2020-01-23) ### Bug fixes Tweak recovery cost for forced reductions to prefer those to other recovery strategies. More agressively reuse cached nodes. ## 0.7.0 (2020-01-20) ### Breaking changes This now consumes the adjusted parser output of lezer-generator 0.7.0. ## 0.6.0 (2020-01-15) ### Bug fixes Rewrite the way the parser advances and recovers from errors, to more rigorously address a number of infinite loops and poor recovery behaviors. ### New features Parse context objects now have a `badness` property that you can use to estimate just how poorly the input matches the grammar. ## 0.5.2 (2020-01-09) ### Bug fixes Fix an issue where the parser will sometimes continue, and even pick as result, a parse with error recovery even though there are error-free parses available. Fix a mistake in our binary heap implementation that would cause stacks to be ordered incorrectly. Fix an issue where the `Stack.startOf` method would ignore the top frame of the stack. ## 0.5.1 (2020-01-01) ### Bug fixes Fix an issue where the parser would loop infinitely when leaving a nested parse in some circumstances. Fix an infinite loop on incomplete input at end of file that occurred for some types of mutually recursive rules. ## 0.5.0 (2019-10-22) ### New features Parser instances now have a `hasNested` property that tells you whether they nest grammars. ## 0.4.1 (2019-10-14) ### Bug fixes Fix an infinite loop where error recovery keeps finding zero-length tokens and imagining it's making progress. ## 0.4.0 (2019-09-10) ### Bug fixes Don't rely on additional data stored in the parse table during recovery (shrinking the parse tables). Fix a crash that could occur when starting a nested parse when there were multiple active stacks. Fix an issue where error nodes would sometimes not be merged. Don't reuse cached tokens for states that have a different token group. ### Breaking changes The on-disk parse table format changed again. ## 0.3.0 (2019-08-22) ### Bug fixes Don't treat reused nodes as if they are error terms when merging errors. Add badness penalty for forced reductions at end of input. Fix several infinite loops around forced reductions. Don't move error nodes out of reduces. ### New features Add a `Parser.withProps` method for extending a parser with new node props. ### Breaking changes Emits lezer-tree 0.3.0 style trees with `NodeType` type objects. `Parser.deserialize`'s interface changed (now taking an object rather than a long list of parameters). ## 0.2.0 (2019-08-02) ### Bug fixes Don't include lezer-tree inline in `dist/index.js`. ### New features The output tree now uses tags, rather than term names, to identify its nodes. Export `Tag` data structure from lezer-tree. Support per-grammar global tag suffixes in `Parser.deserialize`. ### Breaking changes Grammars no longer have ids. Removes export of `allocateGrammarID` and `TagMap`. ## 0.1.1 (2019-07-09) ### Bug Fixes Actually include the .d.ts file in the published package. ## 0.1.0 (2019-07-09) ### New Features First documented release. lr-1.4.0/LICENSE000066400000000000000000000021311455342002000131260ustar00rootroot00000000000000MIT License Copyright (C) 2018 by Marijn Haverbeke and others Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. lr-1.4.0/README.md000066400000000000000000000022111455342002000133770ustar00rootroot00000000000000# @lezer/lr [ [**WEBSITE**](http://lezer.codemirror.net) | [**ISSUES**](https://github.com/lezer-parser/lezer/issues) | [**FORUM**](https://discuss.codemirror.net/c/lezer) | [**CHANGELOG**](https://github.com/lezer-parser/lr/blob/master/CHANGELOG.md) ] Lezer ("reader" in Dutch, pronounced pretty much as laser) is an incremental GLR parser intended for use in an editor or similar system, which needs to keep a representation of the program current during changes and in the face of syntax errors. It prioritizes speed and compactness (both of parser table files and of syntax tree) over having a highly usable parse tree—trees nodes are just blobs with a start, end, tag, and set of child nodes, with no further labeling of child nodes or extra metadata. This package contains the run-time LR parser library. It consumes parsers generated by [@lezer/generator](https://github.com/lezer-parser/generator). The parser programming interface is documented on [the website](https://lezer.codemirror.net/docs/ref/#lr). The code is licensed under an MIT license. This project was hugely inspired by [tree-sitter](http://tree-sitter.github.io/tree-sitter/). lr-1.4.0/build.js000066400000000000000000000006001455342002000135550ustar00rootroot00000000000000import {build, watch} from "@marijn/buildtool" import {fileURLToPath} from "url" import {dirname, join} from "path" let tsOptions = { lib: ["es5", "es6"], types: ["node"], target: "es6" } let main = join(dirname(fileURLToPath(import.meta.url)), "src", "index.ts") if (process.argv.includes("--watch")) { watch([main], [], {tsOptions}) } else { build(main, {tsOptions}) } lr-1.4.0/package.json000066400000000000000000000014431455342002000144140ustar00rootroot00000000000000{ "name": "@lezer/lr", "version": "1.4.0", "description": "Incremental parser", "main": "dist/index.cjs", "type": "module", "exports": { "import": "./dist/index.js", "require": "./dist/index.cjs" }, "module": "dist/index.js", "types": "dist/index.d.ts", "author": "Marijn Haverbeke ", "license": "MIT", "repository": { "type" : "git", "url" : "https://github.com/lezer-parser/lr.git" }, "devDependencies": { "@marijn/buildtool": "^0.1.5", "@types/node": "^20.6.2" }, "dependencies": { "@lezer/common": "^1.0.0" }, "files": ["dist"], "scripts": { "test": "echo 'Tests are in @lezer/generator'", "watch": "node build.js --watch", "prepare": "node build.js; tsc src/constants.ts -d --outDir dist" } } lr-1.4.0/src/000077500000000000000000000000001455342002000127135ustar00rootroot00000000000000lr-1.4.0/src/README.md000066400000000000000000000004721455342002000141750ustar00rootroot00000000000000This package provides an implementation of a [GLR](https://en.wikipedia.org/wiki/GLR_parser) parser that works with the parse tables generated by the [parser generator](#generator). ### Parsing @LRParser @ParserConfig @Stack ### Tokenizers @InputStream @ExternalTokenizer @ContextTracker @LocalTokenGroup lr-1.4.0/src/constants.ts000066400000000000000000000066441455342002000153110ustar00rootroot00000000000000// This file defines some constants that are needed both in this // package and in lezer-generator, so that the generator code can // access them without them being part of lezer's public interface. // Parse actions are represented as numbers, in order to cheaply and // simply pass them around. The numbers are treated as bitfields // holding different pieces of information. // // When storing actions in 16-bit number arrays, they are split in the // middle, with the first element holding the first 16 bits, and the // second the rest. // // The value 0 (which is not a valid action because no shift goes to // state 0, the start state), is often used to denote the absence of a // valid action. export const enum Action { // Distinguishes between shift (off) and reduce (on) actions. ReduceFlag = 1 << 16, // The first 16 bits hold the target state's id for shift actions, // and the reduced term id for reduce actions. ValueMask = 2**16 - 1, // In reduce actions, all bits beyond 18 hold the reduction's depth // (the amount of stack frames it reduces). ReduceDepthShift = 19, // This is set for reduce actions that reduce two instances of a // repeat term to the term (but _not_ for the reductions that match // the repeated content). RepeatFlag = 1 << 17, // Goto actions are a special kind of shift that don't actually // shift the current token, just add a stack frame. This is used for // non-simple skipped expressions, to enter the skip rule when the // appropriate token is seen (because the arbitrary state from which // such a rule may start doesn't have the correct goto entries). GotoFlag = 1 << 17, // Both shifts and reduces can have a stay flag set. For shift, it // means that the current token must be shifted but the state should // stay the same (used for single-token skip expression). For // reduce, it means that, instead of consulting the goto table to // determine which state to go to, the state already on the stack // must be returned to (used at the end of non-simple skip // expressions). StayFlag = 1 << 18 } // Each parser state has a `flags` field. export const enum StateFlag { // Set if this state is part of a skip expression (which means nodes // produced by it should be moved out of any node reduced directly // after them). Skipped = 1, // Indicates whether this is an accepting state. Accepting = 2 } // The lowest bit of the values stored in `parser.specializations` // indicate whether this specialization replaced the original token // (`Specialize`) or adds a second interpretation while also leaving // the first (`Extend`). export const enum Specialize { Specialize = 0, Extend = 1 } // Terms are 16-bit numbers export const enum Term { // The value of the error term is hard coded, the others are // allocated per grammar. Err = 0 } export const enum Seq { // Used as end marker for most of the sequences stored in uint16 // arrays End = 0xffff, Done = 0, Next = 1, Other = 2, } // Memory layout of parse states export const enum ParseState { // Offsets into the record of the individual fields Flags = 0, Actions = 1, Skip = 2, TokenizerMask = 3, DefaultReduce = 4, ForcedReduce = 5, // Total size of a state record Size = 6 } export const enum Encode { BigValCode = 126, BigVal = 0xffff, Start = 32, Gap1 = 34, // '"' Gap2 = 92, // '\\; Base = 46 // (126 - 32 - 2) / 2 } export const enum File { Version = 14 } lr-1.4.0/src/decode.ts000066400000000000000000000016141455342002000145100ustar00rootroot00000000000000// See lezer-generator/src/encode.ts for comments about the encoding // used here import {Encode} from "./constants" export function decodeArray( input: string | T, Type: {new (n: number): T} = Uint16Array as any ): T { if (typeof input != "string") return input let array: T | null = null for (let pos = 0, out = 0; pos < input.length;) { let value = 0 for (;;) { let next = input.charCodeAt(pos++), stop = false if (next == Encode.BigValCode) { value = Encode.BigVal; break } if (next >= Encode.Gap2) next-- if (next >= Encode.Gap1) next-- let digit = next - Encode.Start if (digit >= Encode.Base) { digit -= Encode.Base; stop = true } value += digit if (stop) break value *= Encode.Base } if (array) array[out++] = value else array = new Type(value) } return array! } lr-1.4.0/src/index.ts000066400000000000000000000002451455342002000143730ustar00rootroot00000000000000export {LRParser, ParserConfig, ContextTracker} from "./parse" export {InputStream, ExternalTokenizer, LocalTokenGroup} from "./token" export {Stack} from "./stack" lr-1.4.0/src/parse.ts000066400000000000000000001072101455342002000143760ustar00rootroot00000000000000import {DefaultBufferLength, Tree, TreeFragment, NodeSet, NodeType, NodeProp, NodePropSource, Input, PartialParse, Parser, ParseWrapper, IterMode} from "@lezer/common" import {Stack, StackBufferCursor} from "./stack" import {Action, Specialize, Term, Seq, StateFlag, ParseState, File} from "./constants" import {Tokenizer, TokenGroup, ExternalTokenizer, CachedToken, InputStream} from "./token" import {decodeArray} from "./decode" // Environment variable used to control console output const verbose = typeof process != "undefined" && process.env && /\bparse\b/.test(process.env.LOG!) let stackIDs: WeakMap | null = null const enum Safety { Margin = 25 } function cutAt(tree: Tree, pos: number, side: 1 | -1) { let cursor = tree.cursor(IterMode.IncludeAnonymous) cursor.moveTo(pos) for (;;) { if (!(side < 0 ? cursor.childBefore(pos) : cursor.childAfter(pos))) for (;;) { if ((side < 0 ? cursor.to < pos : cursor.from > pos) && !cursor.type.isError) return side < 0 ? Math.max(0, Math.min(cursor.to - 1, pos - Safety.Margin)) : Math.min(tree.length, Math.max(cursor.from + 1, pos + Safety.Margin)) if (side < 0 ? cursor.prevSibling() : cursor.nextSibling()) break if (!cursor.parent()) return side < 0 ? 0 : tree.length } } } class FragmentCursor { i = 0 fragment: TreeFragment | null = null safeFrom = -1 safeTo = -1 trees: Tree[] = [] start: number[] = [] index: number[] = [] nextStart!: number constructor(readonly fragments: readonly TreeFragment[], readonly nodeSet: NodeSet) { this.nextFragment() } nextFragment() { let fr = this.fragment = this.i == this.fragments.length ? null : this.fragments[this.i++] if (fr) { this.safeFrom = fr.openStart ? cutAt(fr.tree, fr.from + fr.offset, 1) - fr.offset : fr.from this.safeTo = fr.openEnd ? cutAt(fr.tree, fr.to + fr.offset, -1) - fr.offset : fr.to while (this.trees.length) { this.trees.pop(); this.start.pop(); this.index.pop() } this.trees.push(fr.tree) this.start.push(-fr.offset) this.index.push(0) this.nextStart = this.safeFrom } else { this.nextStart = 1e9 } } // `pos` must be >= any previously given `pos` for this cursor nodeAt(pos: number): Tree | null { if (pos < this.nextStart) return null while (this.fragment && this.safeTo <= pos) this.nextFragment() if (!this.fragment) return null for (;;) { let last = this.trees.length - 1 if (last < 0) { // End of tree this.nextFragment() return null } let top = this.trees[last], index = this.index[last] if (index == top.children.length) { this.trees.pop() this.start.pop() this.index.pop() continue } let next = top.children[index] let start = this.start[last] + top.positions[index] if (start > pos) { this.nextStart = start return null } if (next instanceof Tree) { if (start == pos) { if (start < this.safeFrom) return null let end = start + next.length if (end <= this.safeTo) { let lookAhead = next.prop(NodeProp.lookAhead) if (!lookAhead || end + lookAhead < this.fragment.to) return next } } this.index[last]++ if (start + next.length >= Math.max(this.safeFrom, pos)) { // Enter this node this.trees.push(next) this.start.push(start) this.index.push(0) } } else { this.index[last]++ this.nextStart = start + next.length } } } } class TokenCache { tokens: CachedToken[] = [] mainToken: CachedToken | null = null actions: number[] = [] constructor(parser: LRParser, readonly stream: InputStream) { this.tokens = parser.tokenizers.map(_ => new CachedToken) } getActions(stack: Stack) { let actionIndex = 0 let main: CachedToken | null = null let {parser} = stack.p, {tokenizers} = parser let mask = parser.stateSlot(stack.state, ParseState.TokenizerMask) let context = stack.curContext ? stack.curContext.hash : 0 let lookAhead = 0 for (let i = 0; i < tokenizers.length; i++) { if (((1 << i) & mask) == 0) continue let tokenizer = tokenizers[i], token = this.tokens[i] if (main && !tokenizer.fallback) continue if (tokenizer.contextual || token.start != stack.pos || token.mask != mask || token.context != context) { this.updateCachedToken(token, tokenizer, stack) token.mask = mask token.context = context } if (token.lookAhead > token.end + Safety.Margin) lookAhead = Math.max(token.lookAhead, lookAhead) if (token.value != Term.Err) { let startIndex = actionIndex if (token.extended > -1) actionIndex = this.addActions(stack, token.extended, token.end, actionIndex) actionIndex = this.addActions(stack, token.value, token.end, actionIndex) if (!tokenizer.extend) { main = token if (actionIndex > startIndex) break } } } while (this.actions.length > actionIndex) this.actions.pop() if (lookAhead) stack.setLookAhead(lookAhead) if (!main && stack.pos == this.stream.end) { main = new CachedToken main.value = stack.p.parser.eofTerm main.start = main.end = stack.pos actionIndex = this.addActions(stack, main.value, main.end, actionIndex) } this.mainToken = main return this.actions } getMainToken(stack: Stack) { if (this.mainToken) return this.mainToken let main = new CachedToken, {pos, p} = stack main.start = pos main.end = Math.min(pos + 1, p.stream.end) main.value = pos == p.stream.end ? p.parser.eofTerm : Term.Err return main } updateCachedToken(token: CachedToken, tokenizer: Tokenizer, stack: Stack) { let start = this.stream.clipPos(stack.pos) tokenizer.token(this.stream.reset(start, token), stack) if (token.value > -1) { let {parser} = stack.p for (let i = 0; i < parser.specialized.length; i++) if (parser.specialized[i] == token.value) { let result = parser.specializers[i](this.stream.read(token.start, token.end), stack) if (result >= 0 && stack.p.parser.dialect.allows(result >> 1)) { if ((result & 1) == Specialize.Specialize) token.value = result >> 1 else token.extended = result >> 1 break } } } else { token.value = Term.Err token.end = this.stream.clipPos(start + 1) } } putAction(action: number, token: number, end: number, index: number) { // Don't add duplicate actions for (let i = 0; i < index; i += 3) if (this.actions[i] == action) return index this.actions[index++] = action this.actions[index++] = token this.actions[index++] = end return index } addActions(stack: Stack, token: number, end: number, index: number) { let {state} = stack, {parser} = stack.p, {data} = parser for (let set = 0; set < 2; set++) { for (let i = parser.stateSlot(state, set ? ParseState.Skip : ParseState.Actions);; i += 3) { if (data[i] == Seq.End) { if (data[i + 1] == Seq.Next) { i = pair(data, i + 2) } else { if (index == 0 && data[i + 1] == Seq.Other) index = this.putAction(pair(data, i + 2), token, end, index) break } } if (data[i] == token) index = this.putAction(pair(data, i + 1), token, end, index) } } return index } } const enum Rec { Distance = 5, MaxRemainingPerStep = 3, // When two stacks have been running independently long enough to // add this many elements to their buffers, prune one. MinBufferLengthPrune = 500, ForceReduceLimit = 10, // Once a stack reaches this depth (in .stack.length) force-reduce // it back to CutTo to avoid creating trees that overflow the stack // on recursive traversal. CutDepth = 2800 * 3, CutTo = 2000 * 3, MaxLeftAssociativeReductionCount = 300, // The maximum number of non-recovering stacks to explore (to avoid // getting bogged down with exponentially multiplying stacks in // ambiguous content) MaxStackCount = 12 } export class Parse implements PartialParse { // Active parse stacks. stacks: Stack[] recovering = 0 fragments: FragmentCursor | null nextStackID = 0x2654 // ♔, ♕, ♖, ♗, ♘, ♙, ♠, ♡, ♢, ♣, ♤, ♥, ♦, ♧ minStackPos = 0 reused: Tree[] = [] stream: InputStream tokens: TokenCache topTerm: number stoppedAt: null | number = null lastBigReductionStart = -1 lastBigReductionSize = 0 bigReductionCount = 0 constructor( readonly parser: LRParser, readonly input: Input, fragments: readonly TreeFragment[], readonly ranges: readonly {from: number, to: number}[] ) { this.stream = new InputStream(input, ranges) this.tokens = new TokenCache(parser, this.stream) this.topTerm = parser.top[1] let {from} = ranges[0] this.stacks = [Stack.start(this, parser.top[0], from)] this.fragments = fragments.length && this.stream.end - from > parser.bufferLength * 4 ? new FragmentCursor(fragments, parser.nodeSet) : null } get parsedPos() { return this.minStackPos } // Move the parser forward. This will process all parse stacks at // `this.pos` and try to advance them to a further position. If no // stack for such a position is found, it'll start error-recovery. // // When the parse is finished, this will return a syntax tree. When // not, it returns `null`. advance() { let stacks = this.stacks, pos = this.minStackPos // This will hold stacks beyond `pos`. let newStacks: Stack[] = this.stacks = [] let stopped: Stack[] | undefined, stoppedTokens: number[] | undefined // If a large amount of reductions happened with the same start // position, force the stack out of that production in order to // avoid creating a tree too deep to recurse through. // (This is an ugly kludge, because unfortunately there is no // straightforward, cheap way to check for this happening, due to // the history of reductions only being available in an // expensive-to-access format in the stack buffers.) if (this.bigReductionCount > Rec.MaxLeftAssociativeReductionCount && stacks.length == 1) { let [s] = stacks while (s.forceReduce() && s.stack.length && s.stack[s.stack.length - 2] >= this.lastBigReductionStart) {} this.bigReductionCount = this.lastBigReductionSize = 0 } // Keep advancing any stacks at `pos` until they either move // forward or can't be advanced. Gather stacks that can't be // advanced further in `stopped`. for (let i = 0; i < stacks.length; i++) { let stack = stacks[i] for (;;) { this.tokens.mainToken = null if (stack.pos > pos) { newStacks.push(stack) } else if (this.advanceStack(stack, newStacks, stacks)) { continue } else { if (!stopped) { stopped = []; stoppedTokens = [] } stopped.push(stack) let tok = this.tokens.getMainToken(stack) stoppedTokens!.push(tok.value, tok.end) } break } } if (!newStacks.length) { let finished = stopped && findFinished(stopped) if (finished) { if (verbose) console.log("Finish with " + this.stackID(finished)) return this.stackToTree(finished) } if (this.parser.strict) { if (verbose && stopped) console.log("Stuck with token " + (this.tokens.mainToken ? this.parser.getName(this.tokens.mainToken.value) : "none")) throw new SyntaxError("No parse at " + pos) } if (!this.recovering) this.recovering = Rec.Distance } if (this.recovering && stopped) { let finished = this.stoppedAt != null && stopped[0].pos > this.stoppedAt ? stopped[0] : this.runRecovery(stopped, stoppedTokens!, newStacks) if (finished) { if (verbose) console.log("Force-finish " + this.stackID(finished)) return this.stackToTree(finished.forceAll()) } } if (this.recovering) { let maxRemaining = this.recovering == 1 ? 1 : this.recovering * Rec.MaxRemainingPerStep if (newStacks.length > maxRemaining) { newStacks.sort((a, b) => b.score - a.score) while (newStacks.length > maxRemaining) newStacks.pop() } if (newStacks.some(s => s.reducePos > pos)) this.recovering-- } else if (newStacks.length > 1) { // Prune stacks that are in the same state, or that have been // running without splitting for a while, to avoid getting stuck // with multiple successful stacks running endlessly on. outer: for (let i = 0; i < newStacks.length - 1; i++) { let stack = newStacks[i] for (let j = i + 1; j < newStacks.length; j++) { let other = newStacks[j] if (stack.sameState(other) || stack.buffer.length > Rec.MinBufferLengthPrune && other.buffer.length > Rec.MinBufferLengthPrune) { if (((stack.score - other.score) || (stack.buffer.length - other.buffer.length)) > 0) { newStacks.splice(j--, 1) } else { newStacks.splice(i--, 1) continue outer } } } } if (newStacks.length > Rec.MaxStackCount) newStacks.splice(Rec.MaxStackCount, newStacks.length - Rec.MaxStackCount) } this.minStackPos = newStacks[0].pos for (let i = 1; i < newStacks.length; i++) if (newStacks[i].pos < this.minStackPos) this.minStackPos = newStacks[i].pos return null } stopAt(pos: number) { if (this.stoppedAt != null && this.stoppedAt < pos) throw new RangeError("Can't move stoppedAt forward") this.stoppedAt = pos } // Returns an updated version of the given stack, or null if the // stack can't advance normally. When `split` and `stacks` are // given, stacks split off by ambiguous operations will be pushed to // `split`, or added to `stacks` if they move `pos` forward. private advanceStack(stack: Stack, stacks: null | Stack[], split: null | Stack[]) { let start = stack.pos, {parser} = this let base = verbose ? this.stackID(stack) + " -> " : "" if (this.stoppedAt != null && start > this.stoppedAt) return stack.forceReduce() ? stack : null if (this.fragments) { let strictCx = stack.curContext && stack.curContext.tracker.strict, cxHash = strictCx ? stack.curContext!.hash : 0 for (let cached = this.fragments.nodeAt(start); cached;) { let match = this.parser.nodeSet.types[cached.type.id] == cached.type ? parser.getGoto(stack.state, cached.type.id) : -1 if (match > -1 && cached.length && (!strictCx || (cached.prop(NodeProp.contextHash) || 0) == cxHash)) { stack.useNode(cached, match) if (verbose) console.log(base + this.stackID(stack) + ` (via reuse of ${parser.getName(cached.type.id)})`) return true } if (!(cached instanceof Tree) || cached.children.length == 0 || cached.positions[0] > 0) break let inner = cached.children[0] if (inner instanceof Tree && cached.positions[0] == 0) cached = inner else break } } let defaultReduce = parser.stateSlot(stack.state, ParseState.DefaultReduce) if (defaultReduce > 0) { stack.reduce(defaultReduce) if (verbose) console.log(base + this.stackID(stack) + ` (via always-reduce ${parser.getName(defaultReduce & Action.ValueMask)})`) return true } if (stack.stack.length >= Rec.CutDepth) { while (stack.stack.length > Rec.CutTo && stack.forceReduce()) {} } let actions = this.tokens.getActions(stack) for (let i = 0; i < actions.length;) { let action = actions[i++], term = actions[i++], end = actions[i++] let last = i == actions.length || !split let localStack = last ? stack : stack.split() let main = this.tokens.mainToken localStack.apply(action, term, main ? main.start : localStack.pos, end) if (verbose) console.log(base + this.stackID(localStack) + ` (via ${(action & Action.ReduceFlag) == 0 ? "shift" : `reduce of ${parser.getName(action & Action.ValueMask)}`} for ${ parser.getName(term)} @ ${start}${localStack == stack ? "" : ", split"})`) if (last) return true else if (localStack.pos > start) stacks!.push(localStack) else split!.push(localStack) } return false } // Advance a given stack forward as far as it will go. Returns the // (possibly updated) stack if it got stuck, or null if it moved // forward and was given to `pushStackDedup`. private advanceFully(stack: Stack, newStacks: Stack[]) { let pos = stack.pos for (;;) { if (!this.advanceStack(stack, null, null)) return false if (stack.pos > pos) { pushStackDedup(stack, newStacks) return true } } } private runRecovery(stacks: Stack[], tokens: number[], newStacks: Stack[]) { let finished: Stack | null = null, restarted = false for (let i = 0; i < stacks.length; i++) { let stack = stacks[i], token = tokens[i << 1], tokenEnd = tokens[(i << 1) + 1] let base = verbose ? this.stackID(stack) + " -> " : "" if (stack.deadEnd) { if (restarted) continue restarted = true stack.restart() if (verbose) console.log(base + this.stackID(stack) + " (restarted)") let done = this.advanceFully(stack, newStacks) if (done) continue } let force = stack.split(), forceBase = base for (let j = 0; force.forceReduce() && j < Rec.ForceReduceLimit; j++) { if (verbose) console.log(forceBase + this.stackID(force) + " (via force-reduce)") let done = this.advanceFully(force, newStacks) if (done) break if (verbose) forceBase = this.stackID(force) + " -> " } for (let insert of stack.recoverByInsert(token)) { if (verbose) console.log(base + this.stackID(insert) + " (via recover-insert)") this.advanceFully(insert, newStacks) } if (this.stream.end > stack.pos) { if (tokenEnd == stack.pos) { tokenEnd++ token = Term.Err } stack.recoverByDelete(token, tokenEnd) if (verbose) console.log(base + this.stackID(stack) + ` (via recover-delete ${this.parser.getName(token)})`) pushStackDedup(stack, newStacks) } else if (!finished || finished.score < stack.score) { finished = stack } } return finished } // Convert the stack's buffer to a syntax tree. stackToTree(stack: Stack): Tree { stack.close() return Tree.build({buffer: StackBufferCursor.create(stack), nodeSet: this.parser.nodeSet, topID: this.topTerm, maxBufferLength: this.parser.bufferLength, reused: this.reused, start: this.ranges[0].from, length: stack.pos - this.ranges[0].from, minRepeatType: this.parser.minRepeatTerm}) } private stackID(stack: Stack) { let id = (stackIDs || (stackIDs = new WeakMap)).get(stack) if (!id) stackIDs.set(stack, id = String.fromCodePoint(this.nextStackID++)) return id + stack } } function pushStackDedup(stack: Stack, newStacks: Stack[]) { for (let i = 0; i < newStacks.length; i++) { let other = newStacks[i] if (other.pos == stack.pos && other.sameState(stack)) { if (newStacks[i].score < stack.score) newStacks[i] = stack return } } newStacks.push(stack) } export class Dialect { constructor(readonly source: string | undefined, readonly flags: readonly boolean[], readonly disabled: null | Uint8Array) {} allows(term: number) { return !this.disabled || this.disabled[term] == 0 } } const id: (x: T) => T = x => x /// Context trackers are used to track stateful context (such as /// indentation in the Python grammar, or parent elements in the XML /// grammar) needed by external tokenizers. You declare them in a /// grammar file as `@context exportName from "module"`. /// /// Context values should be immutable, and can be updated (replaced) /// on shift or reduce actions. /// /// The export used in a `@context` declaration should be of this /// type. export class ContextTracker { /// @internal start: T /// @internal shift: (context: T, term: number, stack: Stack, input: InputStream) => T /// @internal reduce: (context: T, term: number, stack: Stack, input: InputStream) => T /// @internal reuse: (context: T, node: Tree, stack: Stack, input: InputStream) => T /// @internal hash: (context: T) => number /// @internal strict: boolean /// Define a context tracker. constructor(spec: { /// The initial value of the context at the start of the parse. start: T, /// Update the context when the parser executes a /// [shift](https://en.wikipedia.org/wiki/LR_parser#Shift_and_reduce_actions) /// action. shift?(context: T, term: number, stack: Stack, input: InputStream): T /// Update the context when the parser executes a reduce action. reduce?(context: T, term: number, stack: Stack, input: InputStream): T /// Update the context when the parser reuses a node from a tree /// fragment. reuse?(context: T, node: Tree, stack: Stack, input: InputStream): T /// Reduce a context value to a number (for cheap storage and /// comparison). Only needed for strict contexts. hash?(context: T): number /// By default, nodes can only be reused during incremental /// parsing if they were created in the same context as the one in /// which they are reused. Set this to false to disable that /// check (and the overhead of storing the hashes). strict?: boolean }) { this.start = spec.start this.shift = spec.shift || id this.reduce = spec.reduce || id this.reuse = spec.reuse || id this.hash = spec.hash || (() => 0) this.strict = spec.strict !== false } } type SpecializerSpec = { term: number, get?: (value: string, stack: Stack) => number, external?: any, extend?: boolean } type ParserSpec = { version: number, states: string | Uint32Array, stateData: string | Uint16Array, goto: string | Uint16Array, nodeNames: string, maxTerm: number, repeatNodeCount: number, nodeProps?: [NodeProp | string, ...(string | number)[]][], propSources?: NodePropSource[], skippedNodes?: number[], tokenData: string, tokenizers: (Tokenizer | number)[], topRules: {[name: string]: [number, number]}, context: ContextTracker | null, dialects?: {[name: string]: number}, dynamicPrecedences?: {[term: number]: number}, specialized?: SpecializerSpec[], tokenPrec: number, termNames?: {[id: number]: string} } /// Configuration options when /// [reconfiguring](#lr.LRParser.configure) a parser. export interface ParserConfig { /// Node prop values to add to the parser's node set. props?: readonly NodePropSource[] /// The name of the `@top` declaration to parse from. If not /// specified, the first top rule declaration in the grammar is /// used. top?: string /// A space-separated string of dialects to enable. dialect?: string /// Replace the given external tokenizers with new ones. tokenizers?: {from: ExternalTokenizer, to: ExternalTokenizer}[] /// Replace external specializers with new ones. specializers?: {from: (value: string, stack: Stack) => number, to: (value: string, stack: Stack) => number}[], /// Replace the context tracker with a new one. contextTracker?: ContextTracker, /// When true, the parser will raise an exception, rather than run /// its error-recovery strategies, when the input doesn't match the /// grammar. strict?: boolean /// Add a wrapper, which can extend parses created by this parser /// with additional logic (usually used to add /// [mixed-language](#common.parseMixed) parsing). wrap?: ParseWrapper /// The maximum length of the TreeBuffers generated in the output /// tree. Defaults to 1024. bufferLength?: number } /// Holds the parse tables for a given grammar, as generated by /// `lezer-generator`, and provides [methods](#common.Parser) to parse /// content with. export class LRParser extends Parser { /// The parse states for this grammar @internal readonly states: Readonly /// A blob of data that the parse states, as well as some /// of `LRParser`'s fields, point into @internal readonly data: Readonly /// The goto table. See `computeGotoTable` in /// lezer-generator for details on the format @internal readonly goto: Readonly /// The highest term id @internal readonly maxTerm: number /// The first repeat-related term id @internal readonly minRepeatTerm: number /// The tokenizer objects used by the grammar @internal readonly tokenizers: readonly Tokenizer[] /// Maps top rule names to [state ID, top term ID] pairs. @internal readonly topRules: {[name: string]: [number, number]} /// @internal readonly context: ContextTracker | null /// A mapping from dialect names to the tokens that are exclusive /// to them. @internal readonly dialects: {[name: string]: number} /// Null if there are no dynamic precedences, a map from term ids /// to precedence otherwise. @internal readonly dynamicPrecedences: {[term: number]: number} | null /// The token types have specializers (in this.specializers) @internal readonly specialized: Uint16Array /// The specializer functions for the token types in specialized @internal readonly specializers: ((value: string, stack: Stack) => number)[] /// FIXME @internal readonly specializerSpecs: SpecializerSpec[] /// Points into this.data at an array that holds the /// precedence order (higher precedence first) for ambiguous /// tokens @internal readonly tokenPrecTable: number /// An optional object mapping term ids to name strings @internal readonly termNames: null | {[id: number]: string} /// @internal readonly maxNode: number /// @internal readonly dialect: Dialect /// @internal readonly wrappers: readonly ParseWrapper[] = [] /// @internal readonly top: [number, number] /// @internal readonly bufferLength: number /// @internal readonly strict: boolean /// The nodes used in the trees emitted by this parser. readonly nodeSet: NodeSet /// @internal constructor(spec: ParserSpec) { super() if (spec.version != File.Version) throw new RangeError(`Parser version (${spec.version}) doesn't match runtime version (${File.Version})`) let nodeNames = spec.nodeNames.split(" ") this.minRepeatTerm = nodeNames.length for (let i = 0; i < spec.repeatNodeCount; i++) nodeNames.push("") let topTerms = Object.keys(spec.topRules).map(r => spec.topRules[r][1]) let nodeProps: [NodeProp, any][][] = [] for (let i = 0; i < nodeNames.length; i++) nodeProps.push([]) function setProp(nodeID: number, prop: NodeProp, value: any) { nodeProps[nodeID].push([prop, prop.deserialize(String(value))]) } if (spec.nodeProps) for (let propSpec of spec.nodeProps) { let prop = propSpec[0] if (typeof prop == "string") prop = (NodeProp as unknown as {[name: string]: NodeProp})[prop] for (let i = 1; i < propSpec.length;) { let next = propSpec[i++] as number if (next >= 0) { setProp(next as number, prop, propSpec[i++] as string) } else { let value = propSpec[i + -next] as string for (let j = -next; j > 0; j--) setProp(propSpec[i++] as number, prop, value) i++ } } } this.nodeSet = new NodeSet(nodeNames.map((name, i) => NodeType.define({ name: i >= this.minRepeatTerm ? undefined: name, id: i, props: nodeProps[i], top: topTerms.indexOf(i) > -1, error: i == 0, skipped: spec.skippedNodes && spec.skippedNodes.indexOf(i) > -1 }))) if (spec.propSources) this.nodeSet = this.nodeSet.extend(...spec.propSources) this.strict = false this.bufferLength = DefaultBufferLength let tokenArray = decodeArray(spec.tokenData) this.context = spec.context this.specializerSpecs = spec.specialized || [] this.specialized = new Uint16Array(this.specializerSpecs.length) for (let i = 0; i < this.specializerSpecs.length; i++) this.specialized[i] = this.specializerSpecs[i].term this.specializers = this.specializerSpecs.map(getSpecializer) this.states = decodeArray(spec.states, Uint32Array) this.data = decodeArray(spec.stateData) this.goto = decodeArray(spec.goto) this.maxTerm = spec.maxTerm this.tokenizers = spec.tokenizers.map(value => typeof value == "number" ? new TokenGroup(tokenArray, value) : value) this.topRules = spec.topRules this.dialects = spec.dialects || {} this.dynamicPrecedences = spec.dynamicPrecedences || null this.tokenPrecTable = spec.tokenPrec this.termNames = spec.termNames || null this.maxNode = this.nodeSet.types.length - 1 this.dialect = this.parseDialect() this.top = this.topRules[Object.keys(this.topRules)[0]] } createParse(input: Input, fragments: readonly TreeFragment[], ranges: readonly {from: number, to: number}[]): PartialParse { let parse: PartialParse = new Parse(this, input, fragments, ranges) for (let w of this.wrappers) parse = w(parse, input, fragments, ranges) return parse } /// Get a goto table entry @internal getGoto(state: number, term: number, loose = false) { let table = this.goto if (term >= table[0]) return -1 for (let pos = table[term + 1];;) { let groupTag = table[pos++], last = groupTag & 1 let target = table[pos++] if (last && loose) return target for (let end = pos + (groupTag >> 1); pos < end; pos++) if (table[pos] == state) return target if (last) return -1 } } /// Check if this state has an action for a given terminal @internal hasAction(state: number, terminal: number) { let data = this.data for (let set = 0; set < 2; set++) { for (let i = this.stateSlot(state, set ? ParseState.Skip : ParseState.Actions), next;; i += 3) { if ((next = data[i]) == Seq.End) { if (data[i + 1] == Seq.Next) next = data[i = pair(data, i + 2)] else if (data[i + 1] == Seq.Other) return pair(data, i + 2) else break } if (next == terminal || next == Term.Err) return pair(data, i + 1) } } return 0 } /// @internal stateSlot(state: number, slot: number) { return this.states[(state * ParseState.Size) + slot] } /// @internal stateFlag(state: number, flag: number) { return (this.stateSlot(state, ParseState.Flags) & flag) > 0 } /// @internal validAction(state: number, action: number) { return !!this.allActions(state, a => a == action ? true : null) } /// @internal allActions(state: number, action: (action: number) => void | T): void | T { let deflt = this.stateSlot(state, ParseState.DefaultReduce) let result: void | T = deflt ? action(deflt) : undefined for (let i = this.stateSlot(state, ParseState.Actions); result == null; i += 3) { if (this.data[i] == Seq.End) { if (this.data[i + 1] == Seq.Next) i = pair(this.data, i + 2) else break } result = action(pair(this.data, i + 1)) } return result } /// Get the states that can follow this one through shift actions or /// goto jumps. @internal nextStates(state: number): readonly number[] { let result: number[] = [] for (let i = this.stateSlot(state, ParseState.Actions);; i += 3) { if (this.data[i] == Seq.End) { if (this.data[i + 1] == Seq.Next) i = pair(this.data, i + 2) else break } if ((this.data[i + 2] & (Action.ReduceFlag >> 16)) == 0) { let value = this.data[i + 1] if (!result.some((v, i) => (i & 1) && v == value)) result.push(this.data[i], value) } } return result } /// Configure the parser. Returns a new parser instance that has the /// given settings modified. Settings not provided in `config` are /// kept from the original parser. configure(config: ParserConfig) { // Hideous reflection-based kludge to make it easy to create a // slightly modified copy of a parser. let copy = Object.assign(Object.create(LRParser.prototype), this) if (config.props) copy.nodeSet = this.nodeSet.extend(...config.props) if (config.top) { let info = this.topRules[config.top!] if (!info) throw new RangeError(`Invalid top rule name ${config.top}`) copy.top = info } if (config.tokenizers) copy.tokenizers = this.tokenizers.map(t => { let found = config.tokenizers!.find(r => r.from == t) return found ? found.to : t }) if (config.specializers) { copy.specializers = this.specializers.slice() copy.specializerSpecs = this.specializerSpecs.map((s, i) => { let found = config.specializers!.find(r => r.from == s.external) if (!found) return s let spec = {...s, external: found.to} copy.specializers[i] = getSpecializer(spec) return spec }) } if (config.contextTracker) copy.context = config.contextTracker if (config.dialect) copy.dialect = this.parseDialect(config.dialect) if (config.strict != null) copy.strict = config.strict if (config.wrap) copy.wrappers = copy.wrappers.concat(config.wrap) if (config.bufferLength != null) copy.bufferLength = config.bufferLength return copy as LRParser } /// Tells you whether any [parse wrappers](#lr.ParserConfig.wrap) /// are registered for this parser. hasWrappers() { return this.wrappers.length > 0 } /// Returns the name associated with a given term. This will only /// work for all terms when the parser was generated with the /// `--names` option. By default, only the names of tagged terms are /// stored. getName(term: number): string { return this.termNames ? this.termNames[term] : String(term <= this.maxNode && this.nodeSet.types[term].name || term) } /// The eof term id is always allocated directly after the node /// types. @internal get eofTerm() { return this.maxNode + 1 } /// The type of top node produced by the parser. get topNode() { return this.nodeSet.types[this.top[1]] } /// @internal dynamicPrecedence(term: number) { let prec = this.dynamicPrecedences return prec == null ? 0 : prec[term] || 0 } /// @internal parseDialect(dialect?: string) { let values = Object.keys(this.dialects), flags = values.map(() => false) if (dialect) for (let part of dialect.split(" ")) { let id = values.indexOf(part) if (id >= 0) flags[id] = true } let disabled: Uint8Array | null = null for (let i = 0; i < values.length; i++) if (!flags[i]) { for (let j = this.dialects[values[i]], id; (id = this.data[j++]) != Seq.End;) (disabled || (disabled = new Uint8Array(this.maxTerm + 1)))[id] = 1 } return new Dialect(dialect, flags, disabled) } /// Used by the output of the parser generator. Not available to /// user code. @hide static deserialize(spec: any): LRParser { return new LRParser(spec as ParserSpec) } } function pair(data: Readonly, off: number) { return data[off] | (data[off + 1] << 16) } function findFinished(stacks: Stack[]) { let best: Stack | null = null for (let stack of stacks) { let stopped = stack.p.stoppedAt if ((stack.pos == stack.p.stream.end || stopped != null && stack.pos > stopped) && stack.p.parser.stateFlag(stack.state, StateFlag.Accepting) && (!best || best.score < stack.score)) best = stack } return best } function getSpecializer(spec: SpecializerSpec) { if (spec.external) { let mask = spec.extend ? Specialize.Extend : Specialize.Specialize return (value: string, stack: Stack) => (spec.external!(value, stack) << 1) | mask } return spec.get! } lr-1.4.0/src/stack.ts000066400000000000000000000455361455342002000144050ustar00rootroot00000000000000import {Action, Term, StateFlag, ParseState, Seq} from "./constants" import {Parse, ContextTracker} from "./parse" import {Tree, BufferCursor} from "@lezer/common" /// A parse stack. These are used internally by the parser to track /// parsing progress. They also provide some properties and methods /// that external code such as a tokenizer can use to get information /// about the parse state. export class Stack { /// @internal constructor( /// The parse that this stack is part of @internal readonly p: Parse, /// Holds state, input pos, buffer index triplets for all but the /// top state @internal readonly stack: number[], /// The current parse state @internal public state: number, // The position at which the next reduce should take place. This // can be less than `this.pos` when skipped expressions have been // added to the stack (which should be moved outside of the next // reduction) /// @internal public reducePos: number, /// The input position up to which this stack has parsed. public pos: number, /// The dynamic score of the stack, including dynamic precedence /// and error-recovery penalties /// @internal public score: number, // The output buffer. Holds (type, start, end, size) quads // representing nodes created by the parser, where `size` is // amount of buffer array entries covered by this node. /// @internal public buffer: number[], // The base offset of the buffer. When stacks are split, the split // instance shared the buffer history with its parent up to // `bufferBase`, which is the absolute offset (including the // offset of previous splits) into the buffer at which this stack // starts writing. /// @internal public bufferBase: number, /// @internal public curContext: StackContext | null, /// @internal public lookAhead = 0, // A parent stack from which this was split off, if any. This is // set up so that it always points to a stack that has some // additional buffer content, never to a stack with an equal // `bufferBase`. /// @internal public parent: Stack | null ) {} /// @internal toString() { return `[${this.stack.filter((_, i) => i % 3 == 0).concat(this.state)}]@${this.pos}${this.score ? "!" + this.score : ""}` } // Start an empty stack /// @internal static start(p: Parse, state: number, pos = 0) { let cx = p.parser.context return new Stack(p, [], state, pos, pos, 0, [], 0, cx ? new StackContext(cx, cx.start) : null, 0, null) } /// The stack's current [context](#lr.ContextTracker) value, if /// any. Its type will depend on the context tracker's type /// parameter, or it will be `null` if there is no context /// tracker. get context() { return this.curContext ? this.curContext.context : null } // Push a state onto the stack, tracking its start position as well // as the buffer base at that point. /// @internal pushState(state: number, start: number) { this.stack.push(this.state, start, this.bufferBase + this.buffer.length) this.state = state } // Apply a reduce action /// @internal reduce(action: number) { let depth = action >> Action.ReduceDepthShift, type = action & Action.ValueMask let {parser} = this.p let dPrec = parser.dynamicPrecedence(type) if (dPrec) this.score += dPrec if (depth == 0) { this.pushState(parser.getGoto(this.state, type, true), this.reducePos) // Zero-depth reductions are a special case—they add stuff to // the stack without popping anything off. if (type < parser.minRepeatTerm) this.storeNode(type, this.reducePos, this.reducePos, 4, true) this.reduceContext(type, this.reducePos) return } // Find the base index into `this.stack`, content after which will // be dropped. Note that with `StayFlag` reductions we need to // consume two extra frames (the dummy parent node for the skipped // expression and the state that we'll be staying in, which should // be moved to `this.state`). let base = this.stack.length - ((depth - 1) * 3) - (action & Action.StayFlag ? 6 : 0) let start = base ? this.stack[base - 2] : this.p.ranges[0].from, size = this.reducePos - start // This is a kludge to try and detect overly deep left-associative // trees, which will not increase the parse stack depth and thus // won't be caught by the regular stack-depth limit check. if (size >= Recover.MinBigReduction && !this.p.parser.nodeSet.types[type]?.isAnonymous) { if (start == this.p.lastBigReductionStart) { this.p.bigReductionCount++ this.p.lastBigReductionSize = size } else if (this.p.lastBigReductionSize < size) { this.p.bigReductionCount = 1 this.p.lastBigReductionStart = start this.p.lastBigReductionSize = size } } let bufferBase = base ? this.stack[base - 1] : 0, count = this.bufferBase + this.buffer.length - bufferBase // Store normal terms or `R -> R R` repeat reductions if (type < parser.minRepeatTerm || (action & Action.RepeatFlag)) { let pos = parser.stateFlag(this.state, StateFlag.Skipped) ? this.pos : this.reducePos this.storeNode(type, start, pos, count + 4, true) } if (action & Action.StayFlag) { this.state = this.stack[base] } else { let baseStateID = this.stack[base - 3] this.state = parser.getGoto(baseStateID, type, true) } while (this.stack.length > base) this.stack.pop() this.reduceContext(type, start) } // Shift a value into the buffer /// @internal storeNode(term: number, start: number, end: number, size = 4, isReduce = false) { if (term == Term.Err && (!this.stack.length || this.stack[this.stack.length - 1] < this.buffer.length + this.bufferBase)) { // Try to omit/merge adjacent error nodes let cur: Stack | null = this, top = this.buffer.length if (top == 0 && cur.parent) { top = cur.bufferBase - cur.parent.bufferBase cur = cur.parent } if (top > 0 && cur.buffer[top - 4] == Term.Err && cur.buffer[top - 1] > -1) { if (start == end) return if (cur.buffer[top - 2] >= start) { cur.buffer[top - 2] = end; return } } } if (!isReduce || this.pos == end) { // Simple case, just append this.buffer.push(term, start, end, size) } else { // There may be skipped nodes that have to be moved forward let index = this.buffer.length if (index > 0 && this.buffer[index - 4] != Term.Err) while (index > 0 && this.buffer[index - 2] > end) { // Move this record forward this.buffer[index] = this.buffer[index - 4] this.buffer[index + 1] = this.buffer[index - 3] this.buffer[index + 2] = this.buffer[index - 2] this.buffer[index + 3] = this.buffer[index - 1] index -= 4 if (size > 4) size -= 4 } this.buffer[index] = term this.buffer[index + 1] = start this.buffer[index + 2] = end this.buffer[index + 3] = size } } // Apply a shift action /// @internal shift(action: number, type: number, start: number, end: number) { if (action & Action.GotoFlag) { this.pushState(action & Action.ValueMask, this.pos) } else if ((action & Action.StayFlag) == 0) { // Regular shift let nextState = action, {parser} = this.p if (end > this.pos || type <= parser.maxNode) { this.pos = end if (!parser.stateFlag(nextState, StateFlag.Skipped)) this.reducePos = end } this.pushState(nextState, start) this.shiftContext(type, start) if (type <= parser.maxNode) this.buffer.push(type, start, end, 4) } else { // Shift-and-stay, which means this is a skipped token this.pos = end this.shiftContext(type, start) if (type <= this.p.parser.maxNode) this.buffer.push(type, start, end, 4) } } // Apply an action /// @internal apply(action: number, next: number, nextStart: number, nextEnd: number) { if (action & Action.ReduceFlag) this.reduce(action) else this.shift(action, next, nextStart, nextEnd) } // Add a prebuilt (reused) node into the buffer. /// @internal useNode(value: Tree, next: number) { let index = this.p.reused.length - 1 if (index < 0 || this.p.reused[index] != value) { this.p.reused.push(value) index++ } let start = this.pos this.reducePos = this.pos = start + value.length this.pushState(next, start) this.buffer.push(index, start, this.reducePos, -1 /* size == -1 means this is a reused value */) if (this.curContext) this.updateContext(this.curContext.tracker.reuse(this.curContext.context, value, this, this.p.stream.reset(this.pos - value.length))) } // Split the stack. Due to the buffer sharing and the fact // that `this.stack` tends to stay quite shallow, this isn't very // expensive. /// @internal split() { let parent: Stack | null = this let off = parent.buffer.length // Because the top of the buffer (after this.pos) may be mutated // to reorder reductions and skipped tokens, and shared buffers // should be immutable, this copies any outstanding skipped tokens // to the new buffer, and puts the base pointer before them. while (off > 0 && parent.buffer[off - 2] > parent.reducePos) off -= 4 let buffer = parent.buffer.slice(off), base = parent.bufferBase + off // Make sure parent points to an actual parent with content, if there is such a parent. while (parent && base == parent.bufferBase) parent = parent.parent return new Stack(this.p, this.stack.slice(), this.state, this.reducePos, this.pos, this.score, buffer, base, this.curContext, this.lookAhead, parent) } // Try to recover from an error by 'deleting' (ignoring) one token. /// @internal recoverByDelete(next: number, nextEnd: number) { let isNode = next <= this.p.parser.maxNode if (isNode) this.storeNode(next, this.pos, nextEnd, 4) this.storeNode(Term.Err, this.pos, nextEnd, isNode ? 8 : 4) this.pos = this.reducePos = nextEnd this.score -= Recover.Delete } /// Check if the given term would be able to be shifted (optionally /// after some reductions) on this stack. This can be useful for /// external tokenizers that want to make sure they only provide a /// given token when it applies. canShift(term: number) { for (let sim = new SimulatedStack(this);;) { let action = this.p.parser.stateSlot(sim.state, ParseState.DefaultReduce) || this.p.parser.hasAction(sim.state, term) if (action == 0) return false if ((action & Action.ReduceFlag) == 0) return true sim.reduce(action) } } // Apply up to Recover.MaxNext recovery actions that conceptually // inserts some missing token or rule. /// @internal recoverByInsert(next: number): Stack[] { if (this.stack.length >= Recover.MaxInsertStackDepth) return [] let nextStates = this.p.parser.nextStates(this.state) if (nextStates.length > Recover.MaxNext << 1 || this.stack.length >= Recover.DampenInsertStackDepth) { let best = [] for (let i = 0, s; i < nextStates.length; i += 2) { if ((s = nextStates[i + 1]) != this.state && this.p.parser.hasAction(s, next)) best.push(nextStates[i], s) } if (this.stack.length < Recover.DampenInsertStackDepth) for (let i = 0; best.length < Recover.MaxNext << 1 && i < nextStates.length; i += 2) { let s = nextStates[i + 1] if (!best.some((v, i) => (i & 1) && v == s)) best.push(nextStates[i], s) } nextStates = best } let result: Stack[] = [] for (let i = 0; i < nextStates.length && result.length < Recover.MaxNext; i += 2) { let s = nextStates[i + 1] if (s == this.state) continue let stack = this.split() stack.pushState(s, this.pos) stack.storeNode(Term.Err, stack.pos, stack.pos, 4, true) stack.shiftContext(nextStates[i], this.pos) stack.reducePos = this.pos stack.score -= Recover.Insert result.push(stack) } return result } // Force a reduce, if possible. Return false if that can't // be done. /// @internal forceReduce() { let {parser} = this.p let reduce = parser.stateSlot(this.state, ParseState.ForcedReduce) if ((reduce & Action.ReduceFlag) == 0) return false if (!parser.validAction(this.state, reduce)) { let depth = reduce >> Action.ReduceDepthShift, term = reduce & Action.ValueMask let target = this.stack.length - depth * 3 if (target < 0 || parser.getGoto(this.stack[target], term, false) < 0) { let backup = this.findForcedReduction() if (backup == null) return false reduce = backup } this.storeNode(Term.Err, this.pos, this.pos, 4, true) this.score -= Recover.Reduce } this.reducePos = this.pos this.reduce(reduce) return true } /// Try to scan through the automaton to find some kind of reduction /// that can be applied. Used when the regular ForcedReduce field /// isn't a valid action. @internal findForcedReduction() { let {parser} = this.p, seen: number[] = [] let explore = (state: number, depth: number): number | void => { if (seen.includes(state)) return seen.push(state) return parser.allActions(state, (action): number | void => { if (action & (Action.StayFlag | Action.GotoFlag)) { } else if (action & Action.ReduceFlag) { let rDepth = (action >> Action.ReduceDepthShift) - depth if (rDepth > 1) { let term = action & Action.ValueMask, target = this.stack.length - rDepth * 3 if (target >= 0 && parser.getGoto(this.stack[target], term, false) >= 0) return (rDepth << Action.ReduceDepthShift) | Action.ReduceFlag | term } } else { let found = explore(action, depth + 1) if (found != null) return found } }) } return explore(this.state, 0) } /// @internal forceAll() { while (!this.p.parser.stateFlag(this.state, StateFlag.Accepting)) { if (!this.forceReduce()) { this.storeNode(Term.Err, this.pos, this.pos, 4, true) break } } return this } /// Check whether this state has no further actions (assumed to be a direct descendant of the /// top state, since any other states must be able to continue /// somehow). @internal get deadEnd() { if (this.stack.length != 3) return false let {parser} = this.p return parser.data[parser.stateSlot(this.state, ParseState.Actions)] == Seq.End && !parser.stateSlot(this.state, ParseState.DefaultReduce) } /// Restart the stack (put it back in its start state). Only safe /// when this.stack.length == 3 (state is directly below the top /// state). @internal restart() { this.storeNode(Term.Err, this.pos, this.pos, 4, true) this.state = this.stack[0] this.stack.length = 0 } /// @internal sameState(other: Stack) { if (this.state != other.state || this.stack.length != other.stack.length) return false for (let i = 0; i < this.stack.length; i += 3) if (this.stack[i] != other.stack[i]) return false return true } /// Get the parser used by this stack. get parser() { return this.p.parser } /// Test whether a given dialect (by numeric ID, as exported from /// the terms file) is enabled. dialectEnabled(dialectID: number) { return this.p.parser.dialect.flags[dialectID] } private shiftContext(term: number, start: number) { if (this.curContext) this.updateContext(this.curContext.tracker.shift(this.curContext.context, term, this, this.p.stream.reset(start))) } private reduceContext(term: number, start: number) { if (this.curContext) this.updateContext(this.curContext.tracker.reduce(this.curContext.context, term, this, this.p.stream.reset(start))) } /// @internal private emitContext() { let last = this.buffer.length - 1 if (last < 0 || this.buffer[last] != -3) this.buffer.push(this.curContext!.hash, this.pos, this.pos, -3) } /// @internal emitLookAhead() { let last = this.buffer.length - 1 if (last < 0 || this.buffer[last] != -4) this.buffer.push(this.lookAhead, this.pos, this.pos, -4) } private updateContext(context: any) { if (context != this.curContext!.context) { let newCx = new StackContext(this.curContext!.tracker, context) if (newCx.hash != this.curContext!.hash) this.emitContext() this.curContext = newCx } } /// @internal setLookAhead(lookAhead: number) { if (lookAhead > this.lookAhead) { this.emitLookAhead() this.lookAhead = lookAhead } } /// @internal close() { if (this.curContext && this.curContext!.tracker.strict) this.emitContext() if (this.lookAhead > 0) this.emitLookAhead() } } class StackContext { readonly hash: number constructor(readonly tracker: ContextTracker, readonly context: any) { this.hash = tracker.strict ? tracker.hash(context) : 0 } } export const enum Recover { Insert = 200, Delete = 190, Reduce = 100, MaxNext = 4, MaxInsertStackDepth = 300, DampenInsertStackDepth = 120, MinBigReduction = 2000 } // Used to cheaply run some reductions to scan ahead without mutating // an entire stack class SimulatedStack { state: number stack: number[] base: number constructor(readonly start: Stack) { this.state = start.state this.stack = start.stack this.base = this.stack.length } reduce(action: number) { let term = action & Action.ValueMask, depth = action >> Action.ReduceDepthShift if (depth == 0) { if (this.stack == this.start.stack) this.stack = this.stack.slice() this.stack.push(this.state, 0, 0) this.base += 3 } else { this.base -= (depth - 1) * 3 } let goto = this.start.p.parser.getGoto(this.stack[this.base - 3], term, true) this.state = goto } } // This is given to `Tree.build` to build a buffer, and encapsulates // the parent-stack-walking necessary to read the nodes. export class StackBufferCursor implements BufferCursor { buffer: number[] constructor(public stack: Stack, public pos: number, public index: number) { this.buffer = stack.buffer if (this.index == 0) this.maybeNext() } static create(stack: Stack, pos = stack.bufferBase + stack.buffer.length) { return new StackBufferCursor(stack, pos, pos - stack.bufferBase) } maybeNext() { let next = this.stack.parent if (next != null) { this.index = this.stack.bufferBase - next.bufferBase this.stack = next this.buffer = next.buffer } } get id() { return this.buffer[this.index - 4] } get start() { return this.buffer[this.index - 3] } get end() { return this.buffer[this.index - 2] } get size() { return this.buffer[this.index - 1] } next() { this.index -= 4 this.pos -= 4 if (this.index == 0) this.maybeNext() } fork() { return new StackBufferCursor(this.stack, this.pos, this.index) } } lr-1.4.0/src/token.ts000066400000000000000000000320441455342002000144060ustar00rootroot00000000000000import {Input} from "@lezer/common" import {Stack} from "./stack" import {Seq} from "./constants" import {decodeArray} from "./decode" export class CachedToken { start = -1 value = -1 end = -1 extended = -1 lookAhead = 0 mask = 0 context = 0 } const nullToken = new CachedToken /// [Tokenizers](#lr.ExternalTokenizer) interact with the input /// through this interface. It presents the input as a stream of /// characters, tracking lookahead and hiding the complexity of /// [ranges](#common.Parser.parse^ranges) from tokenizer code. export class InputStream { /// @internal chunk = "" /// @internal chunkOff = 0 /// @internal chunkPos: number /// Backup chunk private chunk2 = "" private chunk2Pos = 0 /// The character code of the next code unit in the input, or -1 /// when the stream is at the end of the input. next: number = -1 /// @internal token = nullToken /// The current position of the stream. Note that, due to parses /// being able to cover non-contiguous /// [ranges](#common.Parser.startParse), advancing the stream does /// not always mean its position moves a single unit. pos: number /// @internal end: number private rangeIndex = 0 private range: {from: number, to: number} /// @internal constructor( /// @internal readonly input: Input, /// @internal readonly ranges: readonly {from: number, to: number}[] ) { this.pos = this.chunkPos = ranges[0].from this.range = ranges[0] this.end = ranges[ranges.length - 1].to this.readNext() } /// @internal resolveOffset(offset: number, assoc: -1 | 1) { let range = this.range, index = this.rangeIndex let pos = this.pos + offset while (pos < range.from) { if (!index) return null let next = this.ranges[--index] pos -= range.from - next.to range = next } while (assoc < 0 ? pos > range.to : pos >= range.to) { if (index == this.ranges.length - 1) return null let next = this.ranges[++index] pos += next.from - range.to range = next } return pos } /// @internal clipPos(pos: number) { if (pos >= this.range.from && pos < this.range.to) return pos for (let range of this.ranges) if (range.to > pos) return Math.max(pos, range.from) return this.end } /// Look at a code unit near the stream position. `.peek(0)` equals /// `.next`, `.peek(-1)` gives you the previous character, and so /// on. /// /// Note that looking around during tokenizing creates dependencies /// on potentially far-away content, which may reduce the /// effectiveness incremental parsing—when looking forward—or even /// cause invalid reparses when looking backward more than 25 code /// units, since the library does not track lookbehind. peek(offset: number) { let idx = this.chunkOff + offset, pos, result if (idx >= 0 && idx < this.chunk.length) { pos = this.pos + offset result = this.chunk.charCodeAt(idx) } else { let resolved = this.resolveOffset(offset, 1) if (resolved == null) return -1 pos = resolved if (pos >= this.chunk2Pos && pos < this.chunk2Pos + this.chunk2.length) { result = this.chunk2.charCodeAt(pos - this.chunk2Pos) } else { let i = this.rangeIndex, range = this.range while (range.to <= pos) range = this.ranges[++i] this.chunk2 = this.input.chunk(this.chunk2Pos = pos) if (pos + this.chunk2.length > range.to) this.chunk2 = this.chunk2.slice(0, range.to - pos) result = this.chunk2.charCodeAt(0) } } if (pos >= this.token.lookAhead) this.token.lookAhead = pos + 1 return result } /// Accept a token. By default, the end of the token is set to the /// current stream position, but you can pass an offset (relative to /// the stream position) to change that. acceptToken(token: number, endOffset = 0) { let end = endOffset ? this.resolveOffset(endOffset, -1) : this.pos if (end == null || end < this.token.start) throw new RangeError("Token end out of bounds") this.token.value = token this.token.end = end } /// Accept a token ending at a specific given position. acceptTokenTo(token: number, endPos: number) { this.token.value = token this.token.end = endPos } private getChunk() { if (this.pos >= this.chunk2Pos && this.pos < this.chunk2Pos + this.chunk2.length) { let {chunk, chunkPos} = this this.chunk = this.chunk2; this.chunkPos = this.chunk2Pos this.chunk2 = chunk; this.chunk2Pos = chunkPos this.chunkOff = this.pos - this.chunkPos } else { this.chunk2 = this.chunk; this.chunk2Pos = this.chunkPos let nextChunk = this.input.chunk(this.pos) let end = this.pos + nextChunk.length this.chunk = end > this.range.to ? nextChunk.slice(0, this.range.to - this.pos) : nextChunk this.chunkPos = this.pos this.chunkOff = 0 } } private readNext() { if (this.chunkOff >= this.chunk.length) { this.getChunk() if (this.chunkOff == this.chunk.length) return this.next = -1 } return this.next = this.chunk.charCodeAt(this.chunkOff) } /// Move the stream forward N (defaults to 1) code units. Returns /// the new value of [`next`](#lr.InputStream.next). advance(n = 1) { this.chunkOff += n while (this.pos + n >= this.range.to) { if (this.rangeIndex == this.ranges.length - 1) return this.setDone() n -= this.range.to - this.pos this.range = this.ranges[++this.rangeIndex] this.pos = this.range.from } this.pos += n if (this.pos >= this.token.lookAhead) this.token.lookAhead = this.pos + 1 return this.readNext() } private setDone() { this.pos = this.chunkPos = this.end this.range = this.ranges[this.rangeIndex = this.ranges.length - 1] this.chunk = "" return this.next = -1 } /// @internal reset(pos: number, token?: CachedToken) { if (token) { this.token = token token.start = pos token.lookAhead = pos + 1 token.value = token.extended = -1 } else { this.token = nullToken } if (this.pos != pos) { this.pos = pos if (pos == this.end) { this.setDone() return this } while (pos < this.range.from) this.range = this.ranges[--this.rangeIndex] while (pos >= this.range.to) this.range = this.ranges[++this.rangeIndex] if (pos >= this.chunkPos && pos < this.chunkPos + this.chunk.length) { this.chunkOff = pos - this.chunkPos } else { this.chunk = "" this.chunkOff = 0 } this.readNext() } return this } /// @internal read(from: number, to: number) { if (from >= this.chunkPos && to <= this.chunkPos + this.chunk.length) return this.chunk.slice(from - this.chunkPos, to - this.chunkPos) if (from >= this.chunk2Pos && to <= this.chunk2Pos + this.chunk2.length) return this.chunk2.slice(from - this.chunk2Pos, to - this.chunk2Pos) if (from >= this.range.from && to <= this.range.to) return this.input.read(from, to) let result = "" for (let r of this.ranges) { if (r.from >= to) break if (r.to > from) result += this.input.read(Math.max(r.from, from), Math.min(r.to, to)) } return result } } export interface Tokenizer { /// @internal token(input: InputStream, stack: Stack): void /// @internal contextual: boolean /// @internal fallback: boolean /// @internal extend: boolean } /// @internal export class TokenGroup implements Tokenizer { contextual!: boolean fallback!: boolean extend!: boolean constructor(readonly data: Readonly, readonly id: number) {} token(input: InputStream, stack: Stack) { let {parser} = stack.p readToken(this.data, input, stack, this.id, parser.data, parser.tokenPrecTable) } } TokenGroup.prototype.contextual = TokenGroup.prototype.fallback = TokenGroup.prototype.extend = false /// @hide export class LocalTokenGroup implements Tokenizer { contextual!: boolean fallback!: boolean extend!: boolean readonly data: Readonly constructor(data: Readonly | string, readonly precTable: number, readonly elseToken?: number) { this.data = typeof data == "string" ? decodeArray>(data) : data } token(input: InputStream, stack: Stack) { let start = input.pos, skipped = 0 for (;;) { let atEof = input.next < 0, nextPos = input.resolveOffset(1, 1) readToken(this.data, input, stack, 0, this.data, this.precTable) if (input.token.value > -1) break if (this.elseToken == null) return if (!atEof) skipped++ if (nextPos == null) break input.reset(nextPos, input.token) } if (skipped) { input.reset(start, input.token) input.acceptToken(this.elseToken!, skipped) } } } LocalTokenGroup.prototype.contextual = TokenGroup.prototype.fallback = TokenGroup.prototype.extend = false interface ExternalOptions { /// When set to true, mark this tokenizer as depending on the /// current parse stack, which prevents its result from being cached /// between parser actions at the same positions. contextual?: boolean, /// By defaults, when a tokenizer returns a token, that prevents /// tokenizers with lower precedence from even running. When /// `fallback` is true, the tokenizer is allowed to run when a /// previous tokenizer returned a token that didn't match any of the /// current state's actions. fallback?: boolean /// When set to true, tokenizing will not stop after this tokenizer /// has produced a token. (But it will still fail to reach this one /// if a higher-precedence tokenizer produced a token.) extend?: boolean } /// `@external tokens` declarations in the grammar should resolve to /// an instance of this class. export class ExternalTokenizer { /// @internal contextual: boolean /// @internal fallback: boolean /// @internal extend: boolean /// Create a tokenizer. The first argument is the function that, /// given an input stream, scans for the types of tokens it /// recognizes at the stream's position, and calls /// [`acceptToken`](#lr.InputStream.acceptToken) when it finds /// one. constructor( /// @internal readonly token: (input: InputStream, stack: Stack) => void, options: ExternalOptions = {} ) { this.contextual = !!options.contextual this.fallback = !!options.fallback this.extend = !!options.extend } } // Tokenizer data is stored a big uint16 array containing, for each // state: // // - A group bitmask, indicating what token groups are reachable from // this state, so that paths that can only lead to tokens not in // any of the current groups can be cut off early. // // - The position of the end of the state's sequence of accepting // tokens // // - The number of outgoing edges for the state // // - The accepting tokens, as (token id, group mask) pairs // // - The outgoing edges, as (start character, end character, state // index) triples, with end character being exclusive // // This function interprets that data, running through a stream as // long as new states with the a matching group mask can be reached, // and updating `input.token` when it matches a token. function readToken(data: Readonly, input: InputStream, stack: Stack, group: number, precTable: Readonly, precOffset: number) { let state = 0, groupMask = 1 << group, {dialect} = stack.p.parser scan: for (;;) { if ((groupMask & data[state]) == 0) break let accEnd = data[state + 1] // Check whether this state can lead to a token in the current group // Accept tokens in this state, possibly overwriting // lower-precedence / shorter tokens for (let i = state + 3; i < accEnd; i += 2) if ((data[i + 1] & groupMask) > 0) { let term = data[i] if (dialect.allows(term) && (input.token.value == -1 || input.token.value == term || overrides(term, input.token.value, precTable, precOffset))) { input.acceptToken(term) break } } let next = input.next, low = 0, high = data[state + 2] // Special case for EOF if (input.next < 0 && high > low && data[accEnd + high * 3 - 3] == Seq.End) { state = data[accEnd + high * 3 - 1] continue scan } // Do a binary search on the state's edges for (; low < high;) { let mid = (low + high) >> 1 let index = accEnd + mid + (mid << 1) let from = data[index], to = data[index + 1] || 0x10000 if (next < from) high = mid else if (next >= to) low = mid + 1 else { state = data[index + 2]; input.advance(); continue scan } } break } } function findOffset(data: Readonly, start: number, term: number) { for (let i = start, next; (next = data[i]) != Seq.End; i++) if (next == term) return i - start return -1 } function overrides(token: number, prev: number, tableData: Readonly, tableOffset: number) { let iPrev = findOffset(tableData, tableOffset, prev) return iPrev < 0 || findOffset(tableData, tableOffset, token) < iPrev } lr-1.4.0/tsconfig.json000066400000000000000000000004771455342002000146430ustar00rootroot00000000000000{ "compilerOptions": { "lib": ["es2017"], "noImplicitReturns": true, "noUnusedLocals": true, "strict": true, "target": "es2015", "module": "es2015", "newLine": "lf", "preserveConstEnums": true, "stripInternal": true, "moduleResolution": "node" }, "include": ["src/*.ts"] }