pax_global_header00006660000000000000000000000064145757615100014525gustar00rootroot0000000000000052 comment=ead67a397f7c7cb688e36d941a32c1527d7cc409 python-1.1.13/000077500000000000000000000000001457576151000131315ustar00rootroot00000000000000python-1.1.13/.gitignore000066400000000000000000000000531457576151000151170ustar00rootroot00000000000000/node_modules/ /src/parser.* .tern-* /dist python-1.1.13/CHANGELOG.md000066400000000000000000000073421457576151000147500ustar00rootroot00000000000000## 1.1.13 (2024-03-18) ### Bug fixes Fix an issue where single-quoted strings inside format strings weren't tokenized properly. ## 1.1.12 (2024-03-12) ### Bug fixes Emit syntax nodes for backslash escapes in strings. ## 1.1.11 (2024-01-16) ### Bug fixes Support the `=` flag to values in format strings. ## 1.1.10 (2023-12-28) ### Bug fixes Tag comments and strings as isolating for the purpose of bidirectional text. ## 1.1.9 (2023-10-20) ### Bug fixes Fix parsing of argument lists that are a comprehension that start with an assignment. Add support for PEP 654 `except*` syntax. Implement PEP 695 type parameter syntax. ## 1.1.8 (2023-07-03) ### Bug fixes Make the package work with new TS resolution styles. ## 1.1.7 (2023-05-27) ### Bug fixes Fix a bug that broke handling of escaped quotes in format strings. ## 1.1.6 (2023-05-18) ### Bug fixes Fix parsing of blank lines at the start of the document. ## 1.1.5 (2023-04-28) ### Bug fixes Fix a bug that caused triple-quoted format strings with quotes in them to be parsed incorrectly. ## 1.1.4 (2023-03-30) ### Bug fixes Make sure blocks stop at their final newline, and don't include any following blank lines. ## 1.1.3 (2023-03-13) ### Bug fixes Fix parsing of empty tuple expressions. Allow `:=` in subscript index expressions. ## 1.1.2 (2023-02-03) ### Bug fixes Fix a bug that caused binary * and ** operators to be highlighted as keywords/modifiers. ## 1.1.1 (2022-10-31) ### Bug fixes Add missing highlighting info for `match` and `case` keywords. ## 1.1.0 (2022-07-27) ### New features Parse Python 3.10 match statements (PEP 634). ## 1.0.0 (2022-06-06) ### New features First stable version. ## 0.16.1 (2022-05-24) ### Bug fixes Fix a bug that caused `f"""` strings to be terminated at the first double quote in their content. ## 0.16.0 (2022-04-20) ### Breaking changes Move to 0.16 serialized parser format. ### New features The parser now includes syntax highlighting information in its node types. ## 0.15.1 (2022-03-16) ### Bug fixes Fix a bug where indentation with tabs was tracked incorrectly, leading to spurious indent tokens and malformed trees. ## 0.15.0 (2021-08-11) ### Breaking changes The module's name changed from `lezer-python` to `@lezer/python`. Upgrade to the 0.15.0 lezer interfaces. ## 0.13.7 (2021-07-12) ### Bug fixes Fix a bug that caused newlines to be disallowed in argument and parameter lists. ## 0.13.6 (2021-02-17) ### Bug fixes Fix a bug where incremental parses could get confused about block nesting. ## 0.13.5 (2021-02-11) ### Bug fixes Fixes an inefficiency in the parsing of large strings. ## 0.13.4 (2021-01-27) ### Bug fixes Fix a bug where keywords like `else` or `except` would be consumed even if they don't match the indentation of the parent statement. ## 0.13.3 (2021-01-25) ### Bug fixes Fix an issue where non-indented lines after a colon were parsed as part of the body. ## 0.13.2 (2021-01-19) ### Bug fixes Add support for return statements without expressions. ## 0.13.1 (2020-12-04) ### Bug fixes Fix versions of lezer packages depended on. ## 0.13.0 (2020-12-04) ## 0.12.0 (2020-10-23) ### Breaking changes Adjust to changed serialized parser format. ## 0.11.1 (2020-09-26) ### Bug fixes Fix lezer depencency versions ## 0.11.0 (2020-09-26) ### Breaking changes Follow change in serialized parser format. ## 0.10.0 (2020-08-07) ### Breaking changes Upgrade to 0.10 parser serialization ## 0.9.0 (2020-06-08) ### Breaking changes Upgrade to 0.9 parser serialization ## 0.8.1 (2020-04-15) ### Bug fixes Include TypeScript definition file. Only treat `print` as a keyword when it looks like a Python 2 style print statement. ## 0.8.0 (2020-04-15) ### New Features First numbered release. python-1.1.13/LICENSE000066400000000000000000000021311457576151000141330ustar00rootroot00000000000000MIT License Copyright (C) 2020 by Marijn Haverbeke and others Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. python-1.1.13/README.md000066400000000000000000000002241457576151000144060ustar00rootroot00000000000000# @lezer/python This is a Python grammar for the [Lezer](https://lezer.codemirror.net/) parser system. The code is licensed under an MIT license. python-1.1.13/dist/000077500000000000000000000000001457576151000140745ustar00rootroot00000000000000python-1.1.13/dist/index.d.cts000066400000000000000000000001021457576151000161310ustar00rootroot00000000000000import {LRParser} from "@lezer/lr" export const parser: LRParser python-1.1.13/dist/index.d.ts000066400000000000000000000001021457576151000157660ustar00rootroot00000000000000import {LRParser} from "@lezer/lr" export const parser: LRParser python-1.1.13/package.json000066400000000000000000000017501457576151000154220ustar00rootroot00000000000000{ "name": "@lezer/python", "version": "1.1.13", "description": "Lezer-based Python grammar", "main": "dist/index.cjs", "type": "module", "exports": { "import": "./dist/index.js", "require": "./dist/index.cjs" }, "module": "dist/index.js", "types": "dist/index.d.ts", "author": "Marijn Haverbeke ", "license": "MIT", "devDependencies": { "@lezer/generator": "^1.0.0", "mocha": "^10.2.0", "rollup": "^2.52.2", "@rollup/plugin-node-resolve": "^9.0.0" }, "dependencies": { "@lezer/common": "^1.2.0", "@lezer/lr": "^1.0.0", "@lezer/highlight": "^1.0.0" }, "repository": { "type" : "git", "url" : "https://github.com/lezer-parser/python.git" }, "scripts": { "build": "lezer-generator src/python.grammar -o src/parser && rollup -c", "build-debug": "lezer-generator src/python.grammar --names -o src/parser && rollup -c", "prepare": "npm run build", "test": "mocha test/test-*.js" } } python-1.1.13/rollup.config.js000066400000000000000000000004621457576151000162520ustar00rootroot00000000000000import {nodeResolve} from "@rollup/plugin-node-resolve" export default { input: "./src/parser.js", output: [{ format: "cjs", file: "./dist/index.cjs" }, { format: "es", file: "./dist/index.js" }], external(id) { return !/^[\.\/]/.test(id) }, plugins: [ nodeResolve() ] } python-1.1.13/src/000077500000000000000000000000001457576151000137205ustar00rootroot00000000000000python-1.1.13/src/highlight.js000066400000000000000000000023741457576151000162330ustar00rootroot00000000000000import {styleTags, tags as t} from "@lezer/highlight" export const pythonHighlighting = styleTags({ "async \"*\" \"**\" FormatConversion FormatSpec": t.modifier, "for while if elif else try except finally return raise break continue with pass assert await yield match case": t.controlKeyword, "in not and or is del": t.operatorKeyword, "from def class global nonlocal lambda": t.definitionKeyword, import: t.moduleKeyword, "with as print": t.keyword, Boolean: t.bool, None: t.null, VariableName: t.variableName, "CallExpression/VariableName": t.function(t.variableName), "FunctionDefinition/VariableName": t.function(t.definition(t.variableName)), "ClassDefinition/VariableName": t.definition(t.className), PropertyName: t.propertyName, "CallExpression/MemberExpression/PropertyName": t.function(t.propertyName), Comment: t.lineComment, Number: t.number, String: t.string, FormatString: t.special(t.string), Escape: t.escape, UpdateOp: t.updateOperator, "ArithOp!": t.arithmeticOperator, BitOp: t.bitwiseOperator, CompareOp: t.compareOperator, AssignOp: t.definitionOperator, Ellipsis: t.punctuation, At: t.meta, "( )": t.paren, "[ ]": t.squareBracket, "{ }": t.brace, ".": t.derefOperator, ", ;": t.separator }) python-1.1.13/src/python.grammar000066400000000000000000000261661457576151000166240ustar00rootroot00000000000000@precedence { cond, trail, power @right, prefix, times @left, plus @left, shift @left, bitand @left, xor @left, bitor @left, compare @left, as @left, and @left, or @left } @top Script { statement+ } @skip { space | newlineBracketed | Comment | blankLine } Decorator { At dottedName ArgList? newline } FunctionDefinition { kw<"async">? kw<"def"> VariableName TypeParamList? ParamList TypeDef { "->" test }? Body } ParamList { "(" commaSep? ")" } TypeParamList { "[" commaSep "]" } TypeParam { ("*" | "**") VariableName | VariableName TypeDef? } MatchStatement { skw<"match"> "*"? expression MatchBody { ":" newline indent MatchClause+ (dedent | eof) } } MatchClause { skw<"case"> commaSep Guard { kw<"if"> expression }? Body } pattern[@isGroup=Pattern] { CapturePattern { VariableName } | LiteralPattern | AsPattern { pattern !as kw<"as"> VariableName } | OrPattern { pattern (!or LogicOp{"|"} pattern)+ } | AttributePattern | SequencePattern | MappingPattern | StarPattern { "*" !prefix pattern } | ClassPattern { (VariableName | AttributePattern) PatternArgList } } AttributePattern { VariableName ("." PropertyName)+ } LiteralPattern { ArithOp{"-"}? Number (ArithOp{"+"|"-"} Number)? | String | kw<"None"> | @specialize[@name=Boolean] } PatternArgList { "(" commaSep "]" | "(" commaSep ")" } MappingPattern { "{" commaSep<"**" pattern | (VariableName | LiteralPattern) ":" pattern> "}" } ClassDefinition { kw<"class"> VariableName TypeParamList? ArgList? Body } param { VariableName TypeDef? (AssignOp{"="} test)? | "*" VariableName? | "**" VariableName | "/" } TypeDef { ":" test } statement[@isGroup=Statement] { simpleStatement | compoundStatement } simpleStatement { smallStatement (newline | eof) | StatementGroup { smallStatement (";" smallStatement?)+ (newline | eof) } } smallStatement { AssignStatement { expressions (TypeDef? (AssignOp{"="} (YieldExpression | expressions))+ | TypeDef) } | UpdateStatement { expressions UpdateOp (YieldExpression | commaSep) } | ExpressionStatement { expressions } | DeleteStatement { kw<"del"> commaSep } | PassStatement { kw<"pass"> } | BreakStatement { kw<"break"> } | ContinueStatement { kw<"continue"> } | ReturnStatement { kw<"return"> commaSep? } | YieldStatement { yield } | PrintStatement { printKeyword test } | RaiseStatement { kw<"raise"> (test (kw<"from"> test | ("," test ("," test)?))?)? } | ImportStatement | ScopeStatement { (kw<"global"> | kw<"nonlocal">) commaSep } | AssertStatement { kw<"assert"> commaSep } | TypeDefinition { skw<"type"> VariableName TypeParamList? "=" test } } expressions { commaSep<"*" expression | test> } ImportStatement { kw<"import"> dottedName (kw<"as"> VariableName)? | kw<"from"> (("." | "...")+ dottedName? | dottedName) kw<"import"> ("*" | importList | importedNames) } importedNames { commaSep VariableName> } importList[@export] { "(" importedNames ")" } commaSep { expr ("," expr)* ","? } compoundStatement { IfStatement | WhileStatement { kw<"while"> testNamed Body elseClause? } | ForStatement { kw<"async">? kw<"for"> commaSep<"*"? expression> kw<"in"> commaSep Body elseClause? } | TryStatement | WithStatement { kw<"async">? kw<"with"> commaSep VariableName)?> Body } | FunctionDefinition | ClassDefinition | DecoratedStatement { Decorator+ (ClassDefinition | FunctionDefinition) } | MatchStatement } elseClause { kw<"else"> Body } IfStatement { kw<"if"> testNamed Body (kw<"elif"> testNamed? Body)* elseClause? } TryStatement { kw<"try"> Body (kw<"except"> "*"? (test ((kw<"as"> | ",") VariableName)?)? Body)* elseClause? (kw<"finally"> Body)? } Body { ":" (simpleStatement | newline indent statement+ (dedent | eof)) } lambdaParam { VariableName (AssignOp{"="} test)? | "*" VariableName? | "**" VariableName } lambdaParams[@name="ParamList"] { (lambdaParam ("," lambdaParam)*)? } test { testInner | ConditionalExpression { testInner !cond kw<"if"> testInner kw<"else"> test } | LambdaExpression { kw<"lambda"> lambdaParams ":" test } } testNoCond { testInner | LambdaExpression { kw<"lambda"> lambdaParams ":" testNoCond } } testNamed { test | NamedExpression { test AssignOp{":="} test } } testInner { binaryTest | unaryTest | expression } binaryTest[@name="BinaryExpression"] { testInner !or kw<"or"> testInner | testInner !and kw<"and"> testInner | testInner !compare (CompareOp | kw<"in"> | kw<"not"> kw<"in"> | kw<"is"> kw<"not">?) testInner } unaryTest[@name="UnaryExpression"] { kw<"not"> testInner } expression[@isGroup=Expression] { BinaryExpression | UnaryExpression { !prefix (ArithOp{"+" | "-"} | BitOp{"~"}) expression } | AwaitExpression { kw<"await"> expression } | ParenthesizedExpression | TupleExpression | ComprehensionExpression | ArrayExpression | ArrayComprehensionExpression | DictionaryExpression | DictionaryComprehensionExpression | SetExpression | SetComprehensionExpression | CallExpression { expression !trail ArgList } | MemberExpression { expression !trail (subscript | "." PropertyName) } | VariableName | Number | String | FormatString | ContinuedString { (String | FormatString) (String | FormatString)+ } | "..." | kw<"None"> | @specialize[@name=Boolean] } subscript[@export] { "[" commaSep "]" } ParenthesizedExpression { "(" (testNamed | "*" expression | YieldExpression) ")" } TupleExpression { "(" ((testNamed | "*" expression) (("," (testNamed | "*" expression))+ ","? | ","))? ")" } ComprehensionExpression { "(" (testNamed | "*" expression) compFor ")" } ArrayExpression { "[" commaSep? "]" } ArrayComprehensionExpression { "[" (testNamed | "*" expression) compFor "]" } DictionaryExpression { "{" commaSep? "}" } DictionaryComprehensionExpression { "{" (test ":" test | "**" expression) compFor "}" } SetExpression { "{" commaSep "}" } SetComprehensionExpression { "{" (test | "*" expression) compFor "}" } yield { kw<"yield"> (kw<"from"> test | commaSep) } YieldExpression { yield } BinaryExpression { expression !bitor BitOp{"|"} expression | expression !xor BitOp{"^"} expression | expression !bitand BitOp{"&"} expression | expression !shift BitOp{"<<" | ">>"} expression | expression !plus ArithOp{"+" | "-"} expression | expression !times ArithOp{"*" | "@" | "/" | "%" | "//"} expression | expression !power ArithOp{"**"} expression } ArgList { "(" commaSep? ")" } argument { test compFor? | VariableName AssignOp{"=" | ":="} test compFor? | "**" test | "*" test } compFor { kw<"async">? kw<"for"> commaSep kw<"in"> testInner (compFor | compIf)? } compIf { kw<"if"> testNoCond (compFor | compIf)? } // FIXME Is it possible to distinguish between VariableName and VariableDefinition? VariableName { identifier } PropertyName { word } dottedName { VariableName ("." VariableName)* } kw { @specialize[@name={term}] } skw { @extend[@name={term}] } @skip {} { String[isolate] { (stringStart | stringStartD | stringStartL | stringStartLD | stringStartR | stringStartRD | stringStartRL | stringStartRLD) (stringContent | Escape)* stringEnd } FormatString[isolate] { (stringStartF | stringStartFD | stringStartFL | stringStartFLD | stringStartFR | stringStartFRD | stringStartFRL | stringStartFRLD) (stringContent | Escape | FormatReplacement)* stringEnd } formatStringSpec { FormatSpec { ":" (formatStringSpecChars | nestedFormatReplacement)* } "}" } blankLine { blankLineStart space? Comment? newline } } formatReplacement { start (YieldExpression | commaSep<"*"? test>) FormatSelfDoc {"="}? FormatConversion? (formatStringSpec | "}") } FormatReplacement[isolate] { formatReplacement } nestedFormatReplacement[isolate,@name=FormatReplacement,@export] { formatReplacement<"{"> } @context trackIndent from "./tokens.js" @external tokens legacyPrint from "./tokens.js" { printKeyword[@name="print"] } @external tokens indentation from "./tokens" { indent, dedent } @external tokens newlines from "./tokens" { newline, blankLineStart, newlineBracketed, eof } @external tokens strings from "./tokens" { stringContent Escape replacementStart[@name="{"] stringEnd } @tokens { CompareOp { "<" | ">" | $[<>=!] "=" | "<>" } UpdateOp { ($[+\-@%&|^] | "<<" | ">>" | "*" "*"? | "/" "/"?) "=" } // String types are identified by letter suffixes: // D: double quoted // L: long string // R: raw string // F: format string stringStart[@export] { stringPrefix? "'" } stringStartD[@export] { stringPrefix? '"' } stringStartL[@export] { stringPrefix? "'''" } stringStartLD[@export] { stringPrefix? '"""' } stringStartR[@export] { stringPrefixR "'" } stringStartRD[@export] { stringPrefixR '"' } stringStartRL[@export] { stringPrefixR "'''" } stringStartRLD[@export] { stringPrefixR '"""' } stringStartF[@export] { stringPrefixF "'" } stringStartFD[@export] { stringPrefixF '"' } stringStartFL[@export] { stringPrefixF "'''" } stringStartFLD[@export] { stringPrefixF '"""' } stringStartFR[@export] { stringPrefixFR "'" } stringStartFRD[@export] { stringPrefixFR '"' } stringStartFRL[@export] { stringPrefixFR "'''" } stringStartFRLD[@export] { stringPrefixFR '"""' } stringPrefix { $[uUbB] } stringPrefixR { $[rR] $[bB]? | $[bB] $[rR] } stringPrefixF { $[fF] } stringPrefixFR { $[rR] $[fF] | $[fF] $[rR] } // Give string quotes a higher precedence than identifiers, and long // strings a higher precedence than short strings @precedence { stringStartL, stringStart, identifier } @precedence { stringStartLD, stringStartD, identifier } @precedence { stringStartRL, stringStartR, identifier } @precedence { stringStartRLD, stringStartRD, identifier } @precedence { stringStartFL, stringStartF, identifier } @precedence { stringStartFLD, stringStartFD, identifier } @precedence { stringStartFRL, stringStartFR, identifier } @precedence { stringStartFRLD, stringStartFRD, identifier } identifierChar { @asciiLetter | $[_\u{a1}-\u{10ffff}] } word { identifierChar (@digit | identifierChar)* } identifier { word } FormatConversion { "!" $[sra] } formatStringSpecChars { ![{}]+ } Number { (@digit ("_" | @digit)* ("." @digit ("_" | @digit)*)? | "." @digit ("_" | @digit)*) ($[eE] $[+\-]? @digit ("_" | @digit)*)? $[jJ]? | "0" $[bB] $[_01]+ | "0" $[oO] $[_0-7]+ | "0" $[xX] $[_0-9a-fA-F]+ } Comment[isolate] { "#" ![\n\r]* } space { ($[ \t\f] | "\\" $[\n\r])+ } At { "@" } "..."[@name=Ellipsis] "("[@export=ParenL] ")" "["[@export=BracketL] "]" "{"[@export=BraceL] "}" "." "," ";" ":" "@" "*" "**" } @external propSource pythonHighlighting from "./highlight" @detectDelim python-1.1.13/src/tokens.js000066400000000000000000000175421457576151000155720ustar00rootroot00000000000000import {ExternalTokenizer, ContextTracker} from "@lezer/lr" import { newline as newlineToken, eof, newlineBracketed, blankLineStart, indent, dedent, printKeyword, ParenthesizedExpression, TupleExpression, ComprehensionExpression, PatternArgList, SequencePattern, MappingPattern, TypeParamList, ArrayExpression, ArrayComprehensionExpression, ArgList, ParamList, importList, subscript, DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression, String as StringTerm, FormatString, FormatReplacement, nestedFormatReplacement, stringStart, stringStartD, stringStartL, stringStartLD, stringStartR, stringStartRD, stringStartRL, stringStartRLD, stringStartF, stringStartFD, stringStartFL, stringStartFLD, stringStartFR, stringStartFRD, stringStartFRL, stringStartFRLD, stringContent, Escape, replacementStart, stringEnd, ParenL, BraceL, BracketL } from "./parser.terms.js" const newline = 10, carriageReturn = 13, space = 32, tab = 9, hash = 35, parenOpen = 40, dot = 46, braceOpen = 123, braceClose = 125, singleQuote = 39, doubleQuote = 34, backslash = 92, letter_o = 111, letter_x = 120, letter_N = 78, letter_u = 117, letter_U = 85 const bracketed = new Set([ ParenthesizedExpression, TupleExpression, ComprehensionExpression, importList, ArgList, ParamList, ArrayExpression, ArrayComprehensionExpression, subscript, SetExpression, SetComprehensionExpression, FormatString, FormatReplacement, nestedFormatReplacement, DictionaryExpression, DictionaryComprehensionExpression, SequencePattern, MappingPattern, PatternArgList, TypeParamList ]) function isLineBreak(ch) { return ch == newline || ch == carriageReturn } function isHex(ch) { return ch >= 48 && ch <= 57 || ch >= 65 && ch <= 70 || ch >= 97 && ch <= 102 } export const newlines = new ExternalTokenizer((input, stack) => { let prev if (input.next < 0) { input.acceptToken(eof) } else if (stack.context.flags & cx_Bracketed) { if (isLineBreak(input.next)) input.acceptToken(newlineBracketed, 1) } else if (((prev = input.peek(-1)) < 0 || isLineBreak(prev)) && stack.canShift(blankLineStart)) { let spaces = 0 while (input.next == space || input.next == tab) { input.advance(); spaces++ } if (input.next == newline || input.next == carriageReturn || input.next == hash) input.acceptToken(blankLineStart, -spaces) } else if (isLineBreak(input.next)) { input.acceptToken(newlineToken, 1) } }, {contextual: true}) export const indentation = new ExternalTokenizer((input, stack) => { let context = stack.context if (context.flags) return let prev = input.peek(-1), depth if (prev == newline || prev == carriageReturn) { let depth = 0, chars = 0 for (;;) { if (input.next == space) depth++ else if (input.next == tab) depth += 8 - (depth % 8) else break input.advance() chars++ } if (depth != context.indent && input.next != newline && input.next != carriageReturn && input.next != hash) { if (depth < context.indent) input.acceptToken(dedent, -chars) else input.acceptToken(indent) } } }) // Flags used in Context objects const cx_Bracketed = 1, cx_String = 2, cx_DoubleQuote = 4, cx_Long = 8, cx_Raw = 16, cx_Format = 32 function Context(parent, indent, flags) { this.parent = parent this.indent = indent this.flags = flags this.hash = (parent ? parent.hash + parent.hash << 8 : 0) + indent + (indent << 4) + flags + (flags << 6) } const topIndent = new Context(null, 0, 0) function countIndent(space) { let depth = 0 for (let i = 0; i < space.length; i++) depth += space.charCodeAt(i) == tab ? 8 - (depth % 8) : 1 return depth } const stringFlags = new Map([ [stringStart, 0], [stringStartD, cx_DoubleQuote], [stringStartL, cx_Long], [stringStartLD, cx_Long | cx_DoubleQuote], [stringStartR, cx_Raw], [stringStartRD, cx_Raw | cx_DoubleQuote], [stringStartRL, cx_Raw | cx_Long], [stringStartRLD, cx_Raw | cx_Long | cx_DoubleQuote], [stringStartF, cx_Format], [stringStartFD, cx_Format | cx_DoubleQuote], [stringStartFL, cx_Format | cx_Long], [stringStartFLD, cx_Format | cx_Long | cx_DoubleQuote], [stringStartFR, cx_Format | cx_Raw], [stringStartFRD, cx_Format | cx_Raw | cx_DoubleQuote], [stringStartFRL, cx_Format | cx_Raw | cx_Long], [stringStartFRLD, cx_Format | cx_Raw | cx_Long | cx_DoubleQuote] ].map(([term, flags]) => [term, flags | cx_String])) export const trackIndent = new ContextTracker({ start: topIndent, reduce(context, term, _, input) { if ((context.flags & cx_Bracketed) && bracketed.has(term) || (term == StringTerm || term == FormatString) && (context.flags & cx_String)) return context.parent return context }, shift(context, term, stack, input) { if (term == indent) return new Context(context, countIndent(input.read(input.pos, stack.pos)), 0) if (term == dedent) return context.parent if (term == ParenL || term == BracketL || term == BraceL || term == replacementStart) return new Context(context, 0, cx_Bracketed) if (stringFlags.has(term)) return new Context(context, 0, stringFlags.get(term) | (context.flags & cx_Bracketed)) return context }, hash(context) { return context.hash } }) export const legacyPrint = new ExternalTokenizer(input => { for (let i = 0; i < 5; i++) { if (input.next != "print".charCodeAt(i)) return input.advance() } if (/\w/.test(String.fromCharCode(input.next))) return for (let off = 0;; off++) { let next = input.peek(off) if (next == space || next == tab) continue if (next != parenOpen && next != dot && next != newline && next != carriageReturn && next != hash) input.acceptToken(printKeyword) return } }) export const strings = new ExternalTokenizer((input, stack) => { let {flags} = stack.context let quote = (flags & cx_DoubleQuote) ? doubleQuote : singleQuote let long = (flags & cx_Long) > 0 let escapes = !(flags & cx_Raw) let format = (flags & cx_Format) > 0 let start = input.pos for (;;) { if (input.next < 0) { break } else if (format && input.next == braceOpen) { if (input.peek(1) == braceOpen) { input.advance(2) } else { if (input.pos == start) { input.acceptToken(replacementStart, 1) return } break } } else if (escapes && input.next == backslash) { if (input.pos == start) { input.advance() let escaped = input.next if (escaped >= 0) { input.advance() skipEscape(input, escaped) } input.acceptToken(Escape) return } break } else if (input.next == quote && (!long || input.peek(1) == quote && input.peek(2) == quote)) { if (input.pos == start) { input.acceptToken(stringEnd, long ? 3 : 1) return } break } else if (input.next == newline) { if (long) { input.advance() } else if (input.pos == start) { input.acceptToken(stringEnd) return } break } else { input.advance() } } if (input.pos > start) input.acceptToken(stringContent) }) function skipEscape(input, ch) { if (ch == letter_o) { for (let i = 0; i < 2 && input.next >= 48 && input.next <= 55; i++) input.advance() } else if (ch == letter_x) { for (let i = 0; i < 2 && isHex(input.next); i++) input.advance() } else if (ch == letter_u) { for (let i = 0; i < 4 && isHex(input.next); i++) input.advance() } else if (ch == letter_U) { for (let i = 0; i < 8 && isHex(input.next); i++) input.advance() } else if (ch == letter_N) { if (input.next == braceOpen) { input.advance() while (input.next >= 0 && input.next != braceClose && input.next != singleQuote && input.next != doubleQuote && input.next != newline) input.advance() if (input.next == braceClose) input.advance() } } } python-1.1.13/test/000077500000000000000000000000001457576151000141105ustar00rootroot00000000000000python-1.1.13/test/expression.txt000066400000000000000000000136551457576151000170620ustar00rootroot00000000000000# Operator precedence a + b * c - d**3 a or b > 2 and c or d == None a + b | c & d ==> Script( ExpressionStatement(BinaryExpression(BinaryExpression(VariableName, ArithOp, BinaryExpression(VariableName, ArithOp, VariableName)), ArithOp, BinaryExpression(VariableName, ArithOp, Number))), ExpressionStatement(BinaryExpression(BinaryExpression(VariableName, or, BinaryExpression(BinaryExpression(VariableName, CompareOp, Number), and, VariableName)), or, BinaryExpression(VariableName, CompareOp, None))), ExpressionStatement(BinaryExpression(BinaryExpression(VariableName, ArithOp, VariableName), BitOp, BinaryExpression(VariableName, BitOp, VariableName)))) # Strings 'foo' "bar" b'baz' '''long string on two lines''' """also with double quotes""" ==> Script(ExpressionStatement(ContinuedString(String, String)), ExpressionStatement(String), ExpressionStatement(String), ExpressionStatement(String)) # Bracketed continued string print('00300:' '03630:') ==> Script(ExpressionStatement(CallExpression(VariableName, ArgList(ContinuedString(String, String))))) # Format strings f'hello{22} abc\' {{ }} {d-1}' f"double\" {quoted !s}" f"""big long format {string :foo}""" f'''well {{ \x }} {2 :{bar}}''' f"""one"{two}"three""" f'{user=!s} {delta.days=:,d}' ==> Script(ExpressionStatement(FormatString(FormatReplacement(Number),Escape, FormatReplacement(BinaryExpression(VariableName, ArithOp, Number)))), ExpressionStatement(FormatString(Escape,FormatReplacement(VariableName, FormatConversion))), ExpressionStatement(FormatString(FormatReplacement(VariableName, FormatSpec))), ExpressionStatement(FormatString(Escape,FormatReplacement(Number, FormatSpec(FormatReplacement(VariableName))))), ExpressionStatement(FormatString(FormatReplacement(VariableName))), ExpressionStatement(FormatString( FormatReplacement(VariableName,FormatSelfDoc,FormatConversion), FormatReplacement(MemberExpression(VariableName,PropertyName),FormatSelfDoc,FormatSpec)))) # Nested quote types f"a{'b'}c" ==> Script(ExpressionStatement(FormatString(FormatReplacement(String)))) # Lambda something.map(lambda x: x + 1) foo = lambda a, b = 0: a ^ b ==> Script( ExpressionStatement(CallExpression(MemberExpression(VariableName, PropertyName), ArgList( LambdaExpression(lambda, ParamList(VariableName), BinaryExpression(VariableName, ArithOp, Number))))), AssignStatement(VariableName, AssignOp, LambdaExpression(lambda, ParamList(VariableName, VariableName, AssignOp, Number), BinaryExpression(VariableName, BitOp, VariableName)))) # Member expressions x[1] x.foo x.if.True ==> Script( ExpressionStatement(MemberExpression(VariableName, Number)), ExpressionStatement(MemberExpression(VariableName, PropertyName)), ExpressionStatement(MemberExpression(MemberExpression(VariableName, PropertyName), PropertyName))) # Call expressions foo(x, y, **z) + bar(blah=20) ==> Script(ExpressionStatement(BinaryExpression( CallExpression(VariableName, ArgList(VariableName, VariableName, VariableName)), ArithOp, CallExpression(VariableName, ArgList(VariableName, AssignOp, Number))))) # Collection expressions [True, False, None] {foo: 22, bar: False, **other} {1, 2, 3} (3) (3,) (3, 4) ==> Script( ExpressionStatement(ArrayExpression(Boolean, Boolean, None)), ExpressionStatement(DictionaryExpression(VariableName, Number, VariableName, Boolean, VariableName)), ExpressionStatement(SetExpression(Number, Number, Number)), ExpressionStatement(ParenthesizedExpression(Number)), ExpressionStatement(TupleExpression(Number)), ExpressionStatement(TupleExpression(Number, Number))) # Comprehension expressions [i + 1 for i in range(1, 10) if x % 2 == 0] (x for x in [3, 4]) {key: value for (key, value) in blah} {a - b for a in foo for b in bar} ==> Script( ExpressionStatement(ArrayComprehensionExpression( BinaryExpression(VariableName, ArithOp, Number), for VariableName in CallExpression(VariableName, ArgList(Number, Number)), if BinaryExpression(BinaryExpression(VariableName, ArithOp, Number), CompareOp, Number))), ExpressionStatement(ComprehensionExpression( VariableName, for, VariableName, in, ArrayExpression(Number, Number))), ExpressionStatement(DictionaryComprehensionExpression( VariableName, VariableName, for, TupleExpression(VariableName, VariableName) in VariableName)), ExpressionStatement(SetComprehensionExpression( BinaryExpression(VariableName, ArithOp, VariableName), for, VariableName, in, VariableName, for, VariableName, in, VariableName))) # Yield expressions def foo(): yield 1 return 1 + (yield 2) ==> Script(FunctionDefinition(def, VariableName, ParamList, Body( YieldStatement(yield, Number), ReturnStatement(return, BinaryExpression(Number, ArithOp, ParenthesizedExpression(YieldExpression(yield, Number))))))) # Unary expressions [-1, +2 * 3, ~2**2] ==> Script(ExpressionStatement(ArrayExpression( UnaryExpression(ArithOp, Number), BinaryExpression(UnaryExpression(ArithOp, Number), ArithOp, Number), UnaryExpression(BitOp, BinaryExpression(Number, ArithOp, Number))))) # Await await something() ==> Script(ExpressionStatement(AwaitExpression(await, CallExpression(VariableName, ArgList)))) # Newlines in brackets x = [ 1, 2, # And 3, 4, ] ==> Script(AssignStatement(VariableName, AssignOp, ArrayExpression(Number, Number, Comment, Number, Number))) # Too many commas in brackets x = [ 1, 2,, ] ==> Script(AssignStatement(VariableName, AssignOp, ArrayExpression(Number, Number, ⚠))) # Conditional expression x = 5 if True else 1 if False else 0 ==> Script(AssignStatement(VariableName, AssignOp, ConditionalExpression(Number, if, Boolean, else, ConditionalExpression(Number, if, Boolean, else, Number)))) # Exponent is R-to-L associative 2 ** 3 ** 2 ==> Script(ExpressionStatement(BinaryExpression( Number,ArithOp("**"), BinaryExpression(Number,ArithOp("**"),Number)))) python-1.1.13/test/statement.txt000066400000000000000000000220761457576151000166640ustar00rootroot00000000000000# Function definition def foo(): pass def bar(a: str, b = 22, **c) -> num: pass ==> Script( FunctionDefinition(def,VariableName,ParamList,Body(PassStatement(pass))), FunctionDefinition(def,VariableName,ParamList(VariableName, TypeDef(VariableName), VariableName, AssignOp, Number, VariableName), TypeDef(VariableName), Body(PassStatement(pass)))) # Single-line function definition def foo(a, b): return a + b ==> Script(FunctionDefinition(def,VariableName,ParamList(VariableName, VariableName), Body(ReturnStatement(return, BinaryExpression(VariableName, ArithOp, VariableName))))) # Return with no body def foo(a, b): a = b return ==> Script(FunctionDefinition(def,VariableName,ParamList(VariableName, VariableName), Body(AssignStatement(VariableName, AssignOp, VariableName), ReturnStatement(return)))) # Conditional if a: b() if 1 + 3: pass elif 55 < 2: pass else: pass ==> Script( IfStatement(if, VariableName, Body(ExpressionStatement(CallExpression(VariableName, ArgList)))) IfStatement(if, BinaryExpression(Number, ArithOp, Number), Body(PassStatement(pass)), elif, BinaryExpression(Number, CompareOp, Number), Body(PassStatement(pass)), else, Body(PassStatement(pass)))) # Assignment a = 4 b: str = "hi" c, d, e = None f = g = False h += 1 ==> Script( AssignStatement(VariableName, AssignOp, Number), AssignStatement(VariableName, TypeDef(VariableName), AssignOp, String), AssignStatement(VariableName, VariableName, VariableName, AssignOp, None), AssignStatement(VariableName, AssignOp, VariableName, AssignOp, Boolean), UpdateStatement(VariableName, UpdateOp, Number)) # For loops for a, b in woop(): doStuff(b, a) ==> Script(ForStatement(for, VariableName, VariableName, in CallExpression(VariableName, ArgList), Body(ExpressionStatement(CallExpression(VariableName, ArgList(VariableName, VariableName)))))) # Try statements try: pass except SomeException as e: pass except OtherException: pass else: pass finally: pass ==> Script(TryStatement( try, Body(PassStatement(pass)), except, VariableName, as, VariableName, Body(PassStatement(pass)), except VariableName, Body(PassStatement(pass)), else Body(PassStatement(pass)), finally Body(PassStatement(pass)))) # With statements with open("x") as file: pass async with foo as bar: pass ==> Script( WithStatement(with, CallExpression(VariableName, ArgList(String)), as, VariableName, Body(PassStatement(pass))), WithStatement(async, with, VariableName, as, VariableName, Body(PassStatement(pass)))) # Class definition class Foo: prop = 0 def __init__(self): pass def plus(self): self.prop += 1 class Bar(Foo): pass ==> Script( ClassDefinition(class, VariableName, Body( AssignStatement(VariableName, AssignOp, Number), FunctionDefinition(def, VariableName, ParamList(VariableName), Body(PassStatement(pass))), FunctionDefinition(def, VariableName, ParamList(VariableName), Body( UpdateStatement(MemberExpression(VariableName, PropertyName), UpdateOp, Number))))), ClassDefinition(class, VariableName, ArgList(VariableName), Body(PassStatement(pass)))) # Scope statements global a nonlocal b, c ==> Script( ScopeStatement(global, VariableName), ScopeStatement(nonlocal, VariableName, VariableName)) # Decorators @Something.X def f(): pass @Other(arg1, arg2) class C: pass ==> Script( DecoratedStatement(Decorator(At, VariableName, VariableName), FunctionDefinition(def, VariableName, ParamList, Body(PassStatement(pass)))), DecoratedStatement(Decorator(At, VariableName, ArgList(VariableName, VariableName)), ClassDefinition(class, VariableName, Body(PassStatement(pass))))) # Small statements def x(): return 5 raise Exception("woop") while False: break continue assert 1 == 2 del x[2] ==> Script( FunctionDefinition(def, VariableName, ParamList, Body(ReturnStatement(return, Number))), RaiseStatement(raise, CallExpression(VariableName, ArgList(String))), WhileStatement(while, Boolean, Body(BreakStatement(break), ContinueStatement(continue))), AssertStatement(assert, BinaryExpression(Number, CompareOp, Number)), DeleteStatement(del, MemberExpression(VariableName, Number))) # Import statements import datetime from something.other import one, two ==> Script( ImportStatement(import, VariableName), ImportStatement(from, VariableName, VariableName, import VariableName, VariableName)) # One-line small statements x; y(); z = 2 raise "oh" ==> Script( StatementGroup( ExpressionStatement(VariableName), ExpressionStatement(CallExpression(VariableName, ArgList)), AssignStatement(VariableName, AssignOp, Number)), RaiseStatement(raise, String)) # Nested bodies def x(): ok if not ok: while True: one two three if None: four else: five six seven ==> Script( FunctionDefinition(def, VariableName, ParamList, Body( ExpressionStatement(VariableName), IfStatement(if, UnaryExpression(not, VariableName), Body( WhileStatement(while, Boolean, Body( ExpressionStatement(VariableName), ExpressionStatement(VariableName))))), ExpressionStatement(VariableName), IfStatement(if, None, Body( ExpressionStatement(VariableName) ), else, Body( ExpressionStatement(VariableName))), ExpressionStatement(VariableName))), ExpressionStatement(VariableName)) # Empty and commented lines if None: one two four # comment five six ==> Script( IfStatement(if, None, Body( ExpressionStatement(VariableName), ExpressionStatement(VariableName), ExpressionStatement(VariableName), Comment, ExpressionStatement(VariableName))), ExpressionStatement(VariableName)) # Escaped newlines x = 1 + \ 2 ==> Script(AssignStatement(VariableName, AssignOp, BinaryExpression(Number, ArithOp, Number))) # Python 2 compatibility print "hi" print(print.something) try: raise Exception, "foo" except Exception, foo: pass ==> Script( PrintStatement(print, String), ExpressionStatement(CallExpression(VariableName, ArgList(MemberExpression(VariableName, PropertyName)))), TryStatement(try, Body(RaiseStatement(raise, VariableName, String)), except, VariableName, VariableName, Body(PassStatement(pass)))) # Require indentation on body def foo(): pass ==> Script(FunctionDefinition(def,VariableName,ParamList,Body(⚠)), PassStatement(pass)) # Nested else if a: if b: pass else: pass ==> Script(IfStatement(if, VariableName, Body( IfStatement(if, VariableName, Body(PassStatement(pass)))), else, Body(PassStatement(pass)))) # Self not reserved self = True ==> Script(AssignStatement(VariableName,AssignOp,Boolean)) # Trailing whitespace in block def x(): one two ==> Script(FunctionDefinition(def,VariableName,ParamList,Body(ExpressionStatement(VariableName),ExpressionStatement(VariableName)))) # Can handle tab indentation class Employee: first_name: str last_name: str def __init__(self, a): self.first_name = a self.last_name = a ==> Script(ClassDefinition(class,VariableName,Body( AssignStatement(VariableName,TypeDef(VariableName)), AssignStatement(VariableName,TypeDef(VariableName)), FunctionDefinition(def,VariableName,ParamList(VariableName,VariableName),Body( AssignStatement(MemberExpression(VariableName,PropertyName),AssignOp,VariableName), AssignStatement(MemberExpression(VariableName,PropertyName),AssignOp,VariableName)))))) # Parses match statements match foo: case 1: pass case Point("a", True) as z | a.b | {x: None, **y}: pass case [a, b, *rest] | (p, q): pass ==> Script(MatchStatement(match,VariableName,MatchBody( MatchClause(case, LiteralPattern(Number), Body(PassStatement(pass))), MatchClause(case, OrPattern( AsPattern( ClassPattern(VariableName,PatternArgList(LiteralPattern(String),LiteralPattern(Boolean))), as,VariableName), LogicOp, AttributePattern(VariableName,PropertyName), LogicOp, MappingPattern(VariableName,LiteralPattern(None),CapturePattern(VariableName))), Body(PassStatement(pass))), MatchClause(case, OrPattern(SequencePattern( CapturePattern(VariableName), CapturePattern(VariableName), StarPattern(CapturePattern(VariableName))), LogicOp, SequencePattern(CapturePattern(VariableName),CapturePattern(VariableName))), Body(PassStatement(pass)))))) # Type Params class ClassA[T: str]: def method1(self) -> T: pass def func[**T](a: T, b: T) -> T: pass ==> Script( ClassDefinition(class,VariableName, TypeParamList(TypeParam(VariableName,TypeDef(":",VariableName))), Body(FunctionDefinition(def,VariableName,ParamList(VariableName),TypeDef(VariableName), Body(PassStatement(pass))))), FunctionDefinition(def,VariableName,TypeParamList(TypeParam(VariableName)), ParamList(VariableName,TypeDef(VariableName),VariableName,TypeDef(VariableName)), TypeDef(VariableName), Body(PassStatement(pass)))) # Type Definition type Point = tuple[float, float] ==> Script(TypeDefinition(type,VariableName,MemberExpression(VariableName,VariableName,",",VariableName))) python-1.1.13/test/test-incremental.js000066400000000000000000000021211457576151000177200ustar00rootroot00000000000000import {parser} from "../dist/index.js" import {fileTests} from "@lezer/generator/dist/test" import {Tree, TreeFragment} from "@lezer/common" describe("Incremental parsing", () => { // See https://github.com/codemirror/codemirror.next/issues/394 it("doesn't reuse statements in the wrong body", () => { let input = `class StreamWriter: def __init__(self): pass def a(): pass def b(self): """ ${"big block comment to fill up the reuse size quota\n ".repeat(150)} """ pass ` let ast = parser.parse(input) let at = input.indexOf("pass") input = input.slice(0, at) + " " + input.slice(at) let cache = TreeFragment.applyChanges(TreeFragment.addTree(ast), [{fromA: at, toA: at, fromB: at, toB: at + 1}]) let ast2 = parser.parse(input, cache) if (ast2.toString() != ast.toString()) throw new Error("Malformed tree") let lastFunc = ast => { let cur = ast.cursor(ast.length) while (cur.type.name != "FunctionDefinition") cur.prev() return cur.tree } if (lastFunc(ast) != lastFunc(ast2)) throw new Error("No reuse") }) }) python-1.1.13/test/test-python.js000066400000000000000000000010131457576151000167370ustar00rootroot00000000000000import {parser} from "../dist/index.js" import {fileTests} from "@lezer/generator/dist/test" import * as fs from "fs" import * as path from "path" import {fileURLToPath} from "url" let caseDir = path.dirname(fileURLToPath(import.meta.url)) for (let file of fs.readdirSync(caseDir)) { if (!/\.txt$/.test(file)) continue let name = /^[^\.]*/.exec(file)[0] describe(name, () => { for (let {name, run} of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) it(name, () => run(parser)) }) }