pax_global_header00006660000000000000000000000064145713006010014507gustar00rootroot0000000000000052 comment=331ab3dd94446f3b2594e71e8ff68be57c2e33a4 xml-1.0.5/000077500000000000000000000000001457130060100123125ustar00rootroot00000000000000xml-1.0.5/.gitignore000066400000000000000000000000531457130060100143000ustar00rootroot00000000000000/node_modules/ /src/parser.* .tern-* /dist xml-1.0.5/.npmignore000066400000000000000000000000161457130060100143060ustar00rootroot00000000000000/node_modules xml-1.0.5/CHANGELOG.md000066400000000000000000000061471457130060100141330ustar00rootroot00000000000000## 1.0.5 (2024-03-04) ### Bug fixes Fix a mistake that broke highlighting for mismatched tag names. ## 1.0.4 (2023-12-28) ### Bug fixes Mark tags, attributes, and comments as isolating for bidirectional text. ## 1.0.3 (2023-10-26) ### Bug fixes The parser is now more precise about what is parsed as an entity. Fix a bug that caused the end of a CDATA block, comment, or processing instruction to not be recognized when the character starting the end marker occured right before it. ## 1.0.2 (2023-07-03) ### Bug fixes Make the package work with new TS resolution styles. ## 1.0.1 (2022-12-19) ### Bug fixes Fix a bug where single-character comments, processing instructions, and CDATA blocks were not parsed correctly. ## 1.0.0 (2022-06-06) ### Bug fixes Fix a bug that would cause the tokenizer to miss `]]>` and `-->` tokens when they were preceded by `]` or `-`. ### New features First stable version. ## 0.16.0 (2022-04-20) ### Breaking changes Move to 0.16 serialized parser format. ### New features The parser now includes syntax highlighting information in its node types. ## 0.15.1 (2021-10-30) ### Bug fixes Fix parsing of CDATA sections. ## 0.15.0 (2021-08-11) ### Breaking changes The module's name changed from `lezer-xml` to `@lezer/xml`. Upgrade to the 0.15.0 lezer interfaces. ## 0.13.4 (2021-02-17) ### Bug fixes Fix an issue that cause improper tokenizing during some types of error recovery. ## 0.13.3 (2021-02-17) ### Bug fixes Optimize tokenizing with a context tracker. ## 0.13.2 (2021-01-22) ### Bug fixes Make comments, processing instructions, and cdata consist of multiple tokens to avoid freezing the parser on huge inputs with unfinished elements of those types. ## 0.13.1 (2020-12-04) ### Bug fixes Fix versions of lezer packages depended on. ## 0.13.0 (2020-12-04) ## 0.12.0 (2020-10-23) ### Breaking changes Adjust to changed serialized parser format. ## 0.11.1 (2020-09-26) ### Bug fixes Fix lezer depencency versions ## 0.11.0 (2020-09-26) ### Breaking changes Follow change in serialized parser format. ## 0.10.1 (2020-09-16) ### New features Make sure mismatched close tags name tokens have a different name than matching ones. ## 0.10.0 (2020-08-07) ### Breaking changes Upgrade to 0.10 parser serialization ## 0.9.0 (2020-06-08) ### Breaking changes Upgrade to 0.9 parser serialization ### New features Tag start/end tokens now have `NodeProp.openedBy`/`closedBy` props. ## 0.8.2 (2020-04-09) ### Bug fixes Regenerate parser with a fix in lezer-generator so that the top node prop is properly assigned. ## 0.8.1 (2020-04-01) ### Bug fixes Make the package load as an ES module on node ## 0.8.0 (2020-02-03) ### New features Follow 0.8.0 release of the library. ## 0.7.0 (2020-01-20) ### Breaking changes Use the lezer 0.7.0 parser format. ## 0.5.1 (2019-10-22) ### Bug fixes Fix top prop missing from build output. ## 0.5.0 (2019-10-22) ### Breaking changes Move from `lang` to `top` prop on document node. ## 0.4.0 (2019-09-10) ### Breaking changes Adjust to 0.4.0 parse table format. ## 0.3.0 (2019-08-22) ### New features First numbered release. xml-1.0.5/LICENSE000066400000000000000000000021311457130060100133140ustar00rootroot00000000000000MIT License Copyright (C) 2018 by Marijn Haverbeke and others Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. xml-1.0.5/README.md000066400000000000000000000002171457130060100135710ustar00rootroot00000000000000# @lezer/xml This is an XML grammar for the [lezer](https://lezer.codemirror.net/) parser system. The code is licensed under an MIT license. xml-1.0.5/dist/000077500000000000000000000000001457130060100132555ustar00rootroot00000000000000xml-1.0.5/dist/index.d.cts000066400000000000000000000001021457130060100153120ustar00rootroot00000000000000import {LRParser} from "@lezer/lr" export const parser: LRParser xml-1.0.5/dist/index.d.ts000066400000000000000000000001021457130060100151470ustar00rootroot00000000000000import {LRParser} from "@lezer/lr" export const parser: LRParser xml-1.0.5/package.json000066400000000000000000000017301457130060100146010ustar00rootroot00000000000000{ "name": "@lezer/xml", "version": "1.0.5", "description": "lezer-based XML grammar", "main": "dist/index.cjs", "type": "module", "exports": { "import": "./dist/index.js", "require": "./dist/index.cjs" }, "module": "dist/index.js", "types": "dist/index.d.ts", "author": "Marijn Haverbeke ", "license": "MIT", "devDependencies": { "@lezer/generator": "^1.0.0", "mocha": "^10.2.0", "rollup": "^2.52.2", "@rollup/plugin-node-resolve": "^9.0.0" }, "dependencies": { "@lezer/common": "^1.2.0", "@lezer/lr": "^1.0.0", "@lezer/highlight": "^1.0.0" }, "repository": { "type" : "git", "url" : "https://github.com/lezer-parser/xml.git" }, "scripts": { "build": "lezer-generator src/xml.grammar -o src/parser && rollup -c", "build-debug": "lezer-generator src/xml.grammar --names -o src/parser && rollup -c", "prepare": "npm run build", "test": "mocha test/test-*.js" } } xml-1.0.5/rollup.config.js000066400000000000000000000004621457130060100154330ustar00rootroot00000000000000import {nodeResolve} from "@rollup/plugin-node-resolve" export default { input: "./src/parser.js", output: [{ format: "cjs", file: "./dist/index.cjs" }, { format: "es", file: "./dist/index.js" }], external(id) { return !/^[\.\/]/.test(id) }, plugins: [ nodeResolve() ] } xml-1.0.5/src/000077500000000000000000000000001457130060100131015ustar00rootroot00000000000000xml-1.0.5/src/highlight.js000066400000000000000000000010421457130060100154030ustar00rootroot00000000000000import {styleTags, tags as t} from "@lezer/highlight" export const xmlHighlighting = styleTags({ Text: t.content, "StartTag StartCloseTag EndTag SelfCloseEndTag": t.angleBracket, TagName: t.tagName, "MismatchedCloseTag/TagName": [t.tagName, t.invalid], AttributeName: t.attributeName, AttributeValue: t.attributeValue, Is: t.definitionOperator, "EntityReference CharacterReference": t.character, Comment: t.blockComment, ProcessingInst: t.processingInstruction, DoctypeDecl: t.documentMeta, Cdata: t.special(t.string) }) xml-1.0.5/src/tokens.js000066400000000000000000000060561457130060100147510ustar00rootroot00000000000000/* Hand-written tokenizer for XML tag matching. */ import {ExternalTokenizer, ContextTracker} from "@lezer/lr" import {StartTag, StartCloseTag, mismatchedStartCloseTag, incompleteStartCloseTag, MissingCloseTag, Element, OpenTag, commentContent as _commentContent, piContent as _piContent, cdataContent as _cdataContent} from "./parser.terms.js" function nameChar(ch) { return ch == 45 || ch == 46 || ch == 58 || ch >= 65 && ch <= 90 || ch == 95 || ch >= 97 && ch <= 122 || ch >= 161 } function isSpace(ch) { return ch == 9 || ch == 10 || ch == 13 || ch == 32 } let cachedName = null, cachedInput = null, cachedPos = 0 function tagNameAfter(input, offset) { let pos = input.pos + offset if (cachedInput == input && cachedPos == pos) return cachedName while (isSpace(input.peek(offset))) offset++ let name = "" for (;;) { let next = input.peek(offset) if (!nameChar(next)) break name += String.fromCharCode(next) offset++ } cachedInput = input; cachedPos = pos return cachedName = name || null } function ElementContext(name, parent) { this.name = name this.parent = parent this.hash = parent ? parent.hash : 0 for (let i = 0; i < name.length; i++) this.hash += (this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8) } export const elementContext = new ContextTracker({ start: null, shift(context, term, stack, input) { return term == StartTag ? new ElementContext(tagNameAfter(input, 1) || "", context) : context }, reduce(context, term) { return term == Element && context ? context.parent : context }, reuse(context, node, _stack, input) { let type = node.type.id return type == StartTag || type == OpenTag ? new ElementContext(tagNameAfter(input, 1) || "", context) : context }, hash(context) { return context ? context.hash : 0 }, strict: false }) export const startTag = new ExternalTokenizer((input, stack) => { if (input.next != 60 /* '<' */) return input.advance() if (input.next == 47 /* '/' */) { input.advance() let name = tagNameAfter(input, 0) if (!name) return input.acceptToken(incompleteStartCloseTag) if (stack.context && name == stack.context.name) return input.acceptToken(StartCloseTag) for (let cx = stack.context; cx; cx = cx.parent) if (cx.name == name) return input.acceptToken(MissingCloseTag, -2) input.acceptToken(mismatchedStartCloseTag) } else if (input.next != 33 /* '!' */ && input.next != 63 /* '?' */) { return input.acceptToken(StartTag) } }, {contextual: true}) function scanTo(type, end) { return new ExternalTokenizer(input => { let len = 0, first = end.charCodeAt(0) scan: for (;; input.advance(), len++) { if (input.next < 0) break if (input.next == first) { for (let i = 1; i < end.length; i++) if (input.peek(i) != end.charCodeAt(i)) continue scan break } } if (len) input.acceptToken(type) }) } export const commentContent = scanTo(_commentContent, "-->") export const piContent = scanTo(_piContent, "?>") export const cdataContent = scanTo(_cdataContent, "]]>") xml-1.0.5/src/xml.grammar000066400000000000000000000045731457130060100152620ustar00rootroot00000000000000@top Document { (entity | DoctypeDecl)+ } entity { Text | EntityReference | CharacterReference | Cdata | Element | Comment | ProcessingInst | MismatchedCloseTag | incompleteStartCloseTag } Element { OpenTag entity* (CloseTag | MissingCloseTag) | SelfClosingTag } OpenTag[closedBy="CloseTag MissingCloseTag",isolate] { StartTag space* TagName space* (Attribute space*)* EndTag } SelfClosingTag[isolate] { StartTag space* TagName space* (Attribute space*)* SelfCloseEndTag } CloseTag[openedBy=OpenTag,isolate] { StartCloseTag space* TagName space* EndTag } MismatchedCloseTag[isolate] { mismatchedStartCloseTag space* TagName space* EndTag } Attribute { AttributeName space* Is space* AttributeValue } AttributeValue[isolate] { "\"" (attributeContent | EntityReference | CharacterReference)* "\"" } Comment[isolate] { "" } ProcessingInst { "" } Cdata { cdataStart cdataContent* "]]>" } @context elementContext from "./tokens.js" @external tokens startTag from "./tokens.js" { StartTag[closedBy="SelfCloseEndTag EndTag"] StartCloseTag MissingCloseTag mismatchedStartCloseTag[@name=StartCloseTag] incompleteStartCloseTag[@name=StartCloseTag] } @external tokens commentContent from "./tokens.js" { commentContent } @external tokens piContent from "./tokens.js" { piContent } @external tokens cdataContent from "./tokens.js" { cdataContent } @tokens { EndTag[openedBy="StartTag StartCloseTag"] { ">" } SelfCloseEndTag[openedBy=StartTag] { "/>" } nameStart { ":" | @asciiLetter | "_" | $[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D] | $[\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u{10000}-\u{EFFFF}] } nameChar { nameStart | "-" | "." | @digit | $[\u00B7\u0300-\u036F\u203F-\u2040] } identifier { nameStart nameChar* } TagName { identifier } AttributeName { identifier } attributeContent { !["&]+ } Is { "=" } // See https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-references EntityReference { "&" identifier ";" } CharacterReference { "&#" ("x" $[0-9a-fA-F]+ | $[0-9]+) ";" } Text { ![<&]+ } DoctypeDecl { "]* ">" } cdataStart { " ==> Document(Element(SelfClosingTag(StartTag,TagName,SelfCloseEndTag))) # Regular tag bar ==> Document(Element(OpenTag(StartTag,TagName,EndTag),Text,CloseTag(StartCloseTag,TagName,EndTag))) # Nested tag c ==> Document(Element(OpenTag(StartTag,TagName,EndTag), Element(OpenTag(StartTag,TagName,EndTag),Text,CloseTag(StartCloseTag,TagName,EndTag)), Element(SelfClosingTag(StartTag,TagName,SelfCloseEndTag)), CloseTag(StartCloseTag,TagName,EndTag))) # Attribute ==> Document(Element(SelfClosingTag(StartTag,TagName,Attribute(AttributeName,Is,AttributeValue),SelfCloseEndTag))) # Multiple attributes ==> Document(Element(OpenTag(StartTag,TagName, Attribute(AttributeName,Is,AttributeValue), Attribute(AttributeName,Is,AttributeValue), Attribute(AttributeName,Is,AttributeValue),EndTag), CloseTag(StartCloseTag,TagName,EndTag))) # Entities &C ==> Document(Element(OpenTag(StartTag,TagName, Attribute(AttributeName,Is,AttributeValue(EntityReference)),EndTag), EntityReference, CharacterReference, CloseTag(StartCloseTag,TagName,EndTag))) # Invalid Entities &&; ==> Document(⚠,Text,⚠,Text) # Doctype ==> Document(DoctypeDecl,Text,Element(SelfClosingTag(StartTag,TagName,SelfCloseEndTag))) # Processing instructions ==> Document(ProcessingInst,Element(OpenTag(StartTag,TagName,EndTag),ProcessingInst,CloseTag(StartCloseTag,TagName,EndTag))) # Comments text ==> Document(Comment,Text,Element(OpenTag(StartTag,TagName,EndTag),Comment,Text,CloseTag(StartCloseTag,TagName,EndTag)),Text,Comment) # Mismatched tag ==> Document(Element(OpenTag(StartTag,TagName,EndTag),MismatchedCloseTag(StartCloseTag,TagName,EndTag),⚠)) # Nested mismatched tag ==> Document(Element(OpenTag(StartTag,TagName,EndTag), Element(OpenTag(StartTag,TagName,EndTag), Element(OpenTag(StartTag,TagName,EndTag),CloseTag(StartCloseTag,TagName,EndTag)), MismatchedCloseTag(StartCloseTag,TagName,EndTag), MissingCloseTag), CloseTag(StartCloseTag,TagName,EndTag))) # Mismatched tag with whitespace < foo bar="10"> blah ==> Document(Element(OpenTag(StartTag,TagName,EndTag), Text, Element(OpenTag(StartTag,TagName,Attribute(AttributeName,Is,AttributeValue),EndTag), Text, MismatchedCloseTag(StartCloseTag,TagName,EndTag), Text, MissingCloseTag), CloseTag(StartCloseTag,TagName,EndTag))) # Cdata ==> Document(Element(OpenTag(StartTag,TagName,EndTag),Cdata,CloseTag(StartCloseTag,TagName,EndTag))) xml-1.0.5/test/test-xml.js000066400000000000000000000010241457130060100154010ustar00rootroot00000000000000import {parser} from "../dist/index.js" import {fileTests, testTree} from "@lezer/generator/dist/test" import * as fs from "fs" import * as path from "path" import {fileURLToPath} from "url" let caseDir = path.dirname(fileURLToPath(import.meta.url)) for (let file of fs.readdirSync(caseDir)) { if (!/\.txt$/.test(file)) continue let name = /^[^\.]*/.exec(file)[0] describe(name, () => { for (let {name, run} of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) it(name, () => run(parser)) }) }