pax_global_header00006660000000000000000000000064132767775360014541gustar00rootroot0000000000000052 comment=f57074a1af76697a2b43b62d0c1e27d1af8a559b character-parser-3.1.0/000077500000000000000000000000001327677753600147705ustar00rootroot00000000000000character-parser-3.1.0/.gitignore000066400000000000000000000002021327677753600167520ustar00rootroot00000000000000lib-cov *.seed *.log *.csv *.dat *.out *.pid *.gz pids logs results npm-debug.log node_modules coverage /lib package-lock.json character-parser-3.1.0/.travis.yml000066400000000000000000000001001327677753600170700ustar00rootroot00000000000000sudo: false language: node_js node_js: - "6" - "8" - "10" character-parser-3.1.0/LICENSE000066400000000000000000000020431327677753600157740ustar00rootroot00000000000000Copyright (c) 2013 Forbes Lindesay Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. character-parser-3.1.0/README.md000066400000000000000000000144501327677753600162530ustar00rootroot00000000000000# character-parser Parse JavaScript one character at a time to look for snippets in Templates. This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly. [![Build Status](https://img.shields.io/travis/ForbesLindesay/character-parser/master.svg)](https://travis-ci.org/ForbesLindesay/character-parser) ## Installation npm install character-parser ## Usage ### Parsing Work out how much depth changes: ```js var state = parse('foo(arg1, arg2, {\n foo: [a, b\n'); assert.deepEqual(state.stack, [')', '}', ']']); parse(' c, d]\n })', state); assert.deepEqual(state.stack, []); ``` ### Custom Delimited Expressions Find code up to a custom delimiter: ```js // EJS-style var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>'); assert(section.start === 0); assert(section.end === 17); // exclusive end of string assert(section.src = 'foo.bar("%>").baz'); var section = parser.parseUntil('<%foo.bar("%>").baz%> bing bong', '%>', {start: 2}); assert(section.start === 2); assert(section.end === 19); // exclusive end of string assert(section.src = 'foo.bar("%>").baz'); // Jade-style var section = parser.parseUntil('#[p= [1, 2][i]]', ']', {start: 2}) assert(section.start === 2); assert(section.end === 14); // exclusive end of string assert(section.src === 'p= [1, 2][i]') // Dumb parsing // Stop at first delimiter encountered, doesn't matter if it's nested or not // This is the character-parser@1 default behavior. var section = parser.parseUntil('#[p= [1, 2][i]]', '}', {start: 2, ignoreNesting: true}) assert(section.start === 2); assert(section.end === 10); // exclusive end of string assert(section.src === 'p= [1, 2') '' ``` Delimiters are ignored if they are inside strings or comments. ## API All methods may throw an exception in the case of syntax errors. The exception contains an additional `code` property that always starts with `CHARACTER_PARSER:` that is unique for the error. ### parse(str, state = defaultState(), options = {start: 0, end: src.length}) Parse a string starting at the index start, and return the state after parsing that string. If you want to parse one string in multiple sections you should keep passing the resulting state to the next parse operation. Returns a `State` object. ### parseUntil(src, delimiter, options = {start: 0, ignoreLineComment: false, ignoreNesting: false}) Parses the source until the first occurrence of `delimiter` which is not in a string or a comment. If `ignoreLineComment` is `true`, it will still count if the delimiter occurs in a line comment. If `ignoreNesting` is `true`, it will stop at the first bracket, not taking into account if the bracket part of nesting or not. See example above. It returns an object with the structure: ```js { start: 0,//index of first character of string end: 13,//index of first character after the end of string src: 'source string' } ``` ### parseChar(character, state = defaultState()) Parses the single character and returns the state. See `parse` for the structure of the returned state object. N.B. character must be a single character not a multi character string. ### defaultState() Get a default starting state. ### isPunctuator(character) Returns `true` if `character` represents punctuation in JavaScript. ### isKeyword(name) Returns `true` if `name` is a keyword in JavaScript. ### TOKEN_TYPES & BRACKETS Objects whose values can be a frame in the `stack` property of a State (documented below). ## State A state is an object with the following structure ```js { stack: [], // stack of detected brackets; the outermost is [0] regexpStart: false, // true if a slash is just encountered and a REGEXP state has just been added to the stack escaped: false, // true if in a string and the last character was an escape character hasDollar: false, // true if in a template string and the last character was a dollar sign src: '', // the concatenated source string history: '', // reversed `src` lastChar: '' // last parsed character } ``` `stack` property can contain any of the following: - Any of the property values of `characterParser.TOKEN_TYPES` - Any of the property values of `characterParser.BRACKETS` (the end bracket, not the starting bracket) It also has the following useful methods: - `.current()` returns the innermost bracket (i.e. the last stack frame). - `.isString()` returns `true` if the current location is inside a string. - `.isComment()` returns `true` if the current location is inside a comment. - `.isNesting([opts])` returns `true` if the current location is not at the top level, i.e. if the stack is not empty. If `opts.ignoreLineComment` is `true`, line comments are not counted as a level, so for `// a` it will still return false. ### Errors All errors thrown by character-parser has a `code` property attached to it that allows one to identify what sort of error is thrown. For errors thrown from `parse` and `parseUntil`, an additional `index` property is available. ## Transition from v1 In character-parser@2, we have changed the APIs quite a bit. These are some notes that will help you transition to the new version. ### State Object Changes Instead of keeping depths of different brackets, we are now keeping a stack. We also removed some properties: ```js state.lineComment → state.current() === parser.TOKEN_TYPES.LINE_COMMENT state.blockComment → state.current() === parser.TOKEN_TYPES.BLOCK_COMMENT state.singleQuote → state.current() === parser.TOKEN_TYPES.SINGLE_QUOTE state.doubleQuote → state.current() === parser.TOKEN_TYPES.DOUBLE_QUOTE state.regexp → state.current() === parser.TOKEN_TYPES.REGEXP ``` ### `parseMax` This function has been removed since the usefulness of this function has been questioned. You should find that `parseUntil` is a better choice for your task. ### `parseUntil` The default behavior when the delimiter is a bracket has been changed so that nesting is taken into account to determine if the end is reached. To preserve the original behavior, pass `ignoreNesting: true` as an option. To see the difference between the new and old behaviors, see the "Usage" section earlier. ### `parseMaxBracket` This function has been merged into `parseUntil`. You can directly rename the function call without any repercussions. ## License MIT character-parser-3.1.0/package.json000066400000000000000000000020051327677753600172530ustar00rootroot00000000000000{ "name": "character-parser", "version": "3.1.0", "description": "Parse JavaScript one character at a time to look for snippets in Templates. This is not a validator, it's just designed to allow you to have sections of JavaScript delimited by brackets robustly.", "main": "lib/index.js", "types": "lib/index.d.ts", "files": [ "lib" ], "scripts": { "pretest": "npm run build", "prepublishOnly": "npm run build", "build": "tsc && flowgen lib/**/*", "coverage": "istanbul cover test/index.js", "test": "node test/index.js" }, "repository": { "type": "git", "url": "https://github.com/ForbesLindesay/character-parser.git" }, "keywords": [ "parser", "JavaScript", "bracket", "nesting", "comment", "string", "escape", "escaping" ], "author": "ForbesLindesay", "license": "MIT", "devDependencies": { "flowgen2": "^2.0.0-alpha.12", "istanbul": "~0.4.5", "testit": "~3.0.0", "typescript": "^2.4.0" }, "dependencies": {} }character-parser-3.1.0/src/000077500000000000000000000000001327677753600155575ustar00rootroot00000000000000character-parser-3.1.0/src/index.ts000066400000000000000000000241651327677753600172460ustar00rootroot00000000000000export enum TOKEN_TYPES { LINE_COMMENT = 'LINE_COMMENT', BLOCK_COMMENT = 'BLOCK_COMMENT', SINGLE_QUOTE = 'SINGLE_QUOTE', DOUBLE_QUOTE = 'DOUBLE_QUOTE', TEMPLATE_QUOTE = 'TEMPLATE_QUOTE', REGEXP = 'REGEXP', ROUND_BRACKET = 'ROUND_BRACKET', CURLY_BRACKET = 'CURLY_BRACKET', SQUARE_BRACKET = 'SQUARE_BRACKET', } type OpenBracket = '(' | '{' | '['; type CloseBracket = ')' | '}' | ']'; function isOpenBracket(character: string): character is OpenBracket { return character === '(' || character === '{' || character === '['; } function isCloseBracket(character: string): character is CloseBracket { return character === ')' || character === '}' || character === ']'; } function getBracketType(openBracket: OpenBracket): TOKEN_TYPES { switch (openBracket) { case '(': return TOKEN_TYPES.ROUND_BRACKET; case '{': return TOKEN_TYPES.CURLY_BRACKET; case '[': return TOKEN_TYPES.SQUARE_BRACKET; } } function isMatchingBracket(closeBracket: CloseBracket, bracketType: TOKEN_TYPES): boolean { switch (bracketType) { case TOKEN_TYPES.ROUND_BRACKET: return closeBracket === ')'; case TOKEN_TYPES.CURLY_BRACKET: return closeBracket === '}'; case TOKEN_TYPES.SQUARE_BRACKET: return closeBracket === ']'; default: return false; } } export class State { stack: Array = []; regexpStart: boolean = false; escaped: boolean = false; hasDollar: boolean = false; src: string = ''; history: string = ''; lastChar: string = ''; current(): TOKEN_TYPES { return this.stack[this.stack.length - 1]; } isString(): boolean { return ( this.current() === TOKEN_TYPES.SINGLE_QUOTE || this.current() === TOKEN_TYPES.DOUBLE_QUOTE || this.current() === TOKEN_TYPES.TEMPLATE_QUOTE ); } isComment(): boolean { return this.current() === TOKEN_TYPES.LINE_COMMENT || this.current() === TOKEN_TYPES.BLOCK_COMMENT; } isNesting(opts?: {readonly ignoreLineComment?: boolean}): boolean { if ( opts && opts.ignoreLineComment && this.stack.length === 1 && this.stack[0] === TOKEN_TYPES.LINE_COMMENT ) { // if we are only inside a line comment, and line comments are ignored // don't count it as nesting return false; } return !!this.stack.length; } } export function defaultState() { return new State(); } export function parse(src: string, state: State = defaultState(), options: {readonly start?: number, readonly end?: number} = {}): State { options = options || {}; state = state || defaultState(); const start = options.start || 0; const end = options.end || src.length; let index = start; for (let index = start; index < end; index++) { try { state = parseChar(src[index], state); } catch (ex) { (ex as any).index = index; throw ex; } } return state; } export default parse; export function parseUntil(src: string, delimiter: string | RegExp, options: {readonly start?: number, readonly end?: number, readonly ignoreLineComment?: boolean, readonly ignoreNesting?: boolean} = {}) { options = options || {}; const start = options.start || 0; const index = start; const state = defaultState(); for (let index = start; index < src.length; index++) { if ((options.ignoreNesting || !state.isNesting(options)) && matches(src, delimiter, index)) { const end = index; return { start: start, end: end, src: src.substring(start, end) }; } try { parseChar(src[index], state); } catch (ex) { ex.index = index; throw ex; } } const err = new Error('The end of the string was reached with no closing bracket found.'); (err as any).code = 'CHARACTER_PARSER:END_OF_STRING_REACHED'; (err as any).index = index; throw err; } export function parseChar(character: string, state: State = defaultState()): State { if (character.length !== 1) { const err = new Error('Character must be a string of length 1'); err.name = 'InvalidArgumentError'; (err as any).code = 'CHARACTER_PARSER:CHAR_LENGTH_NOT_ONE'; throw err; } state = state || defaultState(); state.src += character; const wasComment = state.isComment(); const lastChar = state.history ? state.history[0] : ''; if (state.regexpStart) { if (character === '/' || character == '*') { state.stack.pop(); } state.regexpStart = false; } switch (state.current()) { case TOKEN_TYPES.LINE_COMMENT: if (character === '\n') { state.stack.pop(); } break; case TOKEN_TYPES.BLOCK_COMMENT: if (state.lastChar === '*' && character === '/') { state.stack.pop(); } break; case TOKEN_TYPES.SINGLE_QUOTE: if (character === '\'' && !state.escaped) { state.stack.pop(); } else if (character === '\\' && !state.escaped) { state.escaped = true; } else { state.escaped = false; } break; case TOKEN_TYPES.DOUBLE_QUOTE: if (character === '"' && !state.escaped) { state.stack.pop(); } else if (character === '\\' && !state.escaped) { state.escaped = true; } else { state.escaped = false; } break; case TOKEN_TYPES.TEMPLATE_QUOTE: if (character === '`' && !state.escaped) { state.stack.pop(); state.hasDollar = false; } else if (character === '\\' && !state.escaped) { state.escaped = true; state.hasDollar = false; } else if (character === '$' && !state.escaped) { state.hasDollar = true; } else if (character === '{' && state.hasDollar) { state.stack.push(TOKEN_TYPES.CURLY_BRACKET); } else { state.escaped = false; state.hasDollar = false; } break; case TOKEN_TYPES.REGEXP: if (character === '/' && !state.escaped) { state.stack.pop(); } else if (character === '\\' && !state.escaped) { state.escaped = true; } else { state.escaped = false; } break; default: if (isOpenBracket(character)) { state.stack.push(getBracketType(character)); } else if (isCloseBracket(character)) { if (!isMatchingBracket(character, state.current())) { const err = new SyntaxError('Mismatched Bracket: ' + character); (err as any).code = 'CHARACTER_PARSER:MISMATCHED_BRACKET'; throw err; }; state.stack.pop(); } else if (lastChar === '/' && character === '/') { // Don't include comments in history state.history = state.history.substr(1); state.stack.push(TOKEN_TYPES.LINE_COMMENT); } else if (lastChar === '/' && character === '*') { // Don't include comment in history state.history = state.history.substr(1); state.stack.push(TOKEN_TYPES.BLOCK_COMMENT); } else if (character === '/' && isRegexp(state.history)) { state.stack.push(TOKEN_TYPES.REGEXP); // N.B. if the next character turns out to be a `*` or a `/` // then this isn't actually a regexp state.regexpStart = true; } else if (character === '\'') { state.stack.push(TOKEN_TYPES.SINGLE_QUOTE); } else if (character === '"') { state.stack.push(TOKEN_TYPES.DOUBLE_QUOTE); } else if (character === '`') { state.stack.push(TOKEN_TYPES.TEMPLATE_QUOTE); } break; } if (!state.isComment() && !wasComment) { state.history = character + state.history; } state.lastChar = character; // store last character for ending block comments return state; } function matches(str: string, matcher: string | RegExp, i: number = 0): boolean { if (typeof matcher === 'string') { return str.substr(i || 0, matcher.length) === matcher; } return matcher.test(str.substr(i || 0)); } export function isPunctuator(c: string): boolean { if (!c) return true; // the start of a string is a punctuator const code = c.charCodeAt(0) switch (code) { case 46: // . dot case 40: // ( open bracket case 41: // ) close bracket case 59: // ; semicolon case 44: // , comma case 123: // { open curly brace case 125: // } close curly brace case 91: // [ case 93: // ] case 58: // : case 63: // ? case 126: // ~ case 37: // % case 38: // & case 42: // *: case 43: // + case 45: // - case 47: // / case 60: // < case 62: // > case 94: // ^ case 124: // | case 33: // ! case 61: // = return true; default: return false; } } export function isKeyword(id: string): boolean { return (id === 'if') || (id === 'in') || (id === 'do') || (id === 'var') || (id === 'for') || (id === 'new') || (id === 'try') || (id === 'let') || (id === 'this') || (id === 'else') || (id === 'case') || (id === 'void') || (id === 'with') || (id === 'enum') || (id === 'while') || (id === 'break') || (id === 'catch') || (id === 'throw') || (id === 'const') || (id === 'yield') || (id === 'class') || (id === 'super') || (id === 'return') || (id === 'typeof') || (id === 'delete') || (id === 'switch') || (id === 'export') || (id === 'import') || (id === 'default') || (id === 'finally') || (id === 'extends') || (id === 'function') || (id === 'continue') || (id === 'debugger') || (id === 'package') || (id === 'private') || (id === 'interface') || (id === 'instanceof') || (id === 'implements') || (id === 'protected') || (id === 'public') || (id === 'static'); } function isRegexp(history: string): boolean { //could be start of regexp or divide sign history = history.replace(/^\s*/, ''); //unless its an `if`, `while`, `for` or `with` it's a divide, so we assume it's a divide if (history[0] === ')') return false; //unless it's a function expression, it's a regexp, so we assume it's a regexp if (history[0] === '}') return true; //any punctuation means it's a regexp if (isPunctuator(history[0])) return true; //if the last thing was a keyword then it must be a regexp (e.g. `typeof /foo/`) const match = /^\w+\b/.exec(history); if (match && isKeyword(match[0].split('').reverse().join(''))) return true; return false; } character-parser-3.1.0/test/000077500000000000000000000000001327677753600157475ustar00rootroot00000000000000character-parser-3.1.0/test/index.js000066400000000000000000000062331327677753600174200ustar00rootroot00000000000000var assert = require('assert'); var test = require('testit'); var parser = require('../'); var parse = parser.parse; var TOKEN_TYPES = parser.TOKEN_TYPES; test('parse', function () { test('works out how much depth changes', function () { var state = parse('foo(arg1, arg2, {\n foo: [a, b\n'); assert.deepEqual(state.stack, [ TOKEN_TYPES.ROUND_BRACKET, TOKEN_TYPES.CURLY_BRACKET, TOKEN_TYPES.SQUARE_BRACKET ]); parse(' c, d]\n })', state); assert.deepEqual(state.stack, []); }); }); test('parseUntil', function () { test('finds contents of bracketed expressions with specified bracket', function () { var section = parser.parseUntil('foo="(", bar="}"] bing bong', ']'); assert(section.start === 0); assert(section.end === 16);//exclusive end of string assert(section.src === 'foo="(", bar="}"'); var section = parser.parseUntil('foo="(", bar="}")] bing bong', ')'); assert(section.start === 0); assert(section.end === 16);//exclusive end of string assert(section.src === 'foo="(", bar="}"'); }); test('finds code up to a custom delimiter', function () { var section = parser.parseUntil('foo.bar("%>").baz%> bing bong', '%>'); assert(section.start === 0); assert(section.end === 17);//exclusive end of string assert(section.src === 'foo.bar("%>").baz'); var section = parser.parseUntil('<%foo.bar("%>").baz%> bing bong', '%>', {start: 2}); assert(section.start === 2); assert(section.end === 19);//exclusive end of string assert(section.src === 'foo.bar("%>").baz'); var section = parser.parseUntil('x = `foo${`)`}`)', ')'); assert.deepEqual(section, { start: 0, end: 15, src: 'x = `foo${`)`}`' }); var section = parser.parseUntil('x = `foo${`)`}`])', /^[\])]/); assert.deepEqual(section, { start: 0, end: 15, src: 'x = `foo${`)`}`' }); try { var section = parser.parseUntil('x = `foo${)}`)', ')'); } catch (ex) { assert(ex.code === 'CHARACTER_PARSER:MISMATCHED_BRACKET'); return; } throw new Error('Expected mismatched brackets'); }); }); test('regressions', function () { test('#1', function () { test('parses regular expressions', function () { var section = parser.parseUntil('foo=/\\//g, bar="}") bing bong', ')'); assert(section.start === 0); assert(section.end === 18);//exclusive end of string assert(section.src === 'foo=/\\//g, bar="}"'); var section = parser.parseUntil('foo = typeof /\\//g, bar="}") bing bong', ')'); assert(section.start === 0); //assert(section.end === 18);//exclusive end of string assert(section.src === 'foo = typeof /\\//g, bar="}"'); }) }) test('#6', function () { test('parses block comments', function () { var section = parser.parseUntil('/* ) */) bing bong', ')'); assert(section.start === 0); assert(section.end === 7);//exclusive end of string assert(section.src === '/* ) */'); var section = parser.parseUntil('/* /) */) bing bong', ')'); assert(section.start === 0); assert(section.end === 8);//exclusive end of string assert(section.src === '/* /) */'); }) }) }) character-parser-3.1.0/tsconfig.json000066400000000000000000000001701327677753600174750ustar00rootroot00000000000000{ "compileOnSave": true, "compilerOptions": { "declaration": true, "outDir": "lib", "strict": true } }