pax_global_header00006660000000000000000000000064123162640570014520gustar00rootroot0000000000000052 comment=28aa5ac329310ac74eff0d4259fcf594429134f7 jison-lex-master/000077500000000000000000000000001231626405700142655ustar00rootroot00000000000000jison-lex-master/.gitignore000066400000000000000000000000621231626405700162530ustar00rootroot00000000000000node_modules/ # Editor bak files *~ *.bak *.orig jison-lex-master/README.md000066400000000000000000000020231231626405700155410ustar00rootroot00000000000000# jison-lex A lexical analyzer generator used by [jison](http://jison.org). It takes a lexical grammar definition (either in JSON or Bison's lexical grammar format) and outputs a JavaScript lexer. ## install npm install jison-lex -g ## usage ``` Usage: jison-lex [file] [options] file file containing a lexical grammar Options: -o FILE, --outfile FILE Filename and base module name of the generated parser -t TYPE, --module-type TYPE The type of module to generate (commonjs, js) --version print version and exit ``` ## programatic usage ``` var JisonLex = require('jison-lex'); var grammar = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; // or load from a file // var grammar = fs.readFileSync('mylexer.l', 'utf8'); // generate source var lexerSource = JisonLex.generate(grammar); // or create a parser in memory var lexer = new JisonLex(grammar); lexer.setInput('xyxxy'); lexer.lex(); // => 'X' lexer.lex(); // => 'Y' ## license MIT jison-lex-master/cli.js000077500000000000000000000040701231626405700153760ustar00rootroot00000000000000#!/usr/bin/env node var version = require('./package.json').version; var path = require('path'); var fs = require('fs'); var lexParser = require('lex-parser'); var RegExpLexer = require('./regexp-lexer.js'); var opts = require("nomnom") .script('jison-lex') .option('file', { flag: true, position: 0, help: 'file containing a lexical grammar' }) .option('outfile', { abbr: 'o', metavar: 'FILE', help: 'Filename and base module name of the generated parser' }) .option('module-type', { abbr: 't', default: 'commonjs', metavar: 'TYPE', help: 'The type of module to generate (commonjs, js)' }) .option('version', { abbr: 'V', flag: true, help: 'print version and exit', callback: function() { return version; } }); exports.main = function (opts) { if (opts.file) { var raw = fs.readFileSync(path.normalize(opts.file), 'utf8'), name = path.basename((opts.outfile||opts.file)).replace(/\..*$/g,''); fs.writeFileSync(opts.outfile||(name + '.js'), processGrammar(raw, name)); } else { readin(function (raw) { console.log(processGrammar(raw)); }); } }; function processGrammar (file, name) { var grammar; try { grammar = lexParser.parse(file); } catch (e) { try { grammar = JSON.parse(file); } catch (e2) { throw e; } } var settings = grammar.options || {}; if (!settings.moduleType) settings.moduleType = opts['module-type']; if (!settings.moduleName && name) settings.moduleName = name.replace(/-\w/g, function (match){ return match.charAt(1).toUpperCase(); }); grammar.options = settings; return RegExpLexer.generate(grammar); } function readin (cb) { var stdin = process.openStdin(), data = ''; stdin.setEncoding('utf8'); stdin.addListener('data', function (chunk) { data += chunk; }); stdin.addListener('end', function () { cb(data); }); } if (require.main === module) exports.main(opts.parse()); jison-lex-master/examples/000077500000000000000000000000001231626405700161035ustar00rootroot00000000000000jison-lex-master/examples/lex.l000066400000000000000000000103321231626405700170470ustar00rootroot00000000000000 NAME [a-zA-Z_][a-zA-Z0-9_-]* BR \r\n|\n|\r %s indented trail rules %x code start_condition options conditions action %% "/*"(.|\n|\r)*?"*/" return 'ACTION_BODY'; "//".* return 'ACTION_BODY'; "/"[^ /]*?['"{}'][^ ]*?"/" return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces) \"("\\\\"|'\"'|[^"])*\" return 'ACTION_BODY'; "'"("\\\\"|"\'"|[^'])*"'" return 'ACTION_BODY'; [/"'][^{}/"']+ return 'ACTION_BODY'; [^{}/"']+ return 'ACTION_BODY'; "{" yy.depth++; return '{' "}" yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}' {NAME} return 'NAME'; ">" this.popState(); return '>'; "," return ','; "*" return '*'; {BR}+ /* */ \s+{BR}+ /* */ \s+ this.begin('indented') "%%" this.begin('code'); return '%%' [a-zA-Z0-9_]+ return 'CHARACTER_LIT' {NAME} yy.options[yytext] = true {BR}+ this.begin('INITIAL') \s+{BR}+ this.begin('INITIAL') \s+ /* empty */ {NAME} return 'START_COND' {BR}+ this.begin('INITIAL') \s+{BR}+ this.begin('INITIAL') \s+ /* empty */ .*{BR}+ this.begin('rules') "{" yy.depth = 0; this.begin('action'); return '{' "%{"(.|{BR})*?"%}" this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION' "%{"(.|{BR})*?"%}" yytext = yytext.substr(2, yytext.length-4); return 'ACTION' .+ this.begin('rules'); return 'ACTION' "/*"(.|\n|\r)*?"*/" /* ignore */ "//".* /* ignore */ {BR}+ /* */ \s+ /* */ {NAME} return 'NAME'; \"("\\\\"|'\"'|[^"])*\" yytext = yytext.replace(/\\"/g,'"'); return 'STRING_LIT'; "'"("\\\\"|"\'"|[^'])*"'" yytext = yytext.replace(/\\'/g,"'"); return 'STRING_LIT'; "|" return '|'; "["("\\\\"|"\]"|[^\]])*"]" return 'ANY_GROUP_REGEX'; "(?:" return 'SPECIAL_GROUP'; "(?=" return 'SPECIAL_GROUP'; "(?!" return 'SPECIAL_GROUP'; "(" return '('; ")" return ')'; "+" return '+'; "*" return '*'; "?" return '?'; "^" return '^'; "," return ','; "<>" return '$'; "<" this.begin('conditions'); return '<'; "/!" return '/!'; "/" return '/'; "\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4}) return 'ESCAPE_CHAR'; "\\". yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR'; "$" return '$'; "." return '.'; "%options" yy.options = {}; this.begin('options'); "%s" this.begin('start_condition'); return 'START_INC'; "%x" this.begin('start_condition'); return 'START_EXC'; "%%" this.begin('rules'); return '%%'; "{"\d+(","\s?\d+|",")?"}" return 'RANGE_REGEX'; "{"{NAME}"}" return 'NAME_BRACE'; "{" return '{'; "}" return '}'; . /* ignore bad characters */ <*><> return 'EOF'; (.|{BR})+ return 'CODE'; %% jison-lex-master/package.json000066400000000000000000000015061231626405700165550ustar00rootroot00000000000000{ "author": "Zach Carter (http://zaa.ch)", "name": "jison-lex", "description": "lexical analyzer generator used by jison", "version": "0.3.4", "keywords": [ "jison", "parser", "generator", "lexer", "flex", "tokenizer" ], "repository": { "type": "git", "url": "git://github.com/zaach/jison-lex.git" }, "bugs": { "email": "jison@librelist.com", "url": "http://github.com/zaach/jison-lex/issues" }, "main": "regexp-lexer", "bin": "cli.js", "engines": { "node": ">=0.4" }, "dependencies": { "lex-parser": "0.1.x", "nomnom": "1.5.2" }, "devDependencies": { "test": "0.4.4" }, "scripts": { "test": "node tests/all-tests.js" }, "directories": { "lib": "lib", "tests": "tests" }, "homepage": "http://jison.org" } jison-lex-master/regexp-lexer.js000066400000000000000000000473111231626405700172400ustar00rootroot00000000000000// Basic Lexer implemented using JavaScript regular expressions // MIT Licensed "use strict"; var lexParser = require('lex-parser'); var version = require('./package.json').version; // expand macros and convert matchers to RegExp's function prepareRules(rules, macros, actions, tokens, startConditions, caseless) { var m,i,k,action,conditions, newRules = []; if (macros) { macros = prepareMacros(macros); } function tokenNumberReplacement (str, token) { return "return " + (tokens[token] || "'" + token + "'"); } actions.push('switch($avoiding_name_collisions) {'); for (i=0;i < rules.length; i++) { if (Object.prototype.toString.apply(rules[i][0]) !== '[object Array]') { // implicit add to all inclusive start conditions for (k in startConditions) { if (startConditions[k].inclusive) { startConditions[k].rules.push(i); } } } else if (rules[i][0][0] === '*') { // Add to ALL start conditions for (k in startConditions) { startConditions[k].rules.push(i); } rules[i].shift(); } else { // Add to explicit start conditions conditions = rules[i].shift(); for (k=0;k 20 ? '...':'') + past.substr(-20).replace(/\n/g, ""); }, // displays upcoming input, i.e. for error messages upcomingInput: function () { var next = this.match; if (next.length < 20) { next += this._input.substr(0, 20-next.length); } return (next.substr(0,20) + (next.length > 20 ? '...' : '')).replace(/\n/g, ""); }, // displays the character position where the lexing error occurred, i.e. for error messages showPosition: function () { var pre = this.pastInput(); var c = new Array(pre.length + 1).join("-"); return pre + this.upcomingInput() + "\n" + c + "^"; }, // test the lexed token: return FALSE when not a match, otherwise return token test_match: function(match, indexed_rule) { var token, lines, backup; if (this.options.backtrack_lexer) { // save context backup = { yylineno: this.yylineno, yylloc: { first_line: this.yylloc.first_line, last_line: this.last_line, first_column: this.yylloc.first_column, last_column: this.yylloc.last_column }, yytext: this.yytext, match: this.match, matches: this.matches, matched: this.matched, yyleng: this.yyleng, offset: this.offset, _more: this._more, _input: this._input, yy: this.yy, conditionStack: this.conditionStack.slice(0), done: this.done }; if (this.options.ranges) { backup.yylloc.range = this.yylloc.range.slice(0); } } lines = match[0].match(/(?:\r\n?|\n).*/g); if (lines) { this.yylineno += lines.length; } this.yylloc = { first_line: this.yylloc.last_line, last_line: this.yylineno + 1, first_column: this.yylloc.last_column, last_column: lines ? lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length : this.yylloc.last_column + match[0].length }; this.yytext += match[0]; this.match += match[0]; this.matches = match; this.yyleng = this.yytext.length; if (this.options.ranges) { this.yylloc.range = [this.offset, this.offset += this.yyleng]; } this._more = false; this._backtrack = false; this._input = this._input.slice(match[0].length); this.matched += match[0]; token = this.performAction.call(this, this.yy, this, indexed_rule, this.conditionStack[this.conditionStack.length - 1]); if (this.done && this._input) { this.done = false; } if (token) { return token; } else if (this._backtrack) { // recover context for (var k in backup) { this[k] = backup[k]; } return false; // rule action called reject() implying the next rule should be tested instead. } return false; }, // return next match in input next: function () { if (this.done) { return this.EOF; } if (!this._input) { this.done = true; } var token, match, tempMatch, index; if (!this._more) { this.yytext = ''; this.match = ''; } var rules = this._currentRules(); for (var i = 0; i < rules.length; i++) { tempMatch = this._input.match(this.rules[rules[i]]); if (tempMatch && (!match || tempMatch[0].length > match[0].length)) { match = tempMatch; index = i; if (this.options.backtrack_lexer) { token = this.test_match(tempMatch, rules[i]); if (token !== false) { return token; } else if (this._backtrack) { match = false; continue; // rule action called reject() implying a rule MISmatch. } else { // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace) return false; } } else if (!this.options.flex) { break; } } } if (match) { token = this.test_match(match, rules[index]); if (token !== false) { return token; } // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace) return false; } if (this._input === "") { return this.EOF; } else { return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), { text: "", token: null, line: this.yylineno }); } }, // return next match that has a token lex: function lex () { var r = this.next(); if (r) { return r; } else { return this.lex(); } }, // activates a new lexer condition state (pushes the new lexer condition state onto the condition stack) begin: function begin (condition) { this.conditionStack.push(condition); }, // pop the previously active lexer condition state off the condition stack popState: function popState () { var n = this.conditionStack.length - 1; if (n > 0) { return this.conditionStack.pop(); } else { return this.conditionStack[0]; } }, // produce the lexer rule set which is active for the currently active lexer condition state _currentRules: function _currentRules () { if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) { return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules; } else { return this.conditions["INITIAL"].rules; } }, // return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available topState: function topState (n) { n = this.conditionStack.length - 1 - Math.abs(n || 0); if (n >= 0) { return this.conditionStack[n]; } else { return "INITIAL"; } }, // alias for begin(condition) pushState: function pushState (condition) { this.begin(condition); }, // return the number of states pushed stateStackSize: function stateStackSize() { return this.conditionStack.length; } }; // generate lexer source from a grammar function generate (dict, tokens) { var opt = processGrammar(dict, tokens); return generateFromOpts(opt); } // process the grammar and build final data structures and functions function processGrammar(dict, tokens) { var opts = {}; if (typeof dict === 'string') { dict = lexParser.parse(dict); } dict = dict || {}; opts.options = dict.options || {}; opts.moduleType = opts.options.moduleType; opts.moduleName = opts.options.moduleName; opts.conditions = prepareStartConditions(dict.startConditions); opts.conditions.INITIAL = {rules:[],inclusive:true}; opts.performAction = buildActions.call(opts, dict, tokens); opts.conditionStack = ['INITIAL']; opts.moduleInclude = (dict.moduleInclude || '').trim(); return opts; } // Assemble the final source from the processed grammar function generateFromOpts (opt) { var code = ""; if (opt.moduleType === 'commonjs') { code = generateCommonJSModule(opt); } else if (opt.moduleType === 'amd') { code = generateAMDModule(opt); } else { code = generateModule(opt); } return code; } function generateModuleBody (opt) { var functionDescriptions = { setInput: "resets the lexer, sets new input", input: "consumes and returns one char from the input", unput: "unshifts one char (or a string) into the input", more: "When called from action, caches matched text and appends it on next action", reject: "When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.", less: "retain first n characters of the match", pastInput: "displays already matched input, i.e. for error messages", upcomingInput: "displays upcoming input, i.e. for error messages", showPosition: "displays the character position where the lexing error occurred, i.e. for error messages", test_match: "test the lexed token: return FALSE when not a match, otherwise return token", next: "return next match in input", lex: "return next match that has a token", begin: "activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)", popState: "pop the previously active lexer condition state off the condition stack", _currentRules: "produce the lexer rule set which is active for the currently active lexer condition state", topState: "return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available", pushState: "alias for begin(condition)", stateStackSize: "return the number of states currently on the stack" }; var out = "({\n"; var p = []; var descr; for (var k in RegExpLexer.prototype) { if (RegExpLexer.prototype.hasOwnProperty(k) && k.indexOf("generate") === -1) { // copy the function description as a comment before the implementation; supports multi-line descriptions descr = "\n"; if (functionDescriptions[k]) { descr += "// " + functionDescriptions[k].replace(/\n/g, "\n\/\/ ") + "\n"; } p.push(descr + k + ":" + (RegExpLexer.prototype[k].toString() || '""')); } } out += p.join(",\n"); if (opt.options) { out += ",\noptions: " + JSON.stringify(opt.options); } out += ",\nperformAction: " + String(opt.performAction); out += ",\nrules: [" + opt.rules + "]"; out += ",\nconditions: " + JSON.stringify(opt.conditions); out += "\n})"; return out; } function generateModule(opt) { opt = opt || {}; var out = "/* generated by jison-lex " + version + " */"; var moduleName = opt.moduleName || "lexer"; out += "\nvar " + moduleName + " = (function(){\nvar lexer = " + generateModuleBody(opt); if (opt.moduleInclude) { out += ";\n" + opt.moduleInclude; } out += ";\nreturn lexer;\n})();"; return out; } function generateAMDModule(opt) { var out = "/* generated by jison-lex " + version + " */"; out += "define([], function(){\nvar lexer = " + generateModuleBody(opt); if (opt.moduleInclude) { out += ";\n" + opt.moduleInclude; } out += ";\nreturn lexer;" + "\n});"; return out; } function generateCommonJSModule(opt) { opt = opt || {}; var out = ""; var moduleName = opt.moduleName || "lexer"; out += generateModule(opt); out += "\nexports.lexer = " + moduleName; out += ";\nexports.lex = function () { return " + moduleName + ".lex.apply(lexer, arguments); };"; return out; } RegExpLexer.generate = generate; module.exports = RegExpLexer; jison-lex-master/tests/000077500000000000000000000000001231626405700154275ustar00rootroot00000000000000jison-lex-master/tests/all-tests.js000077500000000000000000000002021231626405700176720ustar00rootroot00000000000000exports.testRegExpLexer = require("./regexplexer"); if (require.main === module) process.exit(require("test").run(exports)); jison-lex-master/tests/regexplexer.js000066400000000000000000000666171231626405700203370ustar00rootroot00000000000000var RegExpLexer = require("../regexp-lexer"), assert = require("assert"); exports["test basic matchers"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test set yy"] = function() { var dict = { rules: [ ["x", "return yy.x;" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexer = new RegExpLexer(dict); lexer.setInput(input, { x: 'EX' }); assert.equal(lexer.lex(), "EX"); }; exports["test set input after"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test unrecognized char"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xa"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "X"); assert.throws(function(){lexer.lex()}, "bad char"); }; exports["test macro"] = function() { var dict = { macros: { "digit": "[0-9]" }, rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["{digit}+", "return 'NAT';" ], ["$", "return 'EOF';" ] ] }; var input = "x12234y42"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "NAT"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "NAT"); assert.equal(lexer.lex(), "EOF"); }; exports["test macro precedence"] = function() { var dict = { macros: { "hex": "[0-9]|[a-f]" }, rules: [ ["-", "return '-';" ], ["{hex}+", "return 'HEX';" ], ["$", "return 'EOF';" ] ] }; var input = "129-abfe-42dc-ea12"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "-"); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "-"); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "-"); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "EOF"); }; exports["test nested macros"] = function () { var dict = { macros: { "digit": "[0-9]", "2digit": "{digit}{digit}", "3digit": "{2digit}{digit}" }, rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["{3digit}", "return 'NNN';" ], ["{2digit}", "return 'NN';" ], ["{digit}", "return 'N';" ], ["$", "return 'EOF';" ] ] }; var input = "x1y42y123"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "N"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "NN"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "NNN"); assert.equal(lexer.lex(), "EOF"); }; exports["test nested macro precedence"] = function() { var dict = { macros: { "hex": "[0-9]|[a-f]", "col": "#{hex}+" }, rules: [ ["-", "return '-';" ], ["{col}", "return 'HEX';" ], ["$", "return 'EOF';" ] ] }; var input = "#129-#abfe-#42dc-#ea12"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "-"); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "-"); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "-"); assert.equal(lexer.lex(), "HEX"); assert.equal(lexer.lex(), "EOF"); }; exports["test action include"] = function() { var dict = { rules: [ ["x", "return included ? 'Y' : 'N';" ], ["$", "return 'EOF';" ] ], actionInclude: "var included = true;" }; var input = "x"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "EOF"); }; exports["test ignored"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["\\s+", "/* skip whitespace */" ], ["$", "return 'EOF';" ] ] }; var input = "x x y x"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test disambiguate"] = function() { var dict = { rules: [ ["for\\b", "return 'FOR';" ], ["if\\b", "return 'IF';" ], ["[a-z]+", "return 'IDENTIFIER';" ], ["\\s+", "/* skip whitespace */" ], ["$", "return 'EOF';" ] ] }; var input = "if forever for for"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "IF"); assert.equal(lexer.lex(), "IDENTIFIER"); assert.equal(lexer.lex(), "FOR"); assert.equal(lexer.lex(), "FOR"); assert.equal(lexer.lex(), "EOF"); }; exports["test yytext overwrite"] = function() { var dict = { rules: [ ["x", "yytext = 'hi der'; return 'X';" ] ] }; var input = "x"; var lexer = new RegExpLexer(dict, input); lexer.lex(); assert.equal(lexer.yytext, "hi der"); }; exports["test yylineno"] = function() { var dict = { rules: [ ["\\s+", "/* skip whitespace */" ], ["x", "return 'x';" ], ["y", "return 'y';" ] ] }; var input = "x\nxy\n\n\nx"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.yylineno, 0); assert.equal(lexer.lex(), "x"); assert.equal(lexer.lex(), "x"); assert.equal(lexer.yylineno, 1); assert.equal(lexer.lex(), "y"); assert.equal(lexer.yylineno, 1); assert.equal(lexer.lex(), "x"); assert.equal(lexer.yylineno, 4); }; exports["test yylloc"] = function() { var dict = { rules: [ ["\\s+", "/* skip whitespace */" ], ["x", "return 'x';" ], ["y", "return 'y';" ] ] }; var input = "x\nxy\n\n\nx"; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "x"); assert.equal(lexer.yylloc.first_column, 0); assert.equal(lexer.yylloc.last_column, 1); assert.equal(lexer.lex(), "x"); assert.equal(lexer.yylloc.first_line, 2); assert.equal(lexer.yylloc.last_line, 2); assert.equal(lexer.yylloc.first_column, 0); assert.equal(lexer.yylloc.last_column, 1); assert.equal(lexer.lex(), "y"); assert.equal(lexer.yylloc.first_line, 2); assert.equal(lexer.yylloc.last_line, 2); assert.equal(lexer.yylloc.first_column, 1); assert.equal(lexer.yylloc.last_column, 2); assert.equal(lexer.lex(), "x"); assert.equal(lexer.yylloc.first_line, 5); assert.equal(lexer.yylloc.last_line, 5); assert.equal(lexer.yylloc.first_column, 0); assert.equal(lexer.yylloc.last_column, 1); }; exports["test more()"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ['"[^"]*', function(){ if(yytext.charAt(yyleng-1) == '\\') { this.more(); } else { yytext += this.input(); // swallow end quote return "STRING"; } } ], ["$", "return 'EOF';" ] ] }; var input = 'x"fgjdrtj\\"sdfsdf"x'; var lexer = new RegExpLexer(dict, input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "STRING"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test defined token returns"] = function() { var tokens = {"2":"X", "3":"Y", "4":"EOF"}; var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexer = new RegExpLexer(dict, input, tokens); assert.equal(lexer.lex(), 2); assert.equal(lexer.lex(), 2); assert.equal(lexer.lex(), 3); assert.equal(lexer.lex(), 2); assert.equal(lexer.lex(), 4); }; exports["test module generator from constructor"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexerSource = RegExpLexer.generate(dict); eval(lexerSource); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test module generator"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexer_ = new RegExpLexer(dict); var lexerSource = lexer_.generateModule(); eval(lexerSource); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test generator with more complex lexer"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ['"[^"]*', function(){ if(yytext.charAt(yyleng-1) == '\\') { this.more(); } else { yytext += this.input(); // swallow end quote return "STRING"; } } ], ["$", "return 'EOF';" ] ] }; var input = 'x"fgjdrtj\\"sdfsdf"x'; var lexer_ = new RegExpLexer(dict); var lexerSource = lexer_.generateModule(); eval(lexerSource); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "STRING"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test commonjs module generator"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexer_ = new RegExpLexer(dict); var lexerSource = lexer_.generateCommonJSModule(); var exports = {}; eval(lexerSource); exports.lexer.setInput(input); assert.equal(exports.lex(), "X"); assert.equal(exports.lex(), "X"); assert.equal(exports.lex(), "Y"); assert.equal(exports.lex(), "X"); assert.equal(exports.lex(), "EOF"); }; exports["test amd module generator"] = function() { var dict = { rules: [ ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xxyx"; var lexer_ = new RegExpLexer(dict); var lexerSource = lexer_.generateAMDModule(); var lexer; var define = function (_, fn) { lexer = fn(); }; eval(lexerSource); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test DJ lexer"] = function() { var dict = { "lex": { "macros": { "digit": "[0-9]", "id": "[a-zA-Z][a-zA-Z0-9]*" }, "rules": [ ["\\/\\/.*", "/* ignore comment */"], ["main\\b", "return 'MAIN';"], ["class\\b", "return 'CLASS';"], ["extends\\b", "return 'EXTENDS';"], ["nat\\b", "return 'NATTYPE';"], ["if\\b", "return 'IF';"], ["else\\b", "return 'ELSE';"], ["for\\b", "return 'FOR';"], ["printNat\\b", "return 'PRINTNAT';"], ["readNat\\b", "return 'READNAT';"], ["this\\b", "return 'THIS';"], ["new\\b", "return 'NEW';"], ["var\\b", "return 'VAR';"], ["null\\b", "return 'NUL';"], ["{digit}+", "return 'NATLITERAL';"], ["{id}", "return 'ID';"], ["==", "return 'EQUALITY';"], ["=", "return 'ASSIGN';"], ["\\+", "return 'PLUS';"], ["-", "return 'MINUS';"], ["\\*", "return 'TIMES';"], [">", "return 'GREATER';"], ["\\|\\|", "return 'OR';"], ["!", "return 'NOT';"], ["\\.", "return 'DOT';"], ["\\{", "return 'LBRACE';"], ["\\}", "return 'RBRACE';"], ["\\(", "return 'LPAREN';"], ["\\)", "return 'RPAREN';"], [";", "return 'SEMICOLON';"], ["\\s+", "/* skip whitespace */"], [".", "print('Illegal character');throw 'Illegal character';"], ["$", "return 'ENDOFFILE';"] ] } }; var input = "class Node extends Object { \ var nat value var nat value;\ var Node next;\ var nat index;\ }\ \ class List extends Object {\ var Node start;\ \ Node prepend(Node startNode) {\ startNode.next = start;\ start = startNode;\ }\ \ nat find(nat index) {\ var nat value;\ var Node node;\ \ for(node = start;!(node == null);node = node.next){\ if(node.index == index){\ value = node.value;\ } else { 0; };\ };\ \ value;\ }\ }\ \ main {\ var nat index;\ var nat value;\ var List list;\ var Node startNode;\ \ index = readNat();\ list = new List;\ \ for(0;!(index==0);0){\ value = readNat();\ startNode = new Node;\ startNode.index = index;\ startNode.value = value;\ list.prepend(startNode);\ index = readNat();\ };\ \ index = readNat();\ \ for(0;!(index==0);0){\ printNat(list.find(index));\ index = readNat();\ };\ }"; var lexer = new RegExpLexer(dict.lex); lexer.setInput(input); var tok; while (tok = lexer.lex(), tok!==1) { assert.equal(typeof tok, "string"); } }; exports["test instantiation from string"] = function() { var dict = "%%\n'x' {return 'X';}\n'y' {return 'Y';}\n<> {return 'EOF';}"; var input = "x"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test inclusive start conditions"] = function() { var dict = { startConditions: { "TEST": 0, }, rules: [ ["enter-test", "this.begin('TEST');" ], [["TEST"], "x", "return 'T';" ], [["TEST"], "y", "this.begin('INITIAL'); return 'TY';" ], ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xenter-testxyy"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "T"); assert.equal(lexer.lex(), "TY"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "EOF"); }; exports["test exclusive start conditions"] = function() { var dict = { startConditions: { "EAT": 1, }, rules: [ ["\\/\\/", "this.begin('EAT');" ], [["EAT"], ".", "" ], [["EAT"], "\\n", "this.begin('INITIAL');" ], ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xy//yxteadh//ste\ny"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "EOF"); }; exports["test pop start condition stack"] = function() { var dict = { startConditions: { "EAT": 1, }, rules: [ ["\\/\\/", "this.begin('EAT');" ], [["EAT"], ".", "" ], [["EAT"], "\\n", "this.popState();" ], ["x", "return 'X';" ], ["y", "return 'Y';" ], ["$", "return 'EOF';" ] ] }; var input = "xy//yxteadh//ste\ny"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "EOF"); }; exports["test star start condition"] = function() { var dict = { startConditions: { "EAT": 1, }, rules: [ ["\\/\\/", "this.begin('EAT');" ], [["EAT"], ".", "" ], ["x", "return 'X';" ], ["y", "return 'Y';" ], [["*"],"$", "return 'EOF';" ] ] }; var input = "xy//yxteadh//stey"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "EOF"); }; exports["test start condition constants"] = function() { var dict = { startConditions: { "EAT": 1, }, rules: [ ["\\/\\/", "this.begin('EAT');" ], [["EAT"], ".", "if (YYSTATE==='EAT') return 'E';" ], ["x", "if (YY_START==='INITIAL') return 'X';" ], ["y", "return 'Y';" ], [["*"],"$", "return 'EOF';" ] ] }; var input = "xy//y"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "Y"); assert.equal(lexer.lex(), "E"); assert.equal(lexer.lex(), "EOF"); }; exports["test unicode encoding"] = function() { var dict = { rules: [ ["\\u2713", "return 'CHECK';" ], ["\\u03c0", "return 'PI';" ], ["y", "return 'Y';" ] ] }; var input = "\u2713\u03c0y"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "CHECK"); assert.equal(lexer.lex(), "PI"); assert.equal(lexer.lex(), "Y"); }; exports["test unicode"] = function() { var dict = { rules: [ ["π", "return 'PI';" ], ["y", "return 'Y';" ] ] }; var input = "πy"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "PI"); assert.equal(lexer.lex(), "Y"); }; exports["test longest match returns"] = function() { var dict = { rules: [ [".", "return 'DOT';" ], ["cat", "return 'CAT';" ] ], options: {flex: true} }; var input = "cat!"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "CAT"); assert.equal(lexer.lex(), "DOT"); }; exports["test case insensitivity"] = function() { var dict = { rules: [ ["cat", "return 'CAT';" ] ], options: {'case-insensitive': true} }; var input = "Cat"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "CAT"); }; exports["test less"] = function() { var dict = { rules: [ ["cat", "this.less(2); return 'CAT';" ], ["t", "return 'T';" ] ], }; var input = "cat"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "CAT"); assert.equal(lexer.lex(), "T"); }; exports["test EOF unput"] = function() { var dict = { startConditions: { "UN": 1, }, rules: [ ["U", "this.begin('UN');return 'U';" ], [["UN"],"$", "this.unput('X')" ], [["UN"],"X", "this.popState();return 'X';" ], ["$", "return 'EOF'" ] ] }; var input = "U"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "U"); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "EOF"); }; exports["test flex mode default rule"] = function() { var dict = { rules: [ ["x", "return 'X';" ] ], options: {flex: true} }; var input = "xyx"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.equal(lexer.lex(), "X"); }; exports["test pipe precedence"] = function() { var dict = { rules: [ ["x|y", "return 'X_Y';" ], [".", "return 'N';"] ] }; var input = "xny"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X_Y"); assert.equal(lexer.lex(), "N"); assert.equal(lexer.lex(), "X_Y"); }; exports["test ranges"] = function() { var dict = { rules: [ ["x+", "return 'X';" ], [".", "return 'N';"] ], options: {ranges: true} }; var input = "xxxyy"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "X"); assert.deepEqual(lexer.yylloc.range, [0, 3]); }; exports["test unput location"] = function() { var dict = { rules: [ ["x+", "return 'X';" ], ["y\\n", "this.unput('\\n'); return 'Y';" ], ["\\ny", "this.unput('y'); return 'BR';" ], ["y", "return 'Y';" ], [".", "return 'N';"] ], options: {ranges: true} }; var input = "xxxy\ny"; var lexer = new RegExpLexer(dict); lexer.setInput(input); console.log(lexer.rules); assert.equal(lexer.next(), "X"); assert.deepEqual(lexer.yylloc, {first_line: 1, first_column: 0, last_line: 1, last_column: 3, range: [0, 3]}); assert.equal(lexer.next(), "Y"); assert.deepEqual(lexer.yylloc, {first_line: 1, first_column: 3, last_line: 1, last_column: 4, range: [3, 4]}); assert.equal(lexer.next(), "BR"); assert.deepEqual(lexer.yylloc, {first_line: 1, first_column: 4, last_line: 2, last_column: 0, range: [4, 5]}); assert.equal(lexer.next(), "Y"); assert.deepEqual(lexer.yylloc, {first_line: 2, first_column: 0, last_line: 2, last_column: 1, range: [5, 6]}); }; exports["test unput location again"] = function() { var dict = { rules: [ ["x+", "return 'X';" ], ["y\\ny\\n", "this.unput('\\n'); return 'YY';" ], ["\\ny", "this.unput('y'); return 'BR';" ], ["y", "return 'Y';" ], [".", "return 'N';"] ], options: {ranges: true} }; var input = "xxxy\ny\ny"; var lexer = new RegExpLexer(dict); lexer.setInput(input); console.log(lexer.rules); assert.equal(lexer.next(), "X"); assert.deepEqual(lexer.yylloc, {first_line: 1, first_column: 0, last_line: 1, last_column: 3, range: [0, 3]}); assert.equal(lexer.next(), "YY"); assert.deepEqual(lexer.yylloc, {first_line: 1, first_column: 3, last_line: 2, last_column: 1, range: [3, 6]}); assert.equal(lexer.next(), "BR"); assert.deepEqual(lexer.yylloc, {first_line: 2, first_column: 1, last_line: 3, last_column: 0, range: [6, 7]}); assert.equal(lexer.next(), "Y"); assert.deepEqual(lexer.yylloc, {first_line: 3, first_column: 0, last_line: 3, last_column: 1, range: [7, 8]}); }; exports["test backtracking lexer reject() method"] = function() { var dict = { rules: [ ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ], ["[A-Z]+", "return 'WORD';" ], ["[0-9]+", "return 'NUM';" ] ], options: {backtrack_lexer: true} }; var input = "A5"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "WORD"); assert.equal(lexer.lex(), "NUM"); }; exports["test lexer reject() exception when not in backtracking mode"] = function() { var dict = { rules: [ ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ], ["[A-Z]+", "return 'WORD';" ], ["[0-9]+", "return 'NUM';" ] ], options: {backtrack_lexer: false} }; var input = "A5"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.throws(function() { lexer.lex(); }, function(err) { return (err instanceof Error) && /You can only invoke reject/.test(err); }); }; exports["test yytext state after unput"] = function() { var dict = { rules: [ ["cat4", "this.unput('4'); return 'CAT';" ], ["4", "return 'NUMBER';" ], ["$", "return 'EOF';"] ] }; var input = "cat4"; var lexer = new RegExpLexer(dict); lexer.setInput(input); assert.equal(lexer.lex(), "CAT"); /*the yytext should be 'cat' since we unput '4' from 'cat4' */ assert.equal(lexer.yytext, "cat"); assert.equal(lexer.lex(), "NUMBER"); assert.equal(lexer.lex(), "EOF"); };