pax_global_header00006660000000000000000000000064121432644320014513gustar00rootroot0000000000000052 comment=fe56cfc6a54866620c5827648cd438a4ec7f7636 joni-2.0.0/000077500000000000000000000000001214326443200124515ustar00rootroot00000000000000joni-2.0.0/.gitignore000066400000000000000000000000071214326443200144360ustar00rootroot00000000000000target joni-2.0.0/.travis.yml000066400000000000000000000003601214326443200145610ustar00rootroot00000000000000language: java notifications: # Email notifications are disabled to not annoy anybody. # See http://about.travis-ci.org/docs/user/build-configuration/ to learn more # about configuring notification recipients and more. email: false joni-2.0.0/MANIFEST.MF000066400000000000000000000001221214326443200140760ustar00rootroot00000000000000Implementation-Title: Joni (java port of Oniguruma) Implementation-Version: 1.1.7 joni-2.0.0/build.xml000066400000000000000000000022171214326443200142740ustar00rootroot00000000000000 joni-2.0.0/pom.xml000066400000000000000000000114501214326443200137670ustar00rootroot00000000000000 4.0.0 org.jruby.joni joni jar 2.0.0 Joni Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma that uses byte arrays directly instead of java Strings and chars UTF-8 JIRA http://jira.codehaus.org/browse/JRUBY scm:git:https://github.com/jruby/joni.git scm:git:git@github.com:jruby/joni.git https://github.com/jruby/joni MIT License http://www.opensource.org/licenses/mit-license.php repo codehaus-jruby-repository JRuby Central Repository dav:https://dav.codehaus.org/repository/jruby codehaus-jruby-snapshot-repository JRuby Central Development Repository dav:https://dav.codehaus.org/snapshots.repository/jruby codehaus-jruby-site JRuby Maven site dav:https://dav.codehaus.org/jruby/info codehaus Codehaus Repository true false http://repository.codehaus.org lopex Marcin Mielzynski lopx@gazeta.pl org.jruby.jcodings jcodings 1.0.8 junit junit 3.8.1 test org.ow2.asm asm 4.0 provided src test joni org.apache.maven.wagon wagon-webdav maven-compiler-plugin 2.3.2 1.5 1.5 org.apache.maven.plugins maven-surefire-plugin 2.4.3 **/TestJoni.java maven-jar-plugin 2.2 MANIFEST.MF org.apache.maven.plugins maven-source-plugin 2.1.2 attach-sources verify jar-no-fork org.apache.maven.plugins maven-javadoc-plugin 2.8.1 private true attach-sources verify jar joni-2.0.0/src/000077500000000000000000000000001214326443200132405ustar00rootroot00000000000000joni-2.0.0/src/org/000077500000000000000000000000001214326443200140275ustar00rootroot00000000000000joni-2.0.0/src/org/joni/000077500000000000000000000000001214326443200147665ustar00rootroot00000000000000joni-2.0.0/src/org/joni/Analyser.java000066400000000000000000002226451214326443200174220ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsAll; import static org.joni.BitStatus.bsAt; import static org.joni.BitStatus.bsClear; import static org.joni.BitStatus.bsOnAt; import static org.joni.BitStatus.bsOnAtSimple; import static org.joni.Option.isCaptureGroup; import static org.joni.Option.isFindCondition; import static org.joni.Option.isIgnoreCase; import static org.joni.Option.isMultiline; import static org.joni.ast.ConsAltNode.newAltNode; import static org.joni.ast.ConsAltNode.newListNode; import static org.joni.ast.QuantifierNode.isRepeatInfinite; import java.util.HashSet; import org.jcodings.CaseFoldCodeItem; import org.jcodings.ObjPtr; import org.jcodings.Ptr; import org.jcodings.constants.CharacterType; import org.joni.ast.AnchorNode; import org.joni.ast.BackRefNode; import org.joni.ast.CClassNode; import org.joni.ast.CTypeNode; import org.joni.ast.CallNode; import org.joni.ast.ConsAltNode; import org.joni.ast.EncloseNode; import org.joni.ast.Node; import org.joni.ast.QuantifierNode; import org.joni.ast.StringNode; import org.joni.constants.AnchorType; import org.joni.constants.EncloseType; import org.joni.constants.NodeType; import org.joni.constants.RegexState; import org.joni.constants.StackPopLevel; import org.joni.constants.TargetInfo; final class Analyser extends Parser { protected Analyser(ScanEnvironment env, byte[]bytes, int p, int end) { super(env, bytes, p, end); } protected final void compile() { regex.state = RegexState.COMPILING; if (Config.DEBUG) { Config.log.println(regex.encStringToString(bytes, getBegin(), getEnd())); } reset(); regex.numMem = 0; regex.numRepeat = 0; regex.numNullCheck = 0; //regex.repeatRangeAlloc = 0; regex.repeatRangeLo = null; regex.repeatRangeHi = null; regex.numCombExpCheck = 0; if (Config.USE_COMBINATION_EXPLOSION_CHECK) regex.numCombExpCheck = 0; parse(); if (Config.USE_NAMED_GROUP) { /* mixed use named group and no-named group */ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(regex.options)) { if (env.numNamed != env.numMem) { root = disableNoNameGroupCapture(root); } else { numberedRefCheck(root); } } } // USE_NAMED_GROUP if (Config.USE_NAMED_GROUP) { if (env.numCall > 0) { env.unsetAddrList = new UnsetAddrList(env.numCall); setupSubExpCall(root); // r != 0 ??? subexpRecursiveCheckTrav(root); // r < 0 -< err, FOUND_CALLED_NODE = 1 subexpInfRecursiveCheckTrav(root); // r != 0 recursion infinite ??? regex.numCall = env.numCall; } else { regex.numCall = 0; } } // USE_NAMED_GROUP if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) { Config.log.println(""); Config.log.println(root + "\n"); } root = setupTree(root, 0); if (Config.DEBUG_PARSE_TREE) { if (Config.DEBUG_PARSE_TREE_RAW) Config.log.println(""); root.verifyTree(new HashSet(), env.reg.warnings); Config.log.println(root + "\n"); } regex.captureHistory = env.captureHistory; regex.btMemStart = env.btMemStart; regex.btMemEnd = env.btMemEnd; if (isFindCondition(regex.options)) { regex.btMemEnd = bsAll(); } else { regex.btMemEnd = env.btMemEnd; regex.btMemEnd |= regex.captureHistory; } if (Config.USE_COMBINATION_EXPLOSION_CHECK) { if (env.backrefedMem == 0 || (Config.USE_SUBEXP_CALL && env.numCall == 0)) { setupCombExpCheck(root, 0); if (Config.USE_SUBEXP_CALL && env.hasRecursion) { env.numCombExpCheck = 0; } else { // USE_SUBEXP_CALL if (env.combExpMaxRegNum > 0) { for (int i=1; ic)/ node = noNameDisableMap(node, map, counter); } } else { //en.target = noNameDisableMap(en.target, map, counter); en.setTarget(noNameDisableMap(en.target, map, counter)); // ??? } return node; } private void noNameDisableMapFor_anchor(Node node, int[]map, Ptr counter) { AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorNode.PREC_READ: case AnchorNode.PREC_READ_NOT: case AnchorNode.LOOK_BEHIND: case AnchorNode.LOOK_BEHIND_NOT: an.setTarget(noNameDisableMap(an.target, map, counter)); } } private Node noNameDisableMap(Node node, int[]map, Ptr counter) { switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: noNameDisableMapFor_cosAlt(node, map, counter); break; case NodeType.QTFR: noNameDisableMapFor_quantifier(node, map, counter); break; case NodeType.ENCLOSE: node = noNameDisableMapFor_enclose(node, map, counter); break; case NodeType.ANCHOR: noNameDisableMapFor_anchor(node, map, counter); break; } // switch return node; } private void renumberByMap(Node node, int[]map) { switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { renumberByMap(can.car, map); } while ((can = can.cdr) != null); break; case NodeType.QTFR: renumberByMap(((QuantifierNode)node).target, map); break; case NodeType.ENCLOSE: renumberByMap(((EncloseNode)node).target, map); break; case NodeType.BREF: ((BackRefNode)node).renumber(map); break; } // switch } protected final void numberedRefCheck(Node node) { switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { numberedRefCheck(can.car); } while ((can = can.cdr) != null); break; case NodeType.QTFR: numberedRefCheck(((QuantifierNode)node).target); break; case NodeType.ENCLOSE: numberedRefCheck(((EncloseNode)node).target); break; case NodeType.BREF: BackRefNode br = (BackRefNode)node; if (!br.isNameRef()) newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); break; } // switch } protected final Node disableNoNameGroupCapture(Node root) { int[]map = new int[env.numMem + 1]; for (int i=1; i<=env.numMem; i++) map[i] = 0; root = noNameDisableMap(root, map, new Ptr(0)); renumberByMap(root, map); for (int i=1, pos=1; i<=env.numMem; i++) { if (map[i] > 0) { env.memNodes[pos] = env.memNodes[i]; pos++; } } int loc = env.captureHistory; env.captureHistory = bsClear(); for (int i=1; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { if (bsAt(loc, i)) { env.captureHistory = bsOnAtSimple(env.captureHistory, map[i]); } } env.numMem = env.numNamed; regex.numMem = env.numNamed; regex.renumberNameTable(map); return root; } private void swap(Node a, Node b) { a.swap(b); if (root == b) { root = a; } else if (root == a) { root = b; } } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK private int quantifiersMemoryInfo(Node node) { int info = 0; switch(node.getType()) { case NodeType.LIST: case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { int v = quantifiersMemoryInfo(can.car); if (v > info) info = v; } while ((can = can.cdr) != null); break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { return TargetInfo.IS_EMPTY_REC; /* tiny version */ } else { info = quantifiersMemoryInfo(cn.target); } } // USE_SUBEXP_CALL break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.upper != 0) { info = quantifiersMemoryInfo(qn.target); } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.MEMORY: return TargetInfo.IS_EMPTY_MEM; case EncloseType.OPTION: case EncloseNode.STOP_BACKTRACK: info = quantifiersMemoryInfo(en.target); break; default: break; } // inner switch break; case NodeType.BREF: case NodeType.STR: case NodeType.CTYPE: case NodeType.CCLASS: case NodeType.CANY: case NodeType.ANCHOR: default: break; } // switch return info; } private int getMinMatchLength(Node node) { int min = 0; switch (node.getType()) { case NodeType.BREF: BackRefNode br = (BackRefNode)node; if (br.isRecursion()) break; if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF); min = getMinMatchLength(env.memNodes[br.back[0]]); for (int i=1; i env.numMem) newValueException(ERR_INVALID_BACKREF); int tmin = getMinMatchLength(env.memNodes[br.back[i]]); if (min > tmin) min = tmin; } break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { EncloseNode en = (EncloseNode)cn.target; if (en.isMinFixed()) min = en.minLength; } else { min = getMinMatchLength(cn.target); } } // USE_SUBEXP_CALL break; case NodeType.LIST: ConsAltNode can = (ConsAltNode)node; do { min += getMinMatchLength(can.car); } while ((can = can.cdr) != null); break; case NodeType.ALT: ConsAltNode y = (ConsAltNode)node; do { Node x = y.car; int tmin = getMinMatchLength(x); if (y == node) { min = tmin; } else if (min > tmin) { min = tmin; } } while ((y = y.cdr) != null); break; case NodeType.STR: min = ((StringNode)node).length(); break; case NodeType.CTYPE: min = 1; break; case NodeType.CCLASS: case NodeType.CANY: min = 1; break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.lower > 0) { min = getMinMatchLength(qn.target); min = MinMaxLen.distanceMultiply(min, qn.lower); } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL) { if (en.isMinFixed()) { min = en.minLength; } else { min = getMinMatchLength(en.target); en.minLength = min; en.setMinFixed(); } } // USE_SUBEXP_CALL break; case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: min = getMinMatchLength(en.target); break; } // inner switch break; case NodeType.ANCHOR: default: break; } // switch return min; } private int getMaxMatchLength(Node node) { int max = 0; switch (node.getType()) { case NodeType.LIST: ConsAltNode ln = (ConsAltNode)node; do { int tmax = getMaxMatchLength(ln.car); max = MinMaxLen.distanceAdd(max, tmax); } while ((ln = ln.cdr) != null); break; case NodeType.ALT: ConsAltNode an = (ConsAltNode)node; do { int tmax = getMaxMatchLength(an.car); if (max < tmax) max = tmax; } while ((an = an.cdr) != null); break; case NodeType.STR: max = ((StringNode)node).length(); break; case NodeType.CTYPE: max = enc.maxLengthDistance(); break; case NodeType.CCLASS: case NodeType.CANY: max = enc.maxLengthDistance(); break; case NodeType.BREF: BackRefNode br = (BackRefNode)node; if (br.isRecursion()) { max = MinMaxLen.INFINITE_DISTANCE; break; } for (int i=0; i env.numMem) newValueException(ERR_INVALID_BACKREF); int tmax = getMaxMatchLength(env.memNodes[br.back[i]]); if (max < tmax) max = tmax; } break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (!cn.isRecursion()) { max = getMaxMatchLength(cn.target); } else { max = MinMaxLen.INFINITE_DISTANCE; } } // USE_SUBEXP_CALL break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.upper != 0) { max = getMaxMatchLength(qn.target); if (max != 0) { if (!isRepeatInfinite(qn.upper)) { max = MinMaxLen.distanceMultiply(max, qn.upper); } else { max = MinMaxLen.INFINITE_DISTANCE; } } } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL) { if (en.isMaxFixed()) { max = en.maxLength; } else { max = getMaxMatchLength(en.target); en.maxLength = max; en.setMaxFixed(); } } // USE_SUBEXP_CALL break; case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: max = getMaxMatchLength(en.target); break; } // inner switch break; case NodeType.ANCHOR: default: break; } // switch return max; } private static final int GET_CHAR_LEN_VARLEN = -1; private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; protected final int getCharLengthTree(Node node) { return getCharLengthTree(node, 0); } private int getCharLengthTree(Node node, int level) { level++; int len = 0; returnCode = 0; switch(node.getType()) { case NodeType.LIST: ConsAltNode ln = (ConsAltNode)node; do { int tlen = getCharLengthTree(ln.car, level); if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen); } while (returnCode == 0 && (ln = ln.cdr) != null); break; case NodeType.ALT: ConsAltNode an = (ConsAltNode)node; boolean varLen = false; int tlen = getCharLengthTree(an.car, level); while (returnCode == 0 && (an = an.cdr) != null) { int tlen2 = getCharLengthTree(an.car, level); if (returnCode == 0) { if (tlen != tlen2) varLen = true; } } if (returnCode == 0) { if (varLen) { if (level == 1) { returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; } else { returnCode = GET_CHAR_LEN_VARLEN; } } else { len = tlen; } } break; case NodeType.STR: StringNode sn = (StringNode)node; len = sn.length(enc); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.lower == qn.upper) { tlen = getCharLengthTree(qn.target, level); if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower); } else { returnCode = GET_CHAR_LEN_VARLEN; } break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (!cn.isRecursion()) { len = getCharLengthTree(cn.target, level); } else { returnCode = GET_CHAR_LEN_VARLEN; } } // USE_SUBEXP_CALL break; case NodeType.CTYPE: len = 1; case NodeType.CCLASS: case NodeType.CANY: len = 1; break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch(en.type) { case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL) { if (en.isCLenFixed()) { len = en.charLength; } else { len = getCharLengthTree(en.target, level); if (returnCode == 0) { en.charLength = len; en.setCLenFixed(); } } } // USE_SUBEXP_CALL break; case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: len = getCharLengthTree(en.target, level); break; } // inner switch break; case NodeType.ANCHOR: break; default: returnCode = GET_CHAR_LEN_VARLEN; } // switch return len; } /* x is not included y ==> 1 : 0 */ private boolean isNotIncluded(Node x, Node y) { Node tmp; // !retry:! retry: while(true) { int yType = y.getType(); switch(x.getType()) { case NodeType.CTYPE: switch(yType) { case NodeType.CTYPE: CTypeNode cny = (CTypeNode)y; CTypeNode cnx = (CTypeNode)x; return cny.ctype == cnx.ctype && cny.not != cnx.not; case NodeType.CCLASS: // !swap:! tmp = x; x = y; y = tmp; // !goto retry;! continue retry; case NodeType.STR: // !goto swap;! tmp = x; x = y; y = tmp; continue retry; default: break; } // inner switch break; case NodeType.CCLASS: CClassNode xc = (CClassNode)x; switch(yType) { case NodeType.CTYPE: switch(((CTypeNode)y).ctype) { case CharacterType.WORD: if (!((CTypeNode)y).not) { if (xc.mbuf == null && !xc.isNot()) { for (int i=0; i ys.length()) len = ys.length(); if (xs.isAmbig() || ys.isAmbig()) { /* tiny version */ return false; } else { for (int i=0, p=ys.p, q=xs.p; i 0) { if (qn.headExact != null) { n = qn.headExact; } else { n = getHeadValueNode(qn.target, exact); } } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.OPTION: int options = regex.options; regex.options = en.option; n = getHeadValueNode(en.target, exact); regex.options = options; break; case EncloseType.MEMORY: case EncloseType.STOP_BACKTRACK: n = getHeadValueNode(en.target, exact); break; } // inner switch break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact); break; default: break; } // switch return n; } // true: invalid private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) { if ((node.getType2Bit() & typeMask) == 0) return true; boolean invalid = false; switch(node.getType()) { case NodeType.LIST: case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask); } while (!invalid && (can = can.cdr) != null); break; case NodeType.QTFR: invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if ((en.type & encloseMask) == 0) return true; invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; if ((an.type & anchorMask) == 0) return true; if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); break; default: break; } // switch return invalid; } private static final int RECURSION_EXIST = 1; private static final int RECURSION_INFINITE = 2; private int subexpInfRecursiveCheck(Node node, boolean head) { int r = 0; switch (node.getType()) { case NodeType.LIST: int min; ConsAltNode x = (ConsAltNode)node; do { int ret = subexpInfRecursiveCheck(x.car, head); if (ret == RECURSION_INFINITE) return ret; r |= ret; if (head) { min = getMinMatchLength(x.car); if (min != 0) head = false; } } while ((x = x.cdr) != null); break; case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; r = RECURSION_EXIST; do { int ret = subexpInfRecursiveCheck(can.car, head); if (ret == RECURSION_INFINITE) return ret; r &= ret; } while ((can = can.cdr) != null); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; r = subexpInfRecursiveCheck(qn.target, head); if (r == RECURSION_EXIST) { if (qn.lower == 0) r = 0; } break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpInfRecursiveCheck(an.target, head); break; } // inner switch break; case NodeType.CALL: r = subexpInfRecursiveCheck(((CallNode)node).target, head); break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (en.isMark2()) { return 0; } else if (en.isMark1()) { return !head ? RECURSION_EXIST : RECURSION_INFINITE; // throw exception here ??? } else { en.setMark2(); r = subexpInfRecursiveCheck(en.target, head); en.clearMark2(); } break; default: break; } // switch return r; } protected final int subexpInfRecursiveCheckTrav(Node node) { int r = 0; switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { r = subexpInfRecursiveCheckTrav(can.car); } while (r == 0 && (can = can.cdr) != null); break; case NodeType.QTFR: r = subexpInfRecursiveCheckTrav(((QuantifierNode)node).target); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpInfRecursiveCheckTrav(an.target); break; } // inner switch break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (en.isRecursion()) { en.setMark1(); r = subexpInfRecursiveCheck(en.target, true); if (r > 0) newValueException(ERR_NEVER_ENDING_RECURSION); en.clearMark1(); } r = subexpInfRecursiveCheckTrav(en.target); break; default: break; } // switch return r; } private int subexpRecursiveCheck(Node node) { int r = 0; switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { r |= subexpRecursiveCheck(can.car); } while ((can = can.cdr) != null); break; case NodeType.QTFR: r = subexpRecursiveCheck(((QuantifierNode)node).target); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpRecursiveCheck(an.target); break; } // inner switch break; case NodeType.CALL: CallNode cn = (CallNode)node; r = subexpRecursiveCheck(cn.target); if (r != 0) cn.setRecursion(); break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (en.isMark2()) { return 0; } else if (en.isMark1()) { return 1; /* recursion */ } else { en.setMark2(); r = subexpRecursiveCheck(en.target); en.clearMark2(); } break; default: break; } // switch return r; } private static final int FOUND_CALLED_NODE = 1; protected final int subexpRecursiveCheckTrav(Node node) { int r = 0; switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { int ret = subexpRecursiveCheckTrav(can.car); if (ret == FOUND_CALLED_NODE) { r = FOUND_CALLED_NODE; } // else if (ret < 0) return ret; ??? } while ((can = can.cdr) != null); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; r = subexpRecursiveCheckTrav(qn.target); if (qn.upper == 0) { if (r == FOUND_CALLED_NODE) qn.isRefered = true; } break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpRecursiveCheckTrav(an.target); break; } // inner switch break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (!en.isRecursion()) { if (en.isCalled()) { en.setMark1(); r = subexpRecursiveCheck(en.target); if (r != 0) en.setRecursion(); en.clearMark1(); } } r = subexpRecursiveCheckTrav(en.target); if (en.isCalled()) r |= FOUND_CALLED_NODE; break; default: break; } // switch return r; } private void setCallAttr(CallNode cn) { cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes! if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); ((EncloseNode)cn.target).setCalled(); env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum); cn.unsetAddrList = env.unsetAddrList; } protected final void setupSubExpCall(Node node) { switch(node.getType()) { case NodeType.LIST: ConsAltNode ln = (ConsAltNode)node; do { setupSubExpCall(ln.car); } while ((ln = ln.cdr) != null); break; case NodeType.ALT: ConsAltNode can = (ConsAltNode)node; do { setupSubExpCall(can.car); } while ((can = can.cdr) != null); break; case NodeType.QTFR: setupSubExpCall(((QuantifierNode)node).target); break; case NodeType.ENCLOSE: setupSubExpCall(((EncloseNode)node).target); break; case NodeType.CALL: CallNode cn = (CallNode)node; if (cn.groupNum != 0) { int gNum = cn.groupNum; if (Config.USE_NAMED_GROUP) { if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) { newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); } } // USE_NAMED_GROUP if (gNum > env.numMem) newValueException(ERR_UNDEFINED_GROUP_REFERENCE, cn.nameP, cn.nameEnd); setCallAttr(cn); } else { if (Config.USE_NAMED_GROUP) { NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd); if (ne == null) { newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); } else if (ne.backNum > 1) { newValueException(ERR_MULTIPLEX_DEFINITION_NAME_CALL, cn.nameP, cn.nameEnd); } else { cn.groupNum = ne.backRef1; // ne.backNum == 1 ? ne.backRef1 : ne.backRefs[0]; // ??? need to check ? setCallAttr(cn); } } } break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: setupSubExpCall(an.target); break; } break; } // switch } /* divide different length alternatives in look-behind. (?<=A|B) ==> (?<=A)|(?<=B) (? (? list */ } while ((np = ((ConsAltNode)np).cdr) != null); } return node; } private Node setupLookBehind(Node node) { AnchorNode an = (AnchorNode)node; int len = getCharLengthTree(an.target); switch(returnCode) { case 0: an.charLength = len; break; case GET_CHAR_LEN_VARLEN: newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); break; case GET_CHAR_LEN_TOP_ALT_VARLEN: if (syntax.differentLengthAltLookBehind()) { return divideLookBehindAlternatives(node); } else { newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); } } return node; } private void nextSetup(Node node, Node nextNode) { // retry: retry: while(true) { int type = node.getType(); if (type == NodeType.QTFR) { QuantifierNode qn = (QuantifierNode)node; if (qn.greedy && isRepeatInfinite(qn.upper)) { if (Config.USE_QTFR_PEEK_NEXT) { StringNode n = (StringNode)getHeadValueNode(nextNode, true); /* '\0': for UTF-16BE etc... */ if (n != null && n.bytes[n.p] != 0) { // ????????? qn.nextHeadExact = n; } } // USE_QTFR_PEEK_NEXT /* automatic posseivation a*b ==> (?>a*)b */ if (qn.lower <= 1) { if (qn.target.isSimple()) { Node x = getHeadValueNode(qn.target, false); if (x != null) { Node y = getHeadValueNode(nextNode, false); if (y != null && isNotIncluded(x, y)) { EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose en.setStopBtSimpleRepeat(); //en.setTarget(qn.target); // optimize it ?? swap(node, en); en.setTarget(node); } } } } } } else if (type == NodeType.ENCLOSE) { EncloseNode en = (EncloseNode)node; if (en.isMemory()) { node = en.target; // !goto retry;! continue retry; } } break; } // while } private void updateStringNodeCaseFoldSingleByte(StringNode sn, byte[]toLower) { int end = sn.end; byte[]bytes = sn.bytes; int sp = 0; int p = sn.p; while (p < end) { byte lower = toLower[bytes[p] & 0xff]; if (lower != bytes[p]) { byte[]sbuf = new byte[end - sn.p]; System.arraycopy(bytes, sn.p, sbuf, 0, sp); while (p < end) sbuf[sp++] = toLower[bytes[p++] & 0xff]; sn.set(sbuf, 0, sp); break; } else { sp++; p++; } } } private void updateStringNodeCaseFoldMultiByte(StringNode sn) { byte[]bytes = sn.bytes; int end = sn.end; value = sn.p; int sp = 0; byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN]; while (value < end) { int ovalue = value; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, buf); for (int i = 0; i < len; i++) { if (bytes[ovalue + i] != buf[i]) { byte[]sbuf = new byte[sn.length() << 1]; System.arraycopy(bytes, sn.p, sbuf, 0, ovalue - sn.p); value = ovalue; while (value < end) { len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, buf); for (i = 0; i < len; i++) { if (sp >= sbuf.length) { byte[]tmp = new byte[sbuf.length << 1]; System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); sbuf = tmp; } sbuf[sp++] = buf[i]; } } sn.set(sbuf, 0, sp); return; } sp++; } } } private void updateStringNodeCaseFold(Node node) { StringNode sn = (StringNode)node; byte[] toLower = enc.toLowerCaseTable(); if (toLower != null) { updateStringNodeCaseFoldSingleByte(sn, toLower); } else { updateStringNodeCaseFoldMultiByte(sn); } } private Node expandCaseFoldMakeRemString(byte[]bytes, int p, int end) { StringNode node = new StringNode(bytes, p, end); updateStringNodeCaseFold(node); node.setAmbig(); node.setDontGetOptInfo(); return node; } private boolean expandCaseFoldStringAlt(int itemNum, CaseFoldCodeItem[]items, byte[]bytes, int p, int slen, int end, ObjPtr node) { boolean varlen = false; for (int i=0; i prevNode = new ObjPtr(); StringNode stringNode = null; while (p < end) { CaseFoldCodeItem[]items = enc.caseFoldCodesByString(regex.caseFoldFlag, bytes, p, end); int len = enc.length(bytes, p, end); if (items.length == 0) { if (stringNode == null) { if (root == null && prevNode.p != null) { topRoot = root = ConsAltNode.listAdd(null, prevNode.p); } prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); if (root != null) ConsAltNode.listAdd(root, stringNode); } stringNode.cat(bytes, p, p + len); } else { altNum *= (items.length + 1); if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; if (root == null && prevNode.p != null) { topRoot = root = ConsAltNode.listAdd(null, prevNode.p); } if (expandCaseFoldStringAlt(items.length, items, bytes, p, len, end, prevNode)) { // if (r == 1) if (root == null) { topRoot = (ConsAltNode)prevNode.p; } else { ConsAltNode.listAdd(root, prevNode.p); } root = (ConsAltNode)((ConsAltNode)prevNode.p).car; } else { /* r == 0 */ if (root != null) ConsAltNode.listAdd(root, prevNode.p); } stringNode = null; } p += len; } if (p < end) { Node srem = expandCaseFoldMakeRemString(bytes, p, end); if (prevNode.p != null && root == null) { topRoot = root = ConsAltNode.listAdd(null, prevNode.p); } if (root == null) { prevNode.p = srem; } else { ConsAltNode.listAdd(root, srem); } } /* ending */ Node xnode = topRoot != null ? topRoot : prevNode.p; swap(node, xnode); return xnode; } private static final int CEC_THRES_NUM_BIG_REPEAT = 512; private static final int CEC_INFINITE_NUM = 0x7fffffff; private static final int CEC_IN_INFINITE_REPEAT = (1<<0); private static final int CEC_IN_FINITE_REPEAT = (1<<1); private static final int CEC_CONT_BIG_REPEAT = (1<<2); protected final int setupCombExpCheck(Node node, int state) { int r = state; int ret; switch (node.getType()) { case NodeType.LIST: ConsAltNode ln = (ConsAltNode)node; do { r = setupCombExpCheck(ln.car, r); //prev = ((ConsAltNode)node).car; } while (r >= 0 && (ln = ln.cdr) != null); break; case NodeType.ALT: ConsAltNode an = (ConsAltNode)node; do { ret = setupCombExpCheck(an.car, state); r |= ret; } while (ret >= 0 && (an = an.cdr) != null); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; int childState = state; int addState = 0; int varNum; if (!isRepeatInfinite(qn.upper)) { if (qn.upper > 1) { /* {0,1}, {1,1} are allowed */ childState |= CEC_IN_FINITE_REPEAT; /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ if (env.backrefedMem == 0) { if (qn.target.getType() == NodeType.ENCLOSE) { EncloseNode en = (EncloseNode)qn.target; if (en.type == EncloseType.MEMORY) { if (en.target.getType() == NodeType.QTFR) { QuantifierNode q = (QuantifierNode)en.target; if (isRepeatInfinite(q.upper) && q.greedy == qn.greedy) { qn.upper = qn.lower == 0 ? 1 : qn.lower; if (qn.upper == 1) childState = state; } } } } } } } if ((state & CEC_IN_FINITE_REPEAT) != 0) { qn.combExpCheckNum = -1; } else { if (isRepeatInfinite(qn.upper)) { varNum = CEC_INFINITE_NUM; childState |= CEC_IN_INFINITE_REPEAT; } else { varNum = qn.upper - qn.lower; } if (varNum >= CEC_THRES_NUM_BIG_REPEAT) addState |= CEC_CONT_BIG_REPEAT; if (((state & CEC_IN_INFINITE_REPEAT) != 0 && varNum != 0) || ((state & CEC_CONT_BIG_REPEAT) != 0 && varNum >= CEC_THRES_NUM_BIG_REPEAT)) { if (qn.combExpCheckNum == 0) { env.numCombExpCheck++; qn.combExpCheckNum = env.numCombExpCheck; if (env.currMaxRegNum > env.combExpMaxRegNum) { env.combExpMaxRegNum = env.currMaxRegNum; } } } } r = setupCombExpCheck(qn.target, childState); r |= addState; break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch( en.type) { case EncloseNode.MEMORY: if (env.currMaxRegNum < en.regNum) { env.currMaxRegNum = en.regNum; } r = setupCombExpCheck(en.target, state); break; default: r = setupCombExpCheck(en.target, state); } // inner switch break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { env.hasRecursion = true; } else { r = setupCombExpCheck(cn.target, state); } } // USE_SUBEXP_CALL break; default: break; } // switch return r; } private static final int IN_ALT = (1<<0); private static final int IN_NOT = (1<<1); private static final int IN_REPEAT = (1<<2); private static final int IN_VAR_REPEAT = (1<<3); private static final int EXPAND_STRING_MAX_LENGTH = 100; /* setup_tree does the following work. 1. check empty loop. (set qn->target_empty_info) 2. expand ignore-case in char class. 3. set memory status bit flags. (reg->mem_stats) 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 5. find invalid patterns in look-behind. 6. expand repeated string. */ protected final Node setupTree(Node node, int state) { restart: while (true) { switch (node.getType()) { case NodeType.LIST: ConsAltNode lin = (ConsAltNode)node; Node prev = null; do { setupTree(lin.car, state); if (prev != null) { nextSetup(prev, lin.car); } prev = lin.car; } while ((lin = lin.cdr) != null); break; case NodeType.ALT: ConsAltNode aln = (ConsAltNode)node; do { setupTree(aln.car, (state | IN_ALT)); } while ((aln = aln.cdr) != null); break; case NodeType.CCLASS: break; case NodeType.STR: if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { node = expandCaseFoldString(node); } break; case NodeType.CTYPE: case NodeType.CANY: break; case NodeType.CALL: // if (Config.USE_SUBEXP_CALL) ? break; case NodeType.BREF: BackRefNode br = (BackRefNode)node; for (int i=0; i env.numMem) newValueException(ERR_INVALID_BACKREF); env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]); env.btMemStart = bsOnAt(env.btMemStart, br.back[i]); if (Config.USE_BACKREF_WITH_LEVEL) { if (br.isNestLevel()) { env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]); } } // USE_BACKREF_AT_LEVEL ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed(); } break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; Node target = qn.target; if ((state & IN_REPEAT) != 0) qn.setInRepeat(); if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { int d = getMinMatchLength(target); if (d == 0) { qn.targetEmptyInfo = TargetInfo.IS_EMPTY; if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { int info = quantifiersMemoryInfo(target); if (info > 0) qn.targetEmptyInfo = info; } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK // strange stuff here (turned off) } } state |= IN_REPEAT; if (qn.lower != qn.upper) state |= IN_VAR_REPEAT; target = setupTree(target, state); /* expand string */ if (target.getType() == NodeType.STR) { if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper && qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) { StringNode sn = (StringNode)target; int len = sn.length(); if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) { StringNode str = qn.convertToString(sn.flag); int n = qn.lower; for (int i = 0; i < n; i++) { str.cat(sn.bytes, sn.p, sn.end); } break; /* break case NT_QTFR: */ } } } if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { if (qn.greedy && qn.targetEmptyInfo != 0) { if (target.getType() == NodeType.QTFR) { QuantifierNode tqn = (QuantifierNode)target; if (tqn.headExact != null) { qn.headExact = tqn.headExact; tqn.headExact = null; } } else { qn.headExact = getHeadValueNode(qn.target, true); } } } // USE_OP_PUSH_OR_JUMP_EXACT break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.OPTION: int options = regex.options; regex.options = en.option; setupTree(en.target, state); regex.options = options; break; case EncloseType.MEMORY: if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { env.btMemStart = bsOnAt(env.btMemStart, en.regNum); /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ } setupTree(en.target, state); break; case EncloseType.STOP_BACKTRACK: setupTree(en.target, state); if (en.target.getType() == NodeType.QTFR) { QuantifierNode tqn = (QuantifierNode)en.target; if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { /* (?>a*), a*+ etc... */ if (tqn.target.isSimple()) en.setStopBtSimpleRepeat(); } } break; } // inner switch break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: setupTree(an.target, state); break; case AnchorType.PREC_READ_NOT: setupTree(an.target, (state | IN_NOT)); break; case AnchorType.LOOK_BEHIND: if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); node = setupLookBehind(node); if (node.getType() != NodeType.ANCHOR) continue restart; setupTree(((AnchorNode)node).target, state); break; case AnchorType.LOOK_BEHIND_NOT: if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); node = setupLookBehind(node); if (node.getType() != NodeType.ANCHOR) continue restart; setupTree(((AnchorNode)node).target, (state | IN_NOT)); break; } // inner switch break; } // switch return node; } // restart: while } private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd opt.clear(); opt.setBoundNode(oenv.mmd); switch (node.getType()) { case NodeType.LIST: { OptEnvironment nenv = new OptEnvironment(); NodeOptInfo nopt = new NodeOptInfo(); nenv.copy(oenv); ConsAltNode lin = (ConsAltNode)node; do { optimizeNodeLeft(lin.car, nopt, nenv); nenv.mmd.add(nopt.length); opt.concatLeftNode(nopt, enc); } while ((lin = lin.cdr) != null); break; } case NodeType.ALT: { NodeOptInfo nopt = new NodeOptInfo(); ConsAltNode aln = (ConsAltNode)node; do { optimizeNodeLeft(aln.car, nopt, oenv); if (aln == node) { opt.copy(nopt); } else { opt.altMerge(nopt, oenv); } } while ((aln = aln.cdr) != null); break; } case NodeType.STR: { StringNode sn = (StringNode)node; int slen = sn.length(); if (!sn.isAmbig()) { opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc); if (slen > 0) { opt.map.addChar(sn.bytes[sn.p], enc); } opt.length.set(slen, slen); } else { int max; if (sn.isDontGetOptInfo()) { int n = sn.length(enc); max = enc.maxLengthDistance() * n; } else { opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc); opt.exb.ignoreCase = true; if (slen > 0) { opt.map.addCharAmb(sn.bytes, sn.p, sn.end, enc, oenv.caseFoldFlag); } max = slen; } opt.length.set(slen, max); } if (opt.exb.length == slen) { opt.exb.reachEnd = true; } break; } case NodeType.CCLASS: { CClassNode cc = (CClassNode)node; /* no need to check ignore case. (setted in setup_tree()) */ if (cc.mbuf != null || cc.isNot()) { int min = enc.minLength(); int max = enc.maxLengthDistance(); opt.length.set(min, max); } else { for (int i=0; i 0) { opt.expr.copy(nopt.exb); } else if (nopt.exm.length > 0) { opt.expr.copy(nopt.exm); } opt.expr.reachEnd = false; if (nopt.map.value > 0) opt.map.copy(nopt.map); break; case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */ case AnchorType.LOOK_BEHIND_NOT: break; } // inner switch break; } case NodeType.BREF: { BackRefNode br = (BackRefNode)node; if (br.isRecursion()) { opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); break; } Node[]nodes = oenv.scanEnv.memNodes; int min = getMinMatchLength(nodes[br.back[0]]); int max = getMaxMatchLength(nodes[br.back[0]]); for (int i=1; i tmin) min = tmin; if (max < tmax) max = tmax; } opt.length.set(min, max); break; } case NodeType.CALL: { if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); } else { int safe = oenv.options; oenv.options = ((EncloseNode)cn.target).option; optimizeNodeLeft(cn.target, opt, oenv); oenv.options = safe; } } // USE_SUBEXP_CALL break; } case NodeType.QTFR: { NodeOptInfo nopt = new NodeOptInfo(); QuantifierNode qn = (QuantifierNode)node; optimizeNodeLeft(qn.target, nopt, oenv); if (qn.lower == 0 && isRepeatInfinite(qn.upper)) { if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { if (isMultiline(oenv.options)) { opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); } else { opt.anchor.add(AnchorType.ANYCHAR_STAR); } } } else { if (qn.lower > 0) { opt.copy(nopt); if (nopt.exb.length > 0) { if (nopt.exb.reachEnd) { int i; for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { opt.exb.concat(nopt.exb, enc); } if (i < qn.lower) { opt.exb.reachEnd = false; } } } if (qn.lower != qn.upper) { opt.exb.reachEnd = false; opt.exm.reachEnd = false; } if (qn.lower > 1) { opt.exm.reachEnd = false; } } } int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); int max; if (isRepeatInfinite(qn.upper)) { max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; } else { max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); } opt.length.set(min, max); break; } case NodeType.ENCLOSE: { EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.OPTION: int save = oenv.options; oenv.options = en.option; optimizeNodeLeft(en.target, opt, oenv); oenv.options = save; break; case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL && ++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { int min = 0; int max = MinMaxLen.INFINITE_DISTANCE; if (en.isMinFixed()) min = en.minLength; if (en.isMaxFixed()) max = en.maxLength; opt.length.set(min, max); } else { // USE_SUBEXP_CALL optimizeNodeLeft(en.target, opt, oenv); if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); } } } break; case EncloseType.STOP_BACKTRACK: optimizeNodeLeft(en.target, opt, oenv); break; } // inner switch break; } default: newInternalException(ERR_PARSER_BUG); } // switch } protected final void setOptimizedInfoFromTree(Node node) { NodeOptInfo opt = new NodeOptInfo(); OptEnvironment oenv = new OptEnvironment(); oenv.enc = regex.enc; oenv.options = regex.options; oenv.caseFoldFlag = regex.caseFoldFlag; oenv.scanEnv = env; oenv.mmd.clear(); // ?? optimizeNodeLeft(node, opt, oenv); regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | AnchorType.BEGIN_POSITION | AnchorType.ANYCHAR_STAR | AnchorType.ANYCHAR_STAR_ML); regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF); if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { regex.anchorDmin = opt.length.min; regex.anchorDmax = opt.length.max; } if (opt.exb.length > 0 || opt.exm.length > 0) { opt.exb.select(opt.exm, enc); if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { // !goto set_map;! regex.setOptimizeMapInfo(opt.map); regex.setSubAnchor(opt.map.anchor); } else { regex.setExactInfo(opt.exb); regex.setSubAnchor(opt.exb.anchor); } } else if (opt.map.value > 0) { // !set_map:! regex.setOptimizeMapInfo(opt.map); regex.setSubAnchor(opt.map.anchor); } else { regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; } if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { Config.log.println(regex.optimizeInfoToString()); } } } joni-2.0.0/src/org/joni/ApplyCaseFold.java000066400000000000000000000071021214326443200203170ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.ApplyAllCaseFoldFunction; import org.jcodings.Encoding; import org.joni.ast.CClassNode; import org.joni.ast.ConsAltNode; import org.joni.ast.StringNode; final class ApplyCaseFold implements ApplyAllCaseFoldFunction { // i_apply_case_fold public void apply(int from, int[]to, int length, Object o) { ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o; ScanEnvironment env = arg.env; Encoding enc = env.enc; CClassNode cc = arg.cc; BitSet bs = cc.bs; if (length == 1) { boolean inCC = cc.isCodeInCC(enc, from); if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) { if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) { if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) { cc.addCodeRange(env, to[0], to[0]); } else { /* /(?i:[^A-C])/.match("a") ==> fail. */ bs.set(to[0]); } } } else { if (inCC) { if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) { if (cc.isNot()) cc.clearNotFlag(enc); cc.addCodeRange(env, to[0], to[0]); } else { if (cc.isNot()) { bs.clear(to[0]); } else { bs.set(to[0]); } } } } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS } else { if (cc.isCodeInCC(enc, from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) { StringNode node = null; for (int i=0; i 1 || cc.bs.isEmpty()) { len = OPSize.OPCODE; } else { len = OPSize.OPCODE + BitSet.BITSET_SIZE; } len += OPSize.LENGTH + cc.mbuf.used; } return len; } @Override protected void compileCClassNode(CClassNode cc) { if (cc.isShare()) { // shared char class addOpcode(OPCode.CCLASS_NODE); addPointer(cc); return; } if (cc.mbuf == null) { if (cc.isNot()) { addOpcode(enc.isSingleByte() ? OPCode.CCLASS_NOT_SB : OPCode.CCLASS_NOT); } else { addOpcode(enc.isSingleByte() ? OPCode.CCLASS_SB : OPCode.CCLASS); } addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset } else { if (enc.minLength() > 1 || cc.bs.isEmpty()) { if (cc.isNot()) { addOpcode(OPCode.CCLASS_MB_NOT); } else { addOpcode(OPCode.CCLASS_MB); } addMultiByteCClass(cc.mbuf); } else { if (cc.isNot()) { addOpcode(OPCode.CCLASS_MIX_NOT); } else { addOpcode(OPCode.CCLASS_MIX); } // store the bit set and mbuf themself! addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset addMultiByteCClass(cc.mbuf); } } } @Override protected void compileCTypeNode(CTypeNode node) { CTypeNode cn = node; int op; switch (cn.ctype) { case CharacterType.WORD: if (cn.not) { op = enc.isSingleByte() ? OPCode.NOT_WORD_SB : OPCode.NOT_WORD; } else { op = enc.isSingleByte() ? OPCode.WORD_SB : OPCode.WORD; } break; default: newInternalException(ERR_PARSER_BUG); return; // not reached } // inner switch addOpcode(op); } @Override protected void compileAnyCharNode() { if (isMultiline(regex.options)) { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_SB : OPCode.ANYCHAR_ML); } else { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_SB : OPCode.ANYCHAR); } } @Override protected void compileCallNode(CallNode node) { addOpcode(OPCode.CALL); node.unsetAddrList.add(codeLength, node.target); addAbsAddr(0); /*dummy addr.*/ } @Override protected void compileBackrefNode(BackRefNode node) { BackRefNode br = node; if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) { addOpcode(OPCode.BACKREF_WITH_LEVEL); addOption(regex.options & Option.IGNORECASE); addLength(br.nestLevel); // !goto add_bacref_mems;! addLength(br.backNum); for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]); return; } else { // USE_BACKREF_AT_LEVEL if (br.backNum == 1) { if (isIgnoreCase(regex.options)) { addOpcode(OPCode.BACKREFN_IC); addMemNum(br.back[0]); } else { switch (br.back[0]) { case 1: addOpcode(OPCode.BACKREF1); break; case 2: addOpcode(OPCode.BACKREF2); break; default: addOpcode(OPCode.BACKREFN); addOpcode(br.back[0]); break; } // switch } } else { if (isIgnoreCase(regex.options)) { addOpcode(OPCode.BACKREF_MULTI_IC); } else { addOpcode(OPCode.BACKREF_MULTI); } // !add_bacref_mems:! addLength(br.backNum); for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]); } } } private static final int REPEAT_RANGE_ALLOC = 8; private void entryRepeatRange(int id, int lower, int upper) { if (regex.repeatRangeLo == null) { regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC]; regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC]; } else if (id >= regex.repeatRangeLo.length){ int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC]; System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length); regex.repeatRangeLo = tmp; tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC]; System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length); regex.repeatRangeHi = tmp; } regex.repeatRangeLo[id] = lower; regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper; } private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) { int numRepeat = regex.numRepeat; addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG); addMemNum(numRepeat); /* OP_REPEAT ID */ regex.numRepeat++; addRelAddr(targetLen + OPSize.REPEAT_INC); entryRepeatRange(numRepeat, qn.lower, qn.upper); compileTreeEmptyCheck(qn.target, emptyInfo); if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) { addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG); } else { addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG); } addMemNum(numRepeat); /* OP_REPEAT ID */ } private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50 private static boolean cknOn(int ckn) { return ckn > 0; } private int compileCECLengthQuantifierNode(QuantifierNode qn) { boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0; int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0; /* anychar repeat */ if (qn.target.getType() == NodeType.CANY) { if (qn.greedy && infinite) { if (qn.nextHeadExact != null && !cknOn(ckn)) { return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen; } else { return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen; } } } int modTLen; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } int len; if (infinite && qn.lower <= 1) { if (qn.greedy) { if (qn.lower == 1) { len = OPSize.JUMP; } else { len = 0; } len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP; } else { if (qn.lower == 0) { len = OPSize.JUMP; } else { len = 0; } len += modTLen + OPSize.PUSH + cklen; } } else if (qn.upper == 0) { if (qn.isRefered) { /* /(?..){0}/ */ len = OPSize.JUMP + tlen; } else { len = 0; } } else if (qn.upper == 1 && qn.greedy) { if (qn.lower == 0) { if (cknOn(ckn)) { len = OPSize.STATE_CHECK_PUSH + tlen; } else { len = OPSize.PUSH + tlen; } } else { len = tlen; } } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ len = OPSize.PUSH + cklen + OPSize.JUMP + tlen; } else { len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM; if (cknOn(ckn)) { len += OPSize.STATE_CHECK; } } return len; } @Override protected void compileCECQuantifierNode(QuantifierNode qn) { boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0; if (qn.isAnyCharStar()) { compileTreeNTimes(qn.target, qn.lower); if (qn.nextHeadExact != null && !cknOn(ckn)) { if (isMultiline(regex.options)) { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT); } else { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT); } if (cknOn(ckn)) { addStateCheckNum(ckn); } StringNode sn = (StringNode)qn.nextHeadExact; addBytes(sn.bytes, sn.p, 1); return; } else { if (isMultiline(regex.options)) { if (cknOn(ckn)) { addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_ML_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_ML_STAR); } else { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR); } } else { if (cknOn(ckn)) { addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_STAR); } else { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR); } } if (cknOn(ckn)) { addStateCheckNum(ckn); } return; } } int modTLen; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } if (infinite && qn.lower <= 1) { if (qn.greedy) { if (qn.lower == 1) { addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH : OPSize.PUSH); } if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH); addStateCheckNum(ckn); addRelAddr(modTLen + OPSize.JUMP); } else { addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP); } compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ? OPSize.STATE_CHECK_PUSH : OPSize.PUSH))); } else { if (qn.lower == 0) { addOpcodeRelAddr(OPCode.JUMP, modTLen); } compileTreeEmptyCheck(qn.target, emptyInfo); if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP); addStateCheckNum(ckn); addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP)); } else { addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH)); } } } else if (qn.upper == 0) { if (qn.isRefered) { /* /(?..){0}/ */ addOpcodeRelAddr(OPCode.JUMP, tlen); compileTree(qn.target); } // else r=0 ??? } else if (qn.upper == 1 && qn.greedy) { if (qn.lower == 0) { if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH); addStateCheckNum(ckn); addRelAddr(tlen); } else { addOpcodeRelAddr(OPCode.PUSH, tlen); } } compileTree(qn.target); } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */ if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH); addStateCheckNum(ckn); addRelAddr(OPSize.JUMP); } else { addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP); } addOpcodeRelAddr(OPCode.JUMP, tlen); compileTree(qn.target); } else { compileRangeRepeatNode(qn, modTLen, emptyInfo); if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK); addStateCheckNum(ckn); } } } private int compileNonCECLengthQuantifierNode(QuantifierNode qn) { boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); /* anychar repeat */ if (qn.target.getType() == NodeType.CANY) { if (qn.greedy && infinite) { if (qn.nextHeadExact != null) { return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower; } else { return OPSize.ANYCHAR_STAR + tlen * qn.lower; } } } int modTLen = 0; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } int len; if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { len = OPSize.JUMP; } else { len = tlen * qn.lower; } if (qn.greedy) { if (qn.headExact != null) { len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP; } else if (qn.nextHeadExact != null) { len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP; } else { len += OPSize.PUSH + modTLen + OPSize.JUMP; } } else { len += OPSize.JUMP + modTLen + OPSize.PUSH; } } else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */ len = OPSize.JUMP + tlen; } else if (!infinite && qn.greedy && (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) { len = tlen * qn.lower; len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower); } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ len = OPSize.PUSH + OPSize.JUMP + tlen; } else { len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM; } return len; } @Override protected void compileNonCECQuantifierNode(QuantifierNode qn) { boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); if (qn.isAnyCharStar()) { compileTreeNTimes(qn.target, qn.lower); if (qn.nextHeadExact != null) { if (isMultiline(regex.options)) { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT); } else { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT); } StringNode sn = (StringNode)qn.nextHeadExact; addBytes(sn.bytes, sn.p, 1); return; } else { if (isMultiline(regex.options)) { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR); } else { addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR); } return; } } int modTLen; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { if (qn.greedy) { if (qn.headExact != null) { addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1); } else if (qn.nextHeadExact != null) { addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT); } else { addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH); } } else { addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP); } } else { compileTreeNTimes(qn.target, qn.lower); } if (qn.greedy) { if (qn.headExact != null) { addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP); StringNode sn = (StringNode)qn.headExact; addBytes(sn.bytes, sn.p, 1); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1)); } else if (qn.nextHeadExact != null) { addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP); StringNode sn = (StringNode)qn.nextHeadExact; addBytes(sn.bytes, sn.p, 1); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT)); } else { addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH)); } } else { addOpcodeRelAddr(OPCode.JUMP, modTLen); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH)); } } else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */ addOpcodeRelAddr(OPCode.JUMP, tlen); compileTree(qn.target); } else if (!infinite && qn.greedy && (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { int n = qn.upper - qn.lower; compileTreeNTimes(qn.target, qn.lower); for (int i=0; i= code.length) { int length = code.length << 1; while (length <= size) length <<= 1; int[]tmp = new int[length]; System.arraycopy(code, 0, tmp, 0, code.length); code = tmp; } } private void addInt(int i) { if (codeLength >= code.length) { int[]tmp = new int[code.length << 1]; System.arraycopy(code, 0, tmp, 0, code.length); code = tmp; } code[codeLength++] = i; } void setInt(int i, int offset) { ensure(offset); regex.code[offset] = i; } private void addObject(Object o) { if (regex.operands == null) { regex.operands = new Object[4]; } else if (regex.operandLength >= regex.operands.length) { Object[]tmp = new Object[regex.operands.length << 1]; System.arraycopy(regex.operands, 0, tmp, 0, regex.operands.length); regex.operands = tmp; } addInt(regex.operandLength); regex.operands[regex.operandLength++] = o; } private void addBytes(byte[]bytes, int p ,int length) { ensure(codeLength + length); int end = p + length; while (p < end) code[codeLength++] = bytes[p++]; } private void addInts(int[]ints, int length) { ensure(codeLength + length); System.arraycopy(ints, 0, code, codeLength, length); codeLength += length; } private void addOpcode(int opcode) { addInt(opcode); switch(opcode) { case OPCode.ANYCHAR_STAR: case OPCode.ANYCHAR_STAR_SB: case OPCode.ANYCHAR_ML_STAR: case OPCode.ANYCHAR_ML_STAR_SB: case OPCode.ANYCHAR_STAR_PEEK_NEXT: case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: case OPCode.STATE_CHECK_ANYCHAR_STAR: case OPCode.STATE_CHECK_ANYCHAR_STAR_SB: case OPCode.STATE_CHECK_ANYCHAR_ML_STAR: case OPCode.MEMORY_START_PUSH: case OPCode.MEMORY_END_PUSH: case OPCode.MEMORY_END_PUSH_REC: case OPCode.MEMORY_END_REC: case OPCode.NULL_CHECK_START: case OPCode.NULL_CHECK_END_MEMST_PUSH: case OPCode.PUSH: case OPCode.STATE_CHECK_PUSH: case OPCode.STATE_CHECK_PUSH_OR_JUMP: case OPCode.STATE_CHECK: case OPCode.PUSH_OR_JUMP_EXACT1: case OPCode.PUSH_IF_PEEK_NEXT: case OPCode.REPEAT: case OPCode.REPEAT_NG: case OPCode.REPEAT_INC_SG: case OPCode.REPEAT_INC_NG: case OPCode.REPEAT_INC_NG_SG: case OPCode.PUSH_POS: case OPCode.PUSH_POS_NOT: case OPCode.PUSH_STOP_BT: case OPCode.PUSH_LOOK_BEHIND_NOT: case OPCode.CALL: case OPCode.RETURN: // it will appear only with CALL though regex.stackNeeded = true; } } private void addStateCheckNum(int num) { addInt(num); } private void addRelAddr(int addr) { addInt(addr); } private void addAbsAddr(int addr) { addInt(addr); } private void addLength(int length) { addInt(length); } private void addMemNum(int num) { addInt(num); } private void addPointer(Object o) { addObject(o); } private void addOption(int option) { addInt(option); } private void addOpcodeRelAddr(int opcode, int addr) { addOpcode(opcode); addRelAddr(addr); } private void addOpcodeOption(int opcode, int option) { addOpcode(opcode); addOption(option); } private void addTemplate(byte[]bytes) { if (templateNum == 0) { templates = new byte[2][]; } else if (templateNum == templates.length) { byte[][]tmp = new byte[templateNum * 2][]; System.arraycopy(templates, 0, tmp, 0, templateNum); templates = tmp; } templates[templateNum++] = bytes; } } joni-2.0.0/src/org/joni/AsmCompiler.java000066400000000000000000000057101214326443200200470ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.joni.ast.AnchorNode; import org.joni.ast.BackRefNode; import org.joni.ast.CClassNode; import org.joni.ast.CTypeNode; import org.joni.ast.CallNode; import org.joni.ast.ConsAltNode; import org.joni.ast.EncloseNode; import org.joni.ast.QuantifierNode; final class AsmCompiler extends AsmCompilerSupport { public AsmCompiler(Analyser analyser) { super(analyser); } @Override protected void prepare() { REG_NUM++; prepareMachine(); prepareMachineInit(); prepareMachineMatch(); prepareFactory(); prepareFactoryInit(); } @Override protected void finish() { setupFactoryInit(); setupMachineInit(); setupMachineMatch(); setupClasses(); } @Override protected void compileAltNode(ConsAltNode node) { } @Override protected void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) { String template = installTemplate(bytes, p, strLength); } @Override protected void compileCClassNode(CClassNode node) { if (node.bs != null) { String bitsetName = installBitSet(node.bs.bits); } } @Override protected void compileCTypeNode(CTypeNode node) { } @Override protected void compileAnyCharNode() { } @Override protected void compileBackrefNode(BackRefNode node) { } @Override protected void compileCallNode(CallNode node) { } @Override protected void compileCECQuantifierNode(QuantifierNode node) { } @Override protected void compileNonCECQuantifierNode(QuantifierNode node) { } @Override protected void compileOptionNode(EncloseNode node) { } @Override protected void compileEncloseNode(EncloseNode node) { } @Override protected void compileAnchorNode(AnchorNode node) { } } joni-2.0.0/src/org/joni/AsmCompilerSupport.java000066400000000000000000000245111214326443200214440ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import java.io.FileOutputStream; import java.io.IOException; import org.joni.constants.AsmConstants; import org.objectweb.asm.ClassWriter; import org.objectweb.asm.MethodVisitor; import org.objectweb.asm.Opcodes; abstract class AsmCompilerSupport extends Compiler implements Opcodes, AsmConstants { protected ClassWriter factory; // matcher allocator, also bit set, code rage and string template container protected MethodVisitor factoryInit;// factory constructor protected String factoryName; protected ClassWriter machine; // matcher protected MethodVisitor machineInit;// matcher constructor protected MethodVisitor match; // actual matcher implementation (the matchAt method) protected String machineName; // we will? try to manage visitMaxs ourselves for efficiency protected int maxStack = 1; protected int maxVars = LAST_INDEX; // for field generation protected int bitsets, ranges, templates; // simple class name postfix scheme for now static int REG_NUM = 0; // dummy class loader for now private static final class DummyClassLoader extends ClassLoader { public Class defineClass(String name, byte[] bytes) { return super.defineClass(name, bytes, 0, bytes.length); } }; private static final DummyClassLoader loader = new DummyClassLoader(); AsmCompilerSupport(Analyser analyser) { super(analyser); } protected final void prepareFactory() { factory = new ClassWriter(ClassWriter.COMPUTE_MAXS); factoryName = "org/joni/MatcherFactory" + REG_NUM; factory.visit(V1_4, ACC_PUBLIC + ACC_FINAL, factoryName, null, "org/joni/MatcherFactory", null); MethodVisitor create = factory.visitMethod(ACC_SYNTHETIC, "create", "(Lorg/joni/Regex;[BII)Lorg/joni/Matcher;", null, null); create.visitTypeInsn(NEW, machineName); create.visitInsn(DUP); // instance create.visitVarInsn(ALOAD, 1); // Regex create.visitVarInsn(ALOAD, 2); // bytes[] create.visitVarInsn(ILOAD, 3); // p create.visitVarInsn(ILOAD, 4); // end create.visitMethodInsn(INVOKESPECIAL, machineName, "", "(Lorg/joni/Regex;[BII)V"); create.visitInsn(ARETURN); create.visitMaxs(0, 0); //create.visitMaxs(6, 5); create.visitEnd(); } protected final void prepareFactoryInit() { factoryInit = factory.visitMethod(ACC_PUBLIC, "", "()V", null, null); factoryInit.visitVarInsn(ALOAD, 0); factoryInit.visitMethodInsn(INVOKESPECIAL, "org/joni/MatcherFactory", "", "()V"); } protected final void setupFactoryInit() { factoryInit.visitInsn(RETURN); factoryInit.visitMaxs(0, 0); //init.visitMaxs(1, 1); factoryInit.visitEnd(); } protected final void prepareMachine() { machine = new ClassWriter(ClassWriter.COMPUTE_MAXS); machineName = "org/joni/NativeMachine" + REG_NUM; } protected final void prepareMachineInit() { machine.visit(V1_4, ACC_PUBLIC + ACC_FINAL, machineName, null, "org/joni/NativeMachine", null); machineInit = machine.visitMethod(ACC_PROTECTED, "", "(Lorg/joni/Regex;[BII)V", null, null); machineInit.visitVarInsn(ALOAD, THIS); // this machineInit.visitVarInsn(ALOAD, 1); // Regex machineInit.visitVarInsn(ALOAD, 2); // bytes[] machineInit.visitVarInsn(ILOAD, 3); // p machineInit.visitVarInsn(ILOAD, 4); // end machineInit.visitMethodInsn(INVOKESPECIAL, "org/joni/NativeMachine", "", "(Lorg/joni/Regex;[BII)V"); } protected final void setupMachineInit() { if (bitsets + ranges + templates > 0) { // ok, some of these are in use, we'd like to cache the factory machine.visitField(ACC_PRIVATE + ACC_FINAL, "factory", "L" + factoryName + ";", null, null); machineInit.visitVarInsn(ALOAD, THIS); // this machineInit.visitVarInsn(ALOAD, 1); // this, Regex machineInit.visitFieldInsn(GETFIELD, "org/joni/Regex", "factory", "Lorg/joni/MatcherFactory;"); // this, factory machineInit.visitTypeInsn(CHECKCAST, factoryName); machineInit.visitFieldInsn(PUTFIELD, machineName, "factory", "L" + factoryName + ";"); // [] } machineInit.visitInsn(RETURN); machineInit.visitMaxs(0, 0); //init.visitMaxs(5, 5); machineInit.visitEnd(); } protected final void prepareMachineMatch() { match = machine.visitMethod(ACC_SYNTHETIC, "matchAt", "(III)I", null, null); move(S, SSTART); // s = sstart load("bytes", "[B"); // astore(BYTES); // byte[]bytes = this.bytes } protected final void setupMachineMatch() { match.visitInsn(ICONST_M1); match.visitInsn(IRETURN); match.visitMaxs(maxStack, maxVars); match.visitEnd(); } protected final void setupClasses() { byte[]factoryCode = factory.toByteArray(); byte[]machineCode = machine.toByteArray(); if (Config.DEBUG_ASM) { try { FileOutputStream fos; fos = new FileOutputStream(factoryName.substring(factoryName.lastIndexOf('/') + 1) + ".class"); fos.write(factoryCode); fos.close(); fos = new FileOutputStream(machineName.substring(machineName.lastIndexOf('/') + 1) + ".class"); fos.write(machineCode); fos.close(); } catch (IOException ioe) { ioe.printStackTrace(Config.err); } } loader.defineClass(machineName.replace('/', '.'), machineCode); Class cls = loader.defineClass(factoryName.replace('/', '.'), factoryCode); try { regex.factory = (MatcherFactory)cls.newInstance(); } catch(Exception e) { e.printStackTrace(Config.err); } } protected final void aload(int var) { match.visitVarInsn(ALOAD, var); } protected final void astore(int var) { match.visitVarInsn(ASTORE, var); } protected final void loadThis() { match.visitVarInsn(ALOAD, THIS); } protected final void load(int var) { match.visitVarInsn(ILOAD, var); } protected final void store(int var) { match.visitVarInsn(ISTORE, var); } protected final void move(int to, int from) { load(from); store(to); } protected final void load(String field, String singature) { loadThis(); match.visitFieldInsn(GETFIELD, machineName, field, singature); } protected final void load(String field) { load(field, "I"); } protected final void store(String field, String singature) { loadThis(); match.visitFieldInsn(PUTFIELD, machineName, field, singature); } protected final void store(String field) { store(field, "I"); } protected final String installTemplate(byte[]arr, int p, int length) { String templateName = TEMPLATE + ++templates; installArray(templateName, arr, p, length); return templateName; } protected final String installCodeRange(int[]arr) { String coreRangeName = CODERANGE + ++ranges; installArray(coreRangeName, arr); return coreRangeName; } protected final String installBitSet(int[]arr) { String bitsetName = BITSET + ++bitsets; installArray(bitsetName, arr); return bitsetName; } private void installArray(String name, int[]arr) { factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[I", null, null); factoryInit.visitVarInsn(ALOAD, THIS); // this; loadInt(factoryInit, arr.length); // this, length factoryInit.visitIntInsn(NEWARRAY, T_INT); // this, arr for (int i=0;i < arr.length; i++) buildArray(i, arr[i], IASTORE); factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[I"); } private void installArray(String name, byte[]arr, int p, int length) { factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[B", null, null); factoryInit.visitVarInsn(ALOAD, THIS); // this; loadInt(factoryInit, arr.length); // this, length factoryInit.visitIntInsn(NEWARRAY, T_BYTE); // this, arr for (int i=p, j=0; i < p + length; i++, j++) buildArray(j, arr[i] & 0xff, BASTORE); factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[B"); } private void buildArray(int index, int value, int type) { factoryInit.visitInsn(DUP); // ... arr, arr loadInt(factoryInit, index); // ... arr, arr, index loadInt(factoryInit, value); // ... arr, arr, index, value factoryInit.visitInsn(type); // ... arr } private void loadInt(MethodVisitor mv, int value) { if (value >= -1 && value <= 5) { mv.visitInsn(value + ICONST_0); // ICONST_0 == 3 } else if (value >= 6 && value <= 127 || value >= -128 && value <= -2) { mv.visitIntInsn(BIPUSH, value); } else if (value >= 128 && value <= 32767 || value >= -32768 && value <= -129) { mv.visitIntInsn(SIPUSH, value); } else { mv.visitLdcInsn(new Integer(value)); } } } joni-2.0.0/src/org/joni/BitSet.java000066400000000000000000000070131214326443200170240ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public final class BitSet { static final int BITS_PER_BYTE = 8; public static final int SINGLE_BYTE_SIZE = (1 << BITS_PER_BYTE); private static final int BITS_IN_ROOM = 4 * BITS_PER_BYTE; static final int BITSET_SIZE = (SINGLE_BYTE_SIZE / BITS_IN_ROOM); static final int ROOM_SHIFT = log2(BITS_IN_ROOM); final int[] bits = new int[BITSET_SIZE]; private static final int BITS_TO_STRING_WRAP = 4; public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append("BitSet"); for (int i=0; i>> ROOM_SHIFT] & bit(pos)) != 0; } public void set(int pos) { bits[pos >>> ROOM_SHIFT] |= bit(pos); } public void clear(int pos) { bits[pos >>> ROOM_SHIFT] &= ~bit(pos); } public void invert(int pos) { bits[pos >>> ROOM_SHIFT] ^= bit(pos); } public void clear() { for (int i=0; i>>= 1) != 0) log++; return log; } } joni-2.0.0/src/org/joni/BitStatus.java000066400000000000000000000036401214326443200175560ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; final class BitStatus { public static final int BIT_STATUS_BITS_NUM = 4 * 8; public static int bsClear() { return 0; } public static int bsAll() { return -1; } public static boolean bsAt(int stats, int n) { return (n < BIT_STATUS_BITS_NUM ? stats & (1 << n) : (stats & 1)) != 0; } public static int bsOnAt(int stats, int n) { if (n < BIT_STATUS_BITS_NUM) { stats |= (1 << n); } else { stats |= 1; } return stats; } public static int bsOnAtSimple(int stats, int n) { if (n < BIT_STATUS_BITS_NUM) stats |= (1 << n); return stats; } public static int bsOnOff(int v, int f, boolean negative) { if (negative) { v &= ~f; } else { v |= f; } return v; } } joni-2.0.0/src/org/joni/ByteCodeMachine.java000066400000000000000000001614031214326443200206210ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsAt; import static org.joni.Option.isFindCondition; import static org.joni.Option.isFindLongest; import static org.joni.Option.isFindNotEmpty; import static org.joni.Option.isNotBol; import static org.joni.Option.isNotEol; import static org.joni.Option.isPosixRegion; import org.jcodings.CodeRange; import org.jcodings.Encoding; import org.jcodings.IntHolder; import org.joni.ast.CClassNode; import org.joni.constants.OPCode; import org.joni.constants.OPSize; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; class ByteCodeMachine extends StackMachine { private static final int INTERRUPT_CHECK_EVERY = 30000; int interruptCheckCounter = 0; // we modulos this to occasionally check for interrupts private int bestLen; // return value private int s = 0; // current char private int range; // right range private int sprev; private int sstart; private int sbegin; private final int[]code; // byte code private int ip; // instruction pointer ByteCodeMachine(Regex regex, byte[]bytes, int p, int end) { super(regex, bytes, p, end); this.code = regex.code; } protected int stkp; // a temporary private boolean makeCaptureHistoryTree(CaptureTreeNode node) { //CaptureTreeNode child; int k = stkp; //int k = kp; while (k < stk) { StackEntry e = stack[k]; if (e.type == MEM_START) { int n = e.getMemNum(); if (n <= Config.MAX_CAPTURE_HISTORY_GROUP && bsAt(regex.captureHistory, n)) { CaptureTreeNode child = new CaptureTreeNode(); child.group = n; child.beg = e.getMemPStr() - str; node.addChild(child); stkp = k + 1; if (makeCaptureHistoryTree(child)) return true; k = stkp; child.end = e.getMemPStr() - str; } } else if (e.type == MEM_END) { if (e.getMemNum() == node.group) { node.end = e.getMemPStr() - str; stkp = k; return false; } } } return true; /* 1: root node ending. */ } private void checkCaptureHistory(Region region) { CaptureTreeNode node; if (region.historyRoot == null) { node = region.historyRoot = new CaptureTreeNode(); } else { node = region.historyRoot; node.clear(); } // was clear ??? node.group = 0; node.beg = sstart - str; node.end = s - str; stkp = 0; makeCaptureHistoryTree(region.historyRoot); } private byte[]cfbuf; private byte[]cfbuf2; protected final byte[]cfbuf() { return cfbuf == null ? cfbuf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf; } protected final byte[]cfbuf2() { return cfbuf2 == null ? cfbuf2 = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf2; } private boolean stringCmpIC(int caseFlodFlag, int s1, IntHolder ps2, int mbLen, int textEnd) { byte[]buf1 = cfbuf(); byte[]buf2 = cfbuf2(); int s2 = ps2.value; int end1 = s1 + mbLen; while (s1 < end1) { value = s1; int len1 = enc.mbcCaseFold(caseFlodFlag, bytes, this, textEnd, buf1); s1 = value; value = s2; int len2 = enc.mbcCaseFold(caseFlodFlag, bytes, this, textEnd, buf2); s2 = value; if (len1 != len2) return false; int p1 = 0; int p2 = 0; while (len1-- > 0) { if (buf1[p1] != buf2[p2]) return false; p1++; p2++; } } ps2.value = s2; return true; } private void debugMatchBegin() { Config.log.println("match_at: " + "str: " + str + ", end: " + end + ", start: " + this.sstart + ", sprev: " + this.sprev); Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str)); } private void debugMatchLoop() { if (Config.DEBUG_MATCH) { Config.log.printf("%4d", (s - str)).print("> \""); int q, i; for (i=0, q=s; i<7 && q=0; i++) { int len = enc.length(bytes, q, end); while (len-- > 0) if (q < end) Config.log.print(new String(new byte[]{bytes[q++]})); } String str = q < end ? "...\"" : "\""; q += str.length(); Config.log.print(str); for (i=0; i<20-(q-s);i++) Config.log.print(" "); StringBuilder sb = new StringBuilder(); new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); Config.log.println(sb.toString()); } } protected final int matchAt(int range, int sstart, int sprev) throws InterruptedException { this.range = range; this.sstart = sstart; this.sprev = sprev; stk = 0; ip = 0; if (Config.DEBUG_MATCH) debugMatchBegin(); init(); bestLen = -1; s = sstart; Thread currentThread = Thread.currentThread(); final int[]code = this.code; while (true) { if (interruptCheckCounter++ % INTERRUPT_CHECK_EVERY == 0 && currentThread.isInterrupted()) { currentThread.interrupted(); throw new InterruptedException(); } if (Config.DEBUG_MATCH) debugMatchLoop(); sbegin = s; switch (code[ip++]) { case OPCode.END: if (opEnd()) return finish(); break; case OPCode.EXACT1: opExact1(); break; case OPCode.EXACT2: opExact2(); continue; case OPCode.EXACT3: opExact3(); continue; case OPCode.EXACT4: opExact4(); continue; case OPCode.EXACT5: opExact5(); continue; case OPCode.EXACTN: opExactN(); continue; case OPCode.EXACTMB2N1: opExactMB2N1(); break; case OPCode.EXACTMB2N2: opExactMB2N2(); continue; case OPCode.EXACTMB2N3: opExactMB2N3(); continue; case OPCode.EXACTMB2N: opExactMB2N(); continue; case OPCode.EXACTMB3N: opExactMB3N(); continue; case OPCode.EXACTMBN: opExactMBN(); continue; case OPCode.EXACT1_IC: opExact1IC(); break; case OPCode.EXACTN_IC: opExactNIC(); continue; case OPCode.CCLASS: opCClass(); break; case OPCode.CCLASS_MB: opCClassMB(); break; case OPCode.CCLASS_MIX: opCClassMIX(); break; case OPCode.CCLASS_NOT: opCClassNot(); break; case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; case OPCode.CCLASS_NODE: opCClassNode(); break; case OPCode.ANYCHAR: opAnyChar(); break; case OPCode.ANYCHAR_ML: opAnyCharML(); break; case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; case OPCode.STATE_CHECK_ANYCHAR_STAR: opStateCheckAnyCharStar(); break; case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:opStateCheckAnyCharMLStar();break; case OPCode.WORD: opWord(); break; case OPCode.NOT_WORD: opNotWord(); break; case OPCode.WORD_BOUND: opWordBound(); continue; case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; case OPCode.WORD_BEGIN: opWordBegin(); continue; case OPCode.WORD_END: opWordEnd(); continue; case OPCode.BEGIN_BUF: opBeginBuf(); continue; case OPCode.END_BUF: opEndBuf(); continue; case OPCode.BEGIN_LINE: opBeginLine(); continue; case OPCode.END_LINE: opEndLine(); continue; case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; case OPCode.BEGIN_POSITION: opBeginPosition(); continue; case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; case OPCode.MEMORY_START: opMemoryStart(); continue; case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; case OPCode.MEMORY_END: opMemoryEnd(); continue; case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; case OPCode.BACKREF1: opBackRef1(); continue; case OPCode.BACKREF2: opBackRef2(); continue; case OPCode.BACKREFN: opBackRefN(); continue; case OPCode.BACKREFN_IC: opBackRefNIC(); continue; case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue; case OPCode.JUMP: opJump(); continue; case OPCode.PUSH: opPush(); continue; // CEC case OPCode.STATE_CHECK_PUSH: opStateCheckPush(); continue; case OPCode.STATE_CHECK_PUSH_OR_JUMP: opStateCheckPushOrJump(); continue; case OPCode.STATE_CHECK: opStateCheck(); continue; case OPCode.POP: opPop(); continue; case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; case OPCode.REPEAT: opRepeat(); continue; case OPCode.REPEAT_NG: opRepeatNG(); continue; case OPCode.REPEAT_INC: opRepeatInc(); continue; case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; case OPCode.PUSH_POS: opPushPos(); continue; case OPCode.POP_POS: opPopPos(); continue; case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; case OPCode.FAIL_POS: opFailPos(); continue; case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; case OPCode.POP_STOP_BT: opPopStopBT(); continue; case OPCode.LOOK_BEHIND: opLookBehind(); continue; case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; // USE_SUBEXP_CALL case OPCode.CALL: opCall(); continue; case OPCode.RETURN: opReturn(); continue; // single byte implementations case OPCode.CCLASS_SB: opCClassSb(); break; case OPCode.CCLASS_NOT_SB: opCClassNotSb(); break; case OPCode.ANYCHAR_SB: opAnyCharSb(); break; case OPCode.ANYCHAR_ML_SB: opAnyCharMLSb(); break; case OPCode.ANYCHAR_STAR_SB: opAnyCharStarSb(); break; case OPCode.ANYCHAR_ML_STAR_SB: opAnyCharMLStarSb(); break; case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: opAnyCharStarPeekNextSb(); break; case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: opAnyCharMLStarPeekNextSb(); break; case OPCode.STATE_CHECK_ANYCHAR_STAR_SB: opStateCheckAnyCharStarSb(); break; case OPCode.STATE_CHECK_ANYCHAR_ML_STAR_SB: opStateCheckAnyCharMLStarSb();break; case OPCode.WORD_SB: opWordSb(); break; case OPCode.NOT_WORD_SB: opNotWordSb(); break; case OPCode.WORD_BOUND_SB: opWordBoundSb(); continue; case OPCode.NOT_WORD_BOUND_SB: opNotWordBoundSb(); continue; case OPCode.WORD_BEGIN_SB: opWordBeginSb(); continue; case OPCode.WORD_END_SB: opWordEndSb(); continue; case OPCode.LOOK_BEHIND_SB: opLookBehindSb(); continue; case OPCode.EXACT1_IC_SB: opExact1ICSb(); break; case OPCode.EXACTN_IC_SB: opExactNICSb(); continue; case OPCode.FINISH: return finish(); case OPCode.FAIL: opFail(); continue; default: throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE); } // main switch } // main while } private boolean opEnd() { int n = s - sstart; if (n > bestLen) { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (isFindLongest(regex.options)) { if (n > msaBestLen) { msaBestLen = n; msaBestS = sstart; } else { // goto end_best_len; return endBestLength(); } } } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE bestLen = n; final Region region = msaRegion; if (region != null) { // USE_POSIX_REGION_OPTION ... else ... region.beg[0] = msaBegin = sstart - str; region.end[0] = msaEnd = s - str; for (int i = 1; i <= regex.numMem; i++) { // opt! if (repeatStk[memEndStk + i] != INVALID_INDEX) { region.beg[i] = bsAt(regex.btMemStart, i) ? stack[repeatStk[memStartStk + i]].getMemPStr() - str : repeatStk[memStartStk + i] - str; region.end[i] = bsAt(regex.btMemEnd, i) ? stack[repeatStk[memEndStk + i]].getMemPStr() : repeatStk[memEndStk + i] - str; } else { region.beg[i] = region.end[i] = Region.REGION_NOTPOS; } } if (Config.USE_CAPTURE_HISTORY) { if (regex.captureHistory != 0) checkCaptureHistory(region); } } else { msaBegin = sstart - str; msaEnd = s - str; } } else { Region region = msaRegion; if (Config.USE_POSIX_API_REGION_OPTION) { if (!isPosixRegion(regex.options)) { if (region != null) { region.clear(); } else { msaBegin = msaEnd = 0; } } } else { if (region != null) { region.clear(); } else { msaBegin = msaEnd = 0; } } // USE_POSIX_REGION_OPTION } // end_best_len: /* default behavior: return first-matching result. */ return endBestLength(); } private boolean endBestLength() { if (isFindCondition(regex.options)) { if (isFindNotEmpty(regex.options) && s == sstart) { bestLen = -1; {opFail(); return false;} /* for retry */ } if (isFindLongest(regex.options) && s < range) { {opFail(); return false;} /* for retry */ } } // goto finish; return true; } private void opExact1() { if (s >= range || code[ip] != bytes[s++]) {opFail(); return;} //if (s > range) {opFail(); return;} ip++; sprev = sbegin; // break; } private void opExact2() { if (s + 2 > range) {opFail(); return;} if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} sprev = s; ip++; s++; } private void opExact3() { if (s + 3 > range) {opFail(); return;} if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} sprev = s; ip++; s++; } private void opExact4() { if (s + 4 > range) {opFail(); return;} if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} sprev = s; ip++; s++; } private void opExact5() { if (s + 5 > range) {opFail(); return;} if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} sprev = s; ip++; s++; } private void opExactN() { int tlen = code[ip++]; if (s + tlen > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while (tlen-- > 0) if (bs[ps++] != bytes[s++]) {opFail(); return;} } else { while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;} } sprev = s - 1; } private void opExactMB2N1() { if (s + 2 > range) {opFail(); return;} if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; sprev = sbegin; // break; } private void opExactMB2N2() { if (s + 4 > range) {opFail(); return;} if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; sprev = s; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; } private void opExactMB2N3() { if (s + 6 > range) {opFail(); return;} if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; sprev = s; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; } private void opExactMB2N() { int tlen = code[ip++]; if (s + tlen * 2 > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while(tlen-- > 0) { if (bs[ps] != bytes[s]) {opFail(); return;} ps++; s++; if (bs[ps] != bytes[s]) {opFail(); return;} ps++; s++; } } else { while(tlen-- > 0) { if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; } } sprev = s - 2; } private void opExactMB3N() { int tlen = code[ip++]; if (s + tlen * 3 > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while (tlen-- > 0) { if (bs[ps] != bytes[s]) {opFail(); return;} ps++; s++; if (bs[ps] != bytes[s]) {opFail(); return;} ps++; s++; if (bs[ps] != bytes[s]) {opFail(); return;} ps++; s++; } } else { while (tlen-- > 0) { if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; } } sprev = s - 3; } private void opExactMBN() { int tlen = code[ip++]; /* mb-len */ int tlen2= code[ip++]; /* string len */ tlen2 *= tlen; if (s + tlen2 > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while (tlen2-- > 0) { if (bs[ps] != bytes[s]) {opFail(); return;} ps++; s++; } } else { while (tlen2-- > 0) { if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; } } sprev = s - tlen; } private void opExact1IC() { if (s >= range) {opFail(); return;} byte[]lowbuf = cfbuf(); value = s; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf); s = value; if (s > range) {opFail(); return;} int q = 0; while (len-- > 0) { if (code[ip] != lowbuf[q]) {opFail(); return;} ip++; q++; } sprev = sbegin; // break; } private void opExact1ICSb() { if (s >= range || code[ip] != enc.toLowerCaseTable()[bytes[s++] & 0xff]) {opFail(); return;} ip++; sprev = sbegin; // break; } private void opExactNIC() { int tlen = code[ip++]; byte[]lowbuf = cfbuf(); if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; int endp = ps + tlen; while (ps < endp) { sprev = s; if (s >= range) {opFail(); return;} value = s; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf); s = value; if (s > range) {opFail(); return;} int q = 0; while (len-- > 0) { if (bs[ps] != lowbuf[q]) {opFail(); return;} ps++; q++; } } } else { int endp = ip + tlen; while (ip < endp) { sprev = s; if (s >= range) {opFail(); return;} value = s; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf); s = value; if (s > range) {opFail(); return;} int q = 0; while (len-- > 0) { if (code[ip] != lowbuf[q]) {opFail(); return;} ip++; q++; } } } } private void opExactNICSb() { int tlen = code[ip++]; if (s + tlen > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; byte[]toLowerTable = enc.toLowerCaseTable(); while (tlen-- > 0) if (bs[ps++] != toLowerTable[bytes[s++] & 0xff]) {opFail(); return;} } else { byte[]toLowerTable = enc.toLowerCaseTable(); while (tlen-- > 0) if (code[ip++] != toLowerTable[bytes[s++] & 0xff]) {opFail(); return;} } sprev = s - 1; } private boolean isInBitSet() { int c = bytes[s] & 0xff; return ((code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); } private void opCClass() { if (s >= range || !isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s += enc.length(bytes, s, end); /* OP_CCLASS can match mb-code. \D, \S */ if (s > end) s = end; sprev = sbegin; // break; } private void opCClassSb() { if (s >= range || !isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s++; sprev = sbegin; // break; } private boolean isInClassMB() { int tlen = code[ip++]; if (s >= range) return false; int mbLen = enc.length(bytes, s, end); if (s + mbLen > range) return false; int ss = s; s += mbLen; int c = enc.mbcToCode(bytes, ss, s); if (!CodeRange.isInCodeRange(code, ip, c)) return false; ip += tlen; return true; } private void opCClassMB() { // beyond string check if (s >= range || !enc.isMbcHead(bytes, s, end)) {opFail(); return;} if (!isInClassMB()) {opFail(); return;} // not!!! sprev = sbegin; // break; } private void opCClassMIX() { if (s >= range) {opFail(); return;} if (enc.isMbcHead(bytes, s, end)) { ip += BitSet.BITSET_SIZE; if (!isInClassMB()) {opFail(); return;} } else { if (!isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; int tlen = code[ip++]; // by code range length ip += tlen; s++; } sprev = sbegin; // break; } private void opCClassNot() { if (s >= range || isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s += enc.length(bytes, s, end); if (s > end) s = end; sprev = sbegin; // break; } private void opCClassNotSb() { if (s >= range || isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s++; sprev = sbegin; // break; } private boolean isNotInClassMB() { int tlen = code[ip++]; int mbLen = enc.length(bytes, s, end); if (!(s + mbLen <= range)) { if (s >= range) return false; s = end; ip += tlen; return true; } int ss = s; s += mbLen; int c = enc.mbcToCode(bytes, ss, s); if (CodeRange.isInCodeRange(code, ip, c)) return false; ip += tlen; return true; } private void opCClassMBNot() { if (s >= range) {opFail(); return;} if (!enc.isMbcHead(bytes, s, end)) { s++; int tlen = code[ip++]; ip += tlen; sprev = sbegin; // break; return; } if (!isNotInClassMB()) {opFail(); return;} sprev = sbegin; // break; } private void opCClassMIXNot() { if (s >= range) {opFail(); return;} if (enc.isMbcHead(bytes, s, end)) { ip += BitSet.BITSET_SIZE; if (!isNotInClassMB()) {opFail(); return;} } else { if (isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; int tlen = code[ip++]; ip += tlen; s++; } sprev = sbegin; // break; } private void opCClassNode() { if (s >= range) {opFail(); return;} CClassNode cc = (CClassNode)regex.operands[code[ip++]]; int mbLen = enc.length(bytes, s, end); int ss = s; s += mbLen; if (s > range) {opFail(); return;} int c = enc.mbcToCode(bytes, ss, s); if (!cc.isCodeInCCLength(mbLen, c)) {opFail(); return;} sprev = sbegin; // break; } private void opAnyChar() { if (s >= range) {opFail(); return;} int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} if (enc.isNewLine(bytes, s, end)) {opFail(); return;} s += n; sprev = sbegin; // break; } private void opAnyCharSb() { if (s >= range) {opFail(); return;} if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;} s++; sprev = sbegin; // break; } private void opAnyCharML() { if (s >= range) {opFail(); return;} int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} s += n; sprev = sbegin; // break; } private void opAnyCharMLSb() { if (s >= range) {opFail(); return;} s++; sprev = sbegin; // break; } private void opAnyCharStar() { final byte[]bytes = this.bytes; while (s < range) { pushAlt(ip, s, sprev); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} if (enc.isNewLine(bytes, s, end)) {opFail(); return;} sprev = s; s += n; } sprev = sbegin; // break; } private void opAnyCharStarSb() { final byte[]bytes = this.bytes; while (s < range) { pushAlt(ip, s, sprev); if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;} sprev = s; s++; } sprev = sbegin; // break; } private void opAnyCharMLStar() { final byte[]bytes = this.bytes; while (s < range) { pushAlt(ip, s, sprev); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} sprev = s; s += n; } sprev = sbegin; // break; } private void opAnyCharMLStarSb() { while (s < range) { pushAlt(ip, s, sprev); sprev = s; s++; } sprev = sbegin; // break; } private void opAnyCharStarPeekNext() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { if (c == bytes[s]) pushAlt(ip + 1, s, sprev); int n = enc.length(bytes, s, end); if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;} sprev = s; s += n; } ip++; sprev = sbegin; // break; } private void opAnyCharStarPeekNextSb() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { byte b = bytes[s]; if (c == b) pushAlt(ip + 1, s, sprev); if (b == Encoding.NEW_LINE) {opFail(); return;} sprev = s; s++; } ip++; sprev = sbegin; // break; } private void opAnyCharMLStarPeekNext() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { if (c == bytes[s]) pushAlt(ip + 1, s, sprev); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} sprev = s; s += n; } ip++; sprev = sbegin; // break; } private void opAnyCharMLStarPeekNextSb() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { if (c == bytes[s]) pushAlt(ip + 1, s, sprev); sprev = s; s++; } ip++; sprev = sbegin; // break; } // CEC private void opStateCheckAnyCharStar() { int mem = code[ip++]; final byte[]bytes = this.bytes; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem); int n = enc.length(bytes, s, end); if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;} sprev = s; s += n; } sprev = sbegin; // break; } private void opStateCheckAnyCharStarSb() { int mem = code[ip++]; final byte[]bytes = this.bytes; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem); if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;} sprev = s; s++; } sprev = sbegin; // break; } // CEC private void opStateCheckAnyCharMLStar() { int mem = code[ip++]; final byte[]bytes = this.bytes; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} sprev = s; s += n; } sprev = sbegin; // break; } private void opStateCheckAnyCharMLStarSb() { int mem = code[ip++]; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem); sprev = s; s++; } sprev = sbegin; // break; } private void opWord() { if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;} s += enc.length(bytes, s, end); sprev = sbegin; // break; } private void opWordSb() { if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;} s++; sprev = sbegin; // break; } private void opNotWord() { if (s >= range || enc.isMbcWord(bytes, s, end)) {opFail(); return;} s += enc.length(bytes, s, end); sprev = sbegin; // break; } private void opNotWordSb() { if (s >= range || enc.isWord(bytes[s] & 0xff)) {opFail(); return;} s++; sprev = sbegin; // break; } private void opWordBound() { if (s == str) { if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;} } else if (s == end) { if (!enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } else { if (enc.isMbcWord(bytes, s, end) == enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } } private void opWordBoundSb() { if (s == str) { if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;} } else if (s == end) { if (sprev >= end || !enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } else { if (enc.isWord(bytes[s] & 0xff) == enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } } private void opNotWordBound() { if (s == str) { if (s < range && enc.isMbcWord(bytes, s, end)) {opFail(); return;} } else if (s == end) { if (enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } else { if (enc.isMbcWord(bytes, s, end) != enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } } private void opNotWordBoundSb() { if (s == str) { if (s < range && enc.isWord(bytes[s] & 0xff)) {opFail(); return;} } else if (s == end) { if (sprev < end && enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } else { if (enc.isWord(bytes[s] & 0xff) != enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } } private void opWordBegin() { if (s < range && enc.isMbcWord(bytes, s, end)) { if (s == str || !enc.isMbcWord(bytes, sprev, end)) return; } opFail(); } private void opWordBeginSb() { if (s < range && enc.isWord(bytes[s] & 0xff)) { if (s == str || !enc.isWord(bytes[sprev] & 0xff)) return; } opFail(); } private void opWordEnd() { if (s != str && enc.isMbcWord(bytes, sprev, end)) { if (s == end || !enc.isMbcWord(bytes, s, end)) return; } opFail(); } private void opWordEndSb() { if (s != str && enc.isWord(bytes[sprev] & 0xff)) { if (s == end || !enc.isWord(bytes[s] & 0xff)) return; } opFail(); } private void opBeginBuf() { if (s != str) opFail(); } private void opEndBuf() { if (s != end) opFail(); } private void opBeginLine() { if (s == str) { if (isNotBol(msaOptions)) opFail(); return; } else if (enc.isNewLine(bytes, sprev, end) && s != end) { return; } opFail(); } private void opEndLine() { if (s == end) { if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { if (str == end || !enc.isNewLine(bytes, sprev, end)) { if (isNotEol(msaOptions)) opFail(); } return; } else { if (isNotEol(msaOptions)) opFail(); return; } } else if (enc.isNewLine(bytes, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end))) { return; } opFail(); } private void opSemiEndBuf() { if (s == end) { if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { if (str == end || !enc.isNewLine(bytes, sprev, end)) { if (isNotEol(msaOptions)) opFail(); } return; } else { if (isNotEol(msaOptions)) opFail(); return; } } else if (enc.isNewLine(bytes, s, end) && (s + enc.length(bytes, s, end)) == end) { return; } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end)) { int ss = s + enc.length(bytes, s, end); ss += enc.length(bytes, ss, end); if (ss == end) return; } opFail(); } private void opBeginPosition() { if (s != msaStart) opFail(); } private void opMemoryStartPush() { int mem = code[ip++]; pushMemStart(mem, s); } private void opMemoryStart() { int mem = code[ip++]; repeatStk[memStartStk + mem] = s; } private void opMemoryEndPush() { int mem = code[ip++]; pushMemEnd(mem, s); } private void opMemoryEnd() { int mem = code[ip++]; repeatStk[memEndStk + mem] = s; } private void opMemoryEndPushRec() { int mem = code[ip++]; int stkp = getMemStart(mem); /* should be before push mem-end. */ pushMemEnd(mem, s); repeatStk[memStartStk + mem] = stkp; } private void opMemoryEndRec() { int mem = code[ip++]; repeatStk[memEndStk + mem] = s; int stkp = getMemStart(mem); if (BitStatus.bsAt(regex.btMemStart, mem)) { repeatStk[memStartStk + mem] = stkp; } else { repeatStk[memStartStk + mem] = stack[stkp].getMemPStr(); } pushMemEndMark(mem); } private boolean backrefInvalid(int mem) { return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; } private int backrefStart(int mem) { return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem]; } private int backrefEnd(int mem) { return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem]; } private void backref(int mem) { /* if you want to remove following line, you should check in parse and compile time. (numMem) */ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} int pstart = backrefStart(mem); int pend = backrefEnd(mem); int n = pend - pstart; if (s + n > range) {opFail(); return;} sprev = s; // STRING_CMP while(n-- > 0) if (bytes[pstart++] != bytes[s++]) {opFail(); return;} int len; // beyond string check if (sprev < range) { while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } } private void opBackRef1() { backref(1); } private void opBackRef2() { backref(2); } private void opBackRefN() { backref(code[ip++]); } private void opBackRefNIC() { int mem = code[ip++]; /* if you want to remove following line, you should check in parse and compile time. (numMem) */ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} int pstart = backrefStart(mem); int pend = backrefEnd(mem); int n = pend - pstart; if (s + n > range) {opFail(); return;} sprev = s; value = s; if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} s = value; int len; // if (sprev < bytes.length) while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } private void opBackRefMulti() { int tlen = code[ip++]; int i; loop:for (i=0; i range) {opFail(); return;} sprev = s; int swork = s; while (n-- > 0) { if (bytes[pstart++] != bytes[swork++]) continue loop; } s = swork; int len; // beyond string check if (sprev < range) { while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } ip += tlen - i - 1; // * SIZE_MEMNUM (1) break; /* success */ } if (i == tlen) {opFail(); return;} } private void opBackRefMultiIC() { int tlen = code[ip++]; int i; loop:for (i=0; i range) {opFail(); return;} sprev = s; value = s; if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) continue loop; // STRING_CMP_VALUE_IC s = value; int len; // if (sprev < bytes.length) while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; ip += tlen - i - 1; // * SIZE_MEMNUM (1) break; /* success */ } if (i == tlen) {opFail(); return;} } private boolean memIsInMemp(int mem, int num, int memp) { for (int i=0; i= 0) { StackEntry e = stack[k]; if (e.type == CALL_FRAME) { level--; } else if (e.type == RETURN) { level++; } else if (level == nest) { if (e.type == MEM_START) { if (memIsInMemp(e.getMemNum(), memNum, memp)) { int pstart = e.getMemPStr(); if (pend != -1) { if (pend - pstart > end - s) return false; /* or goto next_mem; */ int p = pstart; value = s; if (ignoreCase) { if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { return false; /* or goto next_mem; */ } } else { while (p < pend) { if (bytes[p++] != bytes[value++]) return false; /* or goto next_mem; */ } } s = value; return true; } } } else if (e.type == MEM_END) { if (memIsInMemp(e.getMemNum(), memNum, memp)) { pend = e.getMemPStr(); } } } k--; } return false; } private void opBackRefAtLevel() { int ic = code[ip++]; int level = code[ip++]; int tlen = code[ip++]; sprev = s; if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit int len; while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; ip += tlen; // * SIZE_MEMNUM } else { {opFail(); return;} } } /* no need: IS_DYNAMIC_OPTION() == 0 */ private void opSetOptionPush() { // option = code[ip++]; // final for now pushAlt(ip, s, sprev); ip += OPSize.SET_OPTION + OPSize.FAIL; } private void opSetOption() { // option = code[ip++]; // final for now } private void opNullCheckStart() { int mem = code[ip++]; pushNullCheckStart(mem, s); } private void nullCheckFound() { // null_check_found: /* empty loop founded, skip next instruction */ switch(code[ip++]) { case OPCode.JUMP: case OPCode.PUSH: ip++; // p += SIZE_RELADDR; break; case OPCode.REPEAT_INC: case OPCode.REPEAT_INC_NG: case OPCode.REPEAT_INC_SG: case OPCode.REPEAT_INC_NG_SG: ip++; // p += SIZE_MEMNUM; break; default: throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE); } // switch } private void opNullCheckEnd() { int mem = code[ip++]; int isNull = nullCheck(mem, s); /* mem: null check id */ if (isNull != 0) { if (Config.DEBUG_MATCH) { Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); } nullCheckFound(); } } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK private void opNullCheckEndMemST() { int mem = code[ip++]; /* mem: null check id */ int isNull = nullCheckMemSt(mem, s); if (isNull != 0) { if (Config.DEBUG_MATCH) { Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); } if (isNull == -1) {opFail(); return;} nullCheckFound(); } } // USE_SUBEXP_CALL private void opNullCheckEndMemSTPush() { int mem = code[ip++]; /* mem: null check id */ int isNull; if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { isNull = nullCheckMemStRec(mem, s); } else { isNull = nullCheckRec(mem, s); } if (isNull != 0) { if (Config.DEBUG_MATCH) { Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s); } if (isNull == -1) {opFail(); return;} nullCheckFound(); } else { pushNullCheckEnd(mem); } } private void opJump() { ip += code[ip] + 1; } private void opPush() { int addr = code[ip++]; pushAlt(ip + addr, s, sprev); } // CEC private void opStateCheckPush() { int mem = code[ip++]; if (stateCheckVal(s, mem)) {opFail(); return;} int addr = code[ip++]; pushAltWithStateCheck(ip + addr, s, sprev, mem); } // CEC private void opStateCheckPushOrJump() { int mem = code[ip++]; int addr= code[ip++]; if (stateCheckVal(s, mem)) { ip += addr; } else { pushAltWithStateCheck(ip + addr, s, sprev, mem); } } // CEC private void opStateCheck() { int mem = code[ip++]; if (stateCheckVal(s, mem)) {opFail(); return;} pushStateCheck(s, mem); } private void opPop() { popOne(); } private void opPushOrJumpExact1() { int addr = code[ip++]; // beyond string check if (s < range && code[ip] == bytes[s]) { ip++; pushAlt(ip + addr, s, sprev); return; } ip += addr + 1; } private void opPushIfPeekNext() { int addr = code[ip++]; // beyond string check if (s < range && code[ip] == bytes[s]) { ip++; pushAlt(ip + addr, s, sprev); return; } ip++; } private void opRepeat() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int addr= code[ip++]; // ensure1(); repeatStk[mem] = stk; pushRepeat(mem, ip); if (regex.repeatRangeLo[mem] == 0) { // lower pushAlt(ip + addr, s, sprev); } } private void opRepeatNG() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int addr= code[ip++]; // ensure1(); repeatStk[mem] = stk; pushRepeat(mem, ip); if (regex.repeatRangeLo[mem] == 0) { pushAlt(ip, s, sprev); ip += addr; } } private void repeatInc(int mem, int si) { StackEntry e = stack[si]; e.increaseRepeatCount(); if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { /* end of repeat. Nothing to do. */ } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { pushAlt(ip, s, sprev); ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ } else { ip = e.getRepeatPCode(); } pushRepeatInc(si); } private void opRepeatInc() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int si = repeatStk[mem]; repeatInc(mem, si); } private void opRepeatIncSG() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int si = getRepeat(mem); repeatInc(mem, si); } private void repeatIncNG(int mem, int si) { StackEntry e = stack[si]; e.increaseRepeatCount(); if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { int pcode = e.getRepeatPCode(); pushRepeatInc(si); pushAlt(pcode, s, sprev); } else { ip = e.getRepeatPCode(); pushRepeatInc(si); } } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { pushRepeatInc(si); } } private void opRepeatIncNG() { int mem = code[ip++]; int si = repeatStk[mem]; repeatIncNG(mem, si); } private void opRepeatIncNGSG() { int mem = code[ip++]; int si = getRepeat(mem); repeatIncNG(mem, si); } private void opPushPos() { pushPos(s, sprev); } private void opPopPos() { StackEntry e = stack[posEnd()]; s = e.getStatePStr(); sprev= e.getStatePStrPrev(); } private void opPushPosNot() { int addr = code[ip++]; pushPosNot(ip + addr, s, sprev); } private void opFailPos() { popTilPosNot(); opFail(); } private void opPushStopBT() { pushStopBT(); } private void opPopStopBT() { stopBtEnd(); } private void opLookBehind() { int tlen = code[ip++]; s = enc.stepBack(bytes, str, s, end, tlen); if (s == -1) {opFail(); return;} sprev = enc.prevCharHead(bytes, str, s, end); } private void opLookBehindSb() { int tlen = code[ip++]; s -= tlen; if (s < str) {opFail(); return;} sprev = s == str ? -1 : s - 1; } private void opPushLookBehindNot() { int addr = code[ip++]; int tlen = code[ip++]; int q = enc.stepBack(bytes, str, s, end, tlen); if (q == -1) { /* too short case -> success. ex. /(? 0) sb.append(new String(new byte[]{(byte)code[s++]})); } private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) { sb.append(":T:"); while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]})); } private void pLenString(StringBuilder sb, int len, int mbLen, int s) { int x = len * mbLen; sb.append(":" + len + ":"); while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); } private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, byte[]tm, int idx) { int x = len * mbLen; sb.append(":T:" + len + ":"); while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]})); } public int compiledByteCodeToString(StringBuilder sb, int bp) { int len, n, mem, addr, scn, cod; BitSet bs; CClassNode cc; int tm, idx; sb.append("[" + OPCode.OpCodeNames[code[bp]]); int argType = OPCode.OpCodeArgTypes[code[bp]]; int ip = bp; if (argType != Arguments.SPECIAL) { bp++; switch (argType) { case Arguments.NON: break; case Arguments.RELADDR: sb.append(":(" + code[bp] + ")"); bp += OPSize.RELADDR; break; case Arguments.ABSADDR: sb.append(":(" + code[bp] + ")"); bp += OPSize.ABSADDR; break; case Arguments.LENGTH: sb.append(":" + code[bp]); bp += OPSize.LENGTH; break; case Arguments.MEMNUM: sb.append(":" + code[bp]); bp += OPSize.MEMNUM; break; case Arguments.OPTION: sb.append(":" + code[bp]); bp += OPSize.OPTION; break; case Arguments.STATE_CHECK: sb.append(":" + code[bp]); bp += OPSize.STATE_CHECK; break; } } else { switch (code[bp++]) { case OPCode.EXACT1: case OPCode.ANYCHAR_STAR_PEEK_NEXT: case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: pString(sb, 1, bp++); break; case OPCode.EXACT2: pString(sb, 2, bp); bp += 2; break; case OPCode.EXACT3: pString(sb, 3, bp); bp += 3; break; case OPCode.EXACT4: pString(sb, 4, bp); bp += 4; break; case OPCode.EXACT5: pString(sb, 5, bp); bp += 5; break; case OPCode.EXACTN: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 1, templates[tm], idx); } else { pLenString(sb, len, 1, bp); bp += len; } break; case OPCode.EXACTMB2N1: pString(sb, 2, bp); bp += 2; break; case OPCode.EXACTMB2N2: pString(sb, 4, bp); bp += 4; break; case OPCode.EXACTMB2N3: pString(sb, 6, bp); bp += 6; break; case OPCode.EXACTMB2N: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 2, templates[tm], idx); } else { pLenString(sb, len, 2, bp); bp += len * 2; } break; case OPCode.EXACTMB3N: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 3, templates[tm], idx); } else { pLenString(sb, len, 3, bp); bp += len * 3; } break; case OPCode.EXACTMBN: int mbLen = code[bp]; bp += OPSize.LENGTH; len = code[bp]; bp += OPSize.LENGTH; n = len * mbLen; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; sb.append(":T:" + mbLen + ":" + len + ":"); while (n-- > 0) sb.append(new String(new byte[]{templates[tm][idx++]})); } else { sb.append(":" + mbLen + ":" + len + ":"); while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]})); } break; case OPCode.EXACT1_IC: case OPCode.EXACT1_IC_SB: final int MAX_CHAR_LENGTH = 6; byte[]bytes = new byte[MAX_CHAR_LENGTH]; for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i]; len = enc.length(bytes, 0, MAX_CHAR_LENGTH); pString(sb, len, bp); bp += len; break; case OPCode.EXACTN_IC: case OPCode.EXACTN_IC_SB: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 1, templates[tm], idx); } else { pLenString(sb, len, 1, bp); bp += len; } break; case OPCode.CCLASS: case OPCode.CCLASS_SB: bs = new BitSet(); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); n = bs.numOn(); bp += BitSet.BITSET_SIZE; sb.append(":" + n); break; case OPCode.CCLASS_NOT: case OPCode.CCLASS_NOT_SB: bs = new BitSet(); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); n = bs.numOn(); bp += BitSet.BITSET_SIZE; sb.append(":" + n); break; case OPCode.CCLASS_MB: case OPCode.CCLASS_MB_NOT: len = code[bp]; bp += OPSize.LENGTH; cod = code[bp]; //bp += OPSize.CODE_POINT; bp += len; sb.append(":" + cod + ":" + len); break; case OPCode.CCLASS_MIX: case OPCode.CCLASS_MIX_NOT: bs = new BitSet(); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); n = bs.numOn(); bp += BitSet.BITSET_SIZE; len = code[bp]; bp += OPSize.LENGTH; cod = code[bp]; //bp += OPSize.CODE_POINT; bp += len; sb.append(":" + n + ":" + cod + ":" + len); break; case OPCode.CCLASS_NODE: cc = (CClassNode)operands[code[bp]]; bp += OPSize.POINTER; n = cc.bs.numOn(); sb.append(":" + cc + ":" + n); break; case OPCode.BACKREFN_IC: mem = code[bp]; bp += OPSize.MEMNUM; sb.append(":" + mem); break; case OPCode.BACKREF_MULTI_IC: case OPCode.BACKREF_MULTI: sb.append(" "); len = code[bp]; bp += OPSize.LENGTH; for (int i=0; i 0) sb.append(", "); sb.append(mem); } break; case OPCode.BACKREF_WITH_LEVEL: { int option = code[bp]; bp += OPSize.OPTION; sb.append(":" + option); int level = code[bp]; bp += OPSize.LENGTH; sb.append(":" + level); sb.append(" "); len = code[bp]; bp += OPSize.LENGTH; for (int i=0; i 0) sb.append(", "); sb.append(mem); } break; } case OPCode.REPEAT: case OPCode.REPEAT_NG: mem = code[bp]; bp += OPSize.MEMNUM; addr = code[bp]; bp += OPSize.RELADDR; sb.append(":" + mem + ":" + addr); break; case OPCode.PUSH_OR_JUMP_EXACT1: case OPCode.PUSH_IF_PEEK_NEXT: addr = code[bp]; bp += OPSize.RELADDR; sb.append(":(" + addr + ")"); pString(sb, 1, bp); bp++; break; case OPCode.LOOK_BEHIND: case OPCode.LOOK_BEHIND_SB: len = code[bp]; bp += OPSize.LENGTH; sb.append(":" + len); break; case OPCode.PUSH_LOOK_BEHIND_NOT: addr = code[bp]; bp += OPSize.RELADDR; len = code[bp]; bp += OPSize.LENGTH; sb.append(":" + len + ":(" + addr + ")"); break; case OPCode.STATE_CHECK_PUSH: case OPCode.STATE_CHECK_PUSH_OR_JUMP: scn = code[bp]; bp += OPSize.STATE_CHECK_NUM; addr = code[bp]; bp += OPSize.RELADDR; sb.append(":" + scn + ":(" + addr + ")"); break; default: throw new InternalException("undefined code: " + code[--bp]); } } sb.append("]"); // @opcode_address(opcode_size) if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append("@" + ip + "(" + (bp - ip) + ")"); return bp; } private String compiledByteCodeListToString() { StringBuilder sb = new StringBuilder(); sb.append("code length: " + codeLength + "\n"); int ncode = 0; int bp = 0; int end = codeLength; while (bp < end) { ncode++; if (bp > 0) sb.append(ncode % 5 == 0 ? "\n" : " "); bp = compiledByteCodeToString(sb, bp); } sb.append("\n"); return sb.toString(); } } joni-2.0.0/src/org/joni/CaptureTreeNode.java000066400000000000000000000045511214326443200206670ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public class CaptureTreeNode { int group; int beg; int end; // int allocated; int numChildren; CaptureTreeNode[]children; CaptureTreeNode() { beg = Region.REGION_NOTPOS; end = Region.REGION_NOTPOS; group = -1; } static final int HISTORY_TREE_INIT_ALLOC_SIZE = 8; void addChild(CaptureTreeNode child) { if (children == null) { children = new CaptureTreeNode[HISTORY_TREE_INIT_ALLOC_SIZE]; } else if (numChildren >= children.length) { CaptureTreeNode[]tmp = new CaptureTreeNode[children.length << 1]; System.arraycopy(children, 0, tmp, 0, children.length); children = tmp; } children[numChildren] = child; numChildren++; } void clear() { for (int i=0; i 0 && i % 6 == 0) buf.append("\n "); } return buf.toString(); } private static String rangeNumToString(int num){ return "0x" + Integer.toString(num, 16); } public void expand(int low) { int length = p.length; do { length <<= 1; } while (length < low); int[]tmp = new int[length]; System.arraycopy(p, 0, tmp, 0, used); p = tmp; } public void ensureSize(int size) { int length = p.length; while (length < size ) { length <<= 1; } if (p.length != length) { int[]tmp = new int[length]; System.arraycopy(p, 0, tmp, 0, used); p = tmp; } } private void moveRight(int from, int to, int n) { if (to + n > p.length) expand(to + n); System.arraycopy(p, from, p, to, n); if (to + n > used) used = to + n; } protected void moveLeft(int from, int to, int n) { System.arraycopy(p, from, p, to, n); } private void moveLeftAndReduce(int from, int to) { System.arraycopy(p, from, p, to, used - from); used -= from - to; } public void writeCodePoint(int pos, int b) { int u = pos + 1; if (p.length < u) expand(u); p[pos] = b; if (used < u) used = u; } public CodeRangeBuffer clone() { return new CodeRangeBuffer(this); } // ugly part: these methods should be made OO // add_code_range_to_buf public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) { if (from > to) { int n = from; from = to; to = n; } if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode int[]p = pbuf.p; int n = p[0]; int low = 0; int bound = n; while (low < bound) { int x = (low + bound) >>> 1; if (from > p[x * 2 + 2]) { low = x + 1; } else { bound = x; } } int high = low; bound = n; while (high < bound) { int x = (high + bound) >>> 1; if (to >= p[x * 2 + 1] - 1) { high = x + 1; } else { bound = x; } } int incN = low + 1 - high; if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES); if (incN != 1) { if (from > p[low * 2 + 1]) from = p[low * 2 + 1]; if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2]; } if (incN != 0 && high < n) { int fromPos = 1 + high * 2; int toPos = 1 + (low + 1) * 2; int size = (n - high) * 2; if (incN > 0) { pbuf.moveRight(fromPos, toPos, size); } else { pbuf.moveLeftAndReduce(fromPos, toPos); } } int pos = 1 + low * 2; // pbuf.ensureSize(pos + 2); pbuf.writeCodePoint(pos, from); pbuf.writeCodePoint(pos + 1, to); n += incN; pbuf.writeCodePoint(0, n); return pbuf; } // add_code_range, be aware of it returning null! public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) { if (from >to) { if (env.syntax.allowEmptyRangeInCC()) { return pbuf; } else { throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); } } return addCodeRangeToBuff(pbuf, from, to); } // SET_ALL_MULTI_BYTE_RANGE protected static CodeRangeBuffer setAllMultiByteRange(Encoding enc, CodeRangeBuffer pbuf) { return addCodeRangeToBuff(pbuf, enc.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE); } // ADD_ALL_MULTI_BYTE_RANGE public static CodeRangeBuffer addAllMultiByteRange(Encoding enc, CodeRangeBuffer pbuf) { if (!enc.isSingleByte()) return setAllMultiByteRange(enc, pbuf); return pbuf; } // not_code_range_buf public static CodeRangeBuffer notCodeRangeBuff(Encoding enc, CodeRangeBuffer bbuf) { CodeRangeBuffer pbuf = null; if (bbuf == null) return setAllMultiByteRange(enc, pbuf); int[]p = bbuf.p; int n = p[0]; if (n <= 0) return setAllMultiByteRange(enc, pbuf); int pre = enc.mbcodeStartPosition(); int from; int to = 0; for (int i=0; i to1) break; } if (from1 <= to1) { pbuf = addCodeRangeToBuff(pbuf, from1, to1); } return pbuf; } // and_code_range_buf public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1, CodeRangeBuffer bbuf2, boolean not2) { CodeRangeBuffer pbuf = null; if (bbuf1 == null) { if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */ return null; } else if (bbuf2 == null) { if (not2) return bbuf1.clone(); return null; } if (not1) { CodeRangeBuffer tbuf; boolean tnot; // swap tnot = not1; not1 = not2; not2 = tnot; tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; } int[]p1 = bbuf1.p; int n1 = p1[0]; int[]p2 = bbuf2.p; int n2 = p2[0]; if (!not2 && !not1) { /* 1 AND 2 */ for (int i=0; i to1) break; if (to2 < from1) continue; int from = from1 > from2 ? from1 : from2; int to = to1 < to2 ? to1 : to2; pbuf = addCodeRangeToBuff(pbuf, from, to); } } } else if (!not1) { /* 1 AND (not 2) */ for (int i=0; i, \k */ final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */ final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */ final boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = false; final boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = true; final boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = false; final boolean USE_CAPTURE_HISTORY = false; final boolean USE_VARIABLE_META_CHARS = true; final boolean USE_WORD_BEGIN_END = true; /* "\<": word-begin, "\>": word-end */ final boolean USE_POSIX_API_REGION_OPTION = true; /* needed for POSIX API support */ final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true; final boolean USE_COMBINATION_EXPLOSION_CHECK = false; final int NREGION = 10; final int MAX_BACKREF_NUM = 1000; final int MAX_REPEAT_NUM = 100000; final int MAX_MULTI_BYTE_RANGES_NUM = 10000; final boolean USE_WARN = true; // internal config final boolean USE_PARSE_TREE_NODE_RECYCLE = true; final boolean USE_OP_PUSH_OR_JUMP_EXACT = true; final boolean USE_SHARED_CCLASS_TABLE = false; final boolean USE_QTFR_PEEK_NEXT = true; final int INIT_MATCH_STACK_SIZE = 64; final int DEFAULT_MATCH_STACK_LIMIT_SIZE = 0; /* unlimited */ final int NUMBER_OF_POOLED_STACKS = 4; final boolean DONT_OPTIMIZE = false; final boolean USE_STRING_TEMPLATES = true; // use embeded string templates in Regex object as byte arrays instead of compiling them into int bytecode array final int MAX_CAPTURE_HISTORY_GROUP = 31; final int CHECK_STRING_THRESHOLD_LEN = 7; final int CHECK_BUFF_MAX_SIZE = 0x4000; final boolean NON_UNICODE_SDW = true; final PrintStream log = System.out; final PrintStream err = System.err; final boolean DEBUG_ALL = false; final boolean DEBUG = DEBUG_ALL; final boolean DEBUG_PARSE_TREE = DEBUG_ALL; final boolean DEBUG_PARSE_TREE_RAW = true; final boolean DEBUG_COMPILE = DEBUG_ALL; final boolean DEBUG_COMPILE_BYTE_CODE_INFO = DEBUG_ALL; final boolean DEBUG_SEARCH = DEBUG_ALL; final boolean DEBUG_MATCH = DEBUG_ALL; final boolean DEBUG_ASM = true; final boolean DEBUG_ASM_EXEC = true; } joni-2.0.0/src/org/joni/Lexer.java000066400000000000000000001263171214326443200167220ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.Option.isSingleline; import static org.joni.ast.QuantifierNode.isRepeatInfinite; import org.jcodings.Ptr; import org.jcodings.constants.CharacterType; import org.jcodings.exception.CharacterPropertyException; import org.joni.ast.QuantifierNode; import org.joni.constants.AnchorType; import org.joni.constants.MetaChar; import org.joni.constants.TokenType; import org.joni.exception.ErrorMessages; class Lexer extends ScannerSupport { protected final ScanEnvironment env; protected final Syntax syntax; // fast access to syntax protected final Token token = new Token(); // current token protected Lexer(ScanEnvironment env, byte[]bytes, int p, int end) { super(env.enc, bytes, p, end); this.env = env; this.syntax = env.syntax; } /** * @return 0: normal {n,m}, 2: fixed {n} * !introduce returnCode here */ private int fetchRangeQuantifier() { mark(); boolean synAllow = syntax.allowInvalidInterval(); if (!left()) { if (synAllow) { return 1; /* "....{" : OK! */ } else { newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); } } if (!synAllow) { c = peek(); if (c == ')' || c == '(' || c == '|') { newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); } } int low = scanUnsignedNumber(); if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); boolean nonLow = false; if (p == _p) { /* can't read low */ if (syntax.allowIntervalLowAbbrev()) { low = 0; nonLow = true; } else { return invalidRangeQuantifier(synAllow); } } if (!left()) return invalidRangeQuantifier(synAllow); fetch(); int up; int ret = 0; if (c == ',') { int prev = p; // ??? last up = scanUnsignedNumber(); if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (p == prev) { if (nonLow) return invalidRangeQuantifier(synAllow); up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */ } } else { if (nonLow) return invalidRangeQuantifier(synAllow); unfetch(); up = low; /* {n} : exact n times */ ret = 2; /* fixed */ } if (!left()) return invalidRangeQuantifier(synAllow); fetch(); if (syntax.opEscBraceInterval()) { if (c != syntax.metaCharTable.esc) return invalidRangeQuantifier(synAllow); fetch(); } if (c != '}') return invalidRangeQuantifier(synAllow); if (!isRepeatInfinite(up) && low > up) { newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); } token.type = TokenType.INTERVAL; token.setRepeatLower(low); token.setRepeatUpper(up); return ret; /* 0: normal {n,m}, 2: fixed {n} */ } private int invalidRangeQuantifier(boolean synAllow) { if (synAllow) { restore(); return 1; } else { newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN); return 0; // not reached } } /* \M-, \C-, \c, or \... */ private int fetchEscapedValue() { if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); fetch(); switch(c) { case 'M': if (syntax.op2EscCapitalMBarMeta()) { if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META); fetch(); if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX); if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META); fetch(); if (c == syntax.metaCharTable.esc) { c = fetchEscapedValue(); } c = ((c & 0xff) | 0x80); } else { fetchEscapedValueBackSlash(); } break; case 'C': if (syntax.op2EscCapitalCBarControl()) { if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL); fetch(); if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX); fetchEscapedValueControl(); } else { fetchEscapedValueBackSlash(); } break; case 'c': if (syntax.opEscCControl()) { fetchEscapedValueControl(); } /* fall through */ default: fetchEscapedValueBackSlash(); } // switch return c; // ??? } private void fetchEscapedValueBackSlash() { c = env.convertBackslashValue(c); } private void fetchEscapedValueControl() { if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL); fetch(); if (c == '?') { c = 0177; } else { if (c == syntax.metaCharTable.esc) { c = fetchEscapedValue(); } c &= 0x9f; } } private int nameEndCodePoint(int start) { switch(start) { case '<': return '>'; case '\'': return '\''; default: return 0; } } // USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL /* \k, \k \k, \k \k<-num+n>, \k<-num-n> */ // value implicit (rnameEnd) private boolean fetchNameWithLevel(int startCode, Ptr rbackNum, Ptr rlevel) { int src = p; boolean existLevel = false; int isNum = 0; int sign = 1; int endCode = nameEndCodePoint(startCode); int pnumHead = p; int nameEnd = stop; String err = null; if (!left()) { newValueException(ERR_EMPTY_GROUP_NAME); } else { fetch(); if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME); if (enc.isDigit(c)) { isNum = 1; } else if (c == '-') { isNum = 2; sign = -1; pnumHead = p; } else if (!enc.isWord(c)) { err = ERR_INVALID_GROUP_NAME; } } while (left()) { nameEnd = p; fetch(); if (c == endCode || c == ')' || c == '+' || c == '-') { if (isNum == 2) err = ERR_INVALID_GROUP_NAME; break; } if (isNum != 0) { if (enc.isDigit(c)) { isNum = 1; } else { err = ERR_INVALID_GROUP_NAME; // isNum = 0; } } else if (!enc.isWord(c)) { err = ERR_INVALID_CHAR_IN_GROUP_NAME; } } boolean isEndCode = false; if (err == null && c != endCode) { if (c == '+' || c == '-') { int flag = c == '-' ? -1 : 1; fetch(); if (!enc.isDigit(c)) newValueException(ERR_INVALID_GROUP_NAME, src, stop); unfetch(); int level = scanUnsignedNumber(); if (level < 0) newValueException(ERR_TOO_BIG_NUMBER); rlevel.p = level * flag; existLevel = true; fetch(); isEndCode = c == endCode; } if (!isEndCode) { err = ERR_INVALID_GROUP_NAME; nameEnd = stop; } } if (err == null) { if (isNum != 0) { mark(); p = pnumHead; int backNum = scanUnsignedNumber(); restore(); if (backNum < 0) { newValueException(ERR_TOO_BIG_NUMBER); } else if (backNum == 0) { newValueException(ERR_INVALID_GROUP_NAME, src, stop); } rbackNum.p = backNum * sign; } value = nameEnd; return existLevel; } else { newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd); return false; // not reached } } // USE_NAMED_GROUP // ref: 0 -> define name (don't allow number name) // 1 -> reference name (allow number name) private int fetchNameForNamedGroup(int startCode, boolean ref) { int src = p; value = 0; int isNum = 0; int sign = 1; int endCode = nameEndCodePoint(startCode); int pnumHead = p; int nameEnd = stop; String err = null; if (!left()) { newValueException(ERR_EMPTY_GROUP_NAME); } else { fetch(); if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME); if (enc.isDigit(c)) { if (ref) { isNum = 1; } else { err = ERR_INVALID_GROUP_NAME; // isNum = 0; } } else if (c == '-') { if (ref) { isNum = 2; sign = -1; pnumHead = p; } else { err = ERR_INVALID_GROUP_NAME; // isNum = 0; } } else if (!enc.isWord(c)) { err = ERR_INVALID_CHAR_IN_GROUP_NAME; } } if (err == null) { while (left()) { nameEnd = p; fetch(); if (c == endCode || c == ')') { if (isNum == 2) err = ERR_INVALID_GROUP_NAME; break; } if (isNum != 0) { if (enc.isDigit(c)) { isNum = 1; } else { if (!enc.isWord(c)) { err = ERR_INVALID_CHAR_IN_GROUP_NAME; } else { err = ERR_INVALID_GROUP_NAME; } // isNum = 0; } } else { if (!enc.isWord(c)) { err = ERR_INVALID_CHAR_IN_GROUP_NAME; } } } if (c != endCode) { err = ERR_INVALID_GROUP_NAME; nameEnd = stop; } int backNum = 0; if (isNum != 0) { mark(); p = pnumHead; backNum = scanUnsignedNumber(); restore(); if (backNum < 0) { newValueException(ERR_TOO_BIG_NUMBER); } else if (backNum == 0) { newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd); } backNum *= sign; } value = nameEnd; return backNum; } else { while (left()) { nameEnd = p; fetch(); if (c == endCode || c == ')') break; } if (!left()) nameEnd = stop; newValueException(err, src, nameEnd); return 0; // not reached } } // #else USE_NAMED_GROUP // make it return nameEnd! private final int fetchNameForNoNamedGroup(int startCode, boolean ref) { int src = p; value = 0; int isNum = 0; int sign = 1; int endCode = nameEndCodePoint(startCode); int pnumHead = p; int nameEnd = stop; String err = null; if (!left()) { newValueException(ERR_EMPTY_GROUP_NAME); } else { fetch(); if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME); if (enc.isDigit(c)) { isNum = 1; } else if (c == '-') { isNum = 2; sign = -1; pnumHead = p; } else { err = ERR_INVALID_CHAR_IN_GROUP_NAME; } } while(left()) { nameEnd = p; fetch(); if (c == endCode || c == ')') break; if (!enc.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME; } if (err == null && c != endCode) { err = ERR_INVALID_GROUP_NAME; nameEnd = stop; } if (err == null) { mark(); p = pnumHead; int backNum = scanUnsignedNumber(); restore(); if (backNum < 0) { newValueException(ERR_TOO_BIG_NUMBER); } else if (backNum == 0){ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd); } backNum *= sign; value = nameEnd; return backNum; } else { newValueException(err, src, nameEnd); return 0; // not reached } } protected final int fetchName(int startCode, boolean ref) { if (Config.USE_NAMED_GROUP) { return fetchNameForNamedGroup(startCode, ref); } else { return fetchNameForNoNamedGroup(startCode, ref); } } private boolean strExistCheckWithEsc(int[]s, int n, int bad) { int p = this.p; int to = this.stop; boolean inEsc = false; int i=0; while(p < to) { if (inEsc) { inEsc = false; p += enc.length(bytes, p, to); } else { int x = enc.mbcToCode(bytes, p, to); int q = p + enc.length(bytes, p, to); if (x == s[0]) { for (i=1; i= n) return true; p += enc.length(bytes, p, to); } else { x = enc.mbcToCode(bytes, p, to); if (x == bad) return false; else if (x == syntax.metaCharTable.esc) inEsc = true; p = q; } } } return false; } private static final int send[] = new int[]{':', ']'}; private void fetchTokenInCCFor_charType(boolean flag, int type) { token.type = TokenType.CHAR_TYPE; token.setPropCType(type); token.setPropNot(flag); } private void fetchTokenInCCFor_p() { int c2 = peek(); // !!! migrate to peekIs if (c2 == '{' && syntax.op2EscPBraceCharProperty()) { inc(); token.type = TokenType.CHAR_PROPERTY; token.setPropNot(c == 'P'); if (syntax.op2EscPBraceCircumflexNot()) { c2 = fetchTo(); if (c2 == '^') { token.setPropNot(!token.getPropNot()); } else { unfetch(); } } } else { syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c); } } private void fetchTokenInCCFor_x() { if (!left()) return; int last = p; if (peekIs('{') && syntax.opEscXBraceHex8()) { inc(); int num = scanUnsignedHexadecimalNumber(8); if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); if (left()) { int c2 = peek(); if (enc.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); } if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) { inc(); token.type = TokenType.CODE_POINT; token.base = 16; token.setCode(num); } else { /* can't read nothing or invalid format */ p = last; } } else if (syntax.opEscXHex2()) { int num = scanUnsignedHexadecimalNumber(2); if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 16; token.setC(num); } } private void fetchTokenInCCFor_u() { if (!left()) return; int last = p; if (syntax.op2EscUHex4()) { int num = scanUnsignedHexadecimalNumber(4); if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.CODE_POINT; token.base = 16; token.setCode(num); } } private void fetchTokenInCCFor_digit() { if (syntax.opEscOctal3()) { unfetch(); int last = p; int num = scanUnsignedOctalNumber(3); if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 8; token.setC(num); } } private void fetchTokenInCCFor_posixBracket() { if (syntax.opPosixBracket() && peekIs(':')) { token.backP = p; /* point at '[' is readed */ inc(); if (strExistCheckWithEsc(send, send.length, ']')) { token.type = TokenType.POSIX_BRACKET_OPEN; } else { unfetch(); // remove duplication, goto cc_in_cc; if (syntax.op2CClassSetOp()) { token.type = TokenType.CC_CC_OPEN; } else { env.ccEscWarn("["); } } } else { // cc_in_cc: if (syntax.op2CClassSetOp()) { token.type = TokenType.CC_CC_OPEN; } else { env.ccEscWarn("["); } } } private void fetchTokenInCCFor_and() { if (syntax.op2CClassSetOp() && left() && peekIs('&')) { inc(); token.type = TokenType.CC_AND; } } protected final TokenType fetchTokenInCC() { if (!left()) { token.type = TokenType.EOT; return token.type; } fetch(); token.type = TokenType.CHAR; token.base = 0; token.setC(c); token.escaped = false; if (c == ']') { token.type = TokenType.CC_CLOSE; } else if (c == '-') { token.type = TokenType.CC_RANGE; } else if (c == syntax.metaCharTable.esc) { if (!syntax.backSlashEscapeInCC()) return token.type; if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); fetch(); token.escaped = true; token.setC(c); switch (c) { case 'w': fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); break; case 'W': fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); break; case 'd': fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); break; case 'D': fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); break; case 's': fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); break; case 'S': fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); break; case 'h': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); break; case 'H': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); break; case 'p': case 'P': fetchTokenInCCFor_p(); break; case 'x': fetchTokenInCCFor_x(); break; case 'u': fetchTokenInCCFor_u(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': fetchTokenInCCFor_digit(); break; default: unfetch(); int num = fetchEscapedValue(); if (token.getC() != num) { token.setCode(num); token.type = TokenType.CODE_POINT; } break; } // switch } else if (c == '[') { fetchTokenInCCFor_posixBracket(); } else if (c == '&') { fetchTokenInCCFor_and(); } return token.type; } protected final int backrefRelToAbs(int relNo) { return env.numMem + 1 + relNo; } private void fetchTokenFor_repeat(int lower, int upper) { token.type = TokenType.OP_REPEAT; token.setRepeatLower(lower); token.setRepeatUpper(upper); greedyCheck(); } private void fetchTokenFor_openBrace() { switch (fetchRangeQuantifier()) { case 0: greedyCheck(); break; case 2: if (syntax.fixedIntervalIsGreedyOnly()) { possessiveCheck(); } else { greedyCheck(); } break; default: /* 1 : normal char */ } // inner switch } private void fetchTokenFor_anchor(int subType) { token.type = TokenType.ANCHOR; token.setAnchor(subType); } private void fetchTokenFor_xBrace() { if (!left()) return; int last = p; if (peekIs('{') && syntax.opEscXBraceHex8()) { inc(); int num = scanUnsignedHexadecimalNumber(8); if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); if (left()) { if (enc.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); } if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) { inc(); token.type = TokenType.CODE_POINT; token.setCode(num); } else { /* can't read nothing or invalid format */ p = last; } } else if (syntax.opEscXHex2()) { int num = scanUnsignedHexadecimalNumber(2); if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 16; token.setC(num); } } private void fetchTokenFor_uHex() { if (!left()) return; int last = p; if (syntax.op2EscUHex4()) { int num = scanUnsignedHexadecimalNumber(4); if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.CODE_POINT; token.base = 16; token.setCode(num); } } private void fetchTokenFor_digit() { unfetch(); int last = p; int num = scanUnsignedNumber(); if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */ if (syntax.strictCheckBackref()) { if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF); } token.type = TokenType.BACKREF; token.setBackrefNum(1); token.setBackrefRef1(num); token.setBackrefByName(false); if (Config.USE_BACKREF_WITH_LEVEL) token.setBackrefExistLevel(false); return; } if (c == '8' || c == '9') { /* normal char */ // skip_backref: p = last; inc(); return; } p = last; fetchTokenFor_zero(); /* fall through */ } private void fetchTokenFor_zero() { if (syntax.opEscOctal3()) { int last = p; int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3); if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 8; token.setC(num); } else if (c != '0') { inc(); } } private void fetchTokenFor_namedBackref() { if (syntax.op2EscKNamedBackref()) { if (left()) { fetch(); if (c =='<' || c == '\'') { int last = p; int backNum; if (Config.USE_BACKREF_WITH_LEVEL) { Ptr rbackNum = new Ptr(); Ptr rlevel = new Ptr(); token.setBackrefExistLevel(fetchNameWithLevel(c, rbackNum, rlevel)); token.setBackrefLevel(rlevel.p); backNum = rbackNum.p; } else { backNum = fetchName(c, true); } // USE_BACKREF_AT_LEVEL int nameEnd = value; // set by fetchNameWithLevel/fetchName if (backNum != 0) { if (backNum < 0) { backNum = backrefRelToAbs(backNum); if (backNum <= 0) newValueException(ERR_INVALID_BACKREF); } if (syntax.strictCheckBackref() && (backNum > env.numMem || env.memNodes == null)) { newValueException(ERR_INVALID_BACKREF); } token.type = TokenType.BACKREF; token.setBackrefByName(false); token.setBackrefNum(1); token.setBackrefRef1(backNum); } else { NameEntry e = env.reg.nameToGroupNumbers(bytes, last, nameEnd); if (e == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, last, nameEnd); if (syntax.strictCheckBackref()) { if (e.backNum == 1) { if (e.backRef1 > env.numMem || env.memNodes == null || env.memNodes[e.backRef1] == null) newValueException(ERR_INVALID_BACKREF); } else { for (int i=0; i env.numMem || env.memNodes == null || env.memNodes[e.backRefs[i]] == null) newValueException(ERR_INVALID_BACKREF); } } } token.type = TokenType.BACKREF; token.setBackrefByName(true); if (e.backNum == 1) { token.setBackrefNum(1); token.setBackrefRef1(e.backRef1); } else { token.setBackrefNum(e.backNum); token.setBackrefRefs(e.backRefs); } } } else { unfetch(); syntaxWarn(Warnings.INVALID_BACKREFERENCE); } } else { syntaxWarn(Warnings.INVALID_BACKREFERENCE); } } } private void fetchTokenFor_subexpCall() { if (syntax.op2EscGSubexpCall()) { if (left()) { fetch(); if (c == '<' || c == '\'') { int last = p; int gNum = fetchName(c, true); int nameEnd = value; token.type = TokenType.CALL; token.setCallNameP(last); token.setCallNameEnd(nameEnd); token.setCallGNum(gNum); } else { unfetch(); syntaxWarn(Warnings.INVALID_SUBEXP_CALL); } } else { syntaxWarn(Warnings.INVALID_SUBEXP_CALL); } } } private void fetchTokenFor_charProperty() { if (peekIs('{') && syntax.op2EscPBraceCharProperty()) { inc(); token.type = TokenType.CHAR_PROPERTY; token.setPropNot(c == 'P'); if (syntax.op2EscPBraceCircumflexNot()) { fetch(); if (c == '^') { token.setPropNot(!token.getPropNot()); } else { unfetch(); } } } else { syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c); } } private void fetchTokenFor_metaChars() { if (c == syntax.metaCharTable.anyChar) { token.type = TokenType.ANYCHAR; } else if (c == syntax.metaCharTable.anyTime) { fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); } else if (c == syntax.metaCharTable.zeroOrOneTime) { fetchTokenFor_repeat(0, 1); } else if (c == syntax.metaCharTable.oneOrMoreTime) { fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); } else if (c == syntax.metaCharTable.anyCharAnyTime) { token.type = TokenType.ANYCHAR_ANYTIME; // goto out } } protected final TokenType fetchToken() { // mark(); // out start: while(true) { if (!left()) { token.type = TokenType.EOT; return token.type; } token.type = TokenType.STRING; token.base = 0; token.backP = p; fetch(); if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); token.backP = p; fetch(); token.setC(c); token.escaped = true; switch(c) { case '*': if (syntax.opEscAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); break; case '+': if (syntax.opEscPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); break; case '?': if (syntax.opEscQMarkZeroOne()) fetchTokenFor_repeat(0, 1); break; case '{': if (syntax.opEscBraceInterval()) fetchTokenFor_openBrace(); break; case '|': if (syntax.opEscVBarAlt()) token.type = TokenType.ALT; break; case '(': if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_OPEN; break; case ')': if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE; break; case 'w': if (syntax.opEscWWord()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); break; case 'W': if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); break; case 'b': if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.WORD_BOUND); break; case 'B': if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); break; case '<': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_BEGIN); break; case '>': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_END); break; case 's': if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); break; case 'S': if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); break; case 'd': if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); break; case 'D': if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); break; case 'h': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); break; case 'H': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); break; case 'A': if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF); break; case 'Z': if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.SEMI_END_BUF); break; case 'z': if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF); break; case 'G': if (syntax.opEscCapitalGBeginAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_POSITION); break; case '`': if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF); break; case '\'': if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF); break; case 'x': fetchTokenFor_xBrace(); break; case 'u': fetchTokenFor_uHex(); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': fetchTokenFor_digit(); break; case '0': fetchTokenFor_zero(); break; case 'k': if (Config.USE_NAMED_GROUP) fetchTokenFor_namedBackref(); break; case 'g': if (Config.USE_SUBEXP_CALL) fetchTokenFor_subexpCall(); break; case 'Q': if (syntax.op2EscCapitalQQuote()) token.type = TokenType.QUOTE_OPEN; break; case 'p': case 'P': fetchTokenFor_charProperty(); break; default: unfetch(); int num = fetchEscapedValue(); /* set_raw: */ if (token.getC() != num) { token.type = TokenType.CODE_POINT; token.setCode(num); } else { /* string */ p = token.backP + enc.length(bytes, token.backP, stop); } break; } // switch (c) } else { token.setC(c); token.escaped = false; if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) { fetchTokenFor_metaChars(); break; } { switch(c) { case '.': if (syntax.opDotAnyChar()) token.type = TokenType.ANYCHAR; break; case '*': if (syntax.opAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); break; case '+': if (syntax.opPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); break; case '?': if (syntax.opQMarkZeroOne()) fetchTokenFor_repeat(0, 1); break; case '{': if (syntax.opBraceInterval()) fetchTokenFor_openBrace(); break; case '|': if (syntax.opVBarAlt()) token.type = TokenType.ALT; break; case '(': if (peekIs('?') && syntax.op2QMarkGroupEffect()) { inc(); if (peekIs('#')) { fetch(); while (true) { if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); fetch(); if (c == syntax.metaCharTable.esc) { if (left()) fetch(); } else { if (c == ')') break; } } continue start; // goto start } unfetch(); } if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_OPEN; break; case ')': if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE; break; case '^': if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); break; case '$': if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE); break; case '[': if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN; break; case ']': //if (*src > env->pattern) /* /].../ is allowed. */ //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); break; case '#': if (Option.isExtend(env.option)) { while (left()) { fetch(); if (enc.isNewLine(c)) break; } continue start; // goto start } break; case ' ': case '\t': case '\n': case '\r': case '\f': if (Option.isExtend(env.option)) continue start; // goto start break; default: // string break; } // switch } } break; } // while return token.type; } private void greedyCheck() { if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) { fetch(); token.setRepeatGreedy(false); token.setRepeatPossessive(false); } else { possessiveCheck(); } } private void possessiveCheck() { if (left() && peekIs('+') && (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL || syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) { fetch(); token.setRepeatGreedy(true); token.setRepeatPossessive(true); } else { token.setRepeatGreedy(true); token.setRepeatPossessive(false); } } protected final int fetchCharPropertyToCType() { mark(); while (left()) { int last = p; fetch(); if (c == '}') { return enc.propertyNameToCType(bytes, _p, last); } else if (c == '(' || c == ')' || c == '{' || c == '|') { throw new CharacterPropertyException(ERR_INVALID_CHAR_PROPERTY_NAME, bytes, _p, last); } } newInternalException(ERR_PARSER_BUG); return 0; // not reached } protected final void syntaxWarn(String message, char c) { syntaxWarn(message.replace("<%n>", Character.toString(c))); } protected final void syntaxWarn(String message) { if (Config.USE_WARN) { env.reg.warnings.warn(message + ": /" + new String(bytes, getBegin(), getEnd()) + "/"); } } } joni-2.0.0/src/org/joni/Matcher.java000066400000000000000000000547441214326443200172320ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.Option.isFindLongest; import org.jcodings.Encoding; import org.jcodings.IntHolder; import org.joni.constants.AnchorType; public abstract class Matcher extends IntHolder { public static final int FAILED = -1; public static final int INTERRUPTED = -2; protected final Regex regex; protected final Encoding enc; protected final byte[]bytes; protected final int str; protected final int end; protected int msaStart; protected int msaOptions; protected final Region msaRegion; protected int msaBestLen; protected int msaBestS; protected int msaBegin; protected int msaEnd; public Matcher(Regex regex, byte[]bytes) { this(regex, bytes, 0, bytes.length); } public Matcher(Regex regex, byte[]bytes, int p, int end) { this.regex = regex; this.enc = regex.enc; this.bytes = bytes; this.str = p; this.end = end; this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1); } // main matching method protected abstract int matchAt(int range, int sstart, int sprev) throws InterruptedException; protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum); protected abstract void stateCheckBuffClear(); public final Region getRegion() { return msaRegion; } public final Region getEagerRegion() { return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd); } public final int getBegin() { return msaBegin; } public final int getEnd() { return msaEnd; } protected final void msaInit(int option, int start) { msaOptions = option; msaStart = start; if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1; } public final int match(int at, int range, int option) { try { return matchInterruptible(at, range, option); } catch (InterruptedException ex) { return INTERRUPTED; } } public final int matchInterruptible(int at, int range, int option) throws InterruptedException { msaInit(option, at); if (Config.USE_COMBINATION_EXPLOSION_CHECK) { int offset = at = str; stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction? } // USE_COMBINATION_EXPLOSION_CHECK int prev = enc.prevCharHead(bytes, str, at, end); if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { return matchAt(end /*range*/, at, prev); } else { return matchAt(range /*range*/, at, prev); } } int low, high; // these are the return values private boolean forwardSearchRange(byte[]bytes, int str, int end, int s, int range, IntHolder lowPrev) { int pprev = -1; int p = s; if (Config.DEBUG_SEARCH) { Config.log.println("forward_search_range: "+ "str: " + str + ", end: " + end + ", s: " + s + ", range: " + range); } if (regex.dMin > 0) { if (enc.isSingleByte()) { p += regex.dMin; } else { int q = p + regex.dMin; while (p < q && p < end) p += enc.length(bytes, p, end); } } retry:while (true) { p = regex.searchAlgorithm.search(regex, bytes, p, end, range); if (p != -1 && p < range) { if (p - regex.dMin < s) { // retry_gate: pprev = p; p += enc.length(bytes, p, end); continue retry; } if (regex.subAnchor != 0) { switch (regex.subAnchor) { case AnchorType.BEGIN_LINE: if (p != str) { int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end); if (!enc.isNewLine(bytes, prev, end)) { // goto retry_gate; pprev = p; p += enc.length(bytes, p, end); continue retry; } } break; case AnchorType.END_LINE: if (p == end) { if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end); if (prev != -1 && enc.isNewLine(bytes, prev, end)) { // goto retry_gate; pprev = p; p += enc.length(bytes, p, end); continue retry; } } } else if (!enc.isNewLine(bytes, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !enc.isMbcCrnl(bytes, p, end))) { //if () break; // goto retry_gate; pprev = p; p += enc.length(bytes, p, end); continue retry; } break; } // switch } if (regex.dMax == 0) { low = p; if (lowPrev != null) { // ??? // remove null checks if (low > s) { lowPrev.value = enc.prevCharHead(bytes, s, p, end); } else { lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end); } } } else { if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { low = p - regex.dMax; if (low > s) { low = enc.rightAdjustCharHeadWithPrev(bytes, s, low, end, lowPrev); if (lowPrev != null && lowPrev.value == -1) { lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : s, low, end); } } else { if (lowPrev != null) { lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, low, end); } } } } /* no needs to adjust *high, *high is used as range check only */ high = p - regex.dMin; if (Config.DEBUG_SEARCH) { Config.log.println("forward_search_range success: "+ "low: " + (low - str) + ", high: " + (high - str) + ", dmin: " + regex.dMin + ", dmax: " + regex.dMax); } return true; /* success */ } return false; /* fail */ } //while } // low, high private boolean backwardSearchRange(byte[]bytes, int str, int end, int s, int range, int adjrange) { range += regex.dMin; int p = s; retry:while (true) { p = regex.searchAlgorithm.searchBackward(regex, bytes, range, adjrange, end, p, s, range); if (p != -1) { if (regex.subAnchor != 0) { switch (regex.subAnchor) { case AnchorType.BEGIN_LINE: if (p != str) { int prev = enc.prevCharHead(bytes, str, p, end); if (!enc.isNewLine(bytes, prev, end)) { p = prev; continue retry; } } break; case AnchorType.END_LINE: if (p == end) { if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { int prev = enc.prevCharHead(bytes, adjrange, p, end); if (prev == -1) return false; if (enc.isNewLine(bytes, prev, end)) { p = prev; continue retry; } } } else if (!enc.isNewLine(bytes, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !enc.isMbcCrnl(bytes, p, end))) { p = enc.prevCharHead(bytes, adjrange, p, end); if (p == -1) return false; continue retry; } break; } // switch } /* no needs to adjust *high, *high is used as range check only */ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { low = p - regex.dMax; high = p - regex.dMin; high = enc.rightAdjustCharHead(bytes, adjrange, high, end); } if (Config.DEBUG_SEARCH) { Config.log.println("backward_search_range: "+ "low: " + (low - str) + ", high: " + (high - str)); } return true; } if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail."); return false; } // while } // MATCH_AND_RETURN_CHECK private boolean matchCheck(int upperRange, int s, int prev) throws InterruptedException { if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { //range = upperRange; if (matchAt(upperRange, s, prev) != -1) { if (!isFindLongest(regex.options)) return true; } } else { //range = upperRange; if (matchAt(upperRange, s, prev) != -1) return true; } } else { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (matchAt(end, s, prev) != -1) { //range = upperRange; if (!isFindLongest(regex.options)) return true; } } else { //range = upperRange; if (matchAt(end, s, prev) != -1) return true; } } return false; } public final int search(int start, int range, int option) { try { return searchInterruptible(start, range, option); } catch (InterruptedException ex) { return INTERRUPTED; } } public final int searchInterruptible(int start, int range, int option) throws InterruptedException { int s, prev; int origStart = start; int origRange = range; if (Config.DEBUG_SEARCH) { Config.log.println("onig_search (entry point): "+ "str: " + str + ", end: " + (end - str) + ", start: " + (start - str) + ", range " + (range - str)); } if (start > end || start < str) return FAILED; /* anchor optimize: resume search range */ if (regex.anchor != 0 && str < end) { int minSemiEnd, maxSemiEnd; if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { /* search start-position only */ // !begin_position:! if (range > start) { range = start + 1; } else { range = start; } } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { /* search str-position only */ if (range > start) { if (start != str) return FAILED; // mismatch_no_msa; range = str + 1; } else { if (range <= str) { start = str; range = str; } else { return FAILED; // mismatch_no_msa; } } } else if ((regex.anchor & AnchorType.END_BUF) != 0) { minSemiEnd = maxSemiEnd = end; // !end_buf:! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { int preEnd = enc.stepBack(bytes, str, end, end, 1); maxSemiEnd = end; if (enc.isNewLine(bytes, preEnd, end)) { minSemiEnd = preEnd; if (Config.USE_CRNL_AS_LINE_TERMINATOR) { preEnd = enc.stepBack(bytes, str, preEnd, end, 1); if (preEnd != -1 && enc.isMbcCrnl(bytes, preEnd, end)) { minSemiEnd = preEnd; } } if (minSemiEnd > str && start <= minSemiEnd) { // !goto end_buf;! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; } } else { minSemiEnd = end; // !goto end_buf;! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; } } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { // goto !begin_position;! if (range > start) { range = start + 1; } else { range = start; } } } else if (str == end) { /* empty string */ // empty address ? if (Config.DEBUG_SEARCH) { Config.log.println("onig_search: empty string."); } if (regex.thresholdLength == 0) { s = start = str; prev = -1; msaInit(option, start); if (Config.USE_COMBINATION_EXPLOSION_CHECK) stateCheckBuffClear(); if (matchCheck(end, s, prev)) return match(s); return mismatch(); } return FAILED; // goto mismatch_no_msa; } if (Config.DEBUG_SEARCH) { Config.log.println("onig_search(apply anchor): " + "end: " + (end - str) + ", start " + (start - str) + ", range " + (range - str)); } msaInit(option, origStart); if (Config.USE_COMBINATION_EXPLOSION_CHECK) { int offset = Math.min(start, range) - str; stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); } s = start; if (range > start) { /* forward search */ if (s > str) { prev = enc.prevCharHead(bytes, str, s, end); } else { prev = 0; // -1 } if (regex.searchAlgorithm != SearchAlgorithm.NONE) { int schRange = range; if (regex.dMax != 0) { if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { schRange = end; } else { schRange += regex.dMax; if (schRange > end) schRange = end; } } if ((end - start) < regex.thresholdLength) return mismatch(); if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { do { if (!forwardSearchRange(bytes, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev if (s < low) { s = low; prev = value; } while (s <= high) { if (matchCheck(origRange, s, prev)) return match(s); // ??? prev = s; s += enc.length(bytes, s, end); } } while (s < range); return mismatch(); } else { /* check only. */ if (!forwardSearchRange(bytes, str, end, s, schRange, null)) return mismatch(); if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { do { if (matchCheck(origRange, s, prev)) return match(s); prev = s; s += enc.length(bytes, s, end); } while (s < range); return mismatch(); } } } do { if (matchCheck(origRange, s, prev)) return match(s); prev = s; s += enc.length(bytes, s, end); } while (s < range); if (s == range) { /* because empty match with /$/. */ if (matchCheck(origRange, s, prev)) return match(s); } } else { /* backward search */ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { if (origStart < end) { origStart += enc.length(bytes, origStart, end); // /* is upper range */ } } if (regex.searchAlgorithm != SearchAlgorithm.NONE) { int adjrange; if (range < end) { adjrange = enc.leftAdjustCharHead(bytes, str, range, end); } else { adjrange = end; } if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { do { int schStart = s + regex.dMax; if (schStart > end) schStart = end; if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch(); // low, high if (s > high) s = high; while (s != -1 && s >= low) { prev = enc.prevCharHead(bytes, str, s, end); if (matchCheck(origStart, s, prev)) return match(s); s = prev; } } while (s >= range); return mismatch(); } else { /* check only. */ if ((end - range) < regex.thresholdLength) return mismatch(); int schStart = s; if (regex.dMax != 0) { if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { schStart = end; } else { schStart += regex.dMax; if (schStart > end) { schStart = end; } else { schStart = enc.leftAdjustCharHead(bytes, start, schStart, end); } } } if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch(); } } do { prev = enc.prevCharHead(bytes, str, s, end); if (matchCheck(origStart, s, prev)) return match(s); s = prev; } while (s >= range); } return mismatch(); } private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) { if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa; if (range > start) { if ((minSemiEnd - start) > regex.anchorDmax) { start = minSemiEnd - regex.anchorDmax; if (start < end) { start = enc.rightAdjustCharHead(bytes, str, start, end); } else { /* match with empty at end */ start = enc.prevCharHead(bytes, str, end, end); } } if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { range = maxSemiEnd - regex.anchorDmin + 1; } if (start >= range) return true; // mismatch_no_msa; } else { if ((minSemiEnd - range) > regex.anchorDmax) { range = minSemiEnd - regex.anchorDmax; } if ((maxSemiEnd - start) < regex.anchorDmin) { start = maxSemiEnd - regex.anchorDmin; start = enc.leftAdjustCharHead(bytes, str, start, end); } if (range > start) return true; // mismatch_no_msa; } return false; } private int match(int s) { return s - str; // sstart ??? } private int mismatch() { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (msaBestLen >= 0) { int s = msaBestS; return match(s); } } // falls through finish: return FAILED; } } joni-2.0.0/src/org/joni/MatcherFactory.java000066400000000000000000000026571214326443200205560ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public abstract class MatcherFactory { public abstract Matcher create(Regex regex, byte[]bytes, int p, int end); static final MatcherFactory DEFAULT = new MatcherFactory() { @Override public Matcher create(Regex regex, byte[] bytes, int p, int end) { return new ByteCodeMachine(regex, bytes, p, end); } }; } joni-2.0.0/src/org/joni/MinMaxLen.java000066400000000000000000000102771214326443200174700ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; final class MinMaxLen { int min; /* min byte length */ int max; /* max byte length */ MinMaxLen() { } MinMaxLen(int min, int max) { this.min = min; this.max = max; } /* 1000 / (min-max-dist + 1) */ private static final short distValues[] = { 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 }; int distanceValue() { if (max == INFINITE_DISTANCE) return 0; int d = max - min; /* return dist_vals[d] * 16 / (mm->min + 12); */ return d < distValues.length ? distValues[d] : 1; } int compareDistanceValue(MinMaxLen other, int v1, int v2) { if (v2 <= 0) return -1; if (v1 <= 0) return 1; v1 *= distanceValue(); v2 *= other.distanceValue(); if (v2 > v1) return 1; if (v2 < v1) return -1; if (other.min < min) return 1; if (other.min > min) return -1; return 0; } boolean equal(MinMaxLen other) { return min == other.min && max == other.max; } void set(int min, int max) { this.min = min; this.max = max; } void clear() { min = max = 0; } void copy(MinMaxLen other) { min = other.min; max = other.max; } void add(MinMaxLen other) { min = distanceAdd(min, other.min); max = distanceAdd(max, other.max); } void addLength(int len) { min = distanceAdd(min, len); max = distanceAdd(max, len); } void altMerge(MinMaxLen other) { if (min > other.min) min = other.min; if (max < other.max) max = other.max; } static final int INFINITE_DISTANCE = 0x7FFFFFFF; static int distanceAdd(int d1, int d2) { if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) { return INFINITE_DISTANCE; } else { if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2; else return INFINITE_DISTANCE; } } static int distanceMultiply(int d, int m) { if (m == 0) return 0; if (d < INFINITE_DISTANCE / m) { return d * m; } else { return INFINITE_DISTANCE; } } static String distanceRangeToString(int a, int b) { String s = ""; if (a == INFINITE_DISTANCE) { s += "inf"; } else { s += "(" + a + ")"; } s += "-"; if (b == INFINITE_DISTANCE) { s += "inf"; } else { s += "(" + b + ")"; } return s; } } joni-2.0.0/src/org/joni/NameEntry.java000066400000000000000000000057071214326443200175440ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public final class NameEntry { static final int INIT_NAME_BACKREFS_ALLOC_NUM = 8; public final byte[]name; public final int nameP; public final int nameEnd; int backNum; int backRef1; int backRefs[]; public NameEntry(byte[]bytes, int p, int end) { name = bytes; nameP = p; nameEnd = end; } public int[] getBackRefs() { switch (backNum) { case 0: return new int[]{}; case 1: return new int[]{backRef1}; default: int[]result = new int[backNum]; System.arraycopy(backRefs, 0, result, 0, backNum); return result; } } private void alloc() { backRefs = new int[INIT_NAME_BACKREFS_ALLOC_NUM]; } private void ensureSize() { if (backNum > backRefs.length) { int[]tmp = new int[backRefs.length << 1]; System.arraycopy(backRefs, 0, tmp, 0, backRefs.length); backRefs = tmp; } } public void addBackref(int backRef) { backNum++; switch (backNum) { case 1: backRef1 = backRef; break; case 2: alloc(); backRefs[0] = backRef1; backRefs[1] = backRef; break; default: ensureSize(); backRefs[backNum - 1] = backRef; } } public String toString() { StringBuilder buff = new StringBuilder(new String(name, nameP, nameEnd - nameP) + " "); if (backNum == 0) { buff.append("-"); } else if (backNum == 1){ buff.append(backRef1); } else { for (int i=0; i 0) buff.append(", "); buff.append(backRefs[i]); } } return buff.toString(); } } joni-2.0.0/src/org/joni/NativeMachine.java000066400000000000000000000023661214326443200203530ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public abstract class NativeMachine extends Matcher { protected NativeMachine(Regex regex, byte[]bytes, int p, int end) { super(regex, bytes, p, end); } } joni-2.0.0/src/org/joni/NodeOptInfo.java000066400000000000000000000105641214326443200200230ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; public final class NodeOptInfo { final MinMaxLen length = new MinMaxLen(); final OptAnchorInfo anchor = new OptAnchorInfo(); final OptExactInfo exb = new OptExactInfo(); /* boundary */ final OptExactInfo exm = new OptExactInfo(); /* middle */ final OptExactInfo expr = new OptExactInfo(); /* prec read (?=...) */ final OptMapInfo map = new OptMapInfo(); /* boundary */ public void setBoundNode(MinMaxLen mmd) { exb.mmd.copy(mmd); expr.mmd.copy(mmd); map.mmd.copy(mmd); } public void clear() { length.clear(); anchor.clear(); exb.clear(); exm.clear(); expr.clear(); map.clear(); } public void copy(NodeOptInfo other) { length.copy(other.length); anchor.copy(other.anchor); exb.copy(other.exb); exm.copy(other.exm); expr.copy(other.expr); map.copy(other.map); } public void concatLeftNode(NodeOptInfo other, Encoding enc) { OptAnchorInfo tanchor = new OptAnchorInfo(); // remove it somehow ? tanchor.concat(anchor, other.anchor, length.max, other.length.max); anchor.copy(tanchor); if (other.exb.length > 0 && length.max == 0) { tanchor.concat(anchor, other.exb.anchor, length.max, other.length.max); other.exb.anchor.copy(tanchor); } if (other.map.value > 0 && length.max == 0) { if (other.map.mmd.max == 0) { other.map.anchor.leftAnchor |= anchor.leftAnchor; } } boolean exbReach = exb.reachEnd; boolean exmReach = exm.reachEnd; if (other.length.max != 0) { exb.reachEnd = exm.reachEnd = false; } if (other.exb.length > 0) { if (exbReach) { exb.concat(other.exb, enc); other.exb.clear(); } else if (exmReach) { exm.concat(other.exb, enc); other.exb.clear(); } } exm.select(other.exb, enc); exm.select(other.exm, enc); if (expr.length > 0) { if (other.length.max > 0) { // TODO: make sure it is not an Oniguruma bug (casting unsigned int to int for arithmetic comparison) int otherLengthMax = other.length.max; if (otherLengthMax == MinMaxLen.INFINITE_DISTANCE) otherLengthMax = -1; if (expr.length > otherLengthMax) expr.length = otherLengthMax; if (expr.mmd.max == 0) { exb.select(expr, enc); } else { exm.select(expr, enc); } } } else if (other.expr.length > 0) { expr.copy(other.expr); } map.select(other.map); length.add(other.length); } public void altMerge(NodeOptInfo other, OptEnvironment env) { anchor.altMerge(other.anchor); exb.altMerge(other.exb, env); exm.altMerge(other.exm, env); expr.altMerge(other.expr, env); map.altMerge(other.map, env.enc); length.altMerge(other.length); } public void setBound(MinMaxLen mmd) { exb.mmd.copy(mmd); expr.mmd.copy(mmd); map.mmd.copy(mmd); } } joni-2.0.0/src/org/joni/OptAnchorInfo.java000066400000000000000000000064171214326443200203520ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.joni.constants.AnchorType; final class OptAnchorInfo implements AnchorType { int leftAnchor; int rightAnchor; void clear() { leftAnchor = rightAnchor = 0; } void copy(OptAnchorInfo other) { leftAnchor = other.leftAnchor; rightAnchor = other.rightAnchor; } void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) { leftAnchor = left.leftAnchor; if (leftLength == 0) leftAnchor |= right.leftAnchor; rightAnchor = right.rightAnchor; if (rightLength == 0) rightAnchor |= left.rightAnchor; } boolean isSet(int anchor) { if ((leftAnchor & anchor) != 0) return true; return (rightAnchor & anchor) != 0; } void add(int anchor) { if (isLeftAnchor(anchor)) { leftAnchor |= anchor; } else { rightAnchor |= anchor; } } void remove(int anchor) { if (isLeftAnchor(anchor)) { leftAnchor &= ~anchor; } else { rightAnchor &= ~anchor; } } void altMerge(OptAnchorInfo other) { leftAnchor &= other.leftAnchor; rightAnchor &= other.rightAnchor; } static boolean isLeftAnchor(int anchor) { // make a mask for it ? return !(anchor == END_BUF || anchor == SEMI_END_BUF || anchor == END_LINE || anchor == PREC_READ || anchor == PREC_READ_NOT); } static String anchorToString(int anchor) { StringBuffer s = new StringBuffer("["); if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf "); if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line "); if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos "); if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf "); if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf "); if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line "); if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star "); if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl "); s.append("]"); return s.toString(); } } joni-2.0.0/src/org/joni/OptEnvironment.java000066400000000000000000000030141214326443200206160ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; // remove this one in future and pass mmd directly final class OptEnvironment { final MinMaxLen mmd = new MinMaxLen(); Encoding enc; int options; int caseFoldFlag; ScanEnvironment scanEnv; void copy(OptEnvironment other) { mmd.copy(other.mmd); enc = other.enc; options = other.options; caseFoldFlag = other.caseFoldFlag; scanEnv = other.scanEnv; } } joni-2.0.0/src/org/joni/OptExactInfo.java000066400000000000000000000117221214326443200201770ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; final class OptExactInfo { static final int OPT_EXACT_MAXLEN = 24; final MinMaxLen mmd = new MinMaxLen(); final OptAnchorInfo anchor = new OptAnchorInfo(); boolean reachEnd; boolean ignoreCase; final byte bytes[] = new byte[OPT_EXACT_MAXLEN]; int length; boolean isFull() { return length >= OPT_EXACT_MAXLEN; } void clear() { mmd.clear(); anchor.clear(); reachEnd = false; ignoreCase = false; length = 0; } void copy(OptExactInfo other) { mmd.copy(other.mmd); anchor.copy(other.anchor); reachEnd = other.reachEnd; ignoreCase = other.ignoreCase; length = other.length; System.arraycopy(other.bytes, 0, bytes, 0, OPT_EXACT_MAXLEN); } void concat(OptExactInfo other, Encoding enc) { if (!ignoreCase && other.ignoreCase) { if (length >= other.length) return; /* avoid */ ignoreCase = true; } int p = 0; // add->s; int end = p + other.length; int i; for (i=length; p < end;) { int len = enc.length(other.bytes, p, end); if (i + len > OPT_EXACT_MAXLEN) break; for (int j=0; j OPT_EXACT_MAXLEN) break; for (int j=0; j low price */ v2 = OptMapInfo.positionValue(enc, bytes[0] & 0xff); v1 = OptMapInfo.positionValue(enc, alt.bytes[0] & 0xff); if (length > 1) v1 += 5; if (alt.length > 1) v2 += 5; } if (!ignoreCase) v1 *= 2; if (!alt.ignoreCase) v2 *= 2; if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt); } // comp_opt_exact_or_map_info private static final int COMP_EM_BASE = 20; int compare(OptMapInfo m) { if (m.value <= 0) return -1; int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2); int vm = COMP_EM_BASE * 5 * 2 / m.value; return mmd.compareDistanceValue(m.mmd, ve, vm); } } joni-2.0.0/src/org/joni/OptMapInfo.java000066400000000000000000000105551214326443200176530ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.CaseFoldCodeItem; import org.jcodings.Encoding; final class OptMapInfo { final MinMaxLen mmd = new MinMaxLen(); /* info position */ final OptAnchorInfo anchor = new OptAnchorInfo(); int value; /* weighted value */ final byte map[] = new byte[Config.CHAR_TABLE_SIZE]; void clear() { mmd.clear(); anchor.clear(); value = 0; for (int i=0; i 0) copy(alt); } // alt_merge_opt_map_info void altMerge(OptMapInfo other, Encoding enc) { /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ if (value == 0) return; if (other.value == 0 || mmd.max < other.mmd.max) { clear(); return; } mmd.altMerge(other.mmd); int val = 0; for (int i=0; i 1) { return 20; } else { return ByteValTable[i]; } } else { return 4; /* Take it easy. */ } } } joni-2.0.0/src/org/joni/Option.java000066400000000000000000000107301214326443200171020ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public class Option { /* options */ public static final int NONE = 0; public static final int IGNORECASE = (1<<0); public static final int EXTEND = (1<<1); public static final int MULTILINE = (1<<2); public static final int SINGLELINE = (1<<3); public static final int FIND_LONGEST = (1<<4); public static final int FIND_NOT_EMPTY = (1<<5); public static final int NEGATE_SINGLELINE = (1<<6); public static final int DONT_CAPTURE_GROUP = (1<<7); public static final int CAPTURE_GROUP = (1<<8); /* options (search time) */ public static final int NOTBOL = (1<<9); public static final int NOTEOL = (1<<10); public static final int POSIX_REGION = (1<<11); public static final int MAXBIT = (1<<12); /* limit */ public static final int DEFAULT = NONE; public static String toString(int option) { String options = ""; if (isIgnoreCase(option)) options += "IGNORECASE "; if (isExtend(option)) options += "EXTEND "; if (isMultiline(option)) options += "MULTILINE "; if (isSingleline(option)) options += "SINGLELINE "; if (isFindLongest(option)) options += "FIND_LONGEST "; if (isFindNotEmpty(option)) options += "FIND_NOT_EMPTY "; if (isNegateSingleline(option)) options += "NEGATE_SINGLELINE "; if (isDontCaptureGroup(option)) options += "DONT_CAPTURE_GROUP "; if (isCaptureGroup(option)) options += "CAPTURE_GROUP "; if (isNotBol(option)) options += "NOTBOL "; if (isNotEol(option)) options += "NOTEOL "; if (isPosixRegion(option)) options += "POSIX_REGION "; return options; } public static boolean isIgnoreCase(int option) { return (option & IGNORECASE) != 0; } public static boolean isExtend(int option) { return (option & EXTEND) != 0; } public static boolean isSingleline(int option) { return (option & SINGLELINE) != 0; } public static boolean isMultiline(int option) { return (option & MULTILINE) != 0; } public static boolean isFindLongest(int option) { return (option & FIND_LONGEST) != 0; } public static boolean isFindNotEmpty(int option) { return (option & FIND_NOT_EMPTY) != 0; } public static boolean isFindCondition(int option) { return (option & (FIND_LONGEST | FIND_NOT_EMPTY)) != 0; } public static boolean isNegateSingleline(int option) { return (option & NEGATE_SINGLELINE) != 0; } public static boolean isDontCaptureGroup(int option) { return (option & DONT_CAPTURE_GROUP) != 0; } public static boolean isCaptureGroup(int option) { return (option & CAPTURE_GROUP) != 0; } public static boolean isNotBol(int option) { return (option & NOTBOL) != 0; } public static boolean isNotEol(int option) { return (option & NOTEOL) != 0; } public static boolean isPosixRegion(int option) { return (option & POSIX_REGION) != 0; } /* OP_SET_OPTION is required for these options. ??? */ // public static boolean isDynamic(int option) { // return (option & (MULTILINE | IGNORECASE)) != 0; // } public static boolean isDynamic(int option) { return false; } } joni-2.0.0/src/org/joni/Parser.java000066400000000000000000001062321214326443200170710ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsOnAtSimple; import static org.joni.BitStatus.bsOnOff; import static org.joni.Option.isDontCaptureGroup; import static org.joni.Option.isIgnoreCase; import org.jcodings.Ptr; import org.jcodings.constants.CharacterType; import org.jcodings.constants.PosixBracket; import org.joni.ast.AnchorNode; import org.joni.ast.AnyCharNode; import org.joni.ast.BackRefNode; import org.joni.ast.CClassNode; import org.joni.ast.CTypeNode; import org.joni.ast.CallNode; import org.joni.ast.ConsAltNode; import org.joni.ast.EncloseNode; import org.joni.ast.Node; import org.joni.ast.QuantifierNode; import org.joni.ast.StringNode; import org.joni.ast.CClassNode.CCStateArg; import org.joni.constants.AnchorType; import org.joni.constants.CCSTATE; import org.joni.constants.CCVALTYPE; import org.joni.constants.EncloseType; import org.joni.constants.NodeType; import org.joni.constants.TokenType; class Parser extends Lexer { protected final Regex regex; protected Node root; protected int returnCode; // return code used by parser methods (they itself return parsed nodes) // this approach will not affect recursive calls protected Parser(ScanEnvironment env, byte[]bytes, int p, int end) { super(env, bytes, p, end); regex = env.reg; } // onig_parse_make_tree protected final Node parse() { root = parseRegexp(); regex.numMem = env.numMem; return root; } private static final int POSIX_BRACKET_NAME_MIN_LEN = 4; private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH = 20; private static final byte BRACKET_END[] = ":]".getBytes(); private boolean parsePosixBracket(CClassNode cc) { mark(); boolean not; if (peekIs('^')) { inc(); not = true; } else { not = false; } if (enc.strLength(bytes, p, stop) >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket byte[][] pbs= PosixBracket.PBSNamesLower; for (int i=0; i POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; } if (c == ':' && left()) { inc(); if (left()) { fetch(); if (c == ']') newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE); } } restore(); return true; /* 1: is not POSIX bracket, but no error. */ } private CClassNode parseCharProperty() { int ctype = fetchCharPropertyToCType(); CClassNode n = new CClassNode(); n.addCType(ctype, false, env, this); if (token.getPropNot()) n.setNot(); return n; } private boolean codeExistCheck(int code, boolean ignoreEscaped) { mark(); boolean inEsc = false; while (left()) { if (ignoreEscaped && inEsc) { inEsc = false; } else { fetch(); if (c == code) { restore(); return true; } if (c == syntax.metaCharTable.esc) inEsc = true; } } restore(); return false; } private CClassNode parseCharClass() { fetchTokenInCC(); final boolean neg; if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) { neg = true; fetchTokenInCC(); } else { neg = false; } if (token.type == TokenType.CC_CLOSE) { if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS); env.ccEscWarn("]"); token.type = TokenType.CHAR; /* allow []...] */ } CClassNode cc = new CClassNode(); CClassNode prevCC = null; CClassNode workCC = null; CCStateArg arg = new CCStateArg(); boolean andStart = false; arg.state = CCSTATE.START; while (token.type != TokenType.CC_CLOSE) { boolean fetched = false; switch (token.type) { case CHAR: final int len; if (Config.VANILLA) { len = enc.codeToMbcLength(token.getC()); if (len > 1) { arg.inType = CCVALTYPE.CODE_POINT; } else { arg.inType = CCVALTYPE.SB; // sb_char: } } else { if (token.getCode() >= BitSet.SINGLE_BYTE_SIZE || (len = enc.codeToMbcLength(token.getC())) > 1) { arg.inType = CCVALTYPE.CODE_POINT; } else { arg.inType = CCVALTYPE.SB; // sb_char: } } arg.v = token.getC(); arg.vIsRaw = false; parseCharClassValEntry2(cc, arg); // goto val_entry2 break; case RAW_BYTE: if (!enc.isSingleByte() && token.base != 0) { /* tok->base != 0 : octal or hexadec. */ byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN]; int psave = p; int base = token.base; buf[0] = (byte)token.getC(); int i; for (i=1; i len) { /* fetch back */ p = psave; for (i=1; i': /* (?>...) stop backtrack */ node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose break; case '\'': if (Config.USE_NAMED_GROUP) { if (syntax.op2QMarkLtNamedGroup()) { listCapture = false; // goto named_group1 node = parseEncloseNamedGroup2(listCapture); break; } else { newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); } } // USE_NAMED_GROUP break; case '<': /* look behind (?<=...), (?...) */ } unfetch(); } } // USE_NAMED_GROUP EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory int num = env.addMemEntry(); if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); en.regNum = num; node = en; } else { newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); } break; // case 'p': #ifdef USE_POSIXLINE_OPTION case '-': case 'i': case 'm': case 's': case 'x': boolean neg = false; while (true) { switch(c) { case ':': case ')': break; case '-': neg = true; break; case 'x': option = bsOnOff(option, Option.EXTEND, neg); break; case 'i': option = bsOnOff(option, Option.IGNORECASE, neg); break; case 's': if (syntax.op2OptionPerl()) { option = bsOnOff(option, Option.MULTILINE, neg); } else { newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); } break; case 'm': if (syntax.op2OptionPerl()) { option = bsOnOff(option, Option.SINGLELINE, !neg); } else if (syntax.op2OptionRuby()) { option = bsOnOff(option, Option.MULTILINE, neg); } else { newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); } break; // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg); // break; default: newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); } // switch if (c == ')') { EncloseNode en = new EncloseNode(option, 0); // node_new_option node = en; returnCode = 2; /* option only */ return node; } else if (c == ':') { int prev = env.option; env.option = option; fetchToken(); Node target = parseSubExp(term); env.option = prev; EncloseNode en = new EncloseNode(option, 0); // node_new_option en.setTarget(target); node = en; returnCode = 0; return node; } if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); fetch(); } // while default: newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); } // switch } else { if (isDontCaptureGroup(env.option)) { fetchToken(); // goto group node = parseSubExp(term); returnCode = 1; /* group */ return node; } EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory int num = env.addMemEntry(); en.regNum = num; node = en; } fetchToken(); Node target = parseSubExp(term); if (node.getType() == NodeType.ANCHOR) { AnchorNode an = (AnchorNode) node; an.setTarget(target); } else { EncloseNode en = (EncloseNode)node; en.setTarget(target); if (en.type == EncloseType.MEMORY) { /* Don't move this to previous of parse_subexp() */ env.setMemNode(en.regNum, node); } } returnCode = 0; return node; // ?? } private Node parseEncloseNamedGroup2(boolean listCapture) { int nm = p; int num = fetchName(c, false); int nameEnd = value; num = env.addMemEntry(); if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); regex.nameAdd(bytes, nm, nameEnd, num, syntax); EncloseNode en = new EncloseNode(env.option, true); // node_new_enclose_memory en.regNum = num; Node node = en; if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num); env.numNamed++; return node; } private int findStrPosition(int[]s, int n, int from, int to, Ptr nextChar) { int x; int q; int p = from; int i = 0; while (p < to) { x = enc.mbcToCode(bytes, p, to); q = p + enc.length(bytes, p, to); if (x == s[0]) { for (i=1; i= n) { if (bytes[nextChar.p] != 0) nextChar.p = q; // we may need zero term semantics... return p; } } p = q; } return -1; } private Node parseExp(TokenType term) { if (token.type == term) return StringNode.EMPTY; // goto end_of_token Node node = null; boolean group = false; switch(token.type) { case ALT: case EOT: return StringNode.EMPTY; // end_of_token:, node_new_empty case SUBEXP_OPEN: node = parseEnclose(TokenType.SUBEXP_CLOSE); if (returnCode == 1) { group = true; } else if (returnCode == 2) { /* option only */ int prev = env.option; EncloseNode en = (EncloseNode)node; env.option = en.option; fetchToken(); Node target = parseSubExp(term); env.option = prev; en.setTarget(target); return node; } break; case SUBEXP_CLOSE: if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS); if (token.escaped) { return parseExpTkRawByte(group); // goto tk_raw_byte } else { return parseExpTkByte(group); // goto tk_byte } case STRING: return parseExpTkByte(group); // tk_byte: case RAW_BYTE: return parseExpTkRawByte(group); // tk_raw_byte: case CODE_POINT: byte[]buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN]; int num = enc.codeToMbc(token.getCode(), buf, 0); // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else node = new StringNode(buf, 0, num); break; case QUOTE_OPEN: int[]endOp = new int[]{syntax.metaCharTable.esc, 'E'}; int qstart = p; Ptr nextChar = new Ptr(); int qend = findStrPosition(endOp, endOp.length, qstart, stop, nextChar); if (qend == -1) nextChar.p = qend = stop; node = new StringNode(bytes, qstart, qend); p = nextChar.p; break; case CHAR_TYPE: switch(token.getPropCType()) { case CharacterType.D: case CharacterType.S: case CharacterType.W: if (Config.NON_UNICODE_SDW) { CClassNode cc = new CClassNode(); cc.addCType(token.getPropCType(), false, env, this); if (token.getPropNot()) cc.setNot(); node = cc; } break; case CharacterType.WORD: node = new CTypeNode(token.getPropCType(), token.getPropNot()); break; case CharacterType.SPACE: case CharacterType.DIGIT: case CharacterType.XDIGIT: // #ifdef USE_SHARED_CCLASS_TABLE ... #endif CClassNode ccn = new CClassNode(); ccn.addCType(token.getPropCType(), false, env, this); if (token.getPropNot()) ccn.setNot(); node = ccn; break; default: newInternalException(ERR_PARSER_BUG); } // inner switch break; case CHAR_PROPERTY: node = parseCharProperty(); break; case CC_CC_OPEN: CClassNode cc = parseCharClass(); node = cc; if (isIgnoreCase(env.option)) { ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc); enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg); if (arg.altRoot != null) { node = ConsAltNode.newAltNode(node, arg.altRoot); } } break; case ANYCHAR: node = new AnyCharNode(); break; case ANYCHAR_ANYTIME: node = new AnyCharNode(); QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false); qn.setTarget(node); node = qn; break; case BACKREF: int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()}; node = new BackRefNode(token.getBackrefNum(), backRefs, token.getBackrefByName(), token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL token.getBackrefLevel(), // ... env); break; case CALL: if (Config.USE_SUBEXP_CALL) { int gNum = token.getCallGNum(); if (gNum < 0) { gNum = backrefRelToAbs(gNum); if (gNum <= 0) newValueException(ERR_INVALID_BACKREF); } node = new CallNode(bytes, token.getCallNameP(), token.getCallNameEnd(), gNum); env.numCall++; } // USE_SUBEXP_CALL break; case ANCHOR: node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma break; case OP_REPEAT: case INTERVAL: if (syntax.contextIndepRepeatOps()) { if (syntax.contextInvalidRepeatOps()) { newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); } else { node = StringNode.EMPTY; // node_new_empty } } else { return parseExpTkByte(group); // goto tk_byte } break; default: newInternalException(ERR_PARSER_BUG); } //switch //targetp = node; fetchToken(); // re_entry: return parseExpRepeat(node, group); // repeat: } private Node parseExpTkByte(boolean group) { StringNode node = new StringNode(bytes, token.backP, p); // tk_byte: while (true) { fetchToken(); if (token.type != TokenType.STRING) break; if (token.backP == node.end) { node.end = p; // non escaped character, remain shared, just increase shared range } else { node.cat(bytes, token.backP, p); // non continuous string stream, need to COW } } // targetp = node; return parseExpRepeat(node, group); // string_end:, goto repeat } private Node parseExpTkRawByte(boolean group) { // tk_raw_byte: // important: we don't use 0xff mask here neither in the compiler // (in the template string) so we won't have to mask target // strings when comparing against them in the matcher StringNode node = new StringNode((byte)token.getC()); node.setRaw(); int len = 1; while (true) { if (len >= enc.minLength()) { if (len == enc.length(node.bytes, node.p, node.end)) { fetchToken(); node.clearRaw(); // !goto string_end;! return parseExpRepeat(node, group); } } fetchToken(); if (token.type != TokenType.RAW_BYTE) { /* Don't use this, it is wrong for little endian encodings. */ // USE_PAD_TO_SHORT_BYTE_CHAR ... newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING); } // important: we don't use 0xff mask here neither in the compiler // (in the template string) so we won't have to mask target // strings when comparing against them in the matcher node.cat((byte)token.getC()); len++; } // while } private Node parseExpRepeat(Node target, boolean group) { while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), token.getRepeatUpper(), token.type == TokenType.INTERVAL); qtfr.greedy = token.getRepeatGreedy(); int ret = qtfr.setQuantifier(target, group, env, bytes, getBegin(), getEnd()); Node qn = qtfr; if (token.getRepeatPossessive()) { EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose en.setTarget(qn); qn = en; } if (ret == 0) { target = qn; } else if (ret == 2) { /* split case: /abc+/ */ target = ConsAltNode.newListNode(target, null); ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null)); fetchToken(); return parseExpRepeatForCar(target, tmp, group); } fetchToken(); // goto re_entry } return target; } private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) { while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), token.getRepeatUpper(), token.type == TokenType.INTERVAL); qtfr.greedy = token.getRepeatGreedy(); int ret = qtfr.setQuantifier(target.car, group, env, bytes, getBegin(), getEnd()); Node qn = qtfr; if (token.getRepeatPossessive()) { EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose en.setTarget(qn); qn = en; } if (ret == 0) { target.setCar(qn); } else if (ret == 2) { /* split case: /abc+/ */ assert false; } fetchToken(); // goto re_entry } return top; } private Node parseBranch(TokenType term) { Node node = parseExp(term); if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) { return node; } else { ConsAltNode top = ConsAltNode.newListNode(node, null); ConsAltNode t = top; while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) { node = parseExp(term); if (node.getType() == NodeType.LIST) { t.setCdr((ConsAltNode)node); while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr; t = ((ConsAltNode)node); } else { t.setCdr(ConsAltNode.newListNode(node, null)); t = t.cdr; } } return top; } } /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ private Node parseSubExp(TokenType term) { Node node = parseBranch(term); if (token.type == term) { return node; } else if (token.type == TokenType.ALT) { ConsAltNode top = ConsAltNode.newAltNode(node, null); ConsAltNode t = top; while (token.type == TokenType.ALT) { fetchToken(); node = parseBranch(term); t.setCdr(ConsAltNode.newAltNode(node, null)); t = t.cdr; } if (token.type != term) parseSubExpError(term); return top; } else { parseSubExpError(term); return null; //not reached } } private void parseSubExpError(TokenType term) { if (term == TokenType.SUBEXP_CLOSE) { newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); } else { newInternalException(ERR_PARSER_BUG); } } private Node parseRegexp() { fetchToken(); return parseSubExp(TokenType.EOT); } } joni-2.0.0/src/org/joni/Regex.java000066400000000000000000000362171214326443200167140ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsAt; import static org.joni.Option.isCaptureGroup; import static org.joni.Option.isDontCaptureGroup; import java.util.IllegalFormatConversionException; import java.util.Iterator; import org.jcodings.Encoding; import org.jcodings.EncodingDB; import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.UTF8Encoding; import org.jcodings.util.BytesHash; import org.joni.constants.AnchorType; import org.joni.constants.RegexState; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.ValueException; public final class Regex implements RegexState { int[] code; /* compiled pattern */ int codeLength; boolean stackNeeded; Object[]operands; /* e.g. shared CClassNode */ int operandLength; int state; /* normal, searching, compiling */ // remove int numMem; /* used memory(...) num counted from 1 */ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ int numCombExpCheck; /* combination explosion check */ int numCall; /* number of subexp call */ int captureHistory; /* (?@...) flag (1-31) */ int btMemStart; /* need backtrack flag */ int btMemEnd; /* need backtrack flag */ int stackPopLevel; int[]repeatRangeLo; int[]repeatRangeHi; public WarnCallback warnings; public MatcherFactory factory; final Encoding enc; int options; int userOptions; Object userObject; //final Syntax syntax; final int caseFoldFlag; BytesHash nameTable; // named entries /* optimization info (string search, char-map and anchors) */ SearchAlgorithm searchAlgorithm; /* optimize flag */ int thresholdLength; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ int anchorDmin; /* (SEMI_)END_BUF anchor distance */ int anchorDmax; /* (SEMI_)END_BUF anchor distance */ int subAnchor; /* start-anchor for exact or map */ byte[]exact; int exactP; int exactEnd; byte[]map; /* used as BM skip or char-map */ int[]intMap; /* BM skip for exact_len > 255 */ int[]intMapBackward; /* BM skip for backward search */ int dMin; /* min-distance of exact or map */ int dMax; /* max-distance of exact or map */ byte[][]templates; int templateNum; public Regex(CharSequence cs) { this(cs.toString()); } public Regex(CharSequence cs, Encoding enc) { this(cs.toString(), enc); } public Regex(String str) { this(str.getBytes(), 0, str.length(), 0, UTF8Encoding.INSTANCE); } public Regex(String str, Encoding enc) { this(str.getBytes(), 0, str.length(), 0, enc); } public Regex(byte[] bytes) { this(bytes, 0, bytes.length, 0, ASCIIEncoding.INSTANCE); } public Regex(byte[] bytes, int p, int end) { this(bytes, p, end, 0, ASCIIEncoding.INSTANCE); } public Regex(byte[] bytes, int p, int end, int option) { this(bytes, p, end, option, ASCIIEncoding.INSTANCE); } public Regex(byte[]bytes, int p, int end, int option, Encoding enc) { this(bytes, p, end, option, enc, Syntax.RUBY, WarnCallback.DEFAULT); } // onig_new public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax) { this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, WarnCallback.DEFAULT); } public Regex(byte[]bytes, int p, int end, int option, Encoding enc, WarnCallback warnings) { this(bytes, p, end, option, enc, Syntax.RUBY, warnings); } // onig_new public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax, WarnCallback warnings) { this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, warnings); } // onig_alloc_init public Regex(byte[]bytes, int p, int end, int option, int caseFoldFlag, Encoding enc, Syntax syntax, WarnCallback warnings) { if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS); } if ((option & Option.NEGATE_SINGLELINE) != 0) { option |= syntax.options; option &= ~Option.SINGLELINE; } else { option |= syntax.options; } this.enc = enc; this.options = option; this.caseFoldFlag = caseFoldFlag; this.warnings = warnings; new Analyser(new ScanEnvironment(this, syntax), bytes, p, end).compile(); this.warnings = null; } public Matcher matcher(byte[]bytes) { return matcher(bytes, 0, bytes.length); } public Matcher matcher(byte[]bytes, int p, int end) { return factory.create(this, bytes, p, end); } public int numberOfCaptures() { return numMem; } public int numberOfCaptureHistories() { if (Config.USE_CAPTURE_HISTORY) { int n = 0; for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { if (bsAt(captureHistory, i)) n++; } return n; } else { return 0; } } String nameTableToString() { StringBuilder sb = new StringBuilder(); if (nameTable != null) { sb.append("name table\n"); for (NameEntry ne : nameTable) { sb.append(" " + ne + "\n"); } sb.append("\n"); } return sb.toString(); } NameEntry nameFind(byte[]name, int nameP, int nameEnd) { if (nameTable != null) return nameTable.get(name, nameP, nameEnd); return null; } void renumberNameTable(int[]map) { if (nameTable != null) { for (NameEntry e : nameTable) { if (e.backNum > 1) { for (int i=0; i(); // 13, oni defaults to 5 } else { e = nameFind(name, nameP, nameEnd); } if (e == null) { // dup the name here as oni does ?, what for ? (it has to manage it, we don't) e = new NameEntry(name, nameP, nameEnd); nameTable.putDirect(name, nameP, nameEnd, e); } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) { throw new ValueException(ErrorMessages.ERR_MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP)); } e.addBackref(backRef); } NameEntry nameToGroupNumbers(byte[]name, int nameP, int nameEnd) { return nameFind(name, nameP, nameEnd); } public int nameToBackrefNumber(byte[]name, int nameP, int nameEnd, Region region) { NameEntry e = nameToGroupNumbers(name, nameP, nameEnd); if (e == null) throw new ValueException(ErrorMessages.ERR_UNDEFINED_NAME_REFERENCE, new String(name, nameP, nameEnd - nameP)); switch(e.backNum) { case 0: throw new InternalException(ErrorMessages.ERR_PARSER_BUG); case 1: return e.backRef1; default: if (region != null) { for (int i = e.backNum - 1; i >= 0; i--) { if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i]; } } return e.backRefs[e.backNum - 1]; } } public Iterator namedBackrefIterator() { return nameTable.iterator(); } public boolean noNameGroupIsActive(Syntax syntax) { if (isDontCaptureGroup(options)) return false; if (Config.USE_NAMED_GROUP) { if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false; } return true; } /* set skip map for Boyer-Moor search */ void setupBMSkipMap() { byte[]bytes = exact; int p = exactP; int end = exactEnd; int len = end - p; if (len < Config.CHAR_TABLE_SIZE) { // map/skip if (map == null) map = new byte[Config.CHAR_TABLE_SIZE]; for (int i=0; i= 3 || (e.length >= 2 && allowReverse)) { setupBMSkipMap(); if (allowReverse) { searchAlgorithm = SearchAlgorithm.BM; } else { searchAlgorithm = SearchAlgorithm.BM_NOT_REV; } } else { searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.SLOW_SB : SearchAlgorithm.SLOW; } } dMin = e.mmd.min; dMax = e.mmd.max; if (dMin != MinMaxLen.INFINITE_DISTANCE) { thresholdLength = dMin + (exactEnd - exactP); } } void setOptimizeMapInfo(OptMapInfo m) { map = m.map; searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.MAP_SB : SearchAlgorithm.MAP; dMin = m.mmd.min; dMax = m.mmd.max; if (dMin != MinMaxLen.INFINITE_DISTANCE) { thresholdLength = dMin + 1; } } void setSubAnchor(OptAnchorInfo anc) { subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; subAnchor |= anc.rightAnchor & AnchorType.END_LINE; } void clearOptimizeInfo() { searchAlgorithm = SearchAlgorithm.NONE; anchor = 0; anchorDmax = 0; anchorDmin = 0; subAnchor = 0; exact = null; exactP = exactEnd = 0; } public String encStringToString(byte[]bytes, int p, int end) { StringBuilder sb = new StringBuilder("\nPATTERN: /"); if (enc.minLength() > 1) { int p_ = p; while (p_ < end) { int code = enc.mbcToCode(bytes, p_, end); if (code >= 0x80) { try { sb.append(String.format(" 0x%04x ", code)); } catch (IllegalFormatConversionException ifce) { sb.append(code); } } else { sb.append((char)code); } p_ += enc.length(bytes, p_, end); } } else { while (p < end) { sb.append(new String(new byte[]{bytes[p]})); p++; } } return sb.append("/").toString(); } public String optimizeInfoToString() { String s = ""; s += "optimize: " + searchAlgorithm.getName() + "\n"; s += " anchor: " + OptAnchorInfo.anchorToString(anchor); if ((anchor & AnchorType.END_BUF_MASK) != 0) { s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax); } s += "\n"; if (searchAlgorithm != SearchAlgorithm.NONE) { s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n"; } s += "dmin: " + dMin + " dmax: " + dMax + "\n"; s += "threshold length: " + thresholdLength + "\n"; if (exact != null) { s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n"; } else if (searchAlgorithm == SearchAlgorithm.MAP || searchAlgorithm == SearchAlgorithm.MAP_SB) { int n=0; for (int i=0; i 0) { int c=0; s += "["; for (int i=0; i 0) s += ", "; c++; if (enc.maxLength() == 1 && enc.isPrint(i)) s += ((char)i); else s += i; } } s += "]\n"; } } return s; } public Encoding getEncoding() { return enc; } public int getOptions() { return options; } public void setUserOptions(int options) { this.userOptions = options; } public int getUserOptions() { return userOptions; } public void setUserObject(Object object) { this.userObject = object; } public Object getUserObject() { return userObject; } } joni-2.0.0/src/org/joni/Region.java000066400000000000000000000044171214326443200170620ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public final class Region { static final int REGION_NOTPOS = -1; public final int numRegs; public final int[]beg; public final int[]end; public CaptureTreeNode historyRoot; public Region(int num) { this.numRegs = num; this.beg = new int[num]; this.end = new int[num]; } public Region(int begin, int end) { this.numRegs = 1; this.beg = new int[]{begin}; this.end = new int[]{end}; } public Region clone() { Region region = new Region(numRegs); System.arraycopy(beg, 0, region.beg, 0, beg.length); System.arraycopy(end, 0, region.end, 0, end.length); if (historyRoot != null) region.historyRoot = historyRoot.cloneTree(); return region; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Region: \n"); for (int i=0; i= memNodes.length) { Node[]tmp = new Node[memNodes.length << 1]; System.arraycopy(memNodes, 0, tmp, 0, memNodes.length); memNodes = tmp; } return numMem; } public void setMemNode(int num, Node node) { if (numMem >= num) { memNodes[num] = node; } else { throw new InternalException(ErrorMessages.ERR_PARSER_BUG); } } public int convertBackslashValue(int c) { if (syntax.opEscControlChars()) { switch (c) { case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case 'f': return '\f'; case 'a': return '\007'; case 'b': return '\010'; case 'e': return '\033'; case 'v': if (syntax.op2EscVVtab()) return 11; // ??? break; default: break; } } return c; } void ccEscWarn(String s) { if (Config.USE_WARN) { if (syntax.warnCCOpNotEscaped() && syntax.backSlashEscapeInCC()) { reg.warnings.warn("character class has '" + s + "' without escape"); } } } void closeBracketWithoutEscapeWarn(String s) { if (Config.USE_WARN) { if (syntax.warnCCOpNotEscaped()) { reg.warnings.warn("regular expression has '" + s + "' without escape"); } } } } joni-2.0.0/src/org/joni/ScannerSupport.java000066400000000000000000000126231214326443200206230ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; import org.jcodings.IntHolder; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.SyntaxException; import org.joni.exception.ValueException; abstract class ScannerSupport extends IntHolder implements ErrorMessages { protected final Encoding enc; // fast access to encoding protected final byte[]bytes; // pattern protected int p; // current scanner position protected int stop; // pattern end (mutable) private int lastFetched; // last fetched value for unfetch support protected int c; // current code point private final int begin; // pattern begin position for reset() support private final int end; // pattern end position for reset() support protected int _p; // used by mark()/restore() to mark positions protected ScannerSupport(Encoding enc, byte[]bytes, int p, int end) { this.enc = enc; this.bytes = bytes; this.begin = p; this.end = end; reset(); } protected int getBegin() { return begin; } protected int getEnd() { return end; } private final int INT_SIGN_BIT = 1 << 31; protected final int scanUnsignedNumber() { int last = c; int num = 0; // long ??? while(left()) { fetch(); if (enc.isDigit(c)) { int onum = num; num = num * 10 + Encoding.digitVal(c); if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; } else { unfetch(); break; } } c = last; return num; } protected final int scanUnsignedHexadecimalNumber(int maxLength) { int last = c; int num = 0; while(left() && maxLength-- != 0) { fetch(); if (enc.isXDigit(c)) { int onum = num; int val = enc.xdigitVal(c); num = (num << 4) + val; if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; } else { unfetch(); break; } } c = last; return num; } protected final int scanUnsignedOctalNumber(int maxLength) { int last = c; int num = 0; while(left() && maxLength-- != 0) { fetch(); if (enc.isDigit(c) && c < '8') { int onum = num; int val = Encoding.odigitVal(c); num = (num << 3) + val; if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; } else { unfetch(); break; } } c = last; return num; } protected final void reset() { p = begin; stop = end; } protected final void mark() { _p = p; } protected final void restore() { p = _p; } protected final void inc() { lastFetched = p; p += enc.length(bytes, p, stop); } protected final void fetch() { c = enc.mbcToCode(bytes, p, stop); lastFetched = p; p += enc.length(bytes, p, stop); } protected int fetchTo() { int to = enc.mbcToCode(bytes, p, stop); lastFetched = p; p += enc.length(bytes, p, stop); return to; } protected final void unfetch() { p = lastFetched; } protected final int peek() { return p < stop ? enc.mbcToCode(bytes, p, stop) : 0; } protected final boolean peekIs(int c) { return peek() == c; } protected final boolean left() { return p < stop; } protected void newSyntaxException(String message) { throw new SyntaxException(message); } protected void newValueException(String message) { throw new ValueException(message); } protected void newValueException(String message, String str) { throw new ValueException(message, str); } protected void newValueException(String message, int p, int end) { throw new ValueException(message, new String(bytes, p, end - p)); } protected void newInternalException(String message) { throw new InternalException(message); } } joni-2.0.0/src/org/joni/SearchAlgorithm.java000066400000000000000000000435321214326443200207140ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; import org.jcodings.IntHolder; public abstract class SearchAlgorithm { public abstract String getName(); public abstract int search(Regex regex, byte[]text, int textP, int textEnd, int textRange); public abstract int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_); public static final SearchAlgorithm NONE = new SearchAlgorithm() { public final String getName() { return "NONE"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { return textP; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { return textP; } }; public static final SearchAlgorithm SLOW = new SearchAlgorithm() { public final String getName() { return "EXACT"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; while (s < end) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } s += enc.length(text, s, textEnd); } return -1; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; if (s > textStart) { s = textStart; } else { s = enc.leftAdjustCharHead(text, adjustText, s, textEnd); } while (s >= textP) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } s = enc.prevCharHead(text, adjustText, s, textEnd); } return -1; } }; public static final SearchAlgorithm SLOW_SB = new SearchAlgorithm() { public final String getName() { return "EXACT_SB"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; while (s < end) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } s++; } return -1; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; if (s > textStart) s = textStart; while (s >= textP) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } //s = s <= adjustText ? -1 : s - 1; s--; } return -1; } }; public static final class SLOW_IC extends SearchAlgorithm { private final byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN]; private final IntHolder holder = new IntHolder(); private final int caseFoldFlag; private final Encoding enc; public SLOW_IC(Regex regex) { this.caseFoldFlag = regex.caseFoldFlag; this.enc = regex.enc; } public final String getName() { return "EXACT_IC"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; while (s < end) { if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s; s += enc.length(text, s, textEnd); } return -1; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; if (s > textStart) { s = textStart; } else { s = enc.leftAdjustCharHead(text, adjustText, s, textEnd); } while (s >= textP) { if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s; s = enc.prevCharHead(text, adjustText, s, textEnd); } return -1; } private boolean lowerCaseMatch(byte[]t, int tP, int tEnd, byte[]bytes, int p, int end) { holder.value = p; while (tP < tEnd) { int lowlen = enc.mbcCaseFold(caseFoldFlag, bytes, holder, end, buf); if (lowlen == 1) { if (t[tP++] != buf[0]) return false; } else { int q = 0; while (lowlen > 0) { if (t[tP++] != buf[q++]) return false; lowlen--; } } } return true; } }; public static final SearchAlgorithm SLOW_IC_SB = new SearchAlgorithm() { public final String getName() { return "EXACT_IC_SB"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { final byte[]toLowerTable = regex.enc.toLowerCaseTable(); byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; while (s < end) { if (target[targetP] == toLowerTable[text[s] & 0xff]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != toLowerTable[text[p++] & 0xff]) break; t++; } if (t == targetEnd) return s; } s++; } return -1; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { final byte[]toLowerTable = regex.enc.toLowerCaseTable(); byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; if (s > textStart) s = textStart; while (s >= textP) { if (target[targetP] == toLowerTable[text[s] & 0xff]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != toLowerTable[text[p++] & 0xff]) break; t++; } if (t == targetEnd) return s; } //s = s <= adjustText ? -1 : s - 1; s--; } return -1; } }; public static final SearchAlgorithm BM = new SearchAlgorithm() { public final String getName() { return "EXACT_BM"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textRange + (targetEnd - targetP) - 1; if (end > textEnd) end = textEnd; int tail = targetEnd - 1; int s = textP + (targetEnd - targetP) - 1; if (regex.intMap == null) { while (s < end) { int p = s; int t = tail; while (text[p] == target[t]) { if (t == targetP) return p; p--; t--; } s += regex.map[text[s] & 0xff]; } } else { /* see int_map[] */ while (s < end) { int p = s; int t = tail; while (text[p] == target[t]) { if (t == targetP) return p; p--; t--; } s += regex.intMap[text[s] & 0xff]; } } return -1; } private static final int BM_BACKWARD_SEARCH_LENGTH_THRESHOLD = 100; public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; if (regex.intMapBackward == null) { if (s_ - range_ < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) { // goto exact_method; return SLOW.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_); } setBmBackwardSkip(regex, target, targetP, targetEnd); } int s = textEnd - (targetEnd - targetP); if (textStart < s) { s = textStart; } else { s = enc.leftAdjustCharHead(text, adjustText, s, textEnd); } while (s >= textP) { int p = s; int t = targetP; while (t < targetEnd && text[p] == target[t]) { p++; t++; } if (t == targetEnd) return s; s -= regex.intMapBackward[text[s] & 0xff]; s = enc.leftAdjustCharHead(text, adjustText, s, textEnd); } return -1; } private void setBmBackwardSkip(Regex regex, byte[]bytes, int p, int end) { int[] skip; if (regex.intMapBackward == null) { skip = new int[Config.CHAR_TABLE_SIZE]; regex.intMapBackward = skip; } else { skip = regex.intMapBackward; } int len = end - p; for (int i=0; i0; i--) skip[bytes[i] & 0xff] = i; } }; public static final SearchAlgorithm BM_NOT_REV = new SearchAlgorithm() { public final String getName() { return "EXACT_BM_NOT_REV"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int tail = targetEnd - 1; int tlen1 = tail - targetP; int end = textRange; if (Config.DEBUG_SEARCH) { Config.log.println("bm_search_notrev: "+ "text: " + textP + ", text_end: " + textEnd + ", text_range: " + textRange); } if (end + tlen1 > textEnd) end = textEnd - tlen1; int s = textP; if (regex.intMap == null) { while (s < end) { int p, se; p = se = s + tlen1; int t = tail; while (text[p] == target[t]) { if (t == targetP) return s; p--; t--; } int skip = regex.map[text[se] & 0xff]; t = s; do { s += enc.length(text, s, textEnd); } while ((s - t) < skip && s < end); } } else { while (s < end) { int p, se; p = se = s + tlen1; int t = tail; while (text[p] == target[t]) { if (t == targetP) return s; p--; t--; } int skip = regex.intMap[text[se] & 0xff]; t = s; do { s += enc.length(text, s, textEnd); } while ((s - t) < skip && s < end); } } return -1; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { return BM.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_); } }; public static final SearchAlgorithm MAP = new SearchAlgorithm() { public final String getName() { return "MAP"; } // TODO: check 1.9 inconsistent calls to map_search public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { Encoding enc = regex.enc; byte[]map = regex.map; int s = textP; while (s < textRange) { if (map[text[s] & 0xff] != 0) return s; s += enc.length(text, s, textEnd); } return -1; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Encoding enc = regex.enc; byte[]map = regex.map; int s = textStart; if (s >= textEnd) s = textEnd - 1; // multibyte safe ? while (s >= textP) { if (map[text[s] & 0xff] != 0) return s; s = enc.prevCharHead(text, adjustText, s, textEnd); } return -1; } }; public static final SearchAlgorithm MAP_SB = new SearchAlgorithm() { public final String getName() { return "MAP_SB"; } public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) { byte[]map = regex.map; int s = textP; while (s < textRange) { if (map[text[s] & 0xff] != 0) return s; s++; } return -1; } public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { byte[]map = regex.map; int s = textStart; if (s >= textEnd) s = textEnd - 1; while (s >= textP) { if (map[text[s] & 0xff] != 0) return s; s--; } return -1; } }; } joni-2.0.0/src/org/joni/StackEntry.java000066400000000000000000000075501214326443200177270ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; final class StackEntry { int type; private int E1, E2, E3, E4; // first union member /* byte code position */ void setStatePCode(int pcode) { E1 = pcode; } int getStatePCode() { return E1; } /* string position */ void setStatePStr(int pstr) { E2 = pstr; } int getStatePStr() { return E2; } /* previous char position of pstr */ void setStatePStrPrev(int pstrPrev) { E3 = pstrPrev; } int getStatePStrPrev() { return E3; } void setStateCheck(int check) { E4 = check; } int getStateCheck() { return E4; } // second union member /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ void setRepeatCount(int count) { E1 = count; } int getRepeatCount() { return E1; } void decreaseRepeatCount() { E1--; } void increaseRepeatCount() { E1++; } /* byte code position (head of repeated target) */ void setRepeatPCode(int pcode) { E2 = pcode; } int getRepeatPCode() { return E2; } /* repeat id */ void setRepeatNum(int num) { E3 = num; } int getRepeatNum() { return E3; } // third union member /* index of stack */ /*int repeat_inc struct*/ void setSi(int si) { E1 = si; } int getSi() { return E1; } // fourth union member /* memory num */ void setMemNum(int num) { E1 = num; } int getMemNum() { return E1; } /* start/end position */ void setMemPstr(int pstr) { E2 = pstr; } int getMemPStr() { return E2; } /* Following information is set, if this stack type is MEM-START */ /* prev. info (for backtrack "(...)*" ) */ void setMemStart(int start) { E3 = start; } int getMemStart() { return E3; } /* prev. info (for backtrack "(...)*" ) */ void setMemEnd(int end) { E4 = end; } int getMemEnd() { return E4; } // fifth union member /* null check id */ void setNullCheckNum(int num) { E1 = num; } int getNullCheckNum() { return E1; } /* start position */ void setNullCheckPStr(int pstr) { E2 = pstr; } int getNullCheckPStr() { return E2; } // sixth union member /* byte code position */ void setCallFrameRetAddr(int addr) { E1 = addr; } int getCallFrameRetAddr() { return E1; } /* null check id */ void setCallFrameNum(int num) { E2 = num; } int getCallFrameNum() { return E2; } /* string position */ void setCallFramePStr(int pstr) { E3 = pstr; } int getCallFramePStr() { return E3; } } joni-2.0.0/src/org/joni/StackMachine.java000066400000000000000000000474031214326443200201730ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsAt; import java.lang.ref.WeakReference; import java.util.Arrays; import org.joni.constants.StackPopLevel; import org.joni.constants.StackType; abstract class StackMachine extends Matcher implements StackType { protected static final int INVALID_INDEX = -1; protected StackEntry[]stack; protected int stk; // stkEnd protected final int[]repeatStk; protected final int memStartStk, memEndStk; // CEC protected byte[] stateCheckBuff; // move to int[] ? int stateCheckBuffSize; protected StackMachine(Regex regex, byte[]bytes, int p , int end) { super(regex, bytes, p, end); this.stack = regex.stackNeeded ? fetchStack() : null; int n = regex.numRepeat + (regex.numMem << 1); this.repeatStk = n > 0 ? new int[n] : null; memStartStk = regex.numRepeat - 1; memEndStk = memStartStk + regex.numMem; /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */ /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */ } private static StackEntry[] allocateStack() { StackEntry[]stack = new StackEntry[Config.INIT_MATCH_STACK_SIZE]; stack[0] = new StackEntry(); return stack; } private void doubleStack() { StackEntry[] newStack = new StackEntry[stack.length << 1]; System.arraycopy(stack, 0, newStack, 0, stack.length); stack = newStack; } static final ThreadLocal> stacks = new ThreadLocal>() { @Override protected WeakReference initialValue() { return new WeakReference(allocateStack()); } }; private static StackEntry[] fetchStack() { WeakReference ref = stacks.get(); StackEntry[] stack = ref.get(); if (stack == null) { ref = new WeakReference(stack = allocateStack()); stacks.set(ref); } return stack; } protected final void init() { if (stack != null) pushEnsured(ALT, regex.codeLength - 1); /* bottom stack */ if (repeatStk != null) { for (int i=1; i<=regex.numMem; i++) { repeatStk[i + memStartStk] = repeatStk[i + memEndStk] = INVALID_INDEX; } } } protected final StackEntry ensure1() { if (stk >= stack.length) doubleStack(); StackEntry e = stack[stk]; if (e == null) stack[stk] = e = new StackEntry(); return e; } protected final void pushType(int type) { ensure1().type = type; stk++; } // CEC // STATE_CHECK_POS private int stateCheckPos(int s, int snum) { return (s - str) * regex.numCombExpCheck + (snum - 1); } // STATE_CHECK_VAL protected final boolean stateCheckVal(int s, int snum) { if (stateCheckBuff != null) { int x = stateCheckPos(s, snum); return (stateCheckBuff[x / 8] & (1 << (x % 8))) != 0; } return false; } // ELSE_IF_STATE_CHECK_MARK private void stateCheckMark() { StackEntry e = stack[stk]; int x = stateCheckPos(e.getStatePStr(), e.getStateCheck()); stateCheckBuff[x / 8] |= (1 << (x % 8)); } // STATE_CHECK_BUFF_INIT private static final int STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE = 16; protected final void stateCheckBuffInit(int strLength, int offset, int stateNum) { if (stateNum > 0 && strLength >= Config.CHECK_STRING_THRESHOLD_LEN) { int size = ((strLength + 1) * stateNum + 7) >>> 3; offset = (offset * stateNum) >>> 3; if (size > 0 && offset < size && size < Config.CHECK_BUFF_MAX_SIZE) { if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { stateCheckBuff = new byte[size]; } else { // same impl, reduce... stateCheckBuff = new byte[size]; } Arrays.fill(stateCheckBuff, offset, (size - offset), (byte)0); stateCheckBuffSize = size; } else { stateCheckBuff = null; // reduce stateCheckBuffSize = 0; } } else { stateCheckBuff = null; // reduce stateCheckBuffSize = 0; } } protected final void stateCheckBuffClear() { stateCheckBuff = null; stateCheckBuffSize = 0; } private void push(int type, int pat, int s, int prev) { StackEntry e = ensure1(); e.type = type; e.setStatePCode(pat); e.setStatePStr(s); e.setStatePStrPrev(prev); if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0); stk++; } protected final void pushEnsured(int type, int pat) { StackEntry e = stack[stk]; e.type = type; e.setStatePCode(pat); if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0); stk++; } protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum) { StackEntry e = ensure1(); e.type = ALT; e.setStatePCode(pat); e.setStatePStr(s); e.setStatePStrPrev(sprev); if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(stateCheckBuff != null ? snum : 0); stk++; } protected final void pushStateCheck(int s, int snum) { if (stateCheckBuff != null) { StackEntry e = ensure1(); e.type = STATE_CHECK_MARK; e.setStatePStr(s); e.setStateCheck(snum); stk++; } } protected final void pushAlt(int pat, int s, int prev) { push(ALT, pat, s, prev); } protected final void pushPos(int s, int prev) { push(POS, -1 /*NULL_UCHARP*/, s, prev); } protected final void pushPosNot(int pat, int s, int prev) { push(POS_NOT, pat, s, prev); } protected final void pushStopBT() { pushType(STOP_BT); } protected final void pushLookBehindNot(int pat, int s, int sprev) { push(LOOK_BEHIND_NOT, pat, s, sprev); } protected final void pushRepeat(int id, int pat) { StackEntry e = ensure1(); e.type = REPEAT; e.setRepeatNum(id); e.setRepeatPCode(pat); e.setRepeatCount(0); stk++; } protected final void pushRepeatInc(int sindex) { StackEntry e = ensure1(); e.type = REPEAT_INC; e.setSi(sindex); stk++; } protected final void pushMemStart(int mnum, int s) { StackEntry e = ensure1(); e.type = MEM_START; e.setMemNum(mnum); e.setMemPstr(s); e.setMemStart(repeatStk[memStartStk + mnum]); e.setMemEnd(repeatStk[memEndStk + mnum]); repeatStk[memStartStk + mnum] = stk; repeatStk[memEndStk + mnum] = INVALID_INDEX; stk++; } protected final void pushMemEnd(int mnum, int s) { StackEntry e = ensure1(); e.type = MEM_END; e.setMemNum(mnum); e.setMemPstr(s); e.setMemStart(repeatStk[memStartStk + mnum]); e.setMemEnd(repeatStk[memEndStk + mnum]); repeatStk[memEndStk + mnum] = stk; stk++; } protected final void pushMemEndMark(int mnum) { StackEntry e = ensure1(); e.type = MEM_END_MARK; e.setMemNum(mnum); stk++; } protected final int getMemStart(int mnum) { int level = 0; int stkp = stk; while (stkp > 0) { stkp--; StackEntry e = stack[stkp]; if ((e.type & MASK_MEM_END_OR_MARK) != 0 && e.getMemNum() == mnum) { level++; } else if (e.type == MEM_START && e.getMemNum() == mnum) { if (level == 0) break; level--; } } return stkp; } protected final void pushNullCheckStart(int cnum, int s) { StackEntry e = ensure1(); e.type = NULL_CHECK_START; e.setNullCheckNum(cnum); e.setNullCheckPStr(s); stk++; } protected final void pushNullCheckEnd(int cnum) { StackEntry e = ensure1(); e.type = NULL_CHECK_END; e.setNullCheckNum(cnum); stk++; } protected final void pushCallFrame(int pat) { StackEntry e = ensure1(); e.type = CALL_FRAME; e.setCallFrameRetAddr(pat); stk++; } protected final void pushReturn() { StackEntry e = ensure1(); e.type = RETURN; stk++; } // stack debug routines here // ... protected final void popOne() { stk--; } protected final StackEntry pop() { switch (regex.stackPopLevel) { case StackPopLevel.FREE: return popFree(); case StackPopLevel.MEM_START: return popMemStart(); default: return popDefault(); } } private StackEntry popFree() { while (true) { StackEntry e = stack[--stk]; if ((e.type & MASK_POP_USED) != 0) { return e; } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } } private StackEntry popMemStart() { while (true) { StackEntry e = stack[--stk]; if ((e.type & MASK_POP_USED) != 0) { return e; } else if (e.type == MEM_START) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } } private StackEntry popDefault() { while (true) { StackEntry e = stack[--stk]; if ((e.type & MASK_POP_USED) != 0) { return e; } else if (e.type == MEM_START) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (e.type == REPEAT_INC) { //int si = stack[stk + IREPEAT_INC_SI]; //stack[si + IREPEAT_COUNT]--; stack[e.getSi()].decreaseRepeatCount(); } else if (e.type == MEM_END) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } } protected final void popTilPosNot() { while (true) { stk--; StackEntry e = stack[stk]; if (e.type == POS_NOT) { break; } else if (e.type == MEM_START) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemStart(); } else if (e.type == REPEAT_INC) { //int si = stack[stk + IREPEAT_INC_SI]; //stack[si + IREPEAT_COUNT]--; stack[e.getSi()].decreaseRepeatCount(); } else if (e.type == MEM_END){ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemStart(); } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } } protected final void popTilLookBehindNot() { while (true) { stk--; StackEntry e = stack[stk]; if (e.type == LOOK_BEHIND_NOT) { break; } else if (e.type == MEM_START) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (e.type == REPEAT_INC) { //int si = stack[stk + IREPEAT_INC_SI]; //stack[si + IREPEAT_COUNT]--; stack[e.getSi()].decreaseRepeatCount(); } else if (e.type == MEM_END) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } } protected final int posEnd() { int k = stk; while (true) { k--; StackEntry e = stack[k]; if ((e.type & MASK_TO_VOID_TARGET) != 0) { e.type = VOID; } else if (e.type == POS) { e.type = VOID; break; } } return k; } protected final void stopBtEnd() { int k = stk; while (true) { k--; StackEntry e = stack[k]; if ((e.type & MASK_TO_VOID_TARGET) != 0) { e.type = VOID; } else if (e.type == STOP_BT) { e.type = VOID; break; } } } // int for consistency with other null check routines protected final int nullCheck(int id, int s) { int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { return e.getNullCheckPStr() == s ? 1 : 0; } } } } protected final int nullCheckRec(int id, int s) { int level = 0; int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { if (level == 0) { return e.getNullCheckPStr() == s ? 1 : 0; } else { level--; } } } else if (e.type == NULL_CHECK_END) { level++; } } } protected final int nullCheckMemSt(int id, int s) { int k = stk; int isNull; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { if (e.getNullCheckPStr() != s) { isNull = 0; break; } else { int endp; isNull = 1; while (k < stk) { if (e.type == MEM_START) { if (e.getMemEnd() == INVALID_INDEX) { isNull = 0; break; } if (bsAt(regex.btMemEnd, e.getMemNum())) { endp = stack[e.getMemEnd()].getMemPStr(); } else { endp = e.getMemEnd(); } if (stack[e.getMemStart()].getMemPStr() != endp) { isNull = 0; break; } else if (endp != s) { isNull = -1; /* empty, but position changed */ } } k++; e = stack[k]; // !! } break; } } } } return isNull; } protected final int nullCheckMemStRec(int id, int s) { int level = 0; int k = stk; int isNull; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { if (level == 0) { if (e.getNullCheckPStr() != s) { isNull = 0; break; } else { int endp; isNull = 1; while (k < stk) { if (e.type == MEM_START) { if (e.getMemEnd() == INVALID_INDEX) { isNull = 0; break; } if (bsAt(regex.btMemEnd, e.getMemNum())) { endp = stack[e.getMemEnd()].getMemPStr(); } else { endp = e.getMemEnd(); } if (stack[e.getMemStart()].getMemPStr() != endp) { isNull = 0; break; } else if (endp != s) { isNull = -1;; /* empty, but position changed */ } } k++; e = stack[k]; } break; } } else { level--; } } } else if (e.type == NULL_CHECK_END) { if (e.getNullCheckNum() == id) level++; } } return isNull; } protected final int getRepeat(int id) { int level = 0; int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == REPEAT) { if (level == 0) { if (e.getRepeatNum() == id) return k; } } else if (e.type == CALL_FRAME) { level--; } else if (e.type == RETURN) { level++; } } } protected final int sreturn() { int level = 0; int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == CALL_FRAME) { if (level == 0) { return e.getCallFrameRetAddr(); } else { level--; } } else if (e.type == RETURN) { level++; } } } } joni-2.0.0/src/org/joni/Syntax.java000066400000000000000000000427531214326443200171320ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.constants.MetaChar.INEFFECTIVE_META_CHAR; import org.joni.constants.SyntaxProperties; public final class Syntax implements SyntaxProperties{ private final int op; private final int op2; private final int behavior; public final int options; public final MetaCharTable metaCharTable; public Syntax(int op, int op2, int behavior, int options, MetaCharTable metaCharTable) { this.op = op; this.op2 = op2; this.behavior = behavior; this.options = options; this.metaCharTable = metaCharTable; } public static class MetaCharTable { public final int esc; public final int anyChar; public final int anyTime; public final int zeroOrOneTime; public final int oneOrMoreTime; public final int anyCharAnyTime; public MetaCharTable(int esc, int anyChar, int anyTime, int zeroOrOneTime, int oneOrMoreTime, int anyCharAnyTime) { this.esc = esc; this.anyChar = anyChar; this.anyTime = anyTime; this.zeroOrOneTime = zeroOrOneTime; this.oneOrMoreTime = oneOrMoreTime; this.anyCharAnyTime = anyCharAnyTime; } } /** * OP * */ protected boolean isOp(int opm) { return (op & opm) != 0; } public boolean opVariableMetaCharacters() { return isOp(OP_VARIABLE_META_CHARACTERS); } public boolean opDotAnyChar() { return isOp(OP_DOT_ANYCHAR); } public boolean opAsteriskZeroInf() { return isOp(OP_ASTERISK_ZERO_INF); } public boolean opEscAsteriskZeroInf() { return isOp(OP_ESC_ASTERISK_ZERO_INF); } public boolean opPlusOneInf() { return isOp(OP_PLUS_ONE_INF); } public boolean opEscPlusOneInf() { return isOp(OP_ESC_PLUS_ONE_INF); } public boolean opQMarkZeroOne() { return isOp(OP_QMARK_ZERO_ONE); } public boolean opEscQMarkZeroOne() { return isOp(OP_ESC_QMARK_ZERO_ONE); } public boolean opBraceInterval() { return isOp(OP_BRACE_INTERVAL); } public boolean opEscBraceInterval() { return isOp(OP_ESC_BRACE_INTERVAL); } public boolean opVBarAlt() { return isOp(OP_VBAR_ALT); } public boolean opEscVBarAlt() { return isOp(OP_ESC_VBAR_ALT); } public boolean opLParenSubexp() { return isOp(OP_LPAREN_SUBEXP); } public boolean opEscLParenSubexp() { return isOp(OP_ESC_LPAREN_SUBEXP); } public boolean opEscAZBufAnchor() { return isOp(OP_ESC_AZ_BUF_ANCHOR); } public boolean opEscCapitalGBeginAnchor() { return isOp(OP_ESC_CAPITAL_G_BEGIN_ANCHOR); } public boolean opDecimalBackref() { return isOp(OP_DECIMAL_BACKREF); } public boolean opBracketCC() { return isOp(OP_BRACKET_CC); } public boolean opEscWWord() { return isOp(OP_ESC_W_WORD); } public boolean opEscLtGtWordBeginEnd() { return isOp(OP_ESC_LTGT_WORD_BEGIN_END); } public boolean opEscBWordBound() { return isOp(OP_ESC_B_WORD_BOUND); } public boolean opEscSWhiteSpace() { return isOp(OP_ESC_S_WHITE_SPACE); } public boolean opEscDDigit() { return isOp(OP_ESC_D_DIGIT); } public boolean opLineAnchor() { return isOp(OP_LINE_ANCHOR); } public boolean opPosixBracket() { return isOp(OP_POSIX_BRACKET); } public boolean opQMarkNonGreedy() { return isOp(OP_QMARK_NON_GREEDY); } public boolean opEscControlChars() { return isOp(OP_ESC_CONTROL_CHARS); } public boolean opEscCControl() { return isOp(OP_ESC_C_CONTROL); } public boolean opEscOctal3() { return isOp(OP_ESC_OCTAL3); } public boolean opEscXHex2() { return isOp(OP_ESC_X_HEX2); } public boolean opEscXBraceHex8() { return isOp(OP_ESC_X_BRACE_HEX8); } /** * OP * */ protected boolean isOp2(int opm) { return (op2 & opm) != 0; } public boolean op2EscCapitalQQuote() { return isOp2(OP2_ESC_CAPITAL_Q_QUOTE); } public boolean op2QMarkGroupEffect() { return isOp2(OP2_QMARK_GROUP_EFFECT); } public boolean op2OptionPerl() { return isOp2(OP2_OPTION_PERL); } public boolean op2OptionRuby() { return isOp2(OP2_OPTION_RUBY); } public boolean op2PlusPossessiveRepeat() { return isOp2(OP2_PLUS_POSSESSIVE_REPEAT); } public boolean op2PlusPossessiveInterval() { return isOp2(OP2_PLUS_POSSESSIVE_INTERVAL); } public boolean op2CClassSetOp() { return isOp2(OP2_CCLASS_SET_OP); } public boolean op2QMarkLtNamedGroup() { return isOp2(OP2_QMARK_LT_NAMED_GROUP); } public boolean op2EscKNamedBackref() { return isOp2(OP2_ESC_K_NAMED_BACKREF); } public boolean op2EscGSubexpCall() { return isOp2(OP2_ESC_G_SUBEXP_CALL); } public boolean op2AtMarkCaptureHistory() { return isOp2(OP2_ATMARK_CAPTURE_HISTORY); } public boolean op2EscCapitalCBarControl() { return isOp2(OP2_ESC_CAPITAL_C_BAR_CONTROL); } public boolean op2EscCapitalMBarMeta() { return isOp2(OP2_ESC_CAPITAL_M_BAR_META); } public boolean op2EscVVtab() { return isOp2(OP2_ESC_V_VTAB); } public boolean op2EscUHex4() { return isOp2(OP2_ESC_U_HEX4); } public boolean op2EscGnuBufAnchor() { return isOp2(OP2_ESC_GNU_BUF_ANCHOR); } public boolean op2EscPBraceCharProperty() { return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY); } public boolean op2EscPBraceCircumflexNot() { return isOp2(OP2_ESC_P_BRACE_CIRCUMFLEX_NOT); } public boolean op2EscHXDigit() { return isOp2(OP2_ESC_H_XDIGIT); } public boolean op2IneffectiveEscape() { return isOp2(OP2_INEFFECTIVE_ESCAPE); } /** * BEHAVIOR * */ protected boolean isBehavior(int bvm) { return (behavior & bvm) != 0; } public boolean contextIndepRepeatOps() { return isBehavior(CONTEXT_INDEP_REPEAT_OPS); } public boolean contextInvalidRepeatOps() { return isBehavior(CONTEXT_INVALID_REPEAT_OPS); } public boolean allowUnmatchedCloseSubexp() { return isBehavior(ALLOW_UNMATCHED_CLOSE_SUBEXP); } public boolean allowInvalidInterval() { return isBehavior(ALLOW_INVALID_INTERVAL); } public boolean allowIntervalLowAbbrev() { return isBehavior(ALLOW_INTERVAL_LOW_ABBREV); } public boolean strictCheckBackref() { return isBehavior(STRICT_CHECK_BACKREF); } public boolean differentLengthAltLookBehind() { return isBehavior(DIFFERENT_LEN_ALT_LOOK_BEHIND); } public boolean captureOnlyNamedGroup() { return isBehavior(CAPTURE_ONLY_NAMED_GROUP); } public boolean allowMultiplexDefinitionName() { return isBehavior(ALLOW_MULTIPLEX_DEFINITION_NAME); } public boolean fixedIntervalIsGreedyOnly() { return isBehavior(FIXED_INTERVAL_IS_GREEDY_ONLY); } public boolean notNewlineInNegativeCC() { return isBehavior(NOT_NEWLINE_IN_NEGATIVE_CC); } public boolean backSlashEscapeInCC() { return isBehavior(BACKSLASH_ESCAPE_IN_CC); } public boolean allowEmptyRangeInCC() { return isBehavior(ALLOW_EMPTY_RANGE_IN_CC); } public boolean allowDoubleRangeOpInCC() { return isBehavior(ALLOW_DOUBLE_RANGE_OP_IN_CC); } public boolean warnCCOpNotEscaped() { return isBehavior(WARN_CC_OP_NOT_ESCAPED); } public boolean warnReduntantNestedRepeat() { return isBehavior(WARN_REDUNDANT_NESTED_REPEAT); } public static final Syntax RUBY = new Syntax( (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_QMARK_GROUP_EFFECT | OP2_OPTION_RUBY | OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF | OP2_ESC_G_SUBEXP_CALL | OP2_ESC_P_BRACE_CHAR_PROPERTY | OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | OP2_PLUS_POSSESSIVE_REPEAT | OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL | OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB | OP2_ESC_H_XDIGIT ), ( GNU_REGEX_BV | ALLOW_INTERVAL_LOW_ABBREV | DIFFERENT_LEN_ALT_LOOK_BEHIND | CAPTURE_ONLY_NAMED_GROUP | ALLOW_MULTIPLEX_DEFINITION_NAME | FIXED_INTERVAL_IS_GREEDY_ONLY | WARN_CC_OP_NOT_ESCAPED | WARN_REDUNDANT_NESTED_REPEAT ), Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax DEFAULT = RUBY; public static final Syntax ASIS = new Syntax( 0, OP2_INEFFECTIVE_ESCAPE, 0, Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax PosixBasic = new Syntax( (POSIX_COMMON_OP | OP_ESC_LPAREN_SUBEXP | OP_ESC_BRACE_INTERVAL ), 0, 0, ( Option.SINGLELINE | Option.MULTILINE ), new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax PosixExtended = new Syntax( ( POSIX_COMMON_OP | OP_LPAREN_SUBEXP | OP_BRACE_INTERVAL | OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE |OP_VBAR_ALT ), 0, ( CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS | ALLOW_UNMATCHED_CLOSE_SUBEXP | ALLOW_DOUBLE_RANGE_OP_IN_CC ), ( Option.SINGLELINE | Option.MULTILINE ), new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Emacs = new Syntax( ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT | OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE | OP_DECIMAL_BACKREF | OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS ), OP2_ESC_GNU_BUF_ANCHOR, ALLOW_EMPTY_RANGE_IN_CC, Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Grep = new Syntax( ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET | OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT | OP_ASTERISK_ZERO_INF | OP_ESC_PLUS_ONE_INF | OP_ESC_QMARK_ZERO_ONE | OP_LINE_ANCHOR | OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END | OP_DECIMAL_BACKREF ), 0, ( ALLOW_EMPTY_RANGE_IN_CC | NOT_NEWLINE_IN_NEGATIVE_CC ), Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax GnuRegex = new Syntax( GNU_REGEX_OP, 0, GNU_REGEX_BV, Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Java = new Syntax( (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT | OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP | OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 | OP2_ESC_P_BRACE_CHAR_PROPERTY ), ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ), Option.SINGLELINE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Perl = new Syntax( (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | OP2_ESC_P_BRACE_CHAR_PROPERTY | OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ), GNU_REGEX_BV, Option.SINGLELINE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax PerlNG = new Syntax( (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | OP2_ESC_P_BRACE_CHAR_PROPERTY | OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF | OP2_ESC_G_SUBEXP_CALL ), ( GNU_REGEX_BV | CAPTURE_ONLY_NAMED_GROUP | ALLOW_MULTIPLEX_DEFINITION_NAME ), Option.SINGLELINE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); } joni-2.0.0/src/org/joni/Token.java000066400000000000000000000074531214326443200167220ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.joni.constants.TokenType; final class Token { TokenType type; boolean escaped; int base; /* is number: 8, 16 (used in [....]) */ int backP; // union fields private int INT1, INT2, INT3, INT4, INT5; private int []INTA1; // union accessors int getC() { return INT1; } void setC(int c) { INT1 = c; } int getCode() { return INT1; } void setCode(int code) { INT1 = code; } int getAnchor() { return INT1; } void setAnchor(int anchor) { INT1 = anchor; } int getSubtype() { return INT1; } void setSubtype(int subtype) { INT1 = subtype; } // repeat union member int getRepeatLower() { return INT1; } void setRepeatLower(int lower) { INT1 = lower; } int getRepeatUpper() { return INT2; } void setRepeatUpper(int upper) { INT2 = upper; } boolean getRepeatGreedy() { return INT3 != 0; } void setRepeatGreedy(boolean greedy) { INT3 = greedy ? 1 : 0; } boolean getRepeatPossessive() { return INT4 != 0; } void setRepeatPossessive(boolean possessive) { INT4 = possessive ? 1 : 0; } // backref union member int getBackrefNum() { return INT1; } void setBackrefNum(int num) { INT1 = num; } int getBackrefRef1() { return INT2; } void setBackrefRef1(int ref1) { INT2 = ref1; } int[]getBackrefRefs() { return INTA1; } void setBackrefRefs(int[]refs) { INTA1 = refs; } boolean getBackrefByName() { return INT3 != 0; } void setBackrefByName(boolean byName) { INT3 = byName ? 1 : 0; } // USE_BACKREF_AT_LEVEL boolean getBackrefExistLevel() { return INT4 != 0; } void setBackrefExistLevel(boolean existLevel) { INT4 = existLevel ? 1 : 0; } int getBackrefLevel() { return INT5; } void setBackrefLevel(int level) { INT5 = level; } // call union member int getCallNameP() { return INT1; } void setCallNameP(int nameP) { INT1 = nameP; } int getCallNameEnd() { return INT2; } void setCallNameEnd(int nameEnd) { INT2 = nameEnd; } int getCallGNum() { return INT3; } void setCallGNum(int gnum) { INT3 = gnum; } // prop union member int getPropCType() { return INT1; } void setPropCType(int ctype) { INT1 = ctype; } boolean getPropNot() { return INT2 != 0; } void setPropNot(boolean not) { INT2 = not ? 1 : 0; } } joni-2.0.0/src/org/joni/UnsetAddrList.java000066400000000000000000000047071214326443200203660ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.joni.ast.EncloseNode; import org.joni.ast.Node; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; public final class UnsetAddrList { int num; Node[]targets; int[]offsets; public UnsetAddrList(int size) { targets = new Node[size]; offsets = new int[size]; } public void add(int offset, Node node) { if (num >= offsets.length) { Node []ttmp = new Node[targets.length << 1]; System.arraycopy(targets, 0, ttmp, 0, num); targets = ttmp; int[]otmp = new int[offsets.length << 1]; System.arraycopy(offsets, 0, otmp, 0, num); offsets = otmp; } targets[num] = node; offsets[num] = offset; num++; } public void fix(Regex regex) { for (int i=0; i 0) { for (int i=0; iOla Bini */ public interface WarnCallback { WarnCallback DEFAULT = new WarnCallback() { public void warn(String message) { System.err.println(message); } }; void warn(String message); } joni-2.0.0/src/org/joni/Warnings.java000066400000000000000000000025301214326443200174210ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public interface Warnings { final String INVALID_BACKREFERENCE = "invalid back reference"; final String INVALID_SUBEXP_CALL = "invalid subexp call"; final String INVALID_UNICODE_PROPERTY = "invalid Unicode Property \\<%n>"; } joni-2.0.0/src/org/joni/ast/000077500000000000000000000000001214326443200155555ustar00rootroot00000000000000joni-2.0.0/src/org/joni/ast/AnchorNode.java000066400000000000000000000062621214326443200204460ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.constants.AnchorType; public final class AnchorNode extends Node implements AnchorType { public int type; public Node target; public int charLength; public AnchorNode(int type) { this.type = type; charLength = -1; } @Override public int getType() { return ANCHOR; } @Override protected void setChild(Node newChild) { target = newChild; } @Override protected Node getChild() { return target; } public void setTarget(Node tgt) { target = tgt; tgt.parent = this; } @Override public String getName() { return "Anchor"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n type: " + typeToString()); value.append("\n target: " + pad(target, level + 1)); return value.toString(); } public String typeToString() { StringBuilder type = new StringBuilder(); if (isType(BEGIN_BUF)) type.append("BEGIN_BUF "); if (isType(BEGIN_LINE)) type.append("BEGIN_LINE "); if (isType(BEGIN_POSITION)) type.append("BEGIN_POSITION "); if (isType(END_BUF)) type.append("END_BUF "); if (isType(SEMI_END_BUF)) type.append("SEMI_END_BUF "); if (isType(END_LINE)) type.append("END_LINE "); if (isType(WORD_BOUND)) type.append("WORD_BOUND "); if (isType(NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND "); if (isType(WORD_BEGIN)) type.append("WORD_BEGIN "); if (isType(WORD_END)) type.append("WORD_END "); if (isType(PREC_READ)) type.append("PREC_READ "); if (isType(PREC_READ_NOT)) type.append("PREC_READ_NOT "); if (isType(LOOK_BEHIND)) type.append("LOOK_BEHIND "); if (isType(LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT "); if (isType(ANYCHAR_STAR)) type.append("ANYCHAR_STAR "); if (isType(ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML "); return type.toString(); } private boolean isType(int type) { return (this.type & type) != 0; } } joni-2.0.0/src/org/joni/ast/AnyCharNode.java000066400000000000000000000026321214326443200205560ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; public final class AnyCharNode extends Node { public AnyCharNode(){} @Override public int getType() { return CANY; } @Override public String getName() { return "Any Char"; } @Override public String toString(int level) { String value = ""; return value; } } joni-2.0.0/src/org/joni/ast/BackRefNode.java000066400000000000000000000062411214326443200205260ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.ScanEnvironment; import org.joni.exception.ErrorMessages; import org.joni.exception.ValueException; public final class BackRefNode extends StateNode { //private static int NODE_BACKREFS_SIZE = 6; //int state; public int backNum; public int back[]; public int nestLevel; public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) { this.backNum = backNum; if (byName) setNameRef(); for (int i=0; i 0) { back[pos] = n; pos++; } } backNum = pos; } } joni-2.0.0/src/org/joni/ast/CClassNode.java000066400000000000000000000424031214326443200204010ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.jcodings.CodeRange; import org.jcodings.Encoding; import org.jcodings.IntHolder; import org.jcodings.constants.CharacterType; import org.jcodings.exception.EncodingException; import org.jcodings.specific.ASCIIEncoding; import org.joni.BitSet; import org.joni.CodeRangeBuffer; import org.joni.Config; import org.joni.ScanEnvironment; import org.joni.constants.CCSTATE; import org.joni.constants.CCVALTYPE; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.SyntaxException; import org.joni.exception.ValueException; public final class CClassNode extends Node { private static final int FLAG_NCCLASS_NOT = 1<<0; private static final int FLAG_NCCLASS_SHARE = 1<<1; int flags; public final BitSet bs = new BitSet(); // conditional creation ? public CodeRangeBuffer mbuf; /* multi-byte info or NULL */ private int ctype; // for hashing purposes private Encoding enc; // ... // node_new_cclass public CClassNode() {} public CClassNode(int ctype, Encoding enc, boolean not, int sbOut, int[]ranges) { this(not, sbOut, ranges); this.ctype = ctype; this.enc = enc; } public void clear() { bs.clear(); flags = 0; mbuf = null; } // node_new_cclass_by_codepoint_range, only used by shared Char Classes public CClassNode(boolean not, int sbOut, int[]ranges) { if (not) setNot(); // bs.clear(); if (sbOut > 0 && ranges != null) { int n = ranges[0]; for (int i=0; i= sbOut) { setupBuffer(ranges); return; } bs.set(j); } } } setupBuffer(ranges); } @Override public int getType() { return CCLASS; } @Override public String getName() { return "Character Class"; } @Override public boolean equals(Object other) { if (!(other instanceof CClassNode)) return false; CClassNode cc = (CClassNode)other; return ctype == cc.ctype && isNot() == cc.isNot() && enc == cc.enc; } @Override public int hashCode() { if (Config.USE_SHARED_CCLASS_TABLE) { int hash = 0; hash += ctype; hash += enc.hashCode(); if (isNot()) hash++; return hash + (hash >> 5); } else { return super.hashCode(); } } @Override public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n flags: " + flagsToString()); value.append("\n bs: " + pad(bs, level + 1)); value.append("\n mbuf: " + pad(mbuf, level + 1)); return value.toString(); } public String flagsToString() { StringBuilder flags = new StringBuilder(); if (isNot()) flags.append("NOT "); if (isShare()) flags.append("SHARE "); return flags.toString(); } private void setupBuffer(int[]ranges) { if (ranges != null) { if (ranges[0] == 0) return; mbuf = new CodeRangeBuffer(ranges); } } public boolean isEmpty() { return mbuf == null && bs.isEmpty(); } public void addCodeRangeToBuf(int from, int to) { mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to); } public void addCodeRange(ScanEnvironment env, int from, int to) { mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to); } public void addAllMultiByteRange(Encoding enc) { mbuf = CodeRangeBuffer.addAllMultiByteRange(enc, mbuf); } public void clearNotFlag(Encoding enc) { if (isNot()) { bs.invert(); if (!enc.isSingleByte()) { mbuf = CodeRangeBuffer.notCodeRangeBuff(enc, mbuf); } clearNot(); } } // and_cclass public void and(CClassNode other, Encoding enc) { boolean not1 = isNot(); BitSet bsr1 = bs; CodeRangeBuffer buf1 = mbuf; boolean not2 = other.isNot(); BitSet bsr2 = other.bs; CodeRangeBuffer buf2 = other.mbuf; if (not1) { BitSet bs1 = new BitSet(); bsr1.invertTo(bs1); bsr1 = bs1; } if (not2) { BitSet bs2 = new BitSet(); bsr2.invertTo(bs2); bsr2 = bs2; } bsr1.and(bsr2); if (bsr1 != bs) { bs.copy(bsr1); bsr1 = bs; } if (not1) { bs.invert(); } CodeRangeBuffer pbuf = null; if (!enc.isSingleByte()) { if (not1 && not2) { pbuf = CodeRangeBuffer.orCodeRangeBuff(enc, buf1, false, buf2, false); } else { pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2); if (not1) { pbuf = CodeRangeBuffer.notCodeRangeBuff(enc, pbuf); } } mbuf = pbuf; } } // or_cclass public void or(CClassNode other, Encoding enc) { boolean not1 = isNot(); BitSet bsr1 = bs; CodeRangeBuffer buf1 = mbuf; boolean not2 = other.isNot(); BitSet bsr2 = other.bs; CodeRangeBuffer buf2 = other.mbuf; if (not1) { BitSet bs1 = new BitSet(); bsr1.invertTo(bs1); bsr1 = bs1; } if (not2) { BitSet bs2 = new BitSet(); bsr2.invertTo(bs2); bsr2 = bs2; } bsr1.or(bsr2); if (bsr1 != bs) { bs.copy(bsr1); bsr1 = bs; } if (not1) { bs.invert(); } if (!enc.isSingleByte()) { CodeRangeBuffer pbuf = null; if (not1 && not2) { pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false); } else { pbuf = CodeRangeBuffer.orCodeRangeBuff(enc, buf1, not1, buf2, not2); if (not1) { pbuf = CodeRangeBuffer.notCodeRangeBuff(enc, pbuf); } } mbuf = pbuf; } } // add_ctype_to_cc_by_range // Encoding out! public void addCTypeByRange(int ctype, boolean not, Encoding enc, int sbOut, int mbr[]) { int n = mbr[0]; if (!not) { for (int i=0; i= sbOut) { if (Config.VANILLA) { if (j == mbr[i * 2 + 2]) { i++; } else if (j > mbr[i * 2 + 1]) { addCodeRangeToBuf(j, mbr[i * 2 + 2]); i++; } } else { if (j >= mbr[i * 2 + 1]) { addCodeRangeToBuf(j, mbr[i * 2 + 2]); i++; } } // !goto sb_end!, remove duplication! for (; i= sbOut) { // !goto sb_end2!, remove duplication prev = sbOut; for (i=0; i 0 && /* check invalid code point */ !enc.isWord(c)) bs.set(c); } catch (EncodingException ve) {}; } } break; default: throw new InternalException(ErrorMessages.ERR_PARSER_BUG); } // switch } public static final class CCStateArg { public int v; public int vs; public boolean vsIsRaw; public boolean vIsRaw; public CCVALTYPE inType; public CCVALTYPE type; public CCSTATE state; } public void nextStateClass(CCStateArg arg, ScanEnvironment env) { if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE); if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) { if (arg.type == CCVALTYPE.SB) { bs.set(arg.vs); } else if (arg.type == CCVALTYPE.CODE_POINT) { addCodeRange(env, arg.vs, arg.vs); } } arg.state = CCSTATE.VALUE; arg.type = CCVALTYPE.CLASS; } public void nextStateValue(CCStateArg arg, ScanEnvironment env) { switch(arg.state) { case VALUE: if (arg.type == CCVALTYPE.SB) { if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); bs.set(arg.vs); } else if (arg.type == CCVALTYPE.CODE_POINT) { addCodeRange(env, arg.vs, arg.vs); } break; case RANGE: if (arg.inType == arg.type) { if (arg.inType == CCVALTYPE.SB) { if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); if (arg.vs > arg.v) { if (env.syntax.allowEmptyRangeInCC()) { // goto ccs_range_end arg.state = CCSTATE.COMPLETE; break; } else { throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); } } bs.setRange(arg.vs, arg.v); } else { addCodeRange(env, arg.vs, arg.v); } } else { if (arg.vs > arg.v) { if (env.syntax.allowEmptyRangeInCC()) { // goto ccs_range_end arg.state = CCSTATE.COMPLETE; break; } else { throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); } } bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff); addCodeRange(env, arg.vs, arg.v); } // ccs_range_end: arg.state = CCSTATE.COMPLETE; break; case COMPLETE: case START: arg.state = CCSTATE.VALUE; break; default: break; } // switch arg.vsIsRaw = arg.vIsRaw; arg.vs = arg.v; arg.type = arg.inType; } // onig_is_code_in_cc_len public boolean isCodeInCCLength(int encLength, int code) { boolean found; if (encLength > 1 || code >= BitSet.SINGLE_BYTE_SIZE) { if (mbuf == null) { found = false; } else { found = CodeRange.isInCodeRange(mbuf.getCodeRange(), code); } } else { found = bs.at(code); } if (isNot()) { return !found; } else { return found; } } // onig_is_code_in_cc public boolean isCodeInCC(Encoding enc, int code) { int len; if (enc.minLength() > 1) { len = 2; } else { len = enc.codeToMbcLength(code); } return isCodeInCCLength(len, code); } public void setNot() { flags |= FLAG_NCCLASS_NOT; } public void clearNot() { flags &= ~FLAG_NCCLASS_NOT; } public boolean isNot() { return (flags & FLAG_NCCLASS_NOT) != 0; } public void setShare() { flags |= FLAG_NCCLASS_SHARE; } public void clearShare() { flags &= ~FLAG_NCCLASS_SHARE; } public boolean isShare() { return (flags & FLAG_NCCLASS_SHARE) != 0; } } joni-2.0.0/src/org/joni/ast/CTypeNode.java000066400000000000000000000032241214326443200202530ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; public final class CTypeNode extends Node { public int ctype; public boolean not; public CTypeNode(int type, boolean not) { this.ctype= type; this.not = not; } @Override public int getType() { return CTYPE; } @Override public String getName() { return "Character Type"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n ctype: " + ctype); value.append("\n not: " + not); return value.toString(); } } joni-2.0.0/src/org/joni/ast/CallNode.java000066400000000000000000000053721214326443200201100ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import java.util.Set; import org.joni.UnsetAddrList; import org.joni.WarnCallback; public final class CallNode extends StateNode { public byte[]name; public int nameP; public int nameEnd; public int groupNum; public Node target; // is it an EncloseNode always ? public UnsetAddrList unsetAddrList; public CallNode(byte[]name, int nameP, int nameEnd, int gnum) { this.name = name; this.nameP = nameP; this.nameEnd = nameEnd; this.groupNum = gnum; /* call by number if gnum != 0 */ } @Override public int getType() { return CALL; } @Override protected void setChild(Node newChild) { target = newChild; } @Override protected Node getChild() { return target; } public void setTarget(Node tgt) { target = tgt; tgt.parent = this; } @Override public String getName() { return "Call"; } @Override public void verifyTree(Set set, WarnCallback warnings) { if (target == null || target.parent == this) warnings.warn(this.getAddressName() + " doesn't point to a target or the target has been stolen"); // do not recurse here } @Override public String toString(int level) { StringBuilder value = new StringBuilder(super.toString(level)); value.append("\n name: " + new String(name, nameP, nameEnd - nameP)); value.append("\n groupNum: " + groupNum); value.append("\n target: " + pad(target.getAddressName(), level + 1)); value.append("\n unsetAddrList: " + pad(unsetAddrList, level + 1)); return value.toString(); } } joni-2.0.0/src/org/joni/ast/ConsAltNode.java000066400000000000000000000105021214326443200205670ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import java.util.Set; import org.joni.Config; import org.joni.WarnCallback; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; public final class ConsAltNode extends Node { public Node car; public ConsAltNode cdr; private int type; // List or Alt private ConsAltNode(Node car, ConsAltNode cdr, int type) { this.car = car; if (car != null) car.parent = this; this.cdr = cdr; if (cdr != null) cdr.parent = this; this.type = type; } public static ConsAltNode newAltNode(Node left, ConsAltNode right) { return new ConsAltNode(left, right, ALT); } public static ConsAltNode newListNode(Node left, ConsAltNode right) { return new ConsAltNode(left, right, LIST); } public static ConsAltNode listAdd(ConsAltNode list, Node x) { ConsAltNode n = newListNode(x, null); if (list != null) { while (list.cdr != null) { list = list.cdr; } list.setCdr(n); } return n; } public void toListNode() { type = LIST; } public void toAltNode() { type = ALT; } @Override public int getType() { return type; } @Override protected void setChild(Node newChild) { car = newChild; } @Override protected Node getChild() { return car; } @Override public void swap(Node with) { if (cdr != null) { cdr.parent = with; if (with instanceof ConsAltNode) { ConsAltNode withCan = (ConsAltNode)with; withCan.cdr.parent = this; ConsAltNode tmp = cdr; cdr = withCan.cdr; withCan.cdr = tmp; } } super.swap(with); } @Override public void verifyTree(Set set, WarnCallback warnings) { if (!set.contains(this)) { set.add(this); if (car != null) { if (car.parent != this) { warnings.warn("broken list car: " + this.getAddressName() + " -> " + car.getAddressName()); } car.verifyTree(set,warnings); } if (cdr != null) { if (cdr.parent != this) { warnings.warn("broken list cdr: " + this.getAddressName() + " -> " + cdr.getAddressName()); } cdr.verifyTree(set,warnings); } } } public Node setCar(Node ca) { car = ca; ca.parent = this; return car; } public ConsAltNode setCdr(ConsAltNode cd) { cdr = cd; cd.parent = this; return cdr; } @Override public String getName() { switch (type) { case ALT: return "Alt"; case LIST: return "List"; default: throw new InternalException(ErrorMessages.ERR_PARSER_BUG); } } @Override public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n car: " + pad(car, level + 1)); value.append("\n cdr: " + (cdr == null ? "NULL" : cdr.toString())); return value.toString(); } } joni-2.0.0/src/org/joni/ast/EncloseNode.java000066400000000000000000000103551214326443200206220ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.Config; import org.joni.Option; import org.joni.constants.EncloseType; public final class EncloseNode extends StateNode implements EncloseType { public int type; // enclose type public int regNum; public int option; public Node target; /* EncloseNode : ENCLOSE_MEMORY */ public int callAddr; // AbsAddrType public int minLength; // OnigDistance public int maxLength; // OnigDistance public int charLength; public int optCount; // referenced count in optimize_node_left() // node_new_enclose / onig_node_new_enclose public EncloseNode(int type) { this.type = type; callAddr = -1; } // node_new_enclose_memory public EncloseNode(int option, boolean isNamed) { this(MEMORY); if (isNamed) setNamedGroup(); if (Config.USE_SUBEXP_CALL) this.option = option; } // node_new_option public EncloseNode(int option, int _) { this(OPTION); this.option = option; } @Override public int getType() { return ENCLOSE; } @Override protected void setChild(Node newChild) { target = newChild; } @Override protected Node getChild() { return target; } public void setTarget(Node tgt) { target = tgt; tgt.parent = this; } @Override public String getName() { return "Enclose"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(super.toString(level)); value.append("\n type: " + typeToString()); value.append("\n regNum: " + regNum); value.append("\n option: " + Option.toString(option)); value.append("\n target: " + pad(target, level + 1)); value.append("\n callAddr: " + callAddr); value.append("\n minLength: " + minLength); value.append("\n maxLength: " + maxLength); value.append("\n charLength: " + charLength); value.append("\n optCount: " + optCount); return value.toString(); } public String typeToString() { StringBuilder types = new StringBuilder(); if (isStopBacktrack()) types.append("STOP_BACKTRACK "); if (isMemory()) types.append("MEMORY "); if (isOption()) types.append("OPTION "); return types.toString(); } public void setEncloseStatus(int flag) { state |= flag; } public void clearEncloseStatus(int flag) { state &= ~flag; } public void clearMemory() { type &= ~MEMORY; } public void setMemory() { type |= MEMORY; } public boolean isMemory() { return (type & MEMORY) != 0; } public void clearOption() { type &= ~OPTION; } public void setOption() { type |= OPTION; } public boolean isOption() { return (type & OPTION) != 0; } public void clearStopBacktrack() { type &= ~STOP_BACKTRACK; } public void setStopBacktrack() { type |= STOP_BACKTRACK; } public boolean isStopBacktrack() { return (type & STOP_BACKTRACK) != 0; } } joni-2.0.0/src/org/joni/ast/Node.java000066400000000000000000000102421214326443200173040ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import java.util.Set; import org.joni.Config; import org.joni.WarnCallback; import org.joni.constants.NodeType; public abstract class Node implements NodeType { public Node parent; public abstract int getType(); public final int getType2Bit() { return 1 << getType(); } protected void setChild(Node tgt){} // default definition protected Node getChild(){return null;}; // default definition public void swap(Node with) { Node tmp; //if (getChild() != null) getChild().parent = with; //if (with.getChild() != null) with.getChild().parent = this; //tmp = getChild(); //setChild(with.getChild()); //with.setChild(tmp); if (parent != null) parent.setChild(with); if (with.parent != null) with.parent.setChild(this); tmp = parent; parent = with.parent; with.parent = tmp; } // overridden by ConsAltNode and CallNode public void verifyTree(Set set, WarnCallback warnings) { if (!set.contains(this) && getChild() != null) { set.add(this); if (getChild().parent != this) { warnings.warn("broken link to child: " + this.getAddressName() + " -> " + getChild().getAddressName()); } getChild().verifyTree(set, warnings); } } public abstract String getName(); protected abstract String toString(int level); public String getAddressName() { return getName() + ":0x" + Integer.toHexString(System.identityHashCode(this)); } public final String toString() { StringBuilder s = new StringBuilder(); s.append("<" + getAddressName() + " (" + (parent == null ? "NULL" : parent.getAddressName()) + ")>"); return s + toString(0); } protected static String pad(Object value, int level) { if (value == null) return "NULL"; StringBuilder pad = new StringBuilder(" "); for (int i=0; itarget); */ break; case LIST: node = (ConsAltNode)this; do { if (!node.car.isInvalidQuantifier()) return false; } while ((node = node.cdr) != null); return false; case ALT: node = (ConsAltNode)this; do { if (node.car.isInvalidQuantifier()) return true; } while ((node = node.cdr) != null); break; default: break; } return false; } public final boolean isAllowedInLookBehind() { return (getType2Bit() & ALLOWED_IN_LB) != 0; } public final boolean isSimple() { return (getType2Bit() & SIMPLE) != 0; } } joni-2.0.0/src/org/joni/ast/QuantifierNode.java000066400000000000000000000207041214326443200213400ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.Config; import org.joni.ScanEnvironment; import org.joni.constants.Reduce; import org.joni.constants.TargetInfo; public final class QuantifierNode extends StateNode { public Node target; public int lower; public int upper; public boolean greedy; public int targetEmptyInfo; public Node headExact; public Node nextHeadExact; public boolean isRefered; /* include called node. don't eliminate even if {0} */ // USE_COMBINATION_EXPLOSION_CHECK public int combExpCheckNum; /* 1,2,3...: check, 0: no check */ public QuantifierNode(int lower, int upper, boolean byNumber) { this.lower = lower; this.upper = upper; greedy = true; targetEmptyInfo = TargetInfo.ISNOT_EMPTY; if (byNumber) setByNumber(); } @Override public int getType() { return QTFR; } @Override protected void setChild(Node newChild) { target = newChild; } @Override protected Node getChild() { return target; } public void setTarget(Node tgt) { target = tgt; tgt.parent = this; } public StringNode convertToString(int flag) { StringNode sn = new StringNode(); sn.flag = flag; sn.swap(this); return sn; } @Override public String getName() { return "Quantifier"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(super.toString(level)); value.append("\n target: " + pad(target, level + 1)); value.append("\n lower: " + lower); value.append("\n upper: " + upper); value.append("\n greedy: " + greedy); value.append("\n targetEmptyInfo: " + targetEmptyInfo); value.append("\n headExact: " + pad(headExact, level + 1)); value.append("\n nextHeadExact: " + pad(nextHeadExact, level + 1)); value.append("\n isRefered: " + isRefered); value.append("\n combExpCheckNum: " + combExpCheckNum); return value.toString(); } public boolean isAnyCharStar() { return greedy && isRepeatInfinite(upper) && target.getType() == CANY; } /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ protected int popularNum() { if (greedy) { if (lower == 0) { if (upper == 1) return 0; else if (isRepeatInfinite(upper)) return 1; } else if (lower == 1) { if (isRepeatInfinite(upper)) return 2; } } else { if (lower == 0) { if (upper == 1) return 3; else if (isRepeatInfinite(upper)) return 4; } else if (lower == 1) { if (isRepeatInfinite(upper)) return 5; } } return -1; } protected void set(QuantifierNode other) { setTarget(other.target); other.target = null; lower = other.lower; upper = other.upper; greedy = other.greedy; targetEmptyInfo = other.targetEmptyInfo; //setHeadExact(other.headExact); //setNextHeadExact(other.nextHeadExact); headExact = other.headExact; nextHeadExact = other.nextHeadExact; isRefered = other.isRefered; combExpCheckNum = other.combExpCheckNum; } public void reduceNestedQuantifier(QuantifierNode other) { int pnum = popularNum(); int cnum = other.popularNum(); if (pnum < 0 || cnum < 0) return; switch(Reduce.REDUCE_TABLE[cnum][pnum]) { case DEL: // no need to set the parent here... // swap ? set(other); // *pnode = *cnode; ??? break; case A: setTarget(other.target); lower = 0; upper = REPEAT_INFINITE; greedy = true; break; case AQ: setTarget(other.target); lower = 0; upper = REPEAT_INFINITE; greedy = false; break; case QQ: setTarget(other.target); lower = 0; upper = 1; greedy = false; break; case P_QQ: setTarget(other); lower = 0; upper = 1; greedy = false; other.lower = 1; other.upper = REPEAT_INFINITE; other.greedy = true; return; case PQ_Q: setTarget(other); lower = 0; upper = 1; greedy = true; other.lower = 1; other.upper = REPEAT_INFINITE; other.greedy = false; return; case ASIS: setTarget(other); return; } // ??? remove the parent from target ??? other.target = null; // remove target from reduced quantifier } public int setQuantifier(Node tgt, boolean group, ScanEnvironment env, byte[]bytes, int p, int end) { if (lower == 1 && upper == 1) return 1; switch(tgt.getType()) { case STR: if (!group) { StringNode sn = (StringNode)tgt; if (sn.canBeSplit(env.enc)) { StringNode n = sn.splitLastChar(env.enc); if (n != null) { setTarget(n); return 2; } } } break; case QTFR: /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ QuantifierNode qnt = (QuantifierNode)tgt; int nestQNum = popularNum(); int targetQNum = qnt.popularNum(); if (Config.USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR) { if (!isByNumber() && !qnt.isByNumber() && env.syntax.warnReduntantNestedRepeat()) { switch(Reduce.REDUCE_TABLE[targetQNum][nestQNum]) { case ASIS: break; case DEL: env.reg.warnings.warn(new String(bytes, p, end) + " redundant nested repeat operator"); break; default: env.reg.warnings.warn(new String(bytes, p, end) + " nested repeat operator " + Reduce.PopularQStr[targetQNum] + " and " + Reduce.PopularQStr[nestQNum] + " was replaced with '" + Reduce.ReduceQStr[Reduce.REDUCE_TABLE[targetQNum][nestQNum].ordinal()] + "'"); } } } // USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR if (targetQNum >= 0) { if (nestQNum >= 0) { reduceNestedQuantifier(qnt); return 0; } else if (targetQNum == 1 || targetQNum == 2) { /* * or + */ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ if (!isRepeatInfinite(upper) && upper > 1 && greedy) { upper = lower == 0 ? 1 : lower; } } } default: break; } setTarget(tgt); return 0; } public static final int REPEAT_INFINITE = -1; public static boolean isRepeatInfinite(int n) { return n == REPEAT_INFINITE; } } joni-2.0.0/src/org/joni/ast/StateNode.java000066400000000000000000000132361214326443200203130ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.constants.NodeStatus; public abstract class StateNode extends Node implements NodeStatus { protected int state; @Override public String toString(int level) { return "\n state: " + stateToString(); } public String stateToString() { StringBuilder states = new StringBuilder(); if (isMinFixed()) states.append("MIN_FIXED "); if (isMaxFixed()) states.append("MAX_FIXED "); if (isMark1()) states.append("MARK1 "); if (isMark2()) states.append("MARK2 "); if (isMemBackrefed()) states.append("MEM_BACKREFED "); if (isStopBtSimpleRepeat()) states.append("STOP_BT_SIMPLE_REPEAT "); if (isRecursion()) states.append("RECURSION "); if (isCalled()) states.append("CALLED "); if (isAddrFixed()) states.append("ADDR_FIXED "); if (isNamedGroup()) states.append("NAMED_GROUP "); if (isNameRef()) states.append("NAME_REF "); if (isInRepeat()) states.append("IN_REPEAT "); if (isNestLevel()) states.append("NEST_LEVEL "); if (isByNumber()) states.append("BY_NUMBER "); return states.toString(); } public boolean isMinFixed() { return (state & NST_MIN_FIXED) != 0; } public void setMinFixed() { state |= NST_MIN_FIXED; } public void clearMinFixed() { state &= ~NST_MIN_FIXED; } public boolean isMaxFixed() { return (state & NST_MAX_FIXED) != 0; } public void setMaxFixed() { state |= NST_MAX_FIXED; } public void clearMaxFixed() { state &= ~NST_MAX_FIXED; } public boolean isCLenFixed() { return (state & NST_CLEN_FIXED) != 0; } public void setCLenFixed() { state |= NST_CLEN_FIXED; } public void clearCLenFixed() { state &= ~NST_CLEN_FIXED; } public boolean isMark1() { return (state & NST_MARK1) != 0; } public void setMark1() { state |= NST_MARK1; } public void clearMark1() { state &= ~NST_MARK1; } public boolean isMark2() { return (state & NST_MARK2) != 0; } public void setMark2() { state |= NST_MARK2; } public void clearMark2() { state &= ~NST_MARK2; } public boolean isMemBackrefed() { return (state & NST_MEM_BACKREFED) != 0; } public void setMemBackrefed() { state |= NST_MEM_BACKREFED; } public void clearMemBackrefed() { state &= ~NST_MEM_BACKREFED; } public boolean isStopBtSimpleRepeat() { return (state & NST_STOP_BT_SIMPLE_REPEAT) != 0; } public void setStopBtSimpleRepeat() { state |= NST_STOP_BT_SIMPLE_REPEAT; } public void clearStopBtSimpleRepeat() { state &= ~NST_STOP_BT_SIMPLE_REPEAT; } public boolean isRecursion() { return (state & NST_RECURSION) != 0; } public void setRecursion() { state |= NST_RECURSION; } public void clearRecursion() { state &= ~NST_RECURSION; } public boolean isCalled() { return (state & NST_CALLED) != 0; } public void setCalled() { state |= NST_CALLED; } public void clearCAlled() { state &= ~NST_CALLED; } public boolean isAddrFixed() { return (state & NST_ADDR_FIXED) != 0; } public void setAddrFixed() { state |= NST_ADDR_FIXED; } public void clearAddrFixed() { state &= ~NST_ADDR_FIXED; } public boolean isNamedGroup() { return (state & NST_NAMED_GROUP) != 0; } public void setNamedGroup() { state |= NST_NAMED_GROUP; } public void clearNamedGroup() { state &= ~NST_NAMED_GROUP; } public boolean isNameRef() { return (state & NST_NAME_REF) != 0; } public void setNameRef() { state |= NST_NAME_REF; } public void clearNameRef() { state &= ~NST_NAME_REF; } public boolean isInRepeat() { return (state & NST_IN_REPEAT) != 0; } public void setInRepeat() { state |= NST_IN_REPEAT; } public void clearInRepeat() { state &= ~NST_IN_REPEAT; } public boolean isNestLevel() { return (state & NST_NEST_LEVEL) != 0; } public void setNestLevel() { state |= NST_NEST_LEVEL; } public void clearNestLevel() { state &= ~NST_NEST_LEVEL; } public boolean isByNumber() { return (state & NST_BY_NUMBER) != 0; } public void setByNumber() { state |= NST_BY_NUMBER; } public void clearByNumber() { state &= ~NST_BY_NUMBER; } } joni-2.0.0/src/org/joni/ast/StringNode.java000066400000000000000000000130751214326443200205020ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.jcodings.Encoding; import org.joni.Config; import org.joni.constants.StringType; public final class StringNode extends Node implements StringType { private static final int NODE_STR_MARGIN = 16; private static final int NODE_STR_BUF_SIZE = 24; public static final StringNode EMPTY = new StringNode(null, Integer.MAX_VALUE, Integer.MAX_VALUE); public byte[]bytes; public int p; public int end; public int flag; public StringNode() { this.bytes = new byte[NODE_STR_BUF_SIZE]; } public StringNode(byte[]bytes, int p, int end) { this.bytes = bytes; this.p = p; this.end = end; setShared(); } public StringNode(byte c) { this(); bytes[end++] = c; } /* Ensure there is ahead bytes available in node's buffer * (assumes that the node is not shared) */ public void ensure(int ahead) { int len = (end - p) + ahead; if (len >= bytes.length) { byte[]tmp = new byte[len + NODE_STR_MARGIN]; System.arraycopy(bytes, p, tmp, 0, end - p); bytes = tmp; } } /* COW and/or ensure there is ahead bytes available in node's buffer */ private void modifyEnsure(int ahead) { if (isShared()) { int len = (end - p) + ahead; byte[]tmp = new byte[len + NODE_STR_MARGIN]; System.arraycopy(bytes, p, tmp, 0, end - p); bytes = tmp; end = end - p; p = 0; clearShared(); } else { ensure(ahead); } } @Override public int getType() { return STR; } @Override public String getName() { return "String"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n bytes: '"); for (int i=p; i= 0x20 && (bytes[i] & 0xff) < 0x7f) { value.append((char)bytes[i]); } else { value.append(String.format("[0x%02x]", bytes[i])); } } value.append("'"); return value.toString(); } public int length() { return end - p; } public int length(Encoding enc) { return enc.strLength(bytes, p, end); } public StringNode splitLastChar(Encoding enc) { StringNode n = null; if (end > p) { int prev = enc.prevCharHead(bytes, p, end, end); if (prev != -1 && prev > p) { /* can be splitted. */ n = new StringNode(bytes, prev, end); if (isRaw()) n.setRaw(); end = prev; } } return n; } public boolean canBeSplit(Encoding enc) { if (end > p) { return enc.length(bytes, p, end) < (end - p); } return false; } public void set(byte[]bytes, int p, int end) { this.bytes = bytes; this.p = p; this.end = end; setShared(); } public void cat(byte[]cat, int catP, int catEnd) { int len = catEnd - catP; modifyEnsure(len); System.arraycopy(cat, catP, bytes, end, len); end += len; } public void cat(byte c) { modifyEnsure(1); bytes[end++] = c; } public void catCode(int code, Encoding enc) { ensure(Config.ENC_CODE_TO_MBC_MAXLEN); end += enc.codeToMbc(code, bytes, end); } public void clear() { if (bytes.length > NODE_STR_BUF_SIZE) bytes = new byte[NODE_STR_BUF_SIZE]; flag = 0; p = end = 0; } public void setRaw() { flag |= NSTR_RAW; } public void clearRaw() { flag &= ~NSTR_RAW; } public boolean isRaw() { return (flag & NSTR_RAW) != 0; } public void setAmbig() { flag |= NSTR_AMBIG; } public void clearAmbig() { flag &= ~NSTR_AMBIG; } public boolean isAmbig() { return (flag & NSTR_AMBIG) != 0; } public void setDontGetOptInfo() { flag |= NSTR_DONT_GET_OPT_INFO; } public void clearDontGetOptInfo() { flag &= ~NSTR_DONT_GET_OPT_INFO; } public boolean isDontGetOptInfo() { return (flag & NSTR_DONT_GET_OPT_INFO) != 0; } public void setShared() { flag |= NSTR_SHARED; } public void clearShared() { flag &= ~NSTR_SHARED; } public boolean isShared() { return (flag & NSTR_SHARED) != 0; } } joni-2.0.0/src/org/joni/bench/000077500000000000000000000000001214326443200160455ustar00rootroot00000000000000joni-2.0.0/src/org/joni/bench/AbstractBench.java000066400000000000000000000034231214326443200214150ustar00rootroot00000000000000package org.joni.bench; import org.jcodings.specific.ASCIIEncoding; import org.joni.Option; import org.joni.Regex; import org.joni.Syntax; public abstract class AbstractBench { protected void bench(String _reg, String _str, int warmup, int times) throws Exception { byte[] reg = _reg.getBytes(); byte[] str = _str.getBytes(); Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT); System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times"); for(int j=0;j, \k */ final int MEMORY_START = 48; final int MEMORY_START_PUSH = 49; /* push back-tracker to stack */ final int MEMORY_END_PUSH = 50; /* push back-tracker to stack */ final int MEMORY_END_PUSH_REC = 51; /* push back-tracker to stack */ final int MEMORY_END = 52; final int MEMORY_END_REC = 53; /* push marker to stack */ final int FAIL = 54; /* pop stack and move */ final int JUMP = 55; final int PUSH = 56; final int POP = 57; final int PUSH_OR_JUMP_EXACT1 = 58; /* if match exact then push, else jump. */ final int PUSH_IF_PEEK_NEXT = 59; /* if match exact then push, else none. */ final int REPEAT = 60; /* {n,m} */ final int REPEAT_NG = 61; /* {n,m}? (non greedy) */ final int REPEAT_INC = 62; final int REPEAT_INC_NG = 63; /* non greedy */ final int REPEAT_INC_SG = 64; /* search and get in stack */ final int REPEAT_INC_NG_SG = 65; /* search and get in stack (non greedy) */ final int NULL_CHECK_START = 66; /* null loop checker start */ final int NULL_CHECK_END = 67; /* null loop checker end */ final int NULL_CHECK_END_MEMST = 68; /* null loop checker end (with capture status) */ final int NULL_CHECK_END_MEMST_PUSH = 69; /* with capture status and push check-end */ final int PUSH_POS = 70; /* (?=...) start */ final int POP_POS = 71; /* (?=...) end */ final int PUSH_POS_NOT = 72; /* (?!...) start */ final int FAIL_POS = 73; /* (?!...) end */ final int PUSH_STOP_BT = 74; /* (?>...) start */ final int POP_STOP_BT = 75; /* (?>...) end */ final int LOOK_BEHIND = 76; /* (?<=...) start (no needs end opcode) */ final int PUSH_LOOK_BEHIND_NOT = 77; /* (? */ final int RETURN = 80; final int STATE_CHECK_PUSH = 81; /* combination explosion check and push */ final int STATE_CHECK_PUSH_OR_JUMP = 82; /* check ok -> push, else jump */ final int STATE_CHECK = 83; /* check only */ final int STATE_CHECK_ANYCHAR_STAR = 84; final int STATE_CHECK_ANYCHAR_ML_STAR = 85; /* no need: IS_DYNAMIC_OPTION() == 0 */ final int SET_OPTION_PUSH = 86; /* set option and push recover option */ final int SET_OPTION = 87; /* set option */ // single byte versions final int ANYCHAR_SB = 88; /* "." */ final int ANYCHAR_ML_SB = 89; /* "." multi-line */ final int ANYCHAR_STAR_SB = 90; /* ".*" */ final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */ final int ANYCHAR_STAR_PEEK_NEXT_SB = 92; final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93; final int STATE_CHECK_ANYCHAR_STAR_SB = 94; final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95; final int CCLASS_SB = 96; final int CCLASS_NOT_SB = 97; final int WORD_SB = 98; final int NOT_WORD_SB = 99; final int WORD_BOUND_SB = 100; final int NOT_WORD_BOUND_SB = 101; final int WORD_BEGIN_SB = 102; final int WORD_END_SB = 103; final int LOOK_BEHIND_SB = 104; final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */ final int EXACTN_IC_SB = 106; /* single byte, ignore case */ public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] { "finish", /*OP_FINISH*/ "end", /*OP_END*/ "exact1", /*OP_EXACT1*/ "exact2", /*OP_EXACT2*/ "exact3", /*OP_EXACT3*/ "exact4", /*OP_EXACT4*/ "exact5", /*OP_EXACT5*/ "exactn", /*OP_EXACTN*/ "exactmb2-n1", /*OP_EXACTMB2N1*/ "exactmb2-n2", /*OP_EXACTMB2N2*/ "exactmb2-n3", /*OP_EXACTMB2N3*/ "exactmb2-n", /*OP_EXACTMB2N*/ "exactmb3n", /*OP_EXACTMB3N*/ "exactmbn", /*OP_EXACTMBN*/ "exact1-ic", /*OP_EXACT1_IC*/ "exactn-ic", /*OP_EXACTN_IC*/ "cclass", /*OP_CCLASS*/ "cclass-mb", /*OP_CCLASS_MB*/ "cclass-mix", /*OP_CCLASS_MIX*/ "cclass-not", /*OP_CCLASS_NOT*/ "cclass-mb-not", /*OP_CCLASS_MB_NOT*/ "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/ "cclass-node", /*OP_CCLASS_NODE*/ "anychar", /*OP_ANYCHAR*/ "anychar-ml", /*OP_ANYCHAR_ML*/ "anychar*", /*OP_ANYCHAR_STAR*/ "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/ "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/ "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ "word", /*OP_WORD*/ "not-word", /*OP_NOT_WORD*/ "word-bound", /*OP_WORD_BOUND*/ "not-word-bound", /*OP_NOT_WORD_BOUND*/ "word-begin", /*OP_WORD_BEGIN*/ "word-end", /*OP_WORD_END*/ "begin-buf", /*OP_BEGIN_BUF*/ "end-buf", /*OP_END_BUF*/ "begin-line", /*OP_BEGIN_LINE*/ "end-line", /*OP_END_LINE*/ "semi-end-buf", /*OP_SEMI_END_BUF*/ "begin-position", /*OP_BEGIN_POSITION*/ "backref1", /*OP_BACKREF1*/ "backref2", /*OP_BACKREF2*/ "backrefn", /*OP_BACKREFN*/ "backrefn-ic", /*OP_BACKREFN_IC*/ "backref_multi", /*OP_BACKREF_MULTI*/ "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/ "backref_at_level", /*OP_BACKREF_AT_LEVEL*/ "mem-start", /*OP_MEMORY_START*/ "mem-start-push", /*OP_MEMORY_START_PUSH*/ "mem-end-push", /*OP_MEMORY_END_PUSH*/ "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/ "mem-end", /*OP_MEMORY_END*/ "mem-end-rec", /*OP_MEMORY_END_REC*/ "fail", /*OP_FAIL*/ "jump", /*OP_JUMP*/ "push", /*OP_PUSH*/ "pop", /*OP_POP*/ "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/ "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/ "repeat", /*OP_REPEAT*/ "repeat-ng", /*OP_REPEAT_NG*/ "repeat-inc", /*OP_REPEAT_INC*/ "repeat-inc-ng", /*OP_REPEAT_INC_NG*/ "repeat-inc-sg", /*OP_REPEAT_INC_SG*/ "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/ "null-check-start", /*OP_NULL_CHECK_START*/ "null-check-end", /*OP_NULL_CHECK_END*/ "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/ "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/ "push-pos", /*OP_PUSH_POS*/ "pop-pos", /*OP_POP_POS*/ "push-pos-not", /*OP_PUSH_POS_NOT*/ "fail-pos", /*OP_FAIL_POS*/ "push-stop-bt", /*OP_PUSH_STOP_BT*/ "pop-stop-bt", /*OP_POP_STOP_BT*/ "look-behind", /*OP_LOOK_BEHIND*/ "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/ "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/ "call", /*OP_CALL*/ "return", /*OP_RETURN*/ "state-check-push", /*OP_STATE_CHECK_PUSH*/ "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/ "state-check", /*OP_STATE_CHECK*/ "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/ "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ "set-option-push", /*OP_SET_OPTION_PUSH*/ "set-option", /*OP_SET_OPTION*/ // single byte versions "anychar-sb", /*OP_ANYCHAR*/ "anychar-ml-sb", /*OP_ANYCHAR_ML*/ "anychar*-sb", /*OP_ANYCHAR_STAR*/ "anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/ "anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/ "anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ "state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/ "state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ "cclass-sb", /*OP_CCLASS*/ "cclass-not-sb", /*OP_CCLASS_NOT*/ "word-sb", /*OP_WORD*/ "not-word-sb", /*OP_NOT_WORD*/ "word-bound-sb", /*OP_WORD_BOUND*/ "not-word-bound-sb", /*OP_NOT_WORD_BOUND*/ "word-begin-sb", /*OP_WORD_BEGIN*/ "word-end-sb", /*OP_WORD_END*/ "look-behind-sb", /*OP_LOOK_BEHIND*/ "exact1-ic-sb", /*OP_EXACT1_IC*/ "exactn-ic-sb", /*OP_EXACTN_IC*/ } : null; public final int OpCodeArgTypes[] = Config.DEBUG_COMPILE ? new int[] { Arguments.NON, /*OP_FINISH*/ Arguments.NON, /*OP_END*/ Arguments.SPECIAL, /*OP_EXACT1*/ Arguments.SPECIAL, /*OP_EXACT2*/ Arguments.SPECIAL, /*OP_EXACT3*/ Arguments.SPECIAL, /*OP_EXACT4*/ Arguments.SPECIAL, /*OP_EXACT5*/ Arguments.SPECIAL, /*OP_EXACTN*/ Arguments.SPECIAL, /*OP_EXACTMB2N1*/ Arguments.SPECIAL, /*OP_EXACTMB2N2*/ Arguments.SPECIAL, /*OP_EXACTMB2N3*/ Arguments.SPECIAL, /*OP_EXACTMB2N*/ Arguments.SPECIAL, /*OP_EXACTMB3N*/ Arguments.SPECIAL, /*OP_EXACTMBN*/ Arguments.SPECIAL, /*OP_EXACT1_IC*/ Arguments.SPECIAL, /*OP_EXACTN_IC*/ Arguments.SPECIAL, /*OP_CCLASS*/ Arguments.SPECIAL, /*OP_CCLASS_MB*/ Arguments.SPECIAL, /*OP_CCLASS_MIX*/ Arguments.SPECIAL, /*OP_CCLASS_NOT*/ Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/ Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/ Arguments.SPECIAL, /*OP_CCLASS_NODE*/ Arguments.NON, /*OP_ANYCHAR*/ Arguments.NON, /*OP_ANYCHAR_ML*/ Arguments.NON, /*OP_ANYCHAR_STAR*/ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ Arguments.NON, /*OP_WORD*/ Arguments.NON, /*OP_NOT_WORD*/ Arguments.NON, /*OP_WORD_BOUND*/ Arguments.NON, /*OP_NOT_WORD_BOUND*/ Arguments.NON, /*OP_WORD_BEGIN*/ Arguments.NON, /*OP_WORD_END*/ Arguments.NON, /*OP_BEGIN_BUF*/ Arguments.NON, /*OP_END_BUF*/ Arguments.NON, /*OP_BEGIN_LINE*/ Arguments.NON, /*OP_END_LINE*/ Arguments.NON, /*OP_SEMI_END_BUF*/ Arguments.NON, /*OP_BEGIN_POSITION*/ Arguments.NON, /*OP_BACKREF1*/ Arguments.NON, /*OP_BACKREF2*/ Arguments.MEMNUM, /*OP_BACKREFN*/ Arguments.SPECIAL, /*OP_BACKREFN_IC*/ Arguments.SPECIAL, /*OP_BACKREF_MULTI*/ Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/ Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/ Arguments.MEMNUM, /*OP_MEMORY_START*/ Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/ Arguments.MEMNUM, /*OP_MEMORY_END*/ Arguments.MEMNUM, /*OP_MEMORY_END_REC*/ Arguments.NON, /*OP_FAIL*/ Arguments.RELADDR, /*OP_JUMP*/ Arguments.RELADDR, /*OP_PUSH*/ Arguments.NON, /*OP_POP*/ Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/ Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/ Arguments.SPECIAL, /*OP_REPEAT*/ Arguments.SPECIAL, /*OP_REPEAT_NG*/ Arguments.MEMNUM, /*OP_REPEAT_INC*/ Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/ Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/ Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/ Arguments.MEMNUM, /*OP_NULL_CHECK_START*/ Arguments.MEMNUM, /*OP_NULL_CHECK_END*/ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/ Arguments.NON, /*OP_PUSH_POS*/ Arguments.NON, /*OP_POP_POS*/ Arguments.RELADDR, /*OP_PUSH_POS_NOT*/ Arguments.NON, /*OP_FAIL_POS*/ Arguments.NON, /*OP_PUSH_STOP_BT*/ Arguments.NON, /*OP_POP_STOP_BT*/ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/ Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/ Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/ Arguments.ABSADDR, /*OP_CALL*/ Arguments.NON, /*OP_RETURN*/ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ Arguments.OPTION, /*OP_SET_OPTION_PUSH*/ Arguments.OPTION, /*OP_SET_OPTION*/ // single byte versions Arguments.NON, /*OP_ANYCHAR*/ Arguments.NON, /*OP_ANYCHAR_ML*/ Arguments.NON, /*OP_ANYCHAR_STAR*/ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ Arguments.SPECIAL, /*OP_CCLASS*/ Arguments.SPECIAL, /*OP_CCLASS_NOT*/ Arguments.NON, /*OP_WORD*/ Arguments.NON, /*OP_NOT_WORD*/ Arguments.NON, /*OP_WORD_BOUND*/ Arguments.NON, /*OP_NOT_WORD_BOUND*/ Arguments.NON, /*OP_WORD_BEGIN*/ Arguments.NON, /*OP_WORD_END*/ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/ Arguments.SPECIAL, /*OP_EXACT1_IC*/ Arguments.SPECIAL, /*OP_EXACTN_IC*/ } : null; } joni-2.0.0/src/org/joni/constants/OPSize.java000066400000000000000000000075761214326443200210350ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public interface OPSize { // this might be helpful for potential byte[] migration final int OPCODE = 1; final int RELADDR = 1; final int ABSADDR = 1; final int LENGTH = 1; final int MEMNUM = 1; final int STATE_CHECK_NUM = 1; final int REPEATNUM = 1; final int OPTION = 1; final int CODE_POINT = 1; final int POINTER = 1; final int INDEX = 1; /* op-code + arg size */ final int ANYCHAR_STAR = OPCODE; final int ANYCHAR_STAR_PEEK_NEXT = (OPCODE + 1); final int JUMP = (OPCODE + RELADDR); final int PUSH = (OPCODE + RELADDR); final int POP = OPCODE; final int PUSH_OR_JUMP_EXACT1 = (OPCODE + RELADDR + 1); final int PUSH_IF_PEEK_NEXT = (OPCODE + RELADDR + 1); final int REPEAT_INC = (OPCODE + MEMNUM); final int REPEAT_INC_NG = (OPCODE + MEMNUM); final int PUSH_POS = OPCODE; final int PUSH_POS_NOT = (OPCODE + RELADDR); final int POP_POS = OPCODE; final int FAIL_POS = OPCODE; final int SET_OPTION = (OPCODE + OPTION); final int SET_OPTION_PUSH = (OPCODE + OPTION); final int FAIL = OPCODE; final int MEMORY_START = (OPCODE + MEMNUM); final int MEMORY_START_PUSH = (OPCODE + MEMNUM); final int MEMORY_END_PUSH = (OPCODE + MEMNUM); final int MEMORY_END_PUSH_REC = (OPCODE + MEMNUM); final int MEMORY_END = (OPCODE + MEMNUM); final int MEMORY_END_REC = (OPCODE + MEMNUM); final int PUSH_STOP_BT = OPCODE; final int POP_STOP_BT = OPCODE; final int NULL_CHECK_START = (OPCODE + MEMNUM); final int NULL_CHECK_END = (OPCODE + MEMNUM); final int LOOK_BEHIND = (OPCODE + LENGTH); final int PUSH_LOOK_BEHIND_NOT = (OPCODE + RELADDR + LENGTH); final int FAIL_LOOK_BEHIND_NOT = OPCODE; final int CALL = (OPCODE + ABSADDR); final int RETURN = OPCODE; // #ifdef USE_COMBINATION_EXPLOSION_CHECK final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM); final int STATE_CHECK_PUSH = (OPCODE + STATE_CHECK_NUM + RELADDR); final int STATE_CHECK_PUSH_OR_JUMP = (OPCODE + STATE_CHECK_NUM + RELADDR); final int STATE_CHECK_ANYCHAR_STAR = (OPCODE + STATE_CHECK_NUM); } joni-2.0.0/src/org/joni/constants/Reduce.java000066400000000000000000000046311214326443200210600ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; import static org.joni.constants.Reduce.ReduceType.A; import static org.joni.constants.Reduce.ReduceType.AQ; import static org.joni.constants.Reduce.ReduceType.ASIS; import static org.joni.constants.Reduce.ReduceType.DEL; import static org.joni.constants.Reduce.ReduceType.PQ_Q; import static org.joni.constants.Reduce.ReduceType.P_QQ; import static org.joni.constants.Reduce.ReduceType.QQ; public interface Reduce { enum ReduceType { ASIS, /* as is */ DEL, /* delete parent */ A, /* to '*' */ AQ, /* to '*?' */ QQ, /* to '??' */ P_QQ, /* to '+)??' */ PQ_Q, /* to '+?)?' */ } final ReduceType[][]REDUCE_TABLE = { {DEL, A, A, QQ, AQ, ASIS}, /* '?' */ {DEL, DEL, DEL, P_QQ, P_QQ, DEL}, /* '*' */ {A, A, DEL, ASIS, P_QQ, DEL}, /* '+' */ {DEL, AQ, AQ, DEL, AQ, AQ}, /* '??' */ {DEL, DEL, DEL, DEL, DEL, DEL}, /* '*?' */ {ASIS, PQ_Q, DEL, AQ, AQ, DEL} /* '+?' */ }; final String PopularQStr[] = new String[] { "?", "*", "+", "??", "*?", "+?" }; String ReduceQStr[]= new String[] { "", "", "*", "*?", "??", "+ and ??", "+? and ?" }; } joni-2.0.0/src/org/joni/constants/RegexState.java000066400000000000000000000024331214326443200217220ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; // we dont need this ATM public interface RegexState { final int NORMAL = 0; final int SEARCHING = 1; final int COMPILING = -1; final int MODIFY = -2; } joni-2.0.0/src/org/joni/constants/StackPopLevel.java000066400000000000000000000023171214326443200223640ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public interface StackPopLevel { final int FREE = 0; final int MEM_START = 1; final int ALL = 2; } joni-2.0.0/src/org/joni/constants/StackType.java000066400000000000000000000047211214326443200215600ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public interface StackType { /** stack **/ final int INVALID_STACK_INDEX = -1; /* stack type */ /* used by normal-POP */ final int ALT = 0x0001; final int LOOK_BEHIND_NOT = 0x0002; final int POS_NOT = 0x0003; /* handled by normal-POP */ final int MEM_START = 0x0100; final int MEM_END = 0x8200; final int REPEAT_INC = 0x0300; final int STATE_CHECK_MARK = 0x1000; /* avoided by normal-POP */ final int NULL_CHECK_START = 0x3000; final int NULL_CHECK_END = 0x5000; /* for recursive call */ final int MEM_END_MARK = 0x8400; final int POS = 0x0500; /* used when POP-POS */ final int STOP_BT = 0x0600; /* mark for "(?>...)" */ final int REPEAT = 0x0700; final int CALL_FRAME = 0x0800; final int RETURN = 0x0900; final int VOID = 0x0a00; /* for fill a blank */ /* stack type check mask */ final int MASK_POP_USED = 0x00ff; final int MASK_TO_VOID_TARGET = 0x10ff; final int MASK_MEM_END_OR_MARK = 0x8000; /* MEM_END or MEM_END_MARK */ } joni-2.0.0/src/org/joni/constants/StringType.java000066400000000000000000000024501214326443200217560ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public interface StringType { final int NSTR_RAW = 1<<0; final int NSTR_AMBIG = 1<<1; final int NSTR_DONT_GET_OPT_INFO = 1<<2; final int NSTR_SHARED = 1<<3; } joni-2.0.0/src/org/joni/constants/SyntaxProperties.java000066400000000000000000000165061214326443200232200ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public interface SyntaxProperties { /* syntax (operators); */ final int OP_VARIABLE_META_CHARACTERS = (1<<0); final int OP_DOT_ANYCHAR = (1<<1); /* . */ final int OP_ASTERISK_ZERO_INF = (1<<2); /* * */ final int OP_ESC_ASTERISK_ZERO_INF = (1<<3); final int OP_PLUS_ONE_INF = (1<<4); /* + */ final int OP_ESC_PLUS_ONE_INF = (1<<5); final int OP_QMARK_ZERO_ONE = (1<<6); /* ? */ final int OP_ESC_QMARK_ZERO_ONE = (1<<7); final int OP_BRACE_INTERVAL = (1<<8); /* {lower,upper} */ final int OP_ESC_BRACE_INTERVAL = (1<<9); /* \{lower,upper\} */ final int OP_VBAR_ALT = (1<<10); /* | */ final int OP_ESC_VBAR_ALT = (1<<11); /* \| */ final int OP_LPAREN_SUBEXP = (1<<12); /* (...); */ final int OP_ESC_LPAREN_SUBEXP = (1<<13); /* \(...\); */ final int OP_ESC_AZ_BUF_ANCHOR = (1<<14); /* \A, \Z, \z */ final int OP_ESC_CAPITAL_G_BEGIN_ANCHOR = (1<<15); /* \G */ final int OP_DECIMAL_BACKREF = (1<<16); /* \num */ final int OP_BRACKET_CC = (1<<17); /* [...] */ final int OP_ESC_W_WORD = (1<<18); /* \w, \W */ final int OP_ESC_LTGT_WORD_BEGIN_END = (1<<19); /* \<. \> */ final int OP_ESC_B_WORD_BOUND = (1<<20); /* \b, \B */ final int OP_ESC_S_WHITE_SPACE = (1<<21); /* \s, \S */ final int OP_ESC_D_DIGIT = (1<<22); /* \d, \D */ final int OP_LINE_ANCHOR = (1<<23); /* ^, $ */ final int OP_POSIX_BRACKET = (1<<24); /* [:xxxx:] */ final int OP_QMARK_NON_GREEDY = (1<<25); /* ??,*?,+?,{n,m}? */ final int OP_ESC_CONTROL_CHARS = (1<<26); /* \n,\r,\t,\a ... */ final int OP_ESC_C_CONTROL = (1<<27); /* \cx */ final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */ final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */ final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */ final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */ final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */ final int OP2_OPTION_PERL = (1<<2); /* (?imsx);,(?-imsx); */ final int OP2_OPTION_RUBY = (1<<3); /* (?imx);, (?-imx); */ final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */ final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */ final int OP2_CCLASS_SET_OP = (1<<6); /* [...&&..[..]..] */ final int OP2_QMARK_LT_NAMED_GROUP = (1<<7); /* (?...); */ final int OP2_ESC_K_NAMED_BACKREF = (1<<8); /* \k */ final int OP2_ESC_G_SUBEXP_CALL = (1<<9); /* \g, \g */ final int OP2_ATMARK_CAPTURE_HISTORY = (1<<10); /* (?@..);,(?@..); */ final int OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11); /* \C-x */ final int OP2_ESC_CAPITAL_M_BAR_META = (1<<12); /* \M-x */ final int OP2_ESC_V_VTAB = (1<<13); /* \v as VTAB */ final int OP2_ESC_U_HEX4 = (1<<14); /* \\uHHHH */ final int OP2_ESC_GNU_BUF_ANCHOR = (1<<15); /* \`, \' */ final int OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16); /* \p{...}, \P{...} */ final int OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17); /* \p{^..}, \P{^..} */ /* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */ final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */ /* syntax (behavior); */ final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */ final int CONTEXT_INDEP_REPEAT_OPS = (1<<0); /* ?, *, +, {n,m} */ final int CONTEXT_INVALID_REPEAT_OPS = (1<<1); /* error or ignore */ final int ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2); /* ...);... */ final int ALLOW_INVALID_INTERVAL = (1<<3); /* {??? */ final int ALLOW_INTERVAL_LOW_ABBREV = (1<<4); /* {,n} => {0,n} */ final int STRICT_CHECK_BACKREF = (1<<5); /* /(\1);/,/\1();/ ..*/ final int DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6); /* (?<=a|bc); */ final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */ final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?);(?); */ final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */ /* syntax (behavior); in char class [...] */ final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */ final int BACKSLASH_ESCAPE_IN_CC = (1<<21); /* [..\w..] etc.. */ final int ALLOW_EMPTY_RANGE_IN_CC = (1<<22); final int ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23); /* [0-9-a]=[0-9\-a] */ /* syntax (behavior); warning */ final int WARN_CC_OP_NOT_ESCAPED = (1<<24); /* [,-,] */ final int WARN_REDUNDANT_NESTED_REPEAT = (1<<25); /* (?:a*);+ */ final int POSIX_COMMON_OP = OP_DOT_ANYCHAR | OP_POSIX_BRACKET | OP_DECIMAL_BACKREF | OP_BRACKET_CC | OP_ASTERISK_ZERO_INF | OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS; final int GNU_REGEX_OP = OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET | OP_DECIMAL_BACKREF | OP_BRACE_INTERVAL | OP_LPAREN_SUBEXP | OP_VBAR_ALT | OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE | OP_ESC_AZ_BUF_ANCHOR | OP_ESC_CAPITAL_G_BEGIN_ANCHOR | OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END | OP_ESC_S_WHITE_SPACE | OP_ESC_D_DIGIT | OP_LINE_ANCHOR; final int GNU_REGEX_BV = CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS | ALLOW_INVALID_INTERVAL | BACKSLASH_ESCAPE_IN_CC | ALLOW_DOUBLE_RANGE_OP_IN_CC; } joni-2.0.0/src/org/joni/constants/TargetInfo.java000066400000000000000000000023701214326443200217110ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public interface TargetInfo { final int ISNOT_EMPTY = 0; final int IS_EMPTY = 1; final int IS_EMPTY_MEM = 2; final int IS_EMPTY_REC = 3; } joni-2.0.0/src/org/joni/constants/TokenType.java000066400000000000000000000031741214326443200215740ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public enum TokenType { EOT, /* end of token */ RAW_BYTE, CHAR, STRING, CODE_POINT, ANYCHAR, CHAR_TYPE, BACKREF, CALL, ANCHOR, OP_REPEAT, INTERVAL, ANYCHAR_ANYTIME, /* SQL '%' == .* */ ALT, SUBEXP_OPEN, SUBEXP_CLOSE, CC_OPEN, QUOTE_OPEN, CHAR_PROPERTY, /* \p{...}, \P{...} */ /* in cc */ CC_CLOSE, CC_RANGE, POSIX_BRACKET_OPEN, CC_AND, /* && */ CC_CC_OPEN /* [ */ } joni-2.0.0/src/org/joni/constants/Traverse.java000066400000000000000000000024571214326443200214500ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants; public interface Traverse { final int TRAVERSE_CALLBACK_AT_FIRST = 1; final int TRAVERSE_CALLBACK_AT_LAST = 2; final int TRAVERSE_CALLBACK_AT_BOTH = TRAVERSE_CALLBACK_AT_FIRST | TRAVERSE_CALLBACK_AT_LAST; } joni-2.0.0/src/org/joni/exception/000077500000000000000000000000001214326443200167645ustar00rootroot00000000000000joni-2.0.0/src/org/joni/exception/ErrorMessages.java000066400000000000000000000137661214326443200224250ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; import org.joni.Config; public interface ErrorMessages extends org.jcodings.exception.ErrorMessages { final String MISMATCH = "mismatch"; final String NO_SUPPORT_CONFIG = "no support in this configuration"; /* internal error */ final String ERR_MEMORY = "fail to memory allocation"; final String ERR_MATCH_STACK_LIMIT_OVER = "match-stack limit over"; final String ERR_TYPE_BUG = "undefined type (bug)"; final String ERR_PARSER_BUG = "internal parser error (bug)"; final String ERR_STACK_BUG = "stack error (bug)"; final String ERR_UNDEFINED_BYTECODE = "undefined bytecode (bug)"; final String ERR_UNEXPECTED_BYTECODE = "unexpected bytecode (bug)"; final String ERR_DEFAULT_ENCODING_IS_NOT_SETTED = "default multibyte-encoding is not setted"; final String ERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR = "can't convert to wide-char on specified multibyte-encoding"; /* general error */ final String ERR_INVALID_ARGUMENT = "invalid argument"; /* syntax error */ final String ERR_END_PATTERN_AT_LEFT_BRACE = "end pattern at left brace"; final String ERR_END_PATTERN_AT_LEFT_BRACKET = "end pattern at left bracket"; final String ERR_EMPTY_CHAR_CLASS = "empty char-class"; final String ERR_PREMATURE_END_OF_CHAR_CLASS = "premature end of char-class"; final String ERR_END_PATTERN_AT_ESCAPE = "end pattern at escape"; final String ERR_END_PATTERN_AT_META = "end pattern at meta"; final String ERR_END_PATTERN_AT_CONTROL = "end pattern at control"; final String ERR_META_CODE_SYNTAX = "invalid meta-code syntax"; final String ERR_CONTROL_CODE_SYNTAX = "invalid control-code syntax"; final String ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE = "char-class value at end of range"; final String ERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE = "char-class value at start of range"; final String ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class"; final String ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified"; final String ERR_TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid"; final String ERR_NESTED_REPEAT_OPERATOR = "nested repeat operator"; final String ERR_UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis"; final String ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis"; final String ERR_END_PATTERN_IN_GROUP = "end pattern in group"; final String ERR_UNDEFINED_GROUP_OPTION = "undefined group option"; final String ERR_INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type"; final String ERR_INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind"; final String ERR_INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}"; /* values error (syntax error) */ final String ERR_TOO_BIG_NUMBER = "too big number"; final String ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE = "too big number for repeat range"; final String ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE = "upper is smaller than lower in repeat range"; final String ERR_EMPTY_RANGE_IN_CHAR_CLASS = "empty range in char class"; final String ERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE = "mismatch multibyte code length in char-class range"; final String ERR_TOO_MANY_MULTI_BYTE_RANGES = "too many multibyte code ranges are specified"; final String ERR_TOO_SHORT_MULTI_BYTE_STRING = "too short multibyte code string"; final String ERR_TOO_BIG_BACKREF_NUMBER = "too big backref number"; final String ERR_INVALID_BACKREF = Config.USE_NAMED_GROUP ? "invalid backref number/name" : "invalid backref number"; final String ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED = "numbered backref/call is not allowed. (use name)"; final String ERR_INVALID_WIDE_CHAR_VALUE = "invalid wide-char value"; final String ERR_EMPTY_GROUP_NAME = "group name is empty"; final String ERR_INVALID_GROUP_NAME = "invalid group name <%n>"; final String ERR_INVALID_CHAR_IN_GROUP_NAME = Config.USE_NAMED_GROUP ? "invalid char in group name <%n>" : "invalid char in group number <%n>"; final String ERR_UNDEFINED_NAME_REFERENCE = "undefined name <%n> reference"; final String ERR_UNDEFINED_GROUP_REFERENCE = "undefined group <%n> reference"; final String ERR_MULTIPLEX_DEFINED_NAME = "multiplex defined name <%n>"; final String ERR_MULTIPLEX_DEFINITION_NAME_CALL = "multiplex definition name <%n> call"; final String ERR_NEVER_ENDING_RECURSION = "never ending recursion"; final String ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY = "group number is too big for capture history"; final String ERR_NOT_SUPPORTED_ENCODING_COMBINATION = "not supported encoding combination"; final String ERR_INVALID_COMBINATION_OF_OPTIONS = "invalid combination of options"; final String ERR_OVER_THREAD_PASS_LIMIT_COUNT = "over thread pass limit count"; final String ERR_TOO_BIG_SB_CHAR_VALUE = "too big singlebyte char value"; } joni-2.0.0/src/org/joni/exception/InternalException.java000066400000000000000000000024421214326443200232640ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class InternalException extends JOniException{ private static final long serialVersionUID = -3871816465397927992L; public InternalException(String message) { super(message); } } joni-2.0.0/src/org/joni/exception/JOniException.java000066400000000000000000000024351214326443200223510ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class JOniException extends RuntimeException{ private static final long serialVersionUID = -6027192180014164667L; public JOniException(String message) { super(message); } } joni-2.0.0/src/org/joni/exception/SyntaxException.java000066400000000000000000000024351214326443200230000ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class SyntaxException extends JOniException{ private static final long serialVersionUID = 7862720128961874288L; public SyntaxException(String message) { super(message); } } joni-2.0.0/src/org/joni/exception/ValueException.java000066400000000000000000000030211214326443200225560ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class ValueException extends SyntaxException{ private static final long serialVersionUID = -196013852479929134L; public ValueException(String message) { super(message); } public ValueException(String message, String str) { super(message.replaceAll("%n", str)); } public ValueException(String message, byte[]bytes, int p, int end) { this(message, new String(bytes, p, end - p)); } } joni-2.0.0/test/000077500000000000000000000000001214326443200134305ustar00rootroot00000000000000joni-2.0.0/test/org/000077500000000000000000000000001214326443200142175ustar00rootroot00000000000000joni-2.0.0/test/org/joni/000077500000000000000000000000001214326443200151565ustar00rootroot00000000000000joni-2.0.0/test/org/joni/test/000077500000000000000000000000001214326443200161355ustar00rootroot00000000000000joni-2.0.0/test/org/joni/test/Test.java000066400000000000000000000260731214326443200177270ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import java.io.UnsupportedEncodingException; import org.joni.Config; import org.joni.Matcher; import org.joni.Option; import org.joni.Regex; import org.joni.Region; import org.joni.Syntax; import org.jcodings.Encoding; import org.joni.exception.JOniException; public abstract class Test { static final boolean VERBOSE = false; int nsucc; int nerror; int nfail; public abstract int option(); public abstract Encoding encoding(); public abstract String testEncoding(); public abstract Syntax syntax(); protected String repr(byte[]bytes) { return new String(bytes); } protected int length(byte[]bytes) { return bytes.length; } protected void assertTrue(boolean expression, String... failMessage) { if (expression) { nsucc++; } else { Config.err.println(failMessage); nfail++; } } public void xx(byte[]pattern, byte[]str, int from, int to, int mem, boolean not) { xx(pattern, str, from, to, mem, not, option()); } public int xx(byte[]pattern, byte[]str, int from, int to, int mem, boolean not, int option) { Regex reg; try { reg = new Regex(pattern, 0, length(pattern), option, encoding(), syntax()); } catch (JOniException je) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); je.printStackTrace(Config.err); Config.err.println("ERROR: " + je.getMessage()); nerror++; return Matcher.FAILED; } catch (Exception e) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); e.printStackTrace(Config.err); Config.err.println("SEVERE ERROR: " + e.getMessage()); nerror++; return Matcher.FAILED; } Matcher m = reg.matcher(str, 0, length(str)); Region region; int r = 0; try { r = m.search(0, length(str), Option.NONE); region = m.getEagerRegion(); } catch (JOniException je) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); je.printStackTrace(Config.err); Config.err.println("ERROR: " + je.getMessage()); nerror++; return Matcher.FAILED; } catch (Exception e) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); e.printStackTrace(Config.err); Config.err.println("SEVERE ERROR: " + e.getMessage()); nerror++; return Matcher.FAILED; } if (r == -1) { if (not) { if (VERBOSE) Config.log.println("OK(N): /" + repr(pattern) + "/ '" + repr(str) + "'"); nsucc++; } else { Config.log.println("FAIL: /" + repr(pattern) + "/ '" + repr(str) + "'"); nfail++; } } else { if (not) { Config.log.println("FAIL(N): /" + repr(pattern) + "/ '" + repr(str) + "'"); nfail++; } else { if (region.beg[mem] == from && region.end[mem] == to) { if (VERBOSE) Config.log.println("OK: /" + repr(pattern) + "/ '" +repr(str) + "'"); nsucc++; } else { Config.log.println("FAIL: /" + repr(pattern) + "/ '" + repr(str) + "' " + from + "-" + to + " : " + region.beg[mem] + "-" + region.end[mem] ); nfail++; } } } return r; } public void xxi(byte[]pattern, byte[]str, int from, int to, int mem, boolean not, int option) throws InterruptedException { Regex reg; try { reg = new Regex(pattern, 0, length(pattern), option, encoding(), syntax()); } catch (JOniException je) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); je.printStackTrace(Config.err); Config.err.println("ERROR: " + je.getMessage()); nerror++; return; } catch (Exception e) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); e.printStackTrace(Config.err); Config.err.println("SEVERE ERROR: " + e.getMessage()); nerror++; return; } Matcher m = reg.matcher(str, 0, length(str)); Region region; int r = 0; try { r = m.searchInterruptible(0, length(str), Option.NONE); region = m.getEagerRegion(); } catch (JOniException je) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); je.printStackTrace(Config.err); Config.err.println("ERROR: " + je.getMessage()); nerror++; return; } catch (InterruptedException e) { throw e; } catch (Exception e) { Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str)); e.printStackTrace(Config.err); Config.err.println("SEVERE ERROR: " + e.getMessage()); nerror++; return; } if (r == -1) { if (not) { if (VERBOSE) Config.log.println("OK(N): /" + repr(pattern) + "/ '" + repr(str) + "'"); nsucc++; } else { Config.log.println("FAIL: /" + repr(pattern) + "/ '" + repr(str) + "'"); nfail++; } } else { if (not) { Config.log.println("FAIL(N): /" + repr(pattern) + "/ '" + repr(str) + "'"); nfail++; } else { if (region.beg[mem] == from && region.end[mem] == to) { if (VERBOSE) Config.log.println("OK: /" + repr(pattern) + "/ '" +repr(str) + "'"); nsucc++; } else { Config.log.println("FAIL: /" + repr(pattern) + "/ '" + repr(str) + "' " + from + "-" + to + " : " + region.beg[mem] + "-" + region.end[mem] ); nfail++; } } } } protected void x2(byte[]pattern, byte[]str, int from, int to) { xx(pattern, str, from, to, 0, false); } protected void x2(byte[]pattern, byte[]str, int from, int to, int option) { xx(pattern, str, from, to, 0, false, option); } protected void x3(byte[]pattern, byte[]str, int from, int to, int mem) { xx(pattern, str, from, to, mem, false); } protected void n(byte[]pattern, byte[]str) { xx(pattern, str, 0, 0, 0, true); } protected void n(byte[]pattern, byte[]str, int option) { xx(pattern, str, 0, 0, 0, true, option); } public void xxs(String pattern, String str, int from, int to, int mem, boolean not) { xxs(pattern, str, from, to, mem, not, option()); } public void xxs(String pattern, String str, int from, int to, int mem, boolean not, int option) { try{ xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, mem, not, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } public void xxsi(String pattern, String str, int from, int to, int mem, boolean not) throws InterruptedException { xxsi(pattern, str, from, to, mem, not, option()); } public void xxsi(String pattern, String str, int from, int to, int mem, boolean not, int option) throws InterruptedException { try{ xxi(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, mem, not, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } public int x2s(String pattern, String str, int from, int to) { return x2s(pattern, str, from, to, option()); } public int x2s(String pattern, String str, int from, int to, int option) { try{ return xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, 0, false, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); return Matcher.FAILED; } } public void x2si(String pattern, String str, int from, int to) throws InterruptedException { x2si(pattern, str, from, to, option()); } public void x2si(String pattern, String str, int from, int to, int option) throws InterruptedException { try{ xxi(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, 0, false, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } public void x3s(String pattern, String str, int from, int to, int mem) { x3s(pattern, str, from, to, mem, option()); } public void x3s(String pattern, String str, int from, int to, int mem, int option) { try{ xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, mem, false, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } public void ns(String pattern, String str) { ns(pattern, str, option()); } public void ns(String pattern, String str, int option) { try{ xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), 0, 0, 0, true, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } public void printResults() { Config.log.println("RESULT SUCC: " + nsucc + ", FAIL: " + nfail + ", ERROR: " + nerror + " Test: " + getClass().getSimpleName() + ", Encoding: " + encoding()); } public abstract void test() throws InterruptedException; public final void run() { try { test(); } catch (InterruptedException ie) { } printResults(); } } joni-2.0.0/test/org/joni/test/TestA.java000066400000000000000000000503501214326443200200230ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Option; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; public class TestA extends Test { public int option() { return Option.DEFAULT; } public Encoding encoding() { return ASCIIEncoding.INSTANCE; } public String testEncoding() { return "iso-8859-2"; } public Syntax syntax() { return Syntax.DEFAULT; } public void test() throws InterruptedException { x2s("", "", 0, 0); x2s("^", "", 0, 0); x2s("$", "", 0, 0); x2s("\\G", "", 0, 0); x2s("\\A", "", 0, 0); x2s("\\Z", "", 0, 0); x2s("\\z", "", 0, 0); x2s("^$", "", 0, 0); x2s("\\ca", "\001", 0, 1); x2s("\\C-b", "\002", 0, 1); x2s("\\c\\\\", "\034", 0, 1); x2s("q[\\c\\\\]", "q\034", 0, 2); x2s("", "a", 0, 0); x2s("a", "a", 0, 1); x2s("\\x61", "a", 0, 1); x2s("aa", "aa", 0, 2); x2s("aaa", "aaa", 0, 3); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); x2s("ab", "ab", 0, 2); x2s("b", "ab", 1, 2); x2s("bc", "abc", 1, 3); x2s("(?i:#RET#)", "#INS##RET#", 5, 10); x2s("\\17", "\017", 0, 1); x2s("\\x1f", "\u001f", 0, 1); x2s("\\xED\\xF2", "\u00ed\u0148", 0, 2); x2s("a(?#....\\\\JJJJ)b", "ab", 0, 2); x2s("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); x2s(".", "a", 0, 1); ns(".", ""); x2s("..", "ab", 0, 2); x2s("\\w", "e", 0, 1); ns("\\W", "e"); x2s("\\s", " ", 0, 1); x2s("\\S", "b", 0, 1); x2s("\\d", "4", 0, 1); ns("\\D", "4"); x2s("\\b", "z ", 0, 0); x2s("\\b", " z", 1, 1); x2s("\\B", "zz ", 1, 1); x2s("\\B", "z ", 2, 2); x2s("\\B", " z", 0, 0); x2s("[ab]", "b", 0, 1); ns("[ab]", "c"); x2s("[a-z]", "t", 0, 1); ns("[^a]", "a"); x2s("[^a]", "\n", 0, 1); x2s("[]]", "]", 0, 1); ns("[^]]", "]"); x2s("[\\^]+", "0^^1", 1, 3); x2s("[b-]", "b", 0, 1); x2s("[b-]", "-", 0, 1); x2s("[\\w]", "z", 0, 1); ns("[\\w]", " "); x2s("[\\W]", "b$", 1, 2); x2s("[\\d]", "5", 0, 1); ns("[\\d]", "e"); x2s("[\\D]", "t", 0, 1); ns("[\\D]", "3"); x2s("[\\s]", " ", 0, 1); ns("[\\s]", "a"); x2s("[\\S]", "b", 0, 1); ns("[\\S]", " "); x2s("[\\w\\d]", "2", 0, 1); ns("[\\w\\d]", " "); x2s("[[:upper:]]", "B", 0, 1); x2s("[*[:xdigit:]+]", "+", 0, 1); x2s("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); x2s("[*[:xdigit:]+]", "-@^+", 3, 4); ns("[[:upper]]", "A"); x2s("[[:upper]]", ":", 0, 1); x2s("[\\044-\\047]", "\046", 0, 1); x2s("[\\x5a-\\x5c]", "\u005b", 0, 1); x2s("[\\x6A-\\x6D]", "\u006c", 0, 1); ns("[\\x6A-\\x6D]", "\u006e"); ns("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); x2s("[\\[]", "[", 0, 1); x2s("[\\]]", "]", 0, 1); x2s("[&]", "&", 0, 1); x2s("[[ab]]", "b", 0, 1); x2s("[[ab]c]", "c", 0, 1); ns("[[^a]]", "a"); ns("[^[a]]", "a"); x2s("[[ab]&&bc]", "b", 0, 1); ns("[[ab]&&bc]", "a"); ns("[[ab]&&bc]", "c"); x2s("[a-z&&b-y&&c-x]", "w", 0, 1); ns("[^a-z&&b-y&&c-x]", "w"); x2s("[[^a&&a]&&a-z]", "b", 0, 1); ns("[[^a&&a]&&a-z]", "a"); x2s("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); ns("[[^a-z&&bcdef]&&[^c-g]]", "c"); x2s("[^[^abc]&&[^cde]]", "c", 0, 1); x2s("[^[^abc]&&[^cde]]", "e", 0, 1); ns("[^[^abc]&&[^cde]]", "f"); x2s("[a-&&-a]", "-", 0, 1); ns("[a\\-&&\\-a]", "&"); ns("\\wabc", " abc"); x2s("a\\Wbc", "a bc", 0, 4); x2s("a.b.c", "aabbc", 0, 5); x2s(".\\wb\\W..c", "abb bcc", 0, 7); x2s("\\s\\wzzz", " zzzz", 0, 5); x2s("aa.b", "aabb", 0, 4); ns(".a", "ab"); x2s(".a", "aa", 0, 2); x2s("^a", "a", 0, 1); x2s("^a$", "a", 0, 1); x2s("^\\w$", "a", 0, 1); ns("^\\w$", " "); x2s("^\\wab$", "zab", 0, 3); x2s("^\\wabcdef$", "zabcdef", 0, 7); x2s("^\\w...def$", "zabcdef", 0, 7); x2s("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); x2s("\\A\\Z", "", 0, 0); x2s("\\Axyz", "xyz", 0, 3); x2s("xyz\\Z", "xyz", 0, 3); x2s("xyz\\z", "xyz", 0, 3); x2s("a\\Z", "a", 0, 1); x2s("\\Gaz", "az", 0, 2); ns("\\Gz", "bza"); ns("az\\G", "az"); ns("az\\A", "az"); ns("a\\Az", "az"); x2s("\\^\\$", "^$", 0, 2); x2s("^x?y", "xy", 0, 2); x2s("^(x?y)", "xy", 0, 2); x2s("\\w", "_", 0, 1); ns("\\W", "_"); x2s("(?=z)z", "z", 0, 1); ns("(?=z).", "a"); x2s("(?!z)a", "a", 0, 1); ns("(?!z)a", "z"); x2s("(?i:a)", "a", 0, 1); x2s("(?i:a)", "A", 0, 1); x2s("(?i:A)", "a", 0, 1); ns("(?i:A)", "b"); x2s("(?i:[A-Z])", "a", 0, 1); x2s("(?i:[f-m])", "H", 0, 1); x2s("(?i:[f-m])", "h", 0, 1); ns("(?i:[f-m])", "e"); x2s("(?i:[A-c])", "D", 0, 1); x2s("(?i:[!-k])", "Z", 0, 1); x2s("(?i:[!-k])", "7", 0, 1); x2s("(?i:[T-}])", "b", 0, 1); x2s("(?i:[T-}])", "{", 0, 1); x2s("(?i:\\?a)", "?A", 0, 2); x2s("(?i:\\*A)", "*a", 0, 2); ns(".", "\n"); x2s("(?m:.)", "\n", 0, 1); x2s("(?m:a.)", "a\n", 0, 2); x2s("(?m:.b)", "a\nb", 1, 3); x2s(".*abc", "dddabdd\nddabc", 8, 13); x2s("(?m:.*abc)", "dddabddabc", 0, 10); ns("(?i)(?-i)a", "A"); ns("(?i)(?-i:a)", "A"); x2s("a?", "", 0, 0); x2s("a?", "b", 0, 0); x2s("a?", "a", 0, 1); x2s("a*", "", 0, 0); x2s("a*", "a", 0, 1); x2s("a*", "aaa", 0, 3); x2s("a*", "baaaa", 0, 0); ns("a+", ""); x2s("a+", "a", 0, 1); x2s("a+", "aaaa", 0, 4); x2s("a+", "aabbb", 0, 2); x2s("a+", "baaaa", 1, 5); x2s(".?", "", 0, 0); x2s(".?", "f", 0, 1); x2s(".?", "\n", 0, 0); x2s(".*", "", 0, 0); x2s(".*", "abcde", 0, 5); x2s(".+", "z", 0, 1); x2s(".+", "zdswer\n", 0, 6); x2s("(.*)a\\1f", "babfbac", 0, 4); x2s("(.*)a\\1f", "bacbabf", 3, 7); x2s("((.*)a\\2f)", "bacbabf", 3, 7); x2s("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); x2s("a|b", "a", 0, 1); x2s("a|b", "b", 0, 1); x2s("|a", "a", 0, 0); x2s("(|a)", "a", 0, 0); x2s("ab|bc", "ab", 0, 2); x2s("ab|bc", "bc", 0, 2); x2s("z(?:ab|bc)", "zbc", 0, 3); x2s("a(?:ab|bc)c", "aabc", 0, 4); x2s("ab|(?:ac|az)", "az", 0, 2); x2s("a|b|c", "dc", 1, 2); x2s("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); ns("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); x2s("a|^z", "ba", 1, 2); x2s("a|^z", "za", 0, 1); x2s("a|\\Gz", "bza", 2, 3); x2s("a|\\Gz", "za", 0, 1); x2s("a|\\Az", "bza", 2, 3); x2s("a|\\Az", "za", 0, 1); x2s("a|b\\Z", "ba", 1, 2); x2s("a|b\\Z", "b", 0, 1); x2s("a|b\\z", "ba", 1, 2); x2s("a|b\\z", "b", 0, 1); x2s("\\w|\\s", " ", 0, 1); ns("\\w|\\w", " "); x2s("\\w|%", "%", 0, 1); x2s("\\w|[&$]", "&", 0, 1); x2s("[b-d]|[^e-z]", "a", 0, 1); x2s("(?:a|[c-f])|bz", "dz", 0, 1); x2s("(?:a|[c-f])|bz", "bz", 0, 2); x2s("abc|(?=zz)..f", "zzf", 0, 3); x2s("abc|(?!zz)..f", "abf", 0, 3); x2s("(?=za)..a|(?=zz)..a", "zza", 0, 3); ns("(?>a|abd)c", "abdc"); x2s("(?>abd|a)c", "abdc", 0, 4); x2s("a?|b", "a", 0, 1); x2s("a?|b", "b", 0, 0); x2s("a?|b", "", 0, 0); x2s("a*|b", "aa", 0, 2); x2s("a*|b*", "ba", 0, 0); x2s("a*|b*", "ab", 0, 1); x2s("a+|b*", "", 0, 0); x2s("a+|b*", "bbb", 0, 3); x2s("a+|b*", "abbb", 0, 1); ns("a+|b+", ""); x2s("(a|b)?", "b", 0, 1); x2s("(a|b)*", "ba", 0, 2); x2s("(a|b)+", "bab", 0, 3); x2s("(ab|ca)+", "caabbc", 0, 4); x2s("(ab|ca)+", "aabca", 1, 5); x2s("(ab|ca)+", "abzca", 0, 2); x2s("(a|bab)+", "ababa", 0, 5); x2s("(a|bab)+", "ba", 1, 2); x2s("(a|bab)+", "baaaba", 1, 4); x2s("(?:a|b)(?:a|b)", "ab", 0, 2); x2s("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); x2s("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); x2s("(?:a+|b+){2}", "aaabbb", 0, 6); x2s("h{0,}", "hhhh", 0, 4); x2s("(?:a+|b+){1,2}", "aaabbb", 0, 6); ns("ax{2}*a", "0axxxa1"); ns("a.{0,2}a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXXa0"); x2s("^a{2,}?a$", "aaa", 0, 3); x2s("^[a-z]{2,}?$", "aaa", 0, 3); x2s("(?:a+|\\Ab*)cc", "cc", 0, 2); ns("(?:a+|\\Ab*)cc", "abcc"); x2s("(?:^a+|b+)*c", "aabbbabc", 6, 8); x2s("(?:^a+|b+)*c", "aabbbbc", 0, 7); x2s("a|(?i)c", "C", 0, 1); x2s("(?i)c|a", "C", 0, 1); x2s("(?i)c|a", "A", 0, 1); x2s("(?i:c)|a", "C", 0, 1); ns("(?i:c)|a", "A"); x2s("[abc]?", "abc", 0, 1); x2s("[abc]*", "abc", 0, 3); x2s("[^abc]*", "abc", 0, 0); ns("[^abc]+", "abc"); x2s("a??", "aaa", 0, 0); x2s("ba??b", "bab", 0, 3); x2s("a*?", "aaa", 0, 0); x2s("ba*?", "baa", 0, 1); x2s("ba*?b", "baab", 0, 4); x2s("a+?", "aaa", 0, 1); x2s("ba+?", "baa", 0, 2); x2s("ba+?b", "baab", 0, 4); x2s("(?:a?)??", "a", 0, 0); x2s("(?:a??)?", "a", 0, 0); x2s("(?:a?)+?", "aaa", 0, 1); x2s("(?:a+)??", "aaa", 0, 0); x2s("(?:a+)??b", "aaab", 0, 4); x2s("(?:ab)?{2}", "", 0, 0); x2s("(?:ab)?{2}", "ababa", 0, 4); x2s("(?:ab)*{0}", "ababa", 0, 0); x2s("(?:ab){3,}", "abababab", 0, 8); ns("(?:ab){3,}", "abab"); x2s("(?:ab){2,4}", "ababab", 0, 6); x2s("(?:ab){2,4}", "ababababab", 0, 8); x2s("(?:ab){2,4}?", "ababababab", 0, 4); x2s("(?:ab){,}", "ab{,}", 0, 5); x2s("(?:abc)+?{2}", "abcabcabc", 0, 6); x2s("(?:X*)(?i:xa)", "XXXa", 0, 4); x2s("(d+)([^abc]z)", "dddz", 0, 4); x2s("([^abc]*)([^abc]z)", "dddz", 0, 4); x2s("(\\w+)(\\wz)", "dddz", 0, 4); x3s("(a)", "a", 0, 1, 1); x3s("(ab)", "ab", 0, 2, 1); x2s("((ab))", "ab", 0, 2); x3s("((ab))", "ab", 0, 2, 1); x3s("((ab))", "ab", 0, 2, 2); x3s("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); x3s("(ab)(cd)", "abcd", 0, 2, 1); x3s("(ab)(cd)", "abcd", 2, 4, 2); x3s("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); x3s("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); x2s("(^a)", "a", 0, 1); x3s("(a)|(a)", "ba", 1, 2, 1); x3s("(^a)|(a)", "ba", 1, 2, 2); x3s("(a?)", "aaa", 0, 1, 1); x3s("(a*)", "aaa", 0, 3, 1); x3s("(a*)", "", 0, 0, 1); x3s("(a+)", "aaaaaaa", 0, 7, 1); x3s("(a+|b*)", "bbbaa", 0, 3, 1); x3s("(a+|b?)", "bbbaa", 0, 1, 1); x3s("(abc)?", "abc", 0, 3, 1); x3s("(abc)*", "abc", 0, 3, 1); x3s("(abc)+", "abc", 0, 3, 1); x3s("(xyz|abc)+", "abc", 0, 3, 1); x3s("([xyz][abc]|abc)+", "abc", 0, 3, 1); x3s("((?i:abc))", "AbC", 0, 3, 1); x2s("(abc)(?i:\\1)", "abcABC", 0, 6); x3s("((?m:a.c))", "a\nc", 0, 3, 1); x3s("((?=az)a)", "azb", 0, 1, 1); x3s("abc|(.abd)", "zabd", 0, 4, 1); x2s("(?:abc)|(ABC)", "abc", 0, 3); x3s("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); x3s("a*(.)", "aaaaz", 4, 5, 1); x3s("a*?(.)", "aaaaz", 0, 1, 1); x3s("a*?(c)", "aaaac", 4, 5, 1); x3s("[bcd]a*(.)", "caaaaz", 5, 6, 1); x3s("(\\Abb)cc", "bbcc", 0, 2, 1); ns("(\\Abb)cc", "zbbcc"); x3s("(^bb)cc", "bbcc", 0, 2, 1); ns("(^bb)cc", "zbbcc"); x3s("cc(bb$)", "ccbb", 2, 4, 1); ns("cc(bb$)", "ccbbb"); ns("(\\1)", ""); ns("\\1(a)", "aa"); ns("(a(b)\\1)\\2+", "ababb"); ns("(?:(?:\\1|z)(a))+$", "zaa"); x2s("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); x2s("(a)(?=\\1)", "aa", 0, 1); ns("(a)$|\\1", "az"); x2s("(a)\\1", "aa", 0, 2); ns("(a)\\1", "ab"); x2s("(a?)\\1", "aa", 0, 2); x2s("(a??)\\1", "aa", 0, 0); x2s("(a*)\\1", "aaaaa", 0, 4); x3s("(a*)\\1", "aaaaa", 0, 2, 1); x2s("a(b*)\\1", "abbbb", 0, 5); x2s("a(b*)\\1", "ab", 0, 1); x2s("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); x2s("(a*)(b*)\\2", "aaabbbb", 0, 7); x2s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); x3s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); x2s("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); x2s("([a-d])\\1", "cc", 0, 2); x2s("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); ns("(\\w\\d\\s)\\1", "f5 f5"); x2s("(who|[a-c]{3})\\1", "whowho", 0, 6); x2s("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); x2s("(who|[a-c]{3})\\1", "cbccbc", 0, 6); x2s("(^a)\\1", "aa", 0, 2); ns("(^a)\\1", "baa"); ns("(a$)\\1", "aa"); ns("(ab\\Z)\\1", "ab"); x2s("(a*\\Z)\\1", "a", 1, 1); x2s(".(a*\\Z)\\1", "ba", 1, 2); x3s("(.(abc)\\2)", "zabcabc", 0, 7, 1); x3s("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); x2s("((?i:az))\\1", "AzAz", 0, 4); ns("((?i:az))\\1", "Azaz"); x2s("(?<=a)b", "ab", 1, 2); ns("(?<=a)b", "bb"); x2s("(?<=a|b)b", "bb", 1, 2); x2s("(?<=a|bc)b", "bcb", 2, 3); x2s("(?<=a|bc)b", "ab", 1, 2); x2s("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); x2s("(a)\\g<1>", "aa", 0, 2); x2s("(?a)", "a", 0, 1); x2s("(?ab)\\g", "abab", 0, 4); x2s("(?.zv.)\\k", "azvbazvb", 0, 8); x2s("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); x2s("(?|a\\g)+", "", 0, 0); x2s("(?|\\(\\g\\))+$", "()(())", 0, 6); x3s("\\g(?.){0}", "X", 0, 1, 1); x2s("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); x2s("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); x2s("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); x2s("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); x3s("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); x2s("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); x2s("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); x2s("(?:(?)|(?efg))\\k", "", 0, 0); x2s("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); ns("(?:(?abc)|(?efg))\\k", "abcefg"); x2s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); x3s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); x3s("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); x2s("(?a|\\(\\g\\))", "a", 0, 1); x2s("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); x3s("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); x2s("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); x2s("\\g<1>|\\zEND(.a.)", "bac", 0, 3); x3s("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); x2s("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); x2s("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); x2s("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); x2s("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); x2s("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); x2s("()*\\1", "", 0, 0); x2s("(?:()|())*\\1\\2", "", 0, 0); x3s("(?:\\1a|())*", "a", 0, 0, 1); x2s("x((.)*)*x", "0x1x2x3", 1, 6); x2s("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); x2s("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); x2s("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); x3s("\\A(?|.|(?:(?.)\\g\\k))\\z", "reer", 0, 4, 1); x3s("(?-i:\\g)(?i:(?a)){0}", "A", 0, 1, 1); String pat = "(? \\g \\g* \\g ){0}" + "(? < \\g \\s* > ){0}" + "(? [a-zA-Z_:]+ ){0}" + "(? [^<&]+ (\\g | [^<&]+)* ){0}" + "(? >){0}" + "\\g"; String str = "fbbbf"; x3s(pat, str, 0, 27, 0, Option.EXTEND); x3s(pat, str, 0, 27, 1, Option.EXTEND); x3s(pat, str, 6, 11, 2, Option.EXTEND); x3s(pat, str, 7, 10, 3, Option.EXTEND); x3s(pat, str, 5, 21, 4, Option.EXTEND); x3s(pat, str, 21, 27, 5, Option.EXTEND); x2s("(a)b\\k<1>", "aba", 0, 3); x2s("^(?>(?=a)(a|))++$", "a", 0, 1); x2s("\\k", "k", 0, 1); x2s("\\kx", "kx", 0, 2); x2s("\\g", "g", 0, 1); x2s("\\gx", "gx", 0, 2); x2s("\\k\\g", "kg", 0, 2); ns("\\00", "00"); ns("\\70", "70"); x2s("\\80", "80", 0, 2); x2s("\\90", "90", 0, 2); ns("(?a|abd)c", "abdc"); x2s("(?>abd|a)c", "abdc", 0, 4); x2s("a?|b", "a", 0, 1); x2s("a?|b", "b", 0, 0); x2s("a?|b", "", 0, 0); x2s("a*|b", "aa", 0, 2); x2s("a*|b*", "ba", 0, 0); x2s("a*|b*", "ab", 0, 1); x2s("a+|b*", "", 0, 0); x2s("a+|b*", "bbb", 0, 3); x2s("a+|b*", "abbb", 0, 1); ns("a+|b+", ""); x2s("(a|b)?", "b", 0, 1); x2s("(a|b)*", "ba", 0, 2); x2s("(a|b)+", "bab", 0, 3); x2s("(ab|ca)+", "caabbc", 0, 4); x2s("(ab|ca)+", "aabca", 1, 5); x2s("(ab|ca)+", "abzca", 0, 2); x2s("(a|bab)+", "ababa", 0, 5); x2s("(a|bab)+", "ba", 1, 2); x2s("(a|bab)+", "baaaba", 1, 4); x2s("(?:a|b)(?:a|b)", "ab", 0, 2); x2s("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); x2s("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); x2s("(?:a+|b+){2}", "aaabbb", 0, 6); x2s("h{0,}", "hhhh", 0, 4); x2s("(?:a+|b+){1,2}", "aaabbb", 0, 6); ns("ax{2}*a", "0axxxa1"); ns("a.{0,2}a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXXa0"); x2s("^a{2,}?a$", "aaa", 0, 3); x2s("^[a-z]{2,}?$", "aaa", 0, 3); x2s("(?:a+|\\Ab*)cc", "cc", 0, 2); ns("(?:a+|\\Ab*)cc", "abcc"); x2s("(?:^a+|b+)*c", "aabbbabc", 6, 8); x2s("(?:^a+|b+)*c", "aabbbbc", 0, 7); x2s("a|(?i)c", "C", 0, 1); x2s("(?i)c|a", "C", 0, 1); x2s("(?i)c|a", "A", 0, 1); x2s("(?i:c)|a", "C", 0, 1); ns("(?i:c)|a", "A"); x2s("[abc]?", "abc", 0, 1); x2s("[abc]*", "abc", 0, 3); x2s("[^abc]*", "abc", 0, 0); ns("[^abc]+", "abc"); x2s("a??", "aaa", 0, 0); x2s("ba??b", "bab", 0, 3); x2s("a*?", "aaa", 0, 0); x2s("ba*?", "baa", 0, 1); x2s("ba*?b", "baab", 0, 4); x2s("a+?", "aaa", 0, 1); x2s("ba+?", "baa", 0, 2); x2s("ba+?b", "baab", 0, 4); x2s("(?:a?)??", "a", 0, 0); x2s("(?:a??)?", "a", 0, 0); x2s("(?:a?)+?", "aaa", 0, 1); x2s("(?:a+)??", "aaa", 0, 0); x2s("(?:a+)??b", "aaab", 0, 4); x2s("(?:ab)?{2}", "", 0, 0); x2s("(?:ab)?{2}", "ababa", 0, 4); x2s("(?:ab)*{0}", "ababa", 0, 0); x2s("(?:ab){3,}", "abababab", 0, 8); ns("(?:ab){3,}", "abab"); x2s("(?:ab){2,4}", "ababab", 0, 6); x2s("(?:ab){2,4}", "ababababab", 0, 8); x2s("(?:ab){2,4}?", "ababababab", 0, 4); x2s("(?:ab){,}", "ab{,}", 0, 5); x2s("(?:abc)+?{2}", "abcabcabc", 0, 6); x2s("(?:X*)(?i:xa)", "XXXa", 0, 4); x2s("(d+)([^abc]z)", "dddz", 0, 4); x2s("([^abc]*)([^abc]z)", "dddz", 0, 4); x2s("(\\w+)(\\wz)", "dddz", 0, 4); x3s("(a)", "a", 0, 1, 1); x3s("(ab)", "ab", 0, 2, 1); x2s("((ab))", "ab", 0, 2); x3s("((ab))", "ab", 0, 2, 1); x3s("((ab))", "ab", 0, 2, 2); x3s("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); x3s("(ab)(cd)", "abcd", 0, 2, 1); x3s("(ab)(cd)", "abcd", 2, 4, 2); x3s("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); x3s("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); x2s("(^a)", "a", 0, 1); x3s("(a)|(a)", "ba", 1, 2, 1); x3s("(^a)|(a)", "ba", 1, 2, 2); x3s("(a?)", "aaa", 0, 1, 1); x3s("(a*)", "aaa", 0, 3, 1); x3s("(a*)", "", 0, 0, 1); x3s("(a+)", "aaaaaaa", 0, 7, 1); x3s("(a+|b*)", "bbbaa", 0, 3, 1); x3s("(a+|b?)", "bbbaa", 0, 1, 1); x3s("(abc)?", "abc", 0, 3, 1); x3s("(abc)*", "abc", 0, 3, 1); x3s("(abc)+", "abc", 0, 3, 1); x3s("(xyz|abc)+", "abc", 0, 3, 1); x3s("([xyz][abc]|abc)+", "abc", 0, 3, 1); x3s("((?i:abc))", "AbC", 0, 3, 1); x2s("(abc)(?i:\\1)", "abcABC", 0, 6); x3s("((?m:a.c))", "a\nc", 0, 3, 1); x3s("((?=az)a)", "azb", 0, 1, 1); x3s("abc|(.abd)", "zabd", 0, 4, 1); x2s("(?:abc)|(ABC)", "abc", 0, 3); x3s("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); x3s("a*(.)", "aaaaz", 4, 5, 1); x3s("a*?(.)", "aaaaz", 0, 1, 1); x3s("a*?(c)", "aaaac", 4, 5, 1); x3s("[bcd]a*(.)", "caaaaz", 5, 6, 1); x3s("(\\Abb)cc", "bbcc", 0, 2, 1); ns("(\\Abb)cc", "zbbcc"); x3s("(^bb)cc", "bbcc", 0, 2, 1); ns("(^bb)cc", "zbbcc"); x3s("cc(bb$)", "ccbb", 2, 4, 1); ns("cc(bb$)", "ccbbb"); ns("(\\1)", ""); ns("\\1(a)", "aa"); ns("(a(b)\\1)\\2+", "ababb"); ns("(?:(?:\\1|z)(a))+$", "zaa"); x2s("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); x2s("(a)(?=\\1)", "aa", 0, 1); ns("(a)$|\\1", "az"); x2s("(a)\\1", "aa", 0, 2); ns("(a)\\1", "ab"); x2s("(a?)\\1", "aa", 0, 2); x2s("(a??)\\1", "aa", 0, 0); x2s("(a*)\\1", "aaaaa", 0, 4); x3s("(a*)\\1", "aaaaa", 0, 2, 1); x2s("a(b*)\\1", "abbbb", 0, 5); x2s("a(b*)\\1", "ab", 0, 1); x2s("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); x2s("(a*)(b*)\\2", "aaabbbb", 0, 7); x2s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); x3s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); x2s("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); x2s("([a-d])\\1", "cc", 0, 2); x2s("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); ns("(\\w\\d\\s)\\1", "f5 f5"); x2s("(who|[a-c]{3})\\1", "whowho", 0, 6); x2s("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); x2s("(who|[a-c]{3})\\1", "cbccbc", 0, 6); x2s("(^a)\\1", "aa", 0, 2); ns("(^a)\\1", "baa"); ns("(a$)\\1", "aa"); ns("(ab\\Z)\\1", "ab"); x2s("(a*\\Z)\\1", "a", 1, 1); x2s(".(a*\\Z)\\1", "ba", 1, 2); x3s("(.(abc)\\2)", "zabcabc", 0, 7, 1); x3s("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); x2s("((?i:az))\\1", "AzAz", 0, 4); ns("((?i:az))\\1", "Azaz"); x2s("(?<=a)b", "ab", 1, 2); ns("(?<=a)b", "bb"); x2s("(?<=a|b)b", "bb", 1, 2); x2s("(?<=a|bc)b", "bcb", 2, 3); x2s("(?<=a|bc)b", "ab", 1, 2); x2s("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); x2s("(a)\\g<1>", "aa", 0, 2); x2s("(?a)", "a", 0, 1); x2s("(?ab)\\g", "abab", 0, 4); x2s("(?.zv.)\\k", "azvbazvb", 0, 8); x2s("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); x2s("(?|a\\g)+", "", 0, 0); x2s("(?|\\(\\g\\))+$", "()(())", 0, 6); x3s("\\g(?.){0}", "X", 0, 1, 1); x2s("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); x2s("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); x2s("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); x2s("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); x3s("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); x2s("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); x2s("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); x2s("(?:(?)|(?efg))\\k", "", 0, 0); x2s("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); ns("(?:(?abc)|(?efg))\\k", "abcefg"); x2s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); x3s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); x3s("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); x2s("(?a|\\(\\g\\))", "a", 0, 1); x2s("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); x3s("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); x2s("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); x2s("\\g<1>|\\zEND(.a.)", "bac", 0, 3); x3s("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); x2s("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); x2s("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); x2s("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); x2s("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); x2s("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); x2s("()*\\1", "", 0, 0); x2s("(?:()|())*\\1\\2", "", 0, 0); x3s("(?:\\1a|())*", "a", 0, 0, 1); x2s("x((.)*)*x", "0x1x2x3", 1, 6); x2s("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); x2s("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); x2s("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); if (Config.VANILLA) x2s("\\xED\\xF2", "\u00ed\u0148", 0, 2); x2s("", "\u00a4\u02d8", 0, 0); x2s("\u00a4\u02d8", "\u00a4\u02d8", 0, 2); ns("\u00a4\u00a4", "\u00a4\u02d8"); x2s("\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a6\u00a4\u00a6", 0, 4); x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6); x2s("\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", "\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", 0, 70); x2s("\u00a4\u02d8", "\u00a4\u00a4\u00a4\u02d8", 2, 4); x2s("\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 2, 6); if (Config.VANILLA) x2s("\\xca\\xb8", "\u0118\u00b8", 0, 2); x2s(".", "\u00a4\u02d8", 0, 2); x2s("..", "\u00a4\u00ab\u00a4\u00ad", 0, 4); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\\w", "\u00a4\u015e", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) ns("\\W", "\u00a4\u02d8"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("[\\W]", "\u00a4\u00a6$", 2, 3); x2s("\\S", "\u00a4\u02dd", 0, 2); x2s("\\S", "\u00b4\u00c1", 0, 2); x2s("\\b", "\u00b5\u00a4 ", 0, 0); x2s("\\b", " \u00a4\u0170", 1, 1); x2s("\\B", "\u00a4\u00bb\u00a4\u02dd ", 2, 2); x2s("\\B", "\u00a4\u00a6 ", 3, 3); x2s("\\B", " \u00a4\u00a4", 0, 0); x2s("[\u00a4\u017c\u00a4\u00c1]", "\u00a4\u00c1", 0, 2); ns("[\u00a4\u0118\u00a4\u00cb]", "\u00a4\u011a"); x2s("[\u00a4\u00a6-\u00a4\u015e]", "\u00a4\u00a8", 0, 2); ns("[^\u00a4\u00b1]", "\u00a4\u00b1"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("[\\w]", "\u00a4\u00cd", 0, 2); ns("[\\d]", "\u00a4\u0150"); x2s("[\\D]", "\u00a4\u010e", 0, 2); ns("[\\s]", "\u00a4\u017b"); x2s("[\\S]", "\u00a4\u0158", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("[\\w\\d]", "\u00a4\u010d", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("[\\w\\d]", " \u00a4\u010d", 3, 5); ns("\\w\u00b5\u00b4\u013d\u00d6", " \u00b5\u00b4\u013d\u00d6"); x2s("\u00b5\u00b4\\W\u013d\u00d6", "\u00b5\u00b4 \u013d\u00d6", 0, 5); x2s("\u00a4\u02d8.\u00a4\u00a4.\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6", 0, 10); if (!org.joni.Config.NON_UNICODE_SDW) x2s(".\\w\u00a4\u00a6\\W..\u00a4\u013e", "\u00a4\u00a8\u00a4\u00a6\u00a4\u00a6 \u00a4\u00a6\u00a4\u013e\u00a4\u013e", 0, 13); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\\s\\w\u00a4\u0142\u00a4\u0142\u00a4\u0142", " \u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", 0, 9); x2s("\u00a4\u02d8\u00a4\u02d8.\u00a4\u00b1", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00b1\u00a4\u00b1", 0, 8); ns(".\u00a4\u00a4", "\u00a4\u00a4\u00a4\u00a8"); x2s(".\u00a4\u015e", "\u00a4\u015e\u00a4\u015e", 0, 4); x2s("^\u00a4\u02d8", "\u00a4\u02d8", 0, 2); x2s("^\u00a4\u0155$", "\u00a4\u0155", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("^\\w$", "\u00a4\u00cb", 0, 2); x2s("^\\w\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142$", "z\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 0, 11); x2s("^\\w...\u00a4\u00a6\u00a4\u00a8\u00a4\u015e$", "z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6\u00a4\u00a8\u00a4\u015e", 0, 13); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\\w\\w\\s\\W\u00a4\u015e\u00a4\u015e\u00a4\u015e\\d", "a\u00a4\u015e \u00a4\u015e\u00a4\u015e\u00a4\u015e4", 0, 12); x2s("\\A\u00a4\u017c\u00a4\u00c1\u00a4\u00c4", "\u00a4\u017c\u00a4\u00c1\u00a4\u00c4", 0, 6); x2s("\u00a4\u0155\u00a4\u00e1\u00a4\u00e2\\Z", "\u00a4\u0155\u00a4\u00e1\u00a4\u00e2", 0, 6); x2s("\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\\z", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 6); x2s("\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\\Z", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\n", 0, 6); x2s("\\G\u00a4\u00dd\u00a4\u00d4", "\u00a4\u00dd\u00a4\u00d4", 0, 4); ns("\\G\u00a4\u00a8", "\u00a4\u00a6\u00a4\u00a8\u00a4\u015e"); ns("\u00a4\u010c\u00a4\u0106\\G", "\u00a4\u010c\u00a4\u0106"); ns("\u00a4\u0162\u00a4\u00df\\A", "\u00a4\u0162\u00a4\u00df"); ns("\u00a4\u0162\\A\u00a4\u00df", "\u00a4\u0162\u00a4\u00df"); x2s("(?=\u00a4\u00bb)\u00a4\u00bb", "\u00a4\u00bb", 0, 2); ns("(?=\u00a4\u00a6).", "\u00a4\u00a4"); x2s("(?!\u00a4\u00a6)\u00a4\u00ab", "\u00a4\u00ab", 0, 2); ns("(?!\u00a4\u010c)\u00a4\u02d8", "\u00a4\u010c"); x2s("(?i:\u00a4\u02d8)", "\u00a4\u02d8", 0, 2); x2s("(?i:\u00a4\u00d6\u00a4\u016e)", "\u00a4\u00d6\u00a4\u016e", 0, 4); ns("(?i:\u00a4\u00a4)", "\u00a4\u00a6"); x2s("(?m:\u00a4\u010d.)", "\u00a4\u010d\n", 0, 3); x2s("(?m:.\u00a4\u00e1)", "\u00a4\u0162\n\u00a4\u00e1", 2, 5); x2s("\u00a4\u02d8?", "", 0, 0); x2s("\u0118\u0143?", "\u02db\u02dd", 0, 0); x2s("\u0118\u0143?", "\u0118\u0143", 0, 2); x2s("\u00ce\u011a*", "", 0, 0); x2s("\u00ce\u011a*", "\u00ce\u011a", 0, 2); x2s("\u00bb\u0147*", "\u00bb\u0147\u00bb\u0147\u00bb\u0147", 0, 6); x2s("\u00c7\u010e*", "\u013d\u017b\u00c7\u010e\u00c7\u010e\u00c7\u010e\u00c7\u010e", 0, 0); ns("\u00bb\u0142+", ""); x2s("\u02db\u010e+", "\u02db\u010e", 0, 2); x2s("\u00bb\u0163+", "\u00bb\u0163\u00bb\u0163\u00bb\u0163\u00bb\u0163", 0, 8); x2s("\u00a4\u00a8+", "\u00a4\u00a8\u00a4\u00a8\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 0, 4); x2s("\u00a4\u00a6+", "\u00a4\u015e\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 2, 10); x2s(".?", "\u00a4\u017c", 0, 2); x2s(".*", "\u00a4\u0143\u00a4\u00d4\u00a4\u00d7\u00a4\u00da", 0, 8); x2s(".+", "\u00a4\u00ed", 0, 2); x2s(".+", "\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u00ab\n", 0, 8); x2s("\u00a4\u02d8|\u00a4\u00a4", "\u00a4\u02d8", 0, 2); x2s("\u00a4\u02d8|\u00a4\u00a4", "\u00a4\u00a4", 0, 2); x2s("\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4", 0, 4); x2s("\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a4\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a6", 0, 4); x2s("\u00a4\u0148(?:\u00a4\u00ab\u00a4\u00ad|\u00a4\u00ad\u00a4\u017b)", "\u00a4\u0148\u00a4\u00ab\u00a4\u00ad", 0, 6); x2s("\u00a4\u0148(?:\u00a4\u00ab\u00a4\u00ad|\u00a4\u00ad\u00a4\u017b)\u00a4\u00b1", "\u00a4\u0148\u00a4\u00ad\u00a4\u017b\u00a4\u00b1", 0, 8); x2s("\u00a4\u02d8\u00a4\u00a4|(?:\u00a4\u02d8\u00a4\u00a6|\u00a4\u02d8\u00a4\u0148)", "\u00a4\u02d8\u00a4\u0148", 0, 4); x2s("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6", "\u00a4\u00a8\u00a4\u00a6", 2, 4); x2s("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8|\u00a4\u015e\u00a4\u00ab\u00a4\u00ad|\u00a4\u017b|\u00a4\u00b1\u00a4\u0142\u00a4\u00b5|\u00a4\u00b7\u00a4\u0105\u00a4\u00bb|\u00a4\u02dd|\u00a4\u017c\u00a4\u00c1|\u00a4\u00c4\u00a4\u0106\u00a4\u010c\u00a4\u0118\u00a4\u00cb|\u00a4\u011a\u00a4\u00cd", "\u00a4\u00b7\u00a4\u0105\u00a4\u00bb", 0, 6); ns("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8|\u00a4\u015e\u00a4\u00ab\u00a4\u00ad|\u00a4\u017b|\u00a4\u00b1\u00a4\u0142\u00a4\u00b5|\u00a4\u00b7\u00a4\u0105\u00a4\u00bb|\u00a4\u02dd|\u00a4\u017c\u00a4\u00c1|\u00a4\u00c4\u00a4\u0106\u00a4\u010c\u00a4\u0118\u00a4\u00cb|\u00a4\u011a\u00a4\u00cd", "\u00a4\u0105\u00a4\u00bb"); x2s("\u00a4\u02d8|^\u00a4\u010f", "\u00a4\u00d6\u00a4\u02d8", 2, 4); x2s("\u00a4\u02d8|^\u00a4\u0148", "\u00a4\u0148\u00a4\u02d8", 0, 2); x2s("\u00b5\u00b4|\\G\u013d\u00d6", "\u00a4\u00b1\u013d\u00d6\u00b5\u00b4", 4, 6); x2s("\u00b5\u00b4|\\G\u013d\u00d6", "\u013d\u00d6\u00b5\u00b4", 0, 2); x2s("\u00b5\u00b4|\\A\u013d\u00d6", "b\u013d\u00d6\u00b5\u00b4", 3, 5); x2s("\u00b5\u00b4|\\A\u013d\u00d6", "\u013d\u00d6", 0, 2); x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6\u00b5\u00b4", 2, 4); x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6", 0, 2); x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6\n", 0, 2); x2s("\u00b5\u00b4|\u013d\u00d6\\z", "\u013d\u00d6\u00b5\u00b4", 2, 4); x2s("\u00b5\u00b4|\u013d\u00d6\\z", "\u013d\u00d6", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\\w|\\s", "\u00a4\u015e", 0, 2); x2s("\\w|%", "%\u00a4\u015e", 0, 1); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\\w|[&$]", "\u00a4\u00a6&", 0, 2); x2s("[\u00a4\u00a4-\u00a4\u00b1]", "\u00a4\u00a6", 0, 2); x2s("[\u00a4\u00a4-\u00a4\u00b1]|[^\u00a4\u00ab-\u00a4\u0142]", "\u00a4\u02d8", 0, 2); x2s("[\u00a4\u00a4-\u00a4\u00b1]|[^\u00a4\u00ab-\u00a4\u0142]", "\u00a4\u00ab", 0, 2); x2s("[^\u00a4\u02d8]", "\n", 0, 1); x2s("(?:\u00a4\u02d8|[\u00a4\u00a6-\u00a4\u00ad])|\u00a4\u00a4\u00a4\u0148", "\u00a4\u00a6\u00a4\u0148", 0, 2); x2s("(?:\u00a4\u02d8|[\u00a4\u00a6-\u00a4\u00ad])|\u00a4\u00a4\u00a4\u0148", "\u00a4\u00a4\u00a4\u0148", 0, 4); x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(?=\u00a4\u00b1\u00a4\u00b1)..\u00a4\u0170", "\u00a4\u00b1\u00a4\u00b1\u00a4\u0170", 0, 6); x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(?!\u00a4\u00b1\u00a4\u00b1)..\u00a4\u0170", "\u00a4\u02d8\u00a4\u00a4\u00a4\u0170", 0, 6); if (Config.VANILLA) x2s("(?=\u00a4\u0148\u00a4\u02d8)..\u00a4\u02d8|(?=\u00a4\u0148\u00a4\u0148)..\u00a4\u02d8", "\u00a4\u0148\u00a4\u0148\u00a4\u02d8", 0, 6); x2s("(?<=\u00a4\u02d8|\u00a4\u00a4\u00a4\u00a6)\u00a4\u00a4", "\u00a4\u00a4\u00a4\u00a6\u00a4\u00a4", 4, 6); ns("(?>\u00a4\u02d8|\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8)\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8\u00a4\u00a6"); x2s("(?>\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8|\u00a4\u02d8)\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8\u00a4\u00a6", 0, 8); x2s("\u00a4\u02d8?|\u00a4\u00a4", "\u00a4\u02d8", 0, 2); x2s("\u00a4\u02d8?|\u00a4\u00a4", "\u00a4\u00a4", 0, 0); x2s("\u00a4\u02d8?|\u00a4\u00a4", "", 0, 0); x2s("\u00a4\u02d8*|\u00a4\u00a4", "\u00a4\u02d8\u00a4\u02d8", 0, 4); x2s("\u00a4\u02d8*|\u00a4\u00a4*", "\u00a4\u00a4\u00a4\u02d8", 0, 0); x2s("\u00a4\u02d8*|\u00a4\u00a4*", "\u00a4\u02d8\u00a4\u00a4", 0, 2); x2s("[a\u00a4\u02d8]*|\u00a4\u00a4*", "a\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 3); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "", 0, 0); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 6); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 2); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "a\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 0); ns("\u00a4\u02d8+|\u00a4\u00a4+", ""); x2s("(\u00a4\u02d8|\u00a4\u00a4)?", "\u00a4\u00a4", 0, 2); x2s("(\u00a4\u02d8|\u00a4\u00a4)*", "\u00a4\u00a4\u00a4\u02d8", 0, 4); x2s("(\u00a4\u02d8|\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4", 0, 6); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u00a6\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 0, 8); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8)+", "\u00a4\u00a6\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 4, 12); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u02d8", 2, 10); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u0148\u00a4\u00a6\u00a4\u02d8", 0, 4); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "$$zzzz\u00a4\u02d8\u00a4\u00a4\u00a4\u0148\u00a4\u00a6\u00a4\u02d8", 6, 10); x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8", 0, 10); x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8", 2, 4); x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8", 2, 8); x2s("(?:\u00a4\u02d8|\u00a4\u00a4)(?:\u00a4\u02d8|\u00a4\u00a4)", "\u00a4\u02d8\u00a4\u00a4", 0, 4); x2s("(?:\u00a4\u02d8*|\u00a4\u00a4*)(?:\u00a4\u02d8*|\u00a4\u00a4*)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 6); x2s("(?:\u00a4\u02d8*|\u00a4\u00a4*)(?:\u00a4\u02d8+|\u00a4\u00a4+)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12); x2s("(?:\u00a4\u02d8+|\u00a4\u00a4+){2}", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12); x2s("(?:\u00a4\u02d8+|\u00a4\u00a4+){1,2}", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12); x2s("(?:\u00a4\u02d8+|\\A\u00a4\u00a4*)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a6\u00a4\u00a6", 0, 4); ns("(?:\u00a4\u02d8+|\\A\u00a4\u00a4*)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6"); x2s("(?:^\u00a4\u02d8+|\u00a4\u00a4+)*\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 12, 16); x2s("(?:^\u00a4\u02d8+|\u00a4\u00a4+)*\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6", 0, 14); x2s("\u00a4\u00a6{0,}", "\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 0, 8); x2s("\u00a4\u02d8|(?i)c", "C", 0, 1); x2s("(?i)c|\u00a4\u02d8", "C", 0, 1); x2s("(?i:\u00a4\u02d8)|a", "a", 0, 1); ns("(?i:\u00a4\u02d8)|a", "A"); x2s("[\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]?", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 2); x2s("[\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6); x2s("[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 0); ns("[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6"); x2s("\u00a4\u02d8??", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 0); x2s("\u00a4\u00a4\u00a4\u02d8??\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4", 0, 6); x2s("\u00a4\u02d8*?", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 0); x2s("\u00a4\u00a4\u00a4\u02d8*?", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 2); x2s("\u00a4\u00a4\u00a4\u02d8*?\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4", 0, 8); x2s("\u00a4\u02d8+?", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 2); x2s("\u00a4\u00a4\u00a4\u02d8+?", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 4); x2s("\u00a4\u00a4\u00a4\u02d8+?\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4", 0, 8); x2s("(?:\u0139\u00b7?)??", "\u0139\u00b7", 0, 0); x2s("(?:\u0139\u00b7??)?", "\u0139\u00b7", 0, 0); x2s("(?:\u011a\u00b4?)+?", "\u011a\u00b4\u011a\u00b4\u011a\u00b4", 0, 2); x2s("(?:\u00c9\u00f7+)??", "\u00c9\u00f7\u00c9\u00f7\u00c9\u00f7", 0, 0); x2s("(?:\u0154\u0103+)??\u00c1\u00fa", "\u0154\u0103\u0154\u0103\u0154\u0103\u00c1\u00fa", 0, 8); x2s("(?:\u00a4\u02d8\u00a4\u00a4)?{2}", "", 0, 0); x2s("(?:\u00b5\u00b4\u013d\u00d6)?{2}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4", 0, 8); x2s("(?:\u00b5\u00b4\u013d\u00d6)*{0}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4", 0, 0); x2s("(?:\u00b5\u00b4\u013d\u00d6){3,}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 16); ns("(?:\u00b5\u00b4\u013d\u00d6){3,}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6"); x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 12); x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 16); x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}?", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 8); x2s("(?:\u00b5\u00b4\u013d\u00d6){,}", "\u00b5\u00b4\u013d\u00d6{,}", 0, 7); x2s("(?:\u00a4\u00ab\u00a4\u00ad\u00a4\u017b)+?{2}", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 12); x3s("(\u02db\u0110)", "\u02db\u0110", 0, 2, 1); x3s("(\u02db\u0110\u017c\u013a)", "\u02db\u0110\u017c\u013a", 0, 4, 1); x2s("((\u00bb\u0163\u00b4\u00d6))", "\u00bb\u0163\u00b4\u00d6", 0, 4); x3s("((\u00c9\u00f7\u017c\u013a))", "\u00c9\u00f7\u017c\u013a", 0, 4, 1); x3s("((\u015f\u0148\u0106\u00fc))", "\u015f\u0148\u0106\u00fc", 0, 4, 2); x3s("((((((((((((((((((((\u00ce\u011a\u00bb\u0147))))))))))))))))))))", "\u00ce\u011a\u00bb\u0147", 0, 4, 20); x3s("(\u00a4\u02d8\u00a4\u00a4)(\u00a4\u00a6\u00a4\u00a8)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 0, 4, 1); x3s("(\u00a4\u02d8\u00a4\u00a4)(\u00a4\u00a6\u00a4\u00a8)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 4, 8, 2); x3s("()(\u00a4\u02d8)\u00a4\u00a4\u00a4\u00a6(\u00a4\u00a8\u00a4\u015e\u00a4\u00ab)\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 6, 12, 3); x3s("(()(\u00a4\u02d8)\u00a4\u00a4\u00a4\u00a6(\u00a4\u00a8\u00a4\u015e\u00a4\u00ab)\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 6, 12, 4); x3s(".*(\u0104\u0150\u0104\u00a9)\u0104\u00f3\u02c7\u00a6\u0104\u0162(\u0104\u00f3()\u0104\u00b7\u0104\u013a\u0104\u017c)\u0104\u00a4\u0104\u00f3", "\u0104\u0150\u0104\u00a9\u0104\u00f3\u02c7\u00a6\u0104\u0162\u0104\u00f3\u0104\u00b7\u0104\u013a\u0104\u017c\u0104\u00a4\u0104\u00f3", 10, 18, 2); x2s("(^\u00a4\u02d8)", "\u00a4\u02d8", 0, 2); x3s("(\u00a4\u02d8)|(\u00a4\u02d8)", "\u00a4\u00a4\u00a4\u02d8", 2, 4, 1); x3s("(^\u00a4\u02d8)|(\u00a4\u02d8)", "\u00a4\u00a4\u00a4\u02d8", 2, 4, 2); x3s("(\u00a4\u02d8?)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 2, 1); x3s("(\u00a4\u0162*)", "\u00a4\u0162\u00a4\u0162\u00a4\u0162", 0, 6, 1); x3s("(\u00a4\u010c*)", "", 0, 0, 1); x3s("(\u00a4\u00eb+)", "\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb", 0, 14, 1); x3s("(\u00a4\u0150+|\u00a4\u0158*)", "\u00a4\u0150\u00a4\u0150\u00a4\u0150\u00a4\u0158\u00a4\u0158", 0, 6, 1); x3s("(\u00a4\u02d8+|\u00a4\u00a4?)", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 2, 1); x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)?", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("(\u00a4\u00b5\u00a4\u00b7\u00a4\u0105|\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("([\u00a4\u0118\u00a4\u00cb\u00a4\u011a][\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]|\u00a4\u00ab\u00a4\u00ad\u00a4\u017b)+", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 6, 1); x3s("((?i:\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6))", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("((?m:\u00a4\u02d8.\u00a4\u00a6))", "\u00a4\u02d8\n\u00a4\u00a6", 0, 5, 1); x3s("((?=\u00a4\u02d8\u00a4\u00f3)\u00a4\u02d8)", "\u00a4\u02d8\u00a4\u00f3\u00a4\u00a4", 0, 2, 1); x3s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(.\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8)", "\u00a4\u00f3\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8", 0, 8, 1); x3s("\u00a4\u02d8*(.)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 8, 10, 1); x3s("\u00a4\u02d8*?(.)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 0, 2, 1); x3s("\u00a4\u02d8*?(\u00a4\u00f3)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 8, 10, 1); x3s("[\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8]\u00a4\u02d8*(.)", "\u00a4\u00a8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 10, 12, 1); x3s("(\\A\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6", 0, 4, 1); ns("(\\A\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00f3\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6"); x3s("(^\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6", 0, 4, 1); ns("(^\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00f3\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6"); x3s("\u00a4\u00ed\u00a4\u00ed(\u00a4\u00eb\u00a4\u00eb$)", "\u00a4\u00ed\u00a4\u00ed\u00a4\u00eb\u00a4\u00eb", 4, 8, 1); ns("\u00a4\u00ed\u00a4\u00ed(\u00a4\u00eb\u00a4\u00eb$)", "\u00a4\u00ed\u00a4\u00ed\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb"); x2s("(\u011a\u00b5)\\1", "\u011a\u00b5\u011a\u00b5", 0, 4); ns("(\u011a\u00b5)\\1", "\u011a\u00b5\u00c9\u0111"); x2s("(\u00b6\u0151?)\\1", "\u00b6\u0151\u00b6\u0151", 0, 4); x2s("(\u00b6\u0151??)\\1", "\u00b6\u0151\u00b6\u0151", 0, 0); x2s("(\u00b6\u0151*)\\1", "\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151", 0, 8); x3s("(\u00b6\u0151*)\\1", "\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151", 0, 4, 1); x2s("\u00a4\u02d8(\u00a4\u00a4*)\\1", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 10); x2s("\u00a4\u02d8(\u00a4\u00a4*)\\1", "\u00a4\u02d8\u00a4\u00a4", 0, 2); x2s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\1\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4", 0, 20); x2s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 14); x3s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 6, 10, 2); x2s("(((((((\u00a4\u00dd*)\u00a4\u00da))))))\u00a4\u00d4\\7", "\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd\u00a4\u00da\u00a4\u00d4\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd", 0, 16); x3s("(((((((\u00a4\u00dd*)\u00a4\u00da))))))\u00a4\u00d4\\7", "\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd\u00a4\u00da\u00a4\u00d4\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd", 0, 6, 7); x2s("(\u00a4\u010e)(\u00a4\u0147)(\u00a4\u0150)\\2\\1\\3", "\u00a4\u010e\u00a4\u0147\u00a4\u0150\u00a4\u0147\u00a4\u010e\u00a4\u0150", 0, 12); x2s("([\u00a4\u00ad-\u00a4\u00b1])\\1", "\u00a4\u017b\u00a4\u017b", 0, 4); if (!org.joni.Config.NON_UNICODE_SDW) x2s("(\\w\\d\\s)\\1", "\u00a4\u02d85 \u00a4\u02d85 ", 0, 8); ns("(\\w\\d\\s)\\1", "\u00a4\u02d85 \u00a4\u02d85"); x2s("(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u0102\u017b\u02c7\u00a9\u0102\u017b\u02c7\u00a9", 0, 8); x2s("...(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u00a4\u02d8a\u00a4\u02d8\u0102\u017b\u02c7\u00a9\u0102\u017b\u02c7\u00a9", 0, 13); x2s("(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u00a4\u00a6\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6\u00a4\u00a4\u00a4\u00a6", 0, 12); x2s("(^\u00a4\u0142)\\1", "\u00a4\u0142\u00a4\u0142", 0, 4); ns("(^\u00a4\u0155)\\1", "\u00a4\u00e1\u00a4\u0155\u00a4\u0155"); ns("(\u00a4\u02d8$)\\1", "\u00a4\u02d8\u00a4\u02d8"); ns("(\u00a4\u02d8\u00a4\u00a4\\Z)\\1", "\u00a4\u02d8\u00a4\u00a4"); x2s("(\u00a4\u02d8*\\Z)\\1", "\u00a4\u02d8", 2, 2); x2s(".(\u00a4\u02d8*\\Z)\\1", "\u00a4\u00a4\u00a4\u02d8", 2, 4); x3s("(.(\u00a4\u00e4\u00a4\u00a4\u00a4\u0107)\\2)", "z\u00a4\u00e4\u00a4\u00a4\u00a4\u0107\u00a4\u00e4\u00a4\u00a4\u00a4\u0107", 0, 13, 1); x3s("(.(..\\d.)\\2)", "\u00a4\u02d812341234", 0, 10, 1); x2s("((?i:\u00a4\u02d8v\u00a4\u015f))\\1", "\u00a4\u02d8v\u00a4\u015f\u00a4\u02d8v\u00a4\u015f", 0, 10); x2s("(?<\u00b6\u0148\u00a4\u00ab>\u0118\u0143|\\(\\g<\u00b6\u0148\u00a4\u00ab>\\))", "((((((\u0118\u0143))))))", 0, 14); x2s("\\A(?:\\g<\u00b0\u00a4_1>|\\g<\u00b1\u013e_2>|\\z\u02dd\u015e\u00ce\u00bb (?<\u00b0\u00a4_1>\u00b4\u0143|\u013d\u00ab\\g<\u00b1\u013e_2>\u013d\u00ab)(?<\u00b1\u013e_2>\u015f\u00df|\u0118\u00ee\u00bb\u00a7\\g<\u00b0\u00a4_1>\u0118\u00ee\u00bb\u00a7))$", "\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u015f\u00df\u013d\u00ab\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u0118\u00ee\u00bb\u00a7", 0, 26); x2s("[[\u00a4\u0147\u00a4\u0150]]", "\u00a4\u0150", 0, 2); x2s("[[\u00a4\u00a4\u00a4\u015e\u00a4\u00a6]\u00a4\u00ab]", "\u00a4\u00ab", 0, 2); ns("[[^\u00a4\u02d8]]", "\u00a4\u02d8"); ns("[^[\u00a4\u02d8]]", "\u00a4\u02d8"); x2s("[^[^\u00a4\u02d8]]", "\u00a4\u02d8", 0, 2); x2s("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u017b", 0, 2); ns("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u00ab"); ns("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u00b1"); x2s("[\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4-\u00a4\u0148&&\u00a4\u00a6-\u00a4\u0144]", "\u00a4\u0144", 0, 2); ns("[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4-\u00a4\u0148&&\u00a4\u00a6-\u00a4\u0144]", "\u00a4\u0144"); x2s("[[^\u00a4\u02d8&&\u00a4\u02d8]&&\u00a4\u02d8-\u00a4\u00f3]", "\u00a4\u00a4", 0, 2); ns("[[^\u00a4\u02d8&&\u00a4\u02d8]&&\u00a4\u02d8-\u00a4\u00f3]", "\u00a4\u02d8"); x2s("[[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]&&[^\u00a4\u00a6-\u00a4\u00ab]]", "\u00a4\u00ad", 0, 2); ns("[[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]&&[^\u00a4\u00a6-\u00a4\u00ab]]", "\u00a4\u00a4"); x2s("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00a6", 0, 2); x2s("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00a8", 0, 2); ns("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00ab"); x2s("[\u00a4\u02d8-&&-\u00a4\u02d8]", "-", 0, 1); x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]q-w]", "\u00a4\u00a8", 0, 2); x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "f", 0, 1); x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "g", 0, 1); ns("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "2"); x2s("a\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9<\\/b>", "a\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9", 0, 32); x2s(".\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9<\\/b>", "a\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9", 0, 32); } public static void main(String[] args) throws Throwable{ new TestC().run(); } } joni-2.0.0/test/org/joni/test/TestCornerCases.java000066400000000000000000000042031214326443200220460ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Config; import org.joni.Option; import org.joni.Regex; import org.joni.Region; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; public class TestCornerCases extends Test { public int option() { return Option.DEFAULT; } public Encoding encoding() { return ASCIIEncoding.INSTANCE; } public String testEncoding() { return "cp1250"; } public Syntax syntax() { return Syntax.DEFAULT; } public void test() throws InterruptedException { byte[] reg = "l.".getBytes(); byte[] str = "hello,lo".getBytes(); Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT); int result = p.matcher(str, 0, str.length).search(3, 0, Option.NONE); if(result != 3) { Config.log.println("FAIL: /l./ 'hello,lo' - with reverse, 3,0"); nfail++; } } public static void main(String[] args) throws Throwable{ new TestCornerCases().run(); } } joni-2.0.0/test/org/joni/test/TestCrnl.java000066400000000000000000000062711214326443200205440ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Config; import org.joni.Option; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; public class TestCrnl extends Test { public int option() { return Option.DEFAULT; } public Encoding encoding() { return ASCIIEncoding.INSTANCE; } public String testEncoding() { return "ascii"; } public Syntax syntax() { return Syntax.DEFAULT; } public void test() throws InterruptedException { x2s("", "\r\n", 0, 0); x2s(".", "\r\n", 0, 1); ns("..", "\r\n"); x2s("^", "\r\n", 0, 0); x2s("\\n^", "\r\nf", 1, 2); x2s("\\n^a", "\r\na", 1, 3); x2s("$", "\r\n", 0, 0); x2s("T$", "T\r\n", 0, 1); x2s("T$", "T\raT\r\n", 3, 4); x2s("\\z", "\r\n", 2, 2); ns("a\\z", "a\r\n"); x2s("\\Z", "\r\n", 0, 0); x2s("\\Z", "\r\na", 3, 3); x2s("\\Z", "\r\n\r\n\n", 4, 4); x2s("\\Z", "\r\n\r\nX", 5, 5); x2s("a\\Z", "a\r\n", 0, 1); x2s("aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15); x2s("a|$", "b\r\n", 1, 1); x2s("$|b", "\rb", 1, 2); x2s("a$|ab$", "\r\nab\r\n", 2, 4); x2s("a|\\Z", "b\r\n", 1, 1); x2s("\\Z|b", "\rb", 1, 2); x2s("a\\Z|ab\\Z", "\r\nab\r\n", 2, 4); x2s("(?=a$).", "a\r\n", 0, 1); ns("(?=a$).", "a\r"); x2s("(?!a$)..", "a\r", 0, 2); x2s("(?<=a$).\\n", "a\r\n", 1, 3); ns("(? 0 || nerror > 0) Config.err.println("make sure to enable USE_CRNL_AS_LINE_TERMINATOR"); } public static void main(String[] args) throws Throwable{ new TestCrnl().run(); } } joni-2.0.0/test/org/joni/test/TestInterrupt.java000066400000000000000000000071321214326443200216370ustar00rootroot00000000000000/* * The MIT License * * Copyright 2013 enebo. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.joni.test; import java.util.Timer; import java.util.TimerTask; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.joni.Matcher; import org.joni.Option; import org.joni.Syntax; /** * These are fairly long-running tests but we want a large time slice to reduce misfires * on slow ci boxes. */ public class TestInterrupt extends Test { interface InterruptibleRunnable { public void run() throws InterruptedException; } public void test() throws InterruptedException { interruptAfter(new InterruptibleRunnable() { public void run() throws InterruptedException { x2si("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0); } }, 1000, 15000); final int status[] = new int[1]; interruptAfter(new InterruptibleRunnable() { public void run() throws InterruptedException { status[0] = x2s("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0); } }, 1000, 15000); assertTrue(status[0] == Matcher.INTERRUPTED, "Status was not INTERRUPTED: " + status[0]); } private void interruptAfter(InterruptibleRunnable block, int delayBeforeInterrupt, int acceptableMaximumTime) { final long start[] = new long[1]; final Thread currentThread = Thread.currentThread(); new Timer().schedule(new TimerTask() { @Override public void run() { start[0] = System.currentTimeMillis(); System.out.println("INTERRUPTING at " + start[0]); currentThread.interrupt(); } }, delayBeforeInterrupt); try { block.run(); } catch (InterruptedException e) { } long total = System.currentTimeMillis() - start[0]; System.out.println("Time taken: " + total); assertTrue(total < acceptableMaximumTime, "Took too long to interrupt: " + total + " > " + acceptableMaximumTime); } public int option() { return Option.DEFAULT; } public Encoding encoding() { return ASCIIEncoding.INSTANCE; } public String testEncoding() { return "iso-8859-2"; } public Syntax syntax() { return Syntax.DEFAULT; } } joni-2.0.0/test/org/joni/test/TestJoni.java000066400000000000000000000043201214326443200205360ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import junit.framework.TestCase; public class TestJoni extends TestCase { private Test testa; private Test testc; private Test testu; private Test testnsu8; private Test testLookBehind; private Test testu8; private Test testInterrupt; protected void setUp() { testa = new TestA(); testc = new TestC(); testu = new TestU(); testnsu8 = new TestNSU8(); testu8 = new TestU8(); testLookBehind = new TestLookBehind(); testInterrupt = new TestInterrupt(); } protected void tearDown() { } private void testJoniTest(Test test) { test.run(); assertEquals(test.nerror, 0); assertEquals(test.nfail, 0); } public void testAscii() { testJoniTest(testa); } public void testEUCJP() { testJoniTest(testc); } public void testUnicode() { testJoniTest(testu); testJoniTest(testnsu8); testJoniTest(testu8); } public void testLookBehind() { testJoniTest(testLookBehind); } public void testInterrupt() { testJoniTest(testInterrupt); } } joni-2.0.0/test/org/joni/test/TestLookBehind.java000066400000000000000000000032401214326443200216550ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.joni.Option; import org.joni.Syntax; public class TestLookBehind extends Test { public int option() { return Option.DEFAULT; } public Encoding encoding() { return ASCIIEncoding.INSTANCE; } public String testEncoding() { return "iso-8859-1"; } public Syntax syntax() { return Syntax.DEFAULT; } @Override public void test() throws InterruptedException { x2s("(?<=a).*b", "aab", 1, 3); } public static void main(String[] args) throws Throwable { new TestLookBehind().run(); } } joni-2.0.0/test/org/joni/test/TestNSU8.java000066400000000000000000000055351214326443200204050ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Option; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.NonStrictUTF8Encoding; public class TestNSU8 extends Test { public int option() { return Option.DEFAULT; } public Encoding encoding() { return NonStrictUTF8Encoding.INSTANCE; } public String testEncoding() { return "utf-8"; } public Syntax syntax() { return Syntax.DEFAULT; } public void test() throws InterruptedException { xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32, (byte)32, (byte)32}, 0, 5, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32, (byte)32}, 0, 4, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32}, 0, 3, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32}, 0, 2, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240}, 0, 1, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32, (byte)32, (byte)32}, 0, 4, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32, (byte)32}, 0, 3, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32}, 0, 2, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224}, 0, 1, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192, (byte)32, (byte)32}, 0, 3, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192, (byte)32}, 0, 2, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192}, 0, 1, 1, false); } public static void main(String[] args) throws Throwable { new TestNSU8().run(); } } joni-2.0.0/test/org/joni/test/TestU.java000066400000000000000000002245731214326443200200610ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Option; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.UTF16BEEncoding; public class TestU extends Test { public int option() { return Option.DEFAULT; } public Encoding encoding() { return UTF16BEEncoding.INSTANCE; } public String testEncoding() { return "iso-8859-1"; } public Syntax syntax() { return Syntax.DEFAULT; } private int ulen(byte[]bytes) { return encoding().strByteLengthNull(bytes, 0, bytes.length); } private String uconv(byte []bytes, int len) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i += 2) { int c = bytes[i] & 0xff; // sb.append(String.format("\\%03o", c)); if (c == 0) { c = bytes[i+1] & 0xff; if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) { sb.append(String.format("\\%03o", c)); } else { sb.append(new String(new byte[]{(byte)c})); } } else { sb.append(String.format("\\%03o", c)); c = bytes[i+1] & 0xff; sb.append(String.format("\\%03o", c)); } } return sb.toString(); } protected String repr(byte[]bytes) { return uconv(bytes, ulen(bytes)); } protected int length(byte[]bytes) { return ulen(bytes); } public void test() throws InterruptedException { x2s("\000\000", "\000\000", 0, 0); x2s("\000^\000\000", "\000\000", 0, 0); x2s("\000$\000\000", "\000\000", 0, 0); x2s("\000\134\000G\000\000", "\000\000", 0, 0); x2s("\000\134\000A\000\000", "\000\000", 0, 0); x2s("\000\134\000Z\000\000", "\000\000", 0, 0); x2s("\000\134\000z\000\000", "\000\000", 0, 0); x2s("\000^\000$\000\000", "\000\000", 0, 0); x2s("\000\134\000c\000a\000\000", "\000\001\000\000", 0, 2); x2s("\000\134\000C\000-\000b\000\000", "\000\002\000\000", 0, 2); x2s("\000\134\000c\000\134\000\134\000\000", "\000\034\000\000", 0, 2); x2s("\000q\000[\000\134\000c\000\134\000\134\000]\000\000", "\000q\000\034\000\000", 0, 4); x2s("\000\000", "\000a\000\000", 0, 0); x2s("\000a\000\000", "\000a\000\000", 0, 2); x2s("\000\134\000x\0000\0000\000\134\000x\0006\0001\000\000", "\000a\000\000", 0, 2); x2s("\000a\000a\000\000", "\000a\000a\000\000", 0, 4); x2s("\000a\000a\000a\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", "\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 70); x2s("\000a\000b\000\000", "\000a\000b\000\000", 0, 4); x2s("\000b\000\000", "\000a\000b\000\000", 2, 4); x2s("\000b\000c\000\000", "\000a\000b\000c\000\000", 2, 6); x2s("\000(\000?\000i\000:\000#\000R\000E\000T\000#\000)\000\000", "\000#\000I\000N\000S\000#\000#\000R\000E\000T\000#\000\000", 10, 20); x2s("\000\134\0000\0000\0000\000\134\0001\0007\000\000", "\000\017\000\000", 0, 2); x2s("\000\134\000x\0000\0000\000\134\000x\0001\000f\000\000", "\000\037\000\000", 0, 2); x2s("\000a\000(\000?\000#\000.\000.\000.\000.\000\134\000\134\000J\000J\000J\000J\000)\000b\000\000", "\000a\000b\000\000", 0, 4); x2s("\000(\000?\000x\000)\000 \000 \000G\000 \000(\000o\000 \000O\000(\000?\000-\000x\000)\000o\000O\000)\000 \000g\000 \000L\000\000", "\000G\000o\000O\000o\000O\000g\000L\000e\000\000", 0, 14); x2s("\000.\000\000", "\000a\000\000", 0, 2); ns("\000.\000\000", "\000\000"); x2s("\000.\000.\000\000", "\000a\000b\000\000", 0, 4); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000w\000\000", "\000e\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) ns("\000\134\000W\000\000", "\000e\000\000"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000s\000\000", "\000 \000\000", 0, 2); x2s("\000\134\000S\000\000", "\000b\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000d\000\000", "\0004\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) ns("\000\134\000D\000\000", "\0004\000\000"); x2s("\000\134\000b\000\000", "\000z\000 \000\000", 0, 0); x2s("\000\134\000b\000\000", "\000 \000z\000\000", 2, 2); x2s("\000\134\000B\000\000", "\000z\000z\000 \000\000", 2, 2); x2s("\000\134\000B\000\000", "\000z\000 \000\000", 4, 4); x2s("\000\134\000B\000\000", "\000 \000z\000\000", 0, 0); x2s("\000[\000a\000b\000]\000\000", "\000b\000\000", 0, 2); ns("\000[\000a\000b\000]\000\000", "\000c\000\000"); x2s("\000[\000a\000-\000z\000]\000\000", "\000t\000\000", 0, 2); ns("\000[\000^\000a\000]\000\000", "\000a\000\000"); x2s("\000[\000^\000a\000]\000\000", "\000\012\000\000", 0, 2); x2s("\000[\000]\000]\000\000", "\000]\000\000", 0, 2); ns("\000[\000^\000]\000]\000\000", "\000]\000\000"); x2s("\000[\000\134\000^\000]\000+\000\000", "\0000\000^\000^\0001\000\000", 2, 6); x2s("\000[\000b\000-\000]\000\000", "\000b\000\000", 0, 2); x2s("\000[\000b\000-\000]\000\000", "\000-\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000[\000\134\000w\000]\000\000", "\000z\000\000", 0, 2); ns("\000[\000\134\000w\000]\000\000", "\000 \000\000"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000[\000\134\000W\000]\000\000", "\000b\000$\000\000", 2, 4); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000[\000\134\000d\000]\000\000", "\0005\000\000", 0, 2); ns("\000[\000\134\000d\000]\000\000", "\000e\000\000"); x2s("\000[\000\134\000D\000]\000\000", "\000t\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) ns("\000[\000\134\000D\000]\000\000", "\0003\000\000"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000[\000\134\000s\000]\000\000", "\000 \000\000", 0, 2); ns("\000[\000\134\000s\000]\000\000", "\000a\000\000"); x2s("\000[\000\134\000S\000]\000\000", "\000b\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) ns("\000[\000\134\000S\000]\000\000", "\000 \000\000"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "\0002\000\000", 0, 2); ns("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000\000"); x2s("\000[\000[\000:\000u\000p\000p\000e\000r\000:\000]\000]\000\000", "\000B\000\000", 0, 2); x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000+\000\000", 0, 2); x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000G\000H\000I\000K\000K\000-\0009\000+\000*\000\000", 12, 14); x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000-\000@\000^\000+\000\000", 6, 8); ns("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000A\000\000"); x2s("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000:\000\000", 0, 2); x2s("\000[\000\134\0000\0000\0000\000\134\0000\0004\0004\000-\000\134\0000\0000\0000\000\134\0000\0004\0007\000]\000\000", "\000&\000\000", 0, 2); x2s("\000[\000\134\000x\0000\0000\000\134\000x\0005\000a\000-\000\134\000x\0000\0000\000\134\000x\0005\000c\000]\000\000", "\000[\000\000", 0, 2); x2s("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000l\000\000", 0, 2); ns("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000n\000\000"); ns("\000^\000[\0000\000-\0009\000A\000-\000F\000]\000+\000 \0000\000+\000 \000U\000N\000D\000E\000F\000 \000\000", "\0007\0005\000F\000 \0000\0000\0000\0000\0000\0000\0000\0000\000 \000S\000E\000C\000T\0001\0004\000A\000 \000n\000o\000t\000y\000p\000e\000 \000(\000)\000 \000 \000 \000 \000E\000x\000t\000e\000r\000n\000a\000l\000 \000 \000 \000 \000|\000 \000_\000r\000b\000_\000a\000p\000p\000l\000y\000\000"); x2s("\000[\000\134\000[\000]\000\000", "\000[\000\000", 0, 2); x2s("\000[\000\134\000]\000]\000\000", "\000]\000\000", 0, 2); x2s("\000[\000&\000]\000\000", "\000&\000\000", 0, 2); x2s("\000[\000[\000a\000b\000]\000]\000\000", "\000b\000\000", 0, 2); x2s("\000[\000[\000a\000b\000]\000c\000]\000\000", "\000c\000\000", 0, 2); ns("\000[\000[\000^\000a\000]\000]\000\000", "\000a\000\000"); ns("\000[\000^\000[\000a\000]\000]\000\000", "\000a\000\000"); x2s("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000b\000\000", 0, 2); ns("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000a\000\000"); ns("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000c\000\000"); x2s("\000[\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000", 0, 2); ns("\000[\000^\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000"); x2s("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000b\000\000", 0, 2); ns("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000a\000\000"); x2s("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000h\000\000", 0, 2); ns("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000c\000\000"); x2s("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000c\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000e\000\000", 0, 2); ns("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000f\000\000"); x2s("\000[\000a\000-\000&\000&\000-\000a\000]\000\000", "\000-\000\000", 0, 2); ns("\000[\000a\000\134\000-\000&\000&\000\134\000-\000a\000]\000\000", "\000&\000\000"); ns("\000\134\000w\000a\000b\000c\000\000", "\000 \000a\000b\000c\000\000"); x2s("\000a\000\134\000W\000b\000c\000\000", "\000a\000 \000b\000c\000\000", 0, 8); x2s("\000a\000.\000b\000.\000c\000\000", "\000a\000a\000b\000b\000c\000\000", 0, 10); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000.\000\134\000w\000b\000\134\000W\000.\000.\000c\000\000", "\000a\000b\000b\000 \000b\000c\000c\000\000", 0, 14); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000s\000\134\000w\000z\000z\000z\000\000", "\000 \000z\000z\000z\000z\000\000", 0, 10); x2s("\000a\000a\000.\000b\000\000", "\000a\000a\000b\000b\000\000", 0, 8); ns("\000.\000a\000\000", "\000a\000b\000\000"); x2s("\000.\000a\000\000", "\000a\000a\000\000", 0, 4); x2s("\000^\000a\000\000", "\000a\000\000", 0, 2); x2s("\000^\000a\000$\000\000", "\000a\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000^\000\134\000w\000$\000\000", "\000a\000\000", 0, 2); ns("\000^\000\134\000w\000$\000\000", "\000 \000\000"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000^\000\134\000w\000a\000b\000$\000\000", "\000z\000a\000b\000\000", 0, 6); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000^\000\134\000w\000a\000b\000c\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000^\000\134\000w\000.\000.\000.\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000w\000\134\000w\000\134\000s\000\134\000W\000a\000a\000a\000\134\000d\000\000", "\000a\000a\000 \000 \000a\000a\000a\0004\000\000", 0, 16); x2s("\000\134\000A\000\134\000Z\000\000", "\000\000", 0, 0); x2s("\000\134\000A\000x\000y\000z\000\000", "\000x\000y\000z\000\000", 0, 6); x2s("\000x\000y\000z\000\134\000Z\000\000", "\000x\000y\000z\000\000", 0, 6); x2s("\000x\000y\000z\000\134\000z\000\000", "\000x\000y\000z\000\000", 0, 6); x2s("\000a\000\134\000Z\000\000", "\000a\000\000", 0, 2); x2s("\000\134\000G\000a\000z\000\000", "\000a\000z\000\000", 0, 4); ns("\000\134\000G\000z\000\000", "\000b\000z\000a\000\000"); ns("\000a\000z\000\134\000G\000\000", "\000a\000z\000\000"); ns("\000a\000z\000\134\000A\000\000", "\000a\000z\000\000"); ns("\000a\000\134\000A\000z\000\000", "\000a\000z\000\000"); x2s("\000\134\000^\000\134\000$\000\000", "\000^\000$\000\000", 0, 4); x2s("\000^\000x\000?\000y\000\000", "\000x\000y\000\000", 0, 4); x2s("\000^\000(\000x\000?\000y\000)\000\000", "\000x\000y\000\000", 0, 4); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000w\000\000", "\000_\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) ns("\000\134\000W\000\000", "\000_\000\000"); x2s("\000(\000?\000=\000z\000)\000z\000\000", "\000z\000\000", 0, 2); ns("\000(\000?\000=\000z\000)\000.\000\000", "\000a\000\000"); x2s("\000(\000?\000!\000z\000)\000a\000\000", "\000a\000\000", 0, 2); ns("\000(\000?\000!\000z\000)\000a\000\000", "\000z\000\000"); x2s("\000(\000?\000i\000:\000a\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000i\000:\000a\000)\000\000", "\000A\000\000", 0, 2); x2s("\000(\000?\000i\000:\000A\000)\000\000", "\000a\000\000", 0, 2); ns("\000(\000?\000i\000:\000A\000)\000\000", "\000b\000\000"); x2s("\000(\000?\000i\000:\000[\000A\000-\000Z\000]\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000H\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000h\000\000", 0, 2); ns("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000e\000\000"); x2s("\000(\000?\000i\000:\000[\000A\000-\000c\000]\000)\000\000", "\000D\000\000", 0, 2); ns("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000A\000\000"); ns("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000a\000\000"); x2s("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\000Z\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\0007\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000b\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000{\000\000", 0, 2); x2s("\000(\000?\000i\000:\000\134\000?\000a\000)\000\000", "\000?\000A\000\000", 0, 4); x2s("\000(\000?\000i\000:\000\134\000*\000A\000)\000\000", "\000*\000a\000\000", 0, 4); ns("\000.\000\000", "\000\012\000\000"); x2s("\000(\000?\000m\000:\000.\000)\000\000", "\000\012\000\000", 0, 2); x2s("\000(\000?\000m\000:\000a\000.\000)\000\000", "\000a\000\012\000\000", 0, 4); x2s("\000(\000?\000m\000:\000.\000b\000)\000\000", "\000a\000\012\000b\000\000", 2, 6); x2s("\000.\000*\000a\000b\000c\000\000", "\000d\000d\000d\000a\000b\000d\000d\000\012\000d\000d\000a\000b\000c\000\000", 16, 26); x2s("\000(\000?\000m\000:\000.\000*\000a\000b\000c\000)\000\000", "\000d\000d\000d\000a\000b\000d\000d\000a\000b\000c\000\000", 0, 20); ns("\000(\000?\000i\000)\000(\000?\000-\000i\000)\000a\000\000", "\000A\000\000"); ns("\000(\000?\000i\000)\000(\000?\000-\000i\000:\000a\000)\000\000", "\000A\000\000"); x2s("\000a\000?\000\000", "\000\000", 0, 0); x2s("\000a\000?\000\000", "\000b\000\000", 0, 0); x2s("\000a\000?\000\000", "\000a\000\000", 0, 2); x2s("\000a\000*\000\000", "\000\000", 0, 0); x2s("\000a\000*\000\000", "\000a\000\000", 0, 2); x2s("\000a\000*\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000a\000*\000\000", "\000b\000a\000a\000a\000a\000\000", 0, 0); ns("\000a\000+\000\000", "\000\000"); x2s("\000a\000+\000\000", "\000a\000\000", 0, 2); x2s("\000a\000+\000\000", "\000a\000a\000a\000a\000\000", 0, 8); x2s("\000a\000+\000\000", "\000a\000a\000b\000b\000b\000\000", 0, 4); x2s("\000a\000+\000\000", "\000b\000a\000a\000a\000a\000\000", 2, 10); x2s("\000.\000?\000\000", "\000\000", 0, 0); x2s("\000.\000?\000\000", "\000f\000\000", 0, 2); x2s("\000.\000?\000\000", "\000\012\000\000", 0, 0); x2s("\000.\000*\000\000", "\000\000", 0, 0); x2s("\000.\000*\000\000", "\000a\000b\000c\000d\000e\000\000", 0, 10); x2s("\000.\000+\000\000", "\000z\000\000", 0, 2); x2s("\000.\000+\000\000", "\000z\000d\000s\000w\000e\000r\000\012\000\000", 0, 12); x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000b\000f\000b\000a\000c\000\000", 0, 8); x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); x2s("\000(\000(\000.\000*\000)\000a\000\134\0002\000f\000)\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000z\000z\000z\000z\000z\000z\000\012\000b\000a\000z\000z\000\012\000z\000z\000z\000z\000b\000a\000b\000f\000\000", 38, 46); x2s("\000a\000|\000b\000\000", "\000a\000\000", 0, 2); x2s("\000a\000|\000b\000\000", "\000b\000\000", 0, 2); x2s("\000|\000a\000\000", "\000a\000\000", 0, 0); x2s("\000(\000|\000a\000)\000\000", "\000a\000\000", 0, 0); x2s("\000a\000b\000|\000b\000c\000\000", "\000a\000b\000\000", 0, 4); x2s("\000a\000b\000|\000b\000c\000\000", "\000b\000c\000\000", 0, 4); x2s("\000z\000(\000?\000:\000a\000b\000|\000b\000c\000)\000\000", "\000z\000b\000c\000\000", 0, 6); x2s("\000a\000(\000?\000:\000a\000b\000|\000b\000c\000)\000c\000\000", "\000a\000a\000b\000c\000\000", 0, 8); x2s("\000a\000b\000|\000(\000?\000:\000a\000c\000|\000a\000z\000)\000\000", "\000a\000z\000\000", 0, 4); x2s("\000a\000|\000b\000|\000c\000\000", "\000d\000c\000\000", 2, 4); x2s("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000p\000q\000r\000\000", 0, 4); ns("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000m\000n\000\000"); x2s("\000a\000|\000^\000z\000\000", "\000b\000a\000\000", 2, 4); x2s("\000a\000|\000^\000z\000\000", "\000z\000a\000\000", 0, 2); x2s("\000a\000|\000\134\000G\000z\000\000", "\000b\000z\000a\000\000", 4, 6); x2s("\000a\000|\000\134\000G\000z\000\000", "\000z\000a\000\000", 0, 2); x2s("\000a\000|\000\134\000A\000z\000\000", "\000b\000z\000a\000\000", 4, 6); x2s("\000a\000|\000\134\000A\000z\000\000", "\000z\000a\000\000", 0, 2); x2s("\000a\000|\000b\000\134\000Z\000\000", "\000b\000a\000\000", 2, 4); x2s("\000a\000|\000b\000\134\000Z\000\000", "\000b\000\000", 0, 2); x2s("\000a\000|\000b\000\134\000z\000\000", "\000b\000a\000\000", 2, 4); x2s("\000a\000|\000b\000\134\000z\000\000", "\000b\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000w\000|\000\134\000s\000\000", "\000 \000\000", 0, 2); ns("\000\134\000w\000|\000\134\000w\000\000", "\000 \000\000"); x2s("\000\134\000w\000|\000%\000\000", "\000%\000\000", 0, 2); x2s("\000\134\000w\000|\000[\000&\000$\000]\000\000", "\000&\000\000", 0, 2); x2s("\000[\000b\000-\000d\000]\000|\000[\000^\000e\000-\000z\000]\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000d\000z\000\000", 0, 2); x2s("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000b\000z\000\000", 0, 4); x2s("\000a\000b\000c\000|\000(\000?\000=\000z\000z\000)\000.\000.\000f\000\000", "\000z\000z\000f\000\000", 0, 6); x2s("\000a\000b\000c\000|\000(\000?\000!\000z\000z\000)\000.\000.\000f\000\000", "\000a\000b\000f\000\000", 0, 6); x2s("\000(\000?\000=\000z\000a\000)\000.\000.\000a\000|\000(\000?\000=\000z\000z\000)\000.\000.\000a\000\000", "\000z\000z\000a\000\000", 0, 6); ns("\000(\000?\000>\000a\000|\000a\000b\000d\000)\000c\000\000", "\000a\000b\000d\000c\000\000"); x2s("\000(\000?\000>\000a\000b\000d\000|\000a\000)\000c\000\000", "\000a\000b\000d\000c\000\000", 0, 8); x2s("\000a\000?\000|\000b\000\000", "\000a\000\000", 0, 2); x2s("\000a\000?\000|\000b\000\000", "\000b\000\000", 0, 0); x2s("\000a\000?\000|\000b\000\000", "\000\000", 0, 0); x2s("\000a\000*\000|\000b\000\000", "\000a\000a\000\000", 0, 4); x2s("\000a\000*\000|\000b\000*\000\000", "\000b\000a\000\000", 0, 0); x2s("\000a\000*\000|\000b\000*\000\000", "\000a\000b\000\000", 0, 2); x2s("\000a\000+\000|\000b\000*\000\000", "\000\000", 0, 0); x2s("\000a\000+\000|\000b\000*\000\000", "\000b\000b\000b\000\000", 0, 6); x2s("\000a\000+\000|\000b\000*\000\000", "\000a\000b\000b\000b\000\000", 0, 2); ns("\000a\000+\000|\000b\000+\000\000", "\000\000"); x2s("\000(\000a\000|\000b\000)\000?\000\000", "\000b\000\000", 0, 2); x2s("\000(\000a\000|\000b\000)\000*\000\000", "\000b\000a\000\000", 0, 4); x2s("\000(\000a\000|\000b\000)\000+\000\000", "\000b\000a\000b\000\000", 0, 6); x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000c\000a\000a\000b\000b\000c\000\000", 0, 8); x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000a\000b\000c\000a\000\000", 2, 10); x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000b\000z\000c\000a\000\000", 0, 4); x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 10); x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000\000", 2, 4); x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000a\000a\000b\000a\000\000", 2, 8); x2s("\000(\000?\000:\000a\000|\000b\000)\000(\000?\000:\000a\000|\000b\000)\000\000", "\000a\000b\000\000", 0, 4); x2s("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000*\000|\000b\000*\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 6); x2s("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000+\000|\000b\000+\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); x2s("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); x2s("\000h\000{\0000\000,\000}\000\000", "\000h\000h\000h\000h\000\000", 0, 8); x2s("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0001\000,\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); ns("\000a\000x\000{\0002\000}\000*\000a\000\000", "\0000\000a\000x\000x\000x\000a\0001\000\000"); ns("\000a\000.\000{\0000\000,\0002\000}\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); ns("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); ns("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000X\000a\0000\000\000"); x2s("\000^\000a\000{\0002\000,\000}\000?\000a\000$\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000^\000[\000a\000-\000z\000]\000{\0002\000,\000}\000?\000$\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000c\000c\000\000", 0, 4); ns("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000a\000b\000c\000c\000\000"); x2s("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000a\000b\000c\000\000", 12, 16); x2s("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000b\000c\000\000", 0, 14); x2s("\000a\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000A\000\000", 0, 2); x2s("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000C\000\000", 0, 2); ns("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000A\000\000"); x2s("\000[\000a\000b\000c\000]\000?\000\000", "\000a\000b\000c\000\000", 0, 2); x2s("\000[\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 6); x2s("\000[\000^\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 0); ns("\000[\000^\000a\000b\000c\000]\000+\000\000", "\000a\000b\000c\000\000"); x2s("\000a\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); x2s("\000b\000a\000?\000?\000b\000\000", "\000b\000a\000b\000\000", 0, 6); x2s("\000a\000*\000?\000\000", "\000a\000a\000a\000\000", 0, 0); x2s("\000b\000a\000*\000?\000\000", "\000b\000a\000a\000\000", 0, 2); x2s("\000b\000a\000*\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); x2s("\000a\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); x2s("\000b\000a\000+\000?\000\000", "\000b\000a\000a\000\000", 0, 4); x2s("\000b\000a\000+\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); x2s("\000(\000?\000:\000a\000?\000)\000?\000?\000\000", "\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000?\000?\000)\000?\000\000", "\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000?\000)\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); x2s("\000(\000?\000:\000a\000+\000)\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000+\000)\000?\000?\000b\000\000", "\000a\000a\000a\000b\000\000", 0, 8); x2s("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 8); x2s("\000(\000?\000:\000a\000b\000)\000*\000{\0000\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); ns("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000\000"); x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000\000", 0, 12); x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000?\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 8); x2s("\000(\000?\000:\000a\000b\000)\000{\000,\000}\000\000", "\000a\000b\000{\000,\000}\000\000", 0, 10); x2s("\000(\000?\000:\000a\000b\000c\000)\000+\000?\000{\0002\000}\000\000", "\000a\000b\000c\000a\000b\000c\000a\000b\000c\000\000", 0, 12); x2s("\000(\000?\000:\000X\000*\000)\000(\000?\000i\000:\000x\000a\000)\000\000", "\000X\000X\000X\000a\000\000", 0, 8); x2s("\000(\000d\000+\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); x2s("\000(\000[\000^\000a\000b\000c\000]\000*\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000(\000\134\000w\000+\000)\000(\000\134\000w\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); x3s("\000(\000a\000)\000\000", "\000a\000\000", 0, 2, 1); x3s("\000(\000a\000b\000)\000\000", "\000a\000b\000\000", 0, 4, 1); x2s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4); x3s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 1); x3s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 2); x3s("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000a\000b\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 20); x3s("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 0, 4, 1); x3s("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 4, 8, 2); x3s("\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 3); x3s("\000(\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 4); x2s("\000(\000^\000a\000)\000\000", "\000a\000\000", 0, 2); x3s("\000(\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 1); x3s("\000(\000^\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 2); x3s("\000(\000a\000?\000)\000\000", "\000a\000a\000a\000\000", 0, 2, 1); x3s("\000(\000a\000*\000)\000\000", "\000a\000a\000a\000\000", 0, 6, 1); x3s("\000(\000a\000*\000)\000\000", "\000\000", 0, 0, 1); x3s("\000(\000a\000+\000)\000\000", "\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 14, 1); x3s("\000(\000a\000+\000|\000b\000*\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 6, 1); x3s("\000(\000a\000+\000|\000b\000?\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 2, 1); x3s("\000(\000a\000b\000c\000)\000?\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000a\000b\000c\000)\000*\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000x\000y\000z\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000[\000x\000y\000z\000]\000[\000a\000b\000c\000]\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000(\000?\000i\000:\000a\000b\000c\000)\000)\000\000", "\000A\000b\000C\000\000", 0, 6, 1); x2s("\000(\000a\000b\000c\000)\000(\000?\000i\000:\000\134\0001\000)\000\000", "\000a\000b\000c\000A\000B\000C\000\000", 0, 12); x3s("\000(\000(\000?\000m\000:\000a\000.\000c\000)\000)\000\000", "\000a\000\012\000c\000\000", 0, 6, 1); x3s("\000(\000(\000?\000=\000a\000z\000)\000a\000)\000\000", "\000a\000z\000b\000\000", 0, 2, 1); x3s("\000a\000b\000c\000|\000(\000.\000a\000b\000d\000)\000\000", "\000z\000a\000b\000d\000\000", 0, 8, 1); x2s("\000(\000?\000:\000a\000b\000c\000)\000|\000(\000A\000B\000C\000)\000\000", "\000a\000b\000c\000\000", 0, 6); x3s("\000(\000?\000i\000:\000(\000a\000b\000c\000)\000)\000|\000(\000z\000z\000z\000)\000\000", "\000A\000B\000C\000\000", 0, 6, 1); x3s("\000a\000*\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 8, 10, 1); x3s("\000a\000*\000?\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 0, 2, 1); x3s("\000a\000*\000?\000(\000c\000)\000\000", "\000a\000a\000a\000a\000c\000\000", 8, 10, 1); x3s("\000[\000b\000c\000d\000]\000a\000*\000(\000.\000)\000\000", "\000c\000a\000a\000a\000a\000z\000\000", 10, 12, 1); x3s("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); ns("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); x3s("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); ns("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); x3s("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000\000", 4, 8, 1); ns("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000b\000\000"); ns("\000(\000\134\0001\000)\000\000", "\000\000"); ns("\000\134\0001\000(\000a\000)\000\000", "\000a\000a\000\000"); ns("\000(\000a\000(\000b\000)\000\134\0001\000)\000\134\0002\000+\000\000", "\000a\000b\000a\000b\000b\000\000"); ns("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000\000"); x2s("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000a\000\000", 0, 8); x2s("\000(\000a\000)\000(\000?\000=\000\134\0001\000)\000\000", "\000a\000a\000\000", 0, 2); ns("\000(\000a\000)\000$\000|\000\134\0001\000\000", "\000a\000z\000\000"); x2s("\000(\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); ns("\000(\000a\000)\000\134\0001\000\000", "\000a\000b\000\000"); x2s("\000(\000a\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); x2s("\000(\000a\000?\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 0); x2s("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 8); x3s("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 4, 1); x2s("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000b\000b\000b\000\000", 0, 10); x2s("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000\000", 0, 2); x2s("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0001\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000a\000a\000a\000b\000b\000\000", 0, 20); x2s("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000b\000b\000\000", 0, 14); x2s("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 16); x3s("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 6, 7); x2s("\000(\000a\000)\000(\000b\000)\000(\000c\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "\000a\000b\000c\000b\000a\000c\000\000", 0, 12); x2s("\000(\000[\000a\000-\000d\000]\000)\000\134\0001\000\000", "\000c\000c\000\000", 0, 4); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000 \000\000", 0, 12); ns("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000\000"); x2s("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000w\000h\000o\000w\000h\000o\000\000", 0, 12); x2s("\000.\000.\000.\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000a\000b\000c\000w\000h\000o\000w\000h\000o\000\000", 0, 18); x2s("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000c\000b\000c\000c\000b\000c\000\000", 0, 12); x2s("\000(\000^\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); ns("\000(\000^\000a\000)\000\134\0001\000\000", "\000b\000a\000a\000\000"); ns("\000(\000a\000$\000)\000\134\0001\000\000", "\000a\000a\000\000"); ns("\000(\000a\000b\000\134\000Z\000)\000\134\0001\000\000", "\000a\000b\000\000"); x2s("\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000a\000\000", 2, 2); x2s("\000.\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000b\000a\000\000", 2, 4); x3s("\000(\000.\000(\000a\000b\000c\000)\000\134\0002\000)\000\000", "\000z\000a\000b\000c\000a\000b\000c\000\000", 0, 14, 1); if (!org.joni.Config.NON_UNICODE_SDW) x3s("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "\000z\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); x2s("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000A\000z\000\000", 0, 8); ns("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000a\000z\000\000"); x2s("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000a\000b\000\000", 2, 4); ns("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000b\000b\000\000"); x2s("\000(\000?\000<\000=\000a\000|\000b\000)\000b\000\000", "\000b\000b\000\000", 2, 4); x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000b\000c\000b\000\000", 4, 6); x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000a\000b\000\000", 2, 4); x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000|\000|\000d\000e\000f\000g\000h\000i\000j\000|\000k\000l\000m\000n\000o\000p\000q\000|\000r\000)\000z\000\000", "\000r\000z\000\000", 2, 4); x2s("\000(\000a\000)\000\134\000g\000<\0001\000>\000\000", "\000a\000a\000\000", 0, 4); x2s("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000c\000b\000\000", 2, 4); ns("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000a\000b\000\000"); x2s("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000b\000\000", "\000b\000b\000b\000\000", 0, 2); ns("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000z\000\000", "\000b\000c\000z\000\000"); x2s("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000a\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000<\000n\000a\000m\000e\000_\0002\000>\000a\000b\000)\000\134\000g\000<\000n\000a\000m\000e\000_\0002\000>\000\000", "\000a\000b\000a\000b\000\000", 0, 8); x2s("\000(\000?\000<\000n\000a\000m\000e\000_\0003\000>\000.\000z\000v\000.\000)\000\134\000k\000<\000n\000a\000m\000e\000_\0003\000>\000\000", "\000a\000z\000v\000b\000a\000z\000v\000b\000\000", 0, 16); x2s("\000(\000?\000<\000=\000\134\000g\000<\000a\000b\000>\000)\000|\000-\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000a\000b\000>\000X\000y\000Z\000)\000\000", "\000X\000y\000Z\000\000", 6, 6); x2s("\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000n\000>\000)\000+\000\000", "\000\000", 0, 0); x2s("\000(\000?\000<\000n\000>\000|\000\134\000(\000\134\000g\000<\000n\000>\000\134\000)\000)\000+\000$\000\000", "\000(\000)\000(\000(\000)\000)\000\000", 0, 12); x3s("\000\134\000g\000<\000n\000>\000(\000?\000<\000n\000>\000.\000)\000{\0000\000}\000\000", "\000X\000\000", 0, 2, 1); x2s("\000\134\000g\000<\000n\000>\000(\000a\000b\000c\000|\000d\000f\000(\000?\000<\000n\000>\000.\000Y\000Z\000)\000{\0002\000,\0008\000}\000)\000{\0000\000}\000\000", "\000X\000Y\000Z\000\000", 0, 6); x2s("\000\134\000A\000(\000?\000<\000n\000>\000(\000a\000\134\000g\000<\000n\000>\000)\000|\000)\000\134\000z\000\000", "\000a\000a\000a\000a\000\000", 0, 8); x2s("\000(\000?\000<\000n\000>\000|\000\134\000g\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000a\000|\000(\000b\000)\000\134\000g\000<\000m\000>\000)\000\000", "\000b\000b\000b\000b\000a\000b\000b\000a\000\000", 0, 16); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000(\000?\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\134\000w\000+\000\134\000s\000x\000)\000a\000+\000\134\000k\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\000", "\000 \000 \000f\000g\000 \000x\000a\000a\000a\000a\000a\000a\000a\000a\000f\000g\000 \000x\000\000", 4, 36); x3s("\000(\000z\000)\000(\000)\000(\000)\000(\000?\000<\000_\0009\000>\000a\000)\000\134\000g\000<\000_\0009\000>\000\000", "\000z\000a\000a\000\000", 4, 6, 1); x2s("\000(\000.\000)\000(\000(\000(\000?\000<\000_\000>\000a\000)\000)\000)\000\134\000k\000<\000_\000>\000\000", "\000z\000a\000a\000\000", 0, 6); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000(\000(\000?\000<\000n\000a\000m\000e\0001\000>\000\134\000d\000)\000|\000(\000?\000<\000n\000a\000m\000e\0002\000>\000\134\000w\000)\000)\000(\000\134\000k\000<\000n\000a\000m\000e\0001\000>\000|\000\134\000k\000<\000n\000a\000m\000e\0002\000>\000)\000\000", "\000f\000f\000\000", 0, 4); x2s("\000(\000?\000:\000(\000?\000<\000x\000>\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000e\000f\000g\000\000", 6, 18); ns("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000\000"); x2s("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000a\000-\000p\000y\000u\000m\000p\000y\000u\000m\000\000", 4, 20); x3s("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000x\000x\000x\000x\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000\000", 8, 36, 14); x3s("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0006\000>\000)\000(\000?\000<\000n\000a\000m\000e\0007\000>\000)\000(\000?\000<\000n\000a\000m\000e\0008\000>\000)\000(\000?\000<\000n\000a\000m\000e\0009\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0000\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0006\000>\000a\000a\000a\000)\000(\000?\000<\000n\000a\000m\000e\0001\0007\000>\000)\000$\000\000", "\000a\000a\000a\000\000", 0, 6, 16); x2s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000\000", 0, 26); x3s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000)\000)\000\000", 0, 34, 1); x2s("\000\134\000g\000<\000b\000a\000r\000>\000|\000\134\000z\000E\000N\000D\000(\000?\000<\000b\000a\000r\000>\000.\000*\000a\000b\000c\000$\000)\000\000", "\000a\000b\000c\000x\000x\000x\000a\000b\000c\000\000", 0, 18); x2s("\000\134\000g\000<\0001\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000\000", "\000b\000a\000c\000\000", 0, 6); x3s("\000\134\000g\000<\000_\000A\000>\000\134\000g\000<\000_\000A\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000(\000?\000<\000_\000A\000>\000.\000b\000.\000)\000\000", "\000x\000b\000x\000y\000b\000y\000\000", 6, 12, 1); x2s("\000\134\000A\000(\000?\000:\000\134\000g\000<\000p\000o\000n\000>\000|\000\134\000g\000<\000p\000a\000n\000>\000|\000\134\000z\000E\000N\000D\000 \000 \000(\000?\000<\000p\000a\000n\000>\000a\000|\000c\000\134\000g\000<\000p\000o\000n\000>\000c\000)\000(\000?\000<\000p\000o\000n\000>\000b\000|\000d\000\134\000g\000<\000p\000a\000n\000>\000d\000)\000)\000$\000\000", "\000c\000d\000c\000b\000c\000d\000c\000\000", 0, 14); x2s("\000\134\000A\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000m\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\000", "\000a\000a\000a\000a\000\000", 0, 8); x2s("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000\000", 2, 10); x2s("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000a\000a\000a\000a\000\000", 0, 20); x2s("\000(\000?\000<\000p\000a\000r\000e\000>\000\134\000(\000(\000[\000^\000\134\000(\000\134\000)\000]\000+\000+\000|\000\134\000g\000<\000p\000a\000r\000e\000>\000)\000*\000+\000\134\000)\000)\000\000", "\000(\000(\000a\000)\000)\000\000", 0, 10); x2s("\000(\000)\000*\000\134\0001\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000)\000*\000\134\0001\000\134\0002\000\000", "\000\000", 0, 0); x3s("\000(\000?\000:\000\134\0001\000a\000|\000(\000)\000)\000*\000\000", "\000a\000\000", 0, 0, 1); x2s("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000\000", "\0000\000x\0001\000x\0002\000x\0003\000\000", 2, 12); x2s("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000(\000?\000i\000:\000\134\0001\000)\000\134\000Z\000\000", "\0000\000x\0001\000x\0002\000x\0001\000X\0002\000\000", 2, 18); x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000\134\0005\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000x\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000b\000\134\0005\000\000", "\000b\000\000", 0, 2); x2s("\217\372\000\000", "\217\372\000\000", 0, 2); x2s("\000\000", "0B\000\000", 0, 0); x2s("0B\000\000", "0B\000\000", 0, 2); ns("0D\000\000", "0B\000\000"); x2s("0F0F\000\000", "0F0F\000\000", 0, 4); x2s("0B0D0F\000\000", "0B0D0F\000\000", 0, 6); x2s("0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", "0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", 0, 70); x2s("0B\000\000", "0D0B\000\000", 2, 4); x2s("0D0F\000\000", "0B0D0F\000\000", 2, 6); x2s("e\207\000\000", "e\207\000\000", 0, 2); x2s("\000.\000\000", "0B\000\000", 0, 2); x2s("\000.\000.\000\000", "0K0M\000\000", 0, 4); x2s("\000\134\000w\000\000", "0J\000\000", 0, 2); ns("\000\134\000W\000\000", "0B\000\000"); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000[\000\134\000W\000]\000\000", "0F\000$\000\000", 2, 4); x2s("\000\134\000S\000\000", "0]\000\000", 0, 2); x2s("\000\134\000S\000\000", "o\042\000\000", 0, 2); x2s("\000\134\000b\000\000", "l\027\000 \000\000", 0, 0); x2s("\000\134\000b\000\000", "\000 0{\000\000", 2, 2); x2s("\000\134\000B\000\000", "0[0]\000 \000\000", 2, 2); x2s("\000\134\000B\000\000", "0F\000 \000\000", 4, 4); x2s("\000\134\000B\000\000", "\000 0D\000\000", 0, 0); x2s("\000[0_0a\000]\000\000", "0a\000\000", 0, 2); ns("\000[0j0k\000]\000\000", "0l\000\000"); x2s("\000[0F\000-0J\000]\000\000", "0H\000\000", 0, 2); ns("\000[\000^0Q\000]\000\000", "0Q\000\000"); x2s("\000[\000\134\000w\000]\000\000", "0m\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) ns("\000[\000\134\000d\000]\000\000", "0u\000\000"); x2s("\000[\000\134\000D\000]\000\000", "0o\000\000", 0, 2); ns("\000[\000\134\000s\000]\000\000", "0O\000\000"); x2s("\000[\000\134\000S\000]\000\000", "0x\000\000", 0, 2); x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "0\210\000\000", 0, 2); x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000 \000 0\210\000\000", 6, 8); ns("\000\134\000w\233<\216\312\000\000", "\000 \233<\216\312\000\000"); x2s("\233<\000\134\000W\216\312\000\000", "\233<\000 \216\312\000\000", 0, 6); x2s("0B\000.0D\000.0F\000\000", "0B0B0D0D0F\000\000", 0, 10); x2s("\000.\000\134\000w0F\000\134\000W\000.\000.0^\000\000", "0H0F0F\000 0F0^0^\000\000", 0, 14); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000s\000\134\000w0S0S0S\000\000", "\000 0S0S0S0S\000\000", 0, 10); x2s("0B0B\000.0Q\000\000", "0B0B0Q0Q\000\000", 0, 8); ns("\000.0D\000\000", "0D0H\000\000"); x2s("\000.0J\000\000", "0J0J\000\000", 0, 4); x2s("\000^0B\000\000", "0B\000\000", 0, 2); x2s("\000^0\200\000$\000\000", "0\200\000\000", 0, 2); x2s("\000^\000\134\000w\000$\000\000", "0k\000\000", 0, 2); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000^\000\134\000w0K0M0O0Q0S\000$\000\000", "\000z0K0M0O0Q0S\000\000", 0, 12); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000^\000\134\000w\000.\000.\000.0F0H0J\000$\000\000", "\000z0B0D0F0F0H0J\000\000", 0, 14); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000\134\000w\000\134\000w\000\134\000s\000\134\000W0J0J0J\000\134\000d\000\000", "\000a0J\000 \000 0J0J0J\0004\000\000", 0, 16); x2s("\000\134\000A0_0a0d\000\000", "0_0a0d\000\000", 0, 6); x2s("0\2000\2010\202\000\134\000Z\000\000", "0\2000\2010\202\000\000", 0, 6); x2s("0K0M0O\000\134\000z\000\000", "0K0M0O\000\000", 0, 6); x2s("0K0M0O\000\134\000Z\000\000", "0K0M0O\000\012\000\000", 0, 6); x2s("\000\134\000G0}0t\000\000", "0}0t\000\000", 0, 4); ns("\000\134\000G0H\000\000", "0F0H0J\000\000"); ns("0h0f\000\134\000G\000\000", "0h0f\000\000"); ns("0~0\177\000\134\000A\000\000", "0~0\177\000\000"); ns("0~\000\134\000A0\177\000\000", "0~0\177\000\000"); x2s("\000(\000?\000=0[\000)0[\000\000", "0[\000\000", 0, 2); ns("\000(\000?\000=0F\000)\000.\000\000", "0D\000\000"); x2s("\000(\000?\000!0F\000)0K\000\000", "0K\000\000", 0, 2); ns("\000(\000?\000!0h\000)0B\000\000", "0h\000\000"); x2s("\000(\000?\000i\000:0B\000)\000\000", "0B\000\000", 0, 2); x2s("\000(\000?\000i\000:0v0y\000)\000\000", "0v0y\000\000", 0, 4); ns("\000(\000?\000i\000:0D\000)\000\000", "0F\000\000"); x2s("\000(\000?\000m\000:0\210\000.\000)\000\000", "0\210\000\012\000\000", 0, 4); x2s("\000(\000?\000m\000:\000.0\201\000)\000\000", "0~\000\0120\201\000\000", 2, 6); x2s("0B\000?\000\000", "\000\000", 0, 0); x2s("Y\011\000?\000\000", "S\026\000\000", 0, 0); x2s("Y\011\000?\000\000", "Y\011\000\000", 0, 2); x2s("\221\317\000*\000\000", "\000\000", 0, 0); x2s("\221\317\000*\000\000", "\221\317\000\000", 0, 2); x2s("[P\000*\000\000", "[P[P[P\000\000", 0, 6); x2s("\231\254\000*\000\000", "\236\177\231\254\231\254\231\254\231\254\000\000", 0, 0); ns("\134q\000+\000\000", "\000\000"); x2s("l\263\000+\000\000", "l\263\000\000", 0, 2); x2s("fB\000+\000\000", "fBfBfBfB\000\000", 0, 8); x2s("0H\000+\000\000", "0H0H0F0F0F\000\000", 0, 4); x2s("0F\000+\000\000", "0J0F0F0F0F\000\000", 2, 10); x2s("\000.\000?\000\000", "0_\000\000", 0, 2); x2s("\000.\000*\000\000", "0q0t0w0z\000\000", 0, 8); x2s("\000.\000+\000\000", "0\215\000\000", 0, 2); x2s("\000.\000+\000\000", "0D0F0H0K\000\012\000\000", 0, 8); x2s("0B\000|0D\000\000", "0B\000\000", 0, 2); x2s("0B\000|0D\000\000", "0D\000\000", 0, 2); x2s("0B0D\000|0D0F\000\000", "0B0D\000\000", 0, 4); x2s("0B0D\000|0D0F\000\000", "0D0F\000\000", 0, 4); x2s("0\222\000(\000?\000:0K0M\000|0M0O\000)\000\000", "0\2220K0M\000\000", 0, 6); x2s("0\222\000(\000?\000:0K0M\000|0M0O\000)0Q\000\000", "0\2220M0O0Q\000\000", 0, 8); x2s("0B0D\000|\000(\000?\000:0B0F\000|0B0\222\000)\000\000", "0B0\222\000\000", 0, 4); x2s("0B\000|0D\000|0F\000\000", "0H0F\000\000", 2, 4); x2s("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0W0Y0[\000\000", 0, 6); ns("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0Y0[\000\000"); x2s("0B\000|\000^0\217\000\000", "0v0B\000\000", 2, 4); x2s("0B\000|\000^0\222\000\000", "0\2220B\000\000", 0, 2); x2s("\233<\000|\000\134\000G\216\312\000\000", "0Q\216\312\233<\000\000", 4, 6); x2s("\233<\000|\000\134\000G\216\312\000\000", "\216\312\233<\000\000", 0, 2); x2s("\233<\000|\000\134\000A\216\312\000\000", "\000b\216\312\233<\000\000", 4, 6); x2s("\233<\000|\000\134\000A\216\312\000\000", "\216\312\000\000", 0, 2); x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\233<\000\000", 2, 4); x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\000", 0, 2); x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\012\000\000", 0, 2); x2s("\233<\000|\216\312\000\134\000z\000\000", "\216\312\233<\000\000", 2, 4); x2s("\233<\000|\216\312\000\134\000z\000\000", "\216\312\000\000", 0, 2); x2s("\000\134\000w\000|\000\134\000s\000\000", "0J\000\000", 0, 2); x2s("\000\134\000w\000|\000%\000\000", "\000%0J\000\000", 0, 2); x2s("\000\134\000w\000|\000[\000&\000$\000]\000\000", "0F\000&\000\000", 0, 2); x2s("\000[0D\000-0Q\000]\000\000", "0F\000\000", 0, 2); x2s("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0B\000\000", 0, 2); x2s("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0K\000\000", 0, 2); x2s("\000[\000^0B\000]\000\000", "\000\012\000\000", 0, 2); x2s("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0F0\222\000\000", 0, 2); x2s("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0D0\222\000\000", 0, 4); x2s("0B0D0F\000|\000(\000?\000=0Q0Q\000)\000.\000.0{\000\000", "0Q0Q0{\000\000", 0, 6); x2s("0B0D0F\000|\000(\000?\000!0Q0Q\000)\000.\000.0{\000\000", "0B0D0{\000\000", 0, 6); x2s("\000(\000?\000=0\2220B\000)\000.\000.0B\000|\000(\000?\000=0\2220\222\000)\000.\000.0B\000\000", "0\2220\2220B\000\000", 0, 6); x2s("\000(\000?\000<\000=0B\000|0D0F\000)0D\000\000", "0D0F0D\000\000", 4, 6); ns("\000(\000?\000>0B\000|0B0D0H\000)0F\000\000", "0B0D0H0F\000\000"); x2s("\000(\000?\000>0B0D0H\000|0B\000)0F\000\000", "0B0D0H0F\000\000", 0, 8); x2s("0B\000?\000|0D\000\000", "0B\000\000", 0, 2); x2s("0B\000?\000|0D\000\000", "0D\000\000", 0, 0); x2s("0B\000?\000|0D\000\000", "\000\000", 0, 0); x2s("0B\000*\000|0D\000\000", "0B0B\000\000", 0, 4); x2s("0B\000*\000|0D\000*\000\000", "0D0B\000\000", 0, 0); x2s("0B\000*\000|0D\000*\000\000", "0B0D\000\000", 0, 2); x2s("\000[\000a0B\000]\000*\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 4); x2s("0B\000+\000|0D\000*\000\000", "\000\000", 0, 0); x2s("0B\000+\000|0D\000*\000\000", "0D0D0D\000\000", 0, 6); x2s("0B\000+\000|0D\000*\000\000", "0B0D0D0D\000\000", 0, 2); x2s("0B\000+\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 0); ns("0B\000+\000|0D\000+\000\000", "\000\000"); x2s("\000(0B\000|0D\000)\000?\000\000", "0D\000\000", 0, 2); x2s("\000(0B\000|0D\000)\000*\000\000", "0D0B\000\000", 0, 4); x2s("\000(0B\000|0D\000)\000+\000\000", "0D0B0D\000\000", 0, 6); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 0, 8); x2s("\000(0B0D\000|0F0H\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 4, 12); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0B0D0F0B\000\000", 2, 10); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0D0\2220F0B\000\000", 0, 4); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "\000$\000$\000z\000z\000z\000z0B0D0\2220F0B\000\000", 12, 16); x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0B0D0B0D0B\000\000", 0, 10); x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B\000\000", 2, 4); x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B0B0B0D0B\000\000", 2, 8); x2s("\000(\000?\000:0B\000|0D\000)\000(\000?\000:0B\000|0D\000)\000\000", "0B0D\000\000", 0, 4); x2s("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000*\000|0D\000*\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 6); x2s("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000+\000|0D\000+\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 12); x2s("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); x2s("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0001\000,\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); x2s("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0F0F\000\000", 0, 4); ns("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0B0D0F0F\000\000"); x2s("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0B0D0F\000\000", 12, 16); x2s("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0D0F\000\000", 0, 14); x2s("0F\000{\0000\000,\000}\000\000", "0F0F0F0F\000\000", 0, 8); x2s("0B\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000)\000c\000|0B\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000a\000\000", 0, 2); ns("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000A\000\000"); x2s("\000[0B0D0F\000]\000?\000\000", "0B0D0F\000\000", 0, 2); x2s("\000[0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 6); x2s("\000[\000^0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 0); ns("\000[\000^0B0D0F\000]\000+\000\000", "0B0D0F\000\000"); x2s("0B\000?\000?\000\000", "0B0B0B\000\000", 0, 0); x2s("0D0B\000?\000?0D\000\000", "0D0B0D\000\000", 0, 6); x2s("0B\000*\000?\000\000", "0B0B0B\000\000", 0, 0); x2s("0D0B\000*\000?\000\000", "0D0B0B\000\000", 0, 2); x2s("0D0B\000*\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); x2s("0B\000+\000?\000\000", "0B0B0B\000\000", 0, 2); x2s("0D0B\000+\000?\000\000", "0D0B0B\000\000", 0, 4); x2s("0D0B\000+\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); x2s("\000(\000?\000:Y)\000?\000)\000?\000?\000\000", "Y)\000\000", 0, 0); x2s("\000(\000?\000:Y)\000?\000?\000)\000?\000\000", "Y)\000\000", 0, 0); x2s("\000(\000?\000:Y\042\000?\000)\000+\000?\000\000", "Y\042Y\042Y\042\000\000", 0, 2); x2s("\000(\000?\000:\230\250\000+\000)\000?\000?\000\000", "\230\250\230\250\230\250\000\000", 0, 0); x2s("\000(\000?\000:\226\352\000+\000)\000?\000?\227\034\000\000", "\226\352\226\352\226\352\227\034\000\000", 0, 8); x2s("\000(\000?\000:0B0D\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\233<\216\312\000)\000?\000{\0002\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 8); x2s("\000(\000?\000:\233<\216\312\000)\000*\000{\0000\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 0); x2s("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); ns("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\000\000"); x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 12); x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000?\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 8); x2s("\000(\000?\000:\233<\216\312\000)\000{\000,\000}\000\000", "\233<\216\312\000{\000,\000}\000\000", 0, 10); x2s("\000(\000?\000:0K0M0O\000)\000+\000?\000{\0002\000}\000\000", "0K0M0O0K0M0O0K0M0O\000\000", 0, 12); x3s("\000(pk\000)\000\000", "pk\000\000", 0, 2, 1); x3s("\000(pkl4\000)\000\000", "pkl4\000\000", 0, 4, 1); x2s("\000(\000(fB\225\223\000)\000)\000\000", "fB\225\223\000\000", 0, 4); x3s("\000(\000(\230\250l4\000)\000)\000\000", "\230\250l4\000\000", 0, 4, 1); x3s("\000(\000(f(e\345\000)\000)\000\000", "f(e\345\000\000", 0, 4, 2); x3s("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\221\317[P\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\221\317[P\000\000", 0, 4, 20); x3s("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 0, 4, 1); x3s("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 4, 8, 2); x3s("\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 3); x3s("\000(\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000)\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 4); x3s("\000.\000*\000(0\3250\251\000)0\3630\3730\336\000(0\363\000(\000)0\2670\3450\277\000)0\2440\363\000\000", "0\3250\2510\3630\3730\3360\3630\2670\3450\2770\2440\363\000\000", 10, 18, 2); x2s("\000(\000^0B\000)\000\000", "0B\000\000", 0, 2); x3s("\000(0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 1); x3s("\000(\000^0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 2); x3s("\000(0B\000?\000)\000\000", "0B0B0B\000\000", 0, 2, 1); x3s("\000(0~\000*\000)\000\000", "0~0~0~\000\000", 0, 6, 1); x3s("\000(0h\000*\000)\000\000", "\000\000", 0, 0, 1); x3s("\000(0\213\000+\000)\000\000", "0\2130\2130\2130\2130\2130\2130\213\000\000", 0, 14, 1); x3s("\000(0u\000+\000|0x\000*\000)\000\000", "0u0u0u0x0x\000\000", 0, 6, 1); x3s("\000(0B\000+\000|0D\000?\000)\000\000", "0D0D0D0B0B\000\000", 0, 2, 1); x3s("\000(0B0D0F\000)\000?\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(0B0D0F\000)\000*\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(0U0W0Y\000|0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(\000[0j0k0l\000]\000[0K0M0O\000]\000|0K0M0O\000)\000+\000\000", "0K0M0O\000\000", 0, 6, 1); x3s("\000(\000(\000?\000i\000:0B0D0F\000)\000)\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(\000(\000?\000m\000:0B\000.0F\000)\000)\000\000", "0B\000\0120F\000\000", 0, 6, 1); x3s("\000(\000(\000?\000=0B0\223\000)0B\000)\000\000", "0B0\2230D\000\000", 0, 2, 1); x3s("0B0D0F\000|\000(\000.0B0D0H\000)\000\000", "0\2230B0D0H\000\000", 0, 8, 1); x3s("0B\000*\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); x3s("0B\000*\000?\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 0, 2, 1); x3s("0B\000*\000?\000(0\223\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); x3s("\000[0D0F0H\000]0B\000*\000(\000.\000)\000\000", "0H0B0B0B0B0\223\000\000", 10, 12, 1); x3s("\000(\000\134\000A0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); ns("\000(\000\134\000A0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); x3s("\000(\000^0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); ns("\000(\000^0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); x3s("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\213\000\000", 4, 8, 1); ns("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\2130\213\000\000"); x2s("\000(q!\000)\000\134\0001\000\000", "q!q!\000\000", 0, 4); ns("\000(q!\000)\000\134\0001\000\000", "q!kf\000\000"); x2s("\000(zz\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 4); x2s("\000(zz\000?\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 0); x2s("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 8); x3s("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 4, 1); x2s("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D0D0D0D\000\000", 0, 10); x2s("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D\000\000", 0, 2); x2s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0001\000\134\0002\000\000", "0B0B0B0D0D0B0B0B0D0D\000\000", 0, 20); x2s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 0, 14); x3s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 6, 10, 2); x2s("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 16); x3s("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 6, 7); x2s("\000(0o\000)\000(0r\000)\000(0u\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "0o0r0u0r0o0u\000\000", 0, 12); x2s("\000(\000[0M\000-0Q\000]\000)\000\134\0001\000\000", "0O0O\000\000", 0, 4); if (!org.joni.Config.NON_UNICODE_SDW) x2s("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000 \000\000", 0, 12); ns("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000\000"); x2s("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "\212\260\377\037\212\260\377\037\000\000", 0, 8); x2s("\000.\000.\000.\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0B\000a0B\212\260\377\037\212\260\377\037\000\000", 0, 14); x2s("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0F0D0F0F0D0F\000\000", 0, 12); x2s("\000(\000^0S\000)\000\134\0001\000\000", "0S0S\000\000", 0, 4); ns("\000(\000^0\200\000)\000\134\0001\000\000", "0\2010\2000\200\000\000"); ns("\000(0B\000$\000)\000\134\0001\000\000", "0B0B\000\000"); ns("\000(0B0D\000\134\000Z\000)\000\134\0001\000\000", "0B0D\000\000"); x2s("\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0B\000\000", 2, 2); x2s("\000.\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0D0B\000\000", 2, 4); x3s("\000(\000.\000(0\2040D0\206\000)\000\134\0002\000)\000\000", "\000z0\2040D0\2060\2040D0\206\000\000", 0, 14, 1); if (!org.joni.Config.NON_UNICODE_SDW) x3s("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "0B\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); x2s("\000(\000(\000?\000i\000:0B\000v0Z\000)\000)\000\134\0001\000\000", "0B\000v0Z0B\000v0Z\000\000", 0, 12); x2s("\000(\000?\000Y\011\000|\000\134\000(\000\134\000g\000\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(Y\011\000)\000)\000)\000)\000)\000)\000\000", 0, 26); x2s("\000\134\000A\000(\000?\000:\000\134\000g\000<\226?\000_\0001\000>\000|\000\134\000g\000\000|\000\134\000z}BN\206\000 \000 \000(\000?\000<\226?\000_\0001\000>\211\263\000|\201\352\000\134\000g\000\201\352\000)\000(\000?\000W(\000|\203\351\205\251\000\134\000g\000<\226?\000_\0001\000>\203\351\205\251\000)\000)\000$\000\000", "\203\351\205\251\201\352\203\351\205\251\201\352W(\201\352\203\351\205\251\201\352\203\351\205\251\000\000", 0, 26); x2s("\000[\000[0r0u\000]\000]\000\000", "0u\000\000", 0, 2); x2s("\000[\000[0D0J0F\000]0K\000]\000\000", "0K\000\000", 0, 2); ns("\000[\000[\000^0B\000]\000]\000\000", "0B\000\000"); ns("\000[\000^\000[0B\000]\000]\000\000", "0B\000\000"); x2s("\000[\000^\000[\000^0B\000]\000]\000\000", "0B\000\000", 0, 2); x2s("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0O\000\000", 0, 2); ns("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0K\000\000"); ns("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0Q\000\000"); x2s("\000[0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000", 0, 2); ns("\000[\000^0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000"); x2s("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0D\000\000", 0, 2); ns("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0B\000\000"); x2s("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0M\000\000", 0, 2); ns("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0D\000\000"); x2s("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0F\000\000", 0, 2); x2s("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0H\000\000", 0, 2); ns("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0K\000\000"); x2s("\000[0B\000-\000&\000&\000-0B\000]\000\000", "\000-\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000q\000-\000w\000]\000\000", "0H\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000f\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000g\000\000", 0, 2); ns("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\0002\000\000"); x2s("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); x2s("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); } public static void main(String[] args) throws Throwable { new TestU().run(); } } joni-2.0.0/test/org/joni/test/TestU8.java000066400000000000000000000067131214326443200201430ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.jcodings.Encoding; import org.jcodings.specific.UTF8Encoding; import org.joni.Option; import org.joni.Syntax; public class TestU8 extends Test { public int option() { return Option.DEFAULT; } public Encoding encoding() { return UTF8Encoding.INSTANCE; } public String testEncoding() { return "iso-8859-1"; } public Syntax syntax() { return Syntax.DEFAULT; } public void test() throws InterruptedException { xx("^\\d\\d\\d-".getBytes(), new byte []{-30, -126, -84, 48, 45}, 0, 0, 0, true); x2s("x{2}", "xx", 0, 2, Option.IGNORECASE); x2s("x{2}", "XX", 0, 2, Option.IGNORECASE); x2s("x{3}", "XxX", 0, 3, Option.IGNORECASE); ns("x{2}", "x", Option.IGNORECASE); ns("x{2}", "X", Option.IGNORECASE); byte[] pat = new byte[] {(byte)227, (byte)131, (byte)160, (byte)40, (byte)46, (byte)41}; byte[] str = new byte[]{(byte)227, (byte)130, (byte)185, (byte)227, (byte)131, (byte)145, (byte)227, (byte)131, (byte)160, (byte)227, (byte)131, (byte)143, (byte)227, (byte)131, (byte)179, (byte)227, (byte)130, (byte)175}; x2(pat, str, 6, 12); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35, Option.IGNORECASE); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 0, 35, Option.IGNORECASE); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAA", 0, 35, Option.IGNORECASE); pat = new byte[]{94, 40, (byte)239, (byte)188, (byte)161, 41, 92, 49, 36}; str = new byte[]{(byte)239, (byte)188, (byte)161, 65}; n(pat, str, Option.IGNORECASE); pat = new byte[]{94, (byte)195, (byte)159, 123, 50, 125, 36}; str = new byte[]{(byte)195, (byte)159, 115, 115}; x2(pat, str, 0, 4, Option.IGNORECASE); String str2 = new String(new byte[]{-61, -123, -61, -123}); String pat2 = new String(new byte[]{'^', -61, -123, '{', '2', '}', '$'}); // x2s(pat2, str2, 4, 4); // x2s(pat2, str2, 4, 4, Option.IGNORECASE); ns("(?i-mx:ak)a", "ema"); x2s("(?i:!\\[CDAT)", "![CDAT", 0, 6); x2s("(?i:\\!\\[CDAa)", "\\![CDAa", 1, 7); x2s("(?i:\\!\\[CDAb)", "\\![CDAb", 1, 7); } public static void main(String[] args) throws Throwable { new TestU8().run(); } }