--- /dev/null
+/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-\r
+ *\r
+ * The contents of this file are subject to the Netscape Public\r
+ * License Version 1.1 (the "License"); you may not use this file\r
+ * except in compliance with the License. You may obtain a copy of\r
+ * the License at http://www.mozilla.org/NPL/\r
+ *\r
+ * Software distributed under the License is distributed on an "AS\r
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr\r
+ * implied. See the License for the specific language governing\r
+ * rights and limitations under the License.\r
+ *\r
+ * The Original Code is Rhino code, released\r
+ * May 6, 1998.\r
+ *\r
+ * The Initial Developer of the Original Code is Netscape\r
+ * Communications Corporation. Portions created by Netscape are\r
+ * Copyright (C) 1997-1999 Netscape Communications Corporation. All\r
+ * Rights Reserved.\r
+ *\r
+ * Contributor(s): \r
+ * Norris Boyd\r
+ * Brendan Eich\r
+ * Matthias Radestock\r
+ *\r
+ * Alternatively, the contents of this file may be used under the\r
+ * terms of the GNU Public License (the "GPL"), in which case the\r
+ * provisions of the GPL are applicable instead of those above.\r
+ * If you wish to allow use of your version of this file only\r
+ * under the terms of the GPL and not to allow others to use your\r
+ * version of this file under the NPL, indicate your decision by\r
+ * deleting the provisions above and replace them with the notice\r
+ * and other provisions required by the GPL. If you do not delete\r
+ * the provisions above, a recipient may use your version of this\r
+ * file under either the NPL or the GPL.\r
+ */\r
+\r
+package org.mozilla.javascript.regexp;\r
+\r
+import org.mozilla.javascript.*;\r
+import java.lang.reflect.Method;\r
+\r
+/**\r
+ * This class implements the RegExp native object.\r
+ *\r
+ * Revision History:\r
+ * Implementation in C by Brendan Eich\r
+ * Initial port to Java by Norris Boyd from jsregexp.c version 1.36\r
+ * Merged up to version 1.38, which included Unicode support.\r
+ * Merged bug fixes in version 1.39.\r
+ * Merged JSFUN13_BRANCH changes up to 1.32.2.13\r
+ *\r
+ * @author Brendan Eich\r
+ * @author Norris Boyd\r
+ */\r
+public class NativeRegExp extends IdScriptable implements Function {\r
+\r
+ public static final int GLOB = 0x1; // 'g' flag: global\r
+ public static final int FOLD = 0x2; // 'i' flag: fold\r
+ public static final int MULTILINE = 0x4; // 'm' flag: multiline\r
+\r
+ //type of match to perform\r
+ public static final int TEST = 0;\r
+ public static final int MATCH = 1;\r
+ public static final int PREFIX = 2;\r
+\r
+ private static final boolean debug = false;\r
+\r
+ public static void init(Context cx, Scriptable scope, boolean sealed) {\r
+ \r
+ NativeRegExp proto = new NativeRegExp();\r
+ proto.prototypeFlag = true;\r
+ proto.activateIdMap(MAX_PROTOTYPE_ID);\r
+ proto.setSealFunctionsFlag(sealed);\r
+ proto.setFunctionParametrs(cx);\r
+ proto.setParentScope(scope);\r
+ proto.setPrototype(getObjectPrototype(scope));\r
+\r
+\r
+ NativeRegExpCtor ctor = new NativeRegExpCtor();\r
+\r
+ ctor.setPrototype(getClassPrototype(scope, "Function"));\r
+ ctor.setParentScope(scope);\r
+\r
+ ctor.setImmunePrototypeProperty(proto);\r
+ \r
+ if (sealed) {\r
+ proto.sealObject();\r
+ ctor.sealObject();\r
+ }\r
+\r
+ defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM);\r
+ }\r
+\r
+ public NativeRegExp(Context cx, Scriptable scope, String source, \r
+ String global, boolean flat) {\r
+ init(cx, scope, source, global, flat);\r
+ }\r
+\r
+ public void init(Context cx, Scriptable scope, String source, \r
+ String global, boolean flat) {\r
+ this.source = source;\r
+ flags = 0;\r
+ if (global != null) {\r
+ for (int i=0; i < global.length(); i++) {\r
+ char c = global.charAt(i);\r
+ if (c == 'g') {\r
+ flags |= GLOB;\r
+ } else if (c == 'i') {\r
+ flags |= FOLD;\r
+ } else if (c == 'm') {\r
+ flags |= MULTILINE;\r
+ } else {\r
+ Object[] errArgs = { new Character(c) };\r
+ throw NativeGlobal.constructError(cx, "SyntaxError",\r
+ ScriptRuntime.getMessage(\r
+ "msg.invalid.re.flag", errArgs),\r
+ scope);\r
+ }\r
+ }\r
+ }\r
+\r
+ CompilerState state = new CompilerState(source, flags, cx, scope);\r
+ if (flat) {\r
+ ren = null;\r
+ int sourceLen = source.length();\r
+ int index = 0;\r
+ while (sourceLen > 0) {\r
+ int len = sourceLen;\r
+ if (len > REOP_FLATLEN_MAX) {\r
+ len = REOP_FLATLEN_MAX;\r
+ }\r
+ RENode ren2 = new RENode(state, len == 1 ? REOP_FLAT1 : REOP_FLAT,\r
+ new Integer(index)); \r
+ ren2.flags = RENode.NONEMPTY;\r
+ if (len > 1) {\r
+ ren2.kid2 = index + len;\r
+ } else {\r
+ ren2.flags |= RENode.SINGLE;\r
+ ren2.chr = state.source[index]; \r
+ }\r
+ index += len;\r
+ sourceLen -= len;\r
+ if (ren == null)\r
+ ren = ren2;\r
+ else\r
+ setNext(state, ren, ren2);\r
+ }\r
+ }\r
+ else\r
+ this.ren = parseRegExp(state);\r
+ if (ren == null) return;\r
+ RENode end = new RENode(state, REOP_END, null);\r
+ setNext(state, ren, end);\r
+ if (debug)\r
+ dumpRegExp(state, ren);\r
+ this.lastIndex = 0;\r
+ this.parenCount = state.parenCount;\r
+ this.flags = flags;\r
+\r
+ scope = org.xwt.util.JSObject.defaultObjects;\r
+ setPrototype(getClassPrototype(scope, "RegExp"));\r
+ setParentScope(scope);\r
+ }\r
+\r
+ public String getClassName() {\r
+ return "RegExp";\r
+ }\r
+\r
+ public Object call(Context cx, Scriptable scope, Scriptable thisObj,\r
+ Object[] args) {\r
+ return execSub(cx, scope, args, MATCH);\r
+ }\r
+\r
+ public Scriptable construct(Context cx, Scriptable scope, Object[] args) {\r
+ return (Scriptable) call(cx, scope, null, args);\r
+ }\r
+\r
+ Scriptable compile(Context cx, Scriptable scope, Object[] args) {\r
+ if (args.length > 0 && args[0] instanceof NativeRegExp) {\r
+ if (args.length > 1 && args[1] != Undefined.instance) {\r
+ // report error\r
+ throw NativeGlobal.constructError(\r
+ cx, "TypeError",\r
+ "only one argument may be specified " +\r
+ "if the first argument is a RegExp object",\r
+ scope);\r
+ }\r
+ NativeRegExp thatObj = (NativeRegExp) args[0];\r
+ source = thatObj.source; \r
+ lastIndex = thatObj.lastIndex;\r
+ parenCount = thatObj.parenCount;\r
+ flags = thatObj.flags;\r
+ program = thatObj.program;\r
+ ren = thatObj.ren;\r
+ return this;\r
+ }\r
+ String s = args.length == 0 ? "" : ScriptRuntime.toString(args[0]);\r
+ String global = args.length > 1 && args[1] != Undefined.instance\r
+ ? ScriptRuntime.toString(args[1])\r
+ : null;\r
+ init(cx, scope, s, global, false);\r
+ return this;\r
+ }\r
+\r
+ public String toString() {\r
+ StringBuffer buf = new StringBuffer();\r
+ buf.append('/');\r
+ buf.append(source);\r
+ buf.append('/');\r
+ if ((flags & GLOB) != 0)\r
+ buf.append('g');\r
+ if ((flags & FOLD) != 0)\r
+ buf.append('i');\r
+ if ((flags & MULTILINE) != 0)\r
+ buf.append('m');\r
+ return buf.toString();\r
+ }\r
+\r
+ public NativeRegExp() {\r
+ }\r
+ \r
+ private static RegExpImpl getImpl(Context cx) {\r
+ return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx);\r
+ }\r
+\r
+ private Object execSub(Context cx, Scriptable scopeObj,\r
+ Object[] args, int matchType)\r
+ {\r
+ RegExpImpl reImpl = getImpl(cx);\r
+ String str;\r
+ if (args.length == 0) {\r
+ str = reImpl.input;\r
+ if (str == null) {\r
+ Object[] errArgs = { toString() };\r
+ throw NativeGlobal.constructError(\r
+ cx, "SyntaxError",\r
+ ScriptRuntime.getMessage\r
+ ("msg.no.re.input.for", errArgs),\r
+ scopeObj);\r
+ }\r
+ } else {\r
+ str = ScriptRuntime.toString(args[0]);\r
+ }\r
+ int i = ((flags & GLOB) != 0) ? lastIndex : 0;\r
+ int indexp[] = { i };\r
+ Object rval = executeRegExp(cx, scopeObj, \r
+ reImpl, str, indexp, matchType);\r
+ if ((flags & GLOB) != 0) {\r
+ lastIndex = (rval == null || rval == Undefined.instance) \r
+ ? 0 : indexp[0];\r
+ }\r
+ return rval;\r
+ }\r
+\r
+ private Object exec(Context cx, Scriptable scopeObj, Object[] args) {\r
+ return execSub(cx, scopeObj, args, MATCH);\r
+ }\r
+\r
+ private Object test(Context cx, Scriptable scopeObj, Object[] args) {\r
+ Object rval = execSub(cx, scopeObj, args, TEST);\r
+ if (rval == null || !rval.equals(Boolean.TRUE))\r
+ rval = Boolean.FALSE;\r
+ return rval;\r
+ }\r
+\r
+ private Object prefix(Context cx, Scriptable scopeObj, Object[] args) {\r
+ return execSub(cx, scopeObj, args, PREFIX);\r
+ }\r
+\r
+ static final int JS_BITS_PER_BYTE = 8;\r
+\r
+ private static final byte REOP_EMPTY = 0; /* match rest of input against rest of r.e. */\r
+ private static final byte REOP_ALT = 1; /* alternative subexpressions in kid and next */\r
+ private static final byte REOP_BOL = 2; /* beginning of input (or line if multiline) */\r
+ private static final byte REOP_EOL = 3; /* end of input (or line if multiline) */\r
+ private static final byte REOP_WBDRY = 4; /* match "" at word boundary */\r
+ private static final byte REOP_WNONBDRY = 5; /* match "" at word non-boundary */\r
+ private static final byte REOP_QUANT = 6; /* quantified atom: atom{1,2} */\r
+ private static final byte REOP_STAR = 7; /* zero or more occurrences of kid */\r
+ private static final byte REOP_PLUS = 8; /* one or more occurrences of kid */\r
+ private static final byte REOP_OPT = 9; /* optional subexpression in kid */\r
+ private static final byte REOP_LPAREN = 10; /* left paren bytecode: kid is u.num'th sub-regexp */\r
+ private static final byte REOP_RPAREN = 11; /* right paren bytecode */\r
+ private static final byte REOP_DOT = 12; /* stands for any character */\r
+ private static final byte REOP_CCLASS = 13; /* character class: [a-f] */\r
+ private static final byte REOP_DIGIT = 14; /* match a digit char: [0-9] */\r
+ private static final byte REOP_NONDIGIT = 15; /* match a non-digit char: [^0-9] */\r
+ private static final byte REOP_ALNUM = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */\r
+ private static final byte REOP_NONALNUM = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */\r
+ private static final byte REOP_SPACE = 18; /* match a whitespace char */\r
+ private static final byte REOP_NONSPACE = 19; /* match a non-whitespace char */\r
+ private static final byte REOP_BACKREF = 20; /* back-reference (e.g., \1) to a parenthetical */\r
+ private static final byte REOP_FLAT = 21; /* match a flat string */\r
+ private static final byte REOP_FLAT1 = 22; /* match a single char */\r
+ private static final byte REOP_JUMP = 23; /* for deoptimized closure loops */\r
+ private static final byte REOP_DOTSTAR = 24; /* optimize .* to use a single opcode */\r
+ private static final byte REOP_ANCHOR = 25; /* like .* but skips left context to unanchored r.e. */\r
+ private static final byte REOP_EOLONLY = 26; /* $ not preceded by any pattern */\r
+ private static final byte REOP_UCFLAT = 27; /* flat Unicode string; len immediate counts chars */\r
+ private static final byte REOP_UCFLAT1 = 28; /* single Unicode char */\r
+ private static final byte REOP_UCCLASS = 29; /* Unicode character class, vector of chars to match */\r
+ private static final byte REOP_NUCCLASS = 30; /* negated Unicode character class */\r
+ private static final byte REOP_BACKREFi = 31; /* case-independent REOP_BACKREF */\r
+ private static final byte REOP_FLATi = 32; /* case-independent REOP_FLAT */\r
+ private static final byte REOP_FLAT1i = 33; /* case-independent REOP_FLAT1 */\r
+ private static final byte REOP_UCFLATi = 34; /* case-independent REOP_UCFLAT */\r
+ private static final byte REOP_UCFLAT1i = 35; /* case-independent REOP_UCFLAT1 */\r
+ private static final byte REOP_ANCHOR1 = 36; /* first-char discriminating REOP_ANCHOR */\r
+ private static final byte REOP_NCCLASS = 37; /* negated 8-bit character class */\r
+ private static final byte REOP_DOTSTARMIN = 38; /* ungreedy version of REOP_DOTSTAR */\r
+ private static final byte REOP_LPARENNON = 39; /* non-capturing version of REOP_LPAREN */\r
+ private static final byte REOP_RPARENNON = 40; /* non-capturing version of REOP_RPAREN */\r
+ private static final byte REOP_ASSERT = 41; /* zero width positive lookahead assertion */\r
+ private static final byte REOP_ASSERT_NOT = 42; /* zero width negative lookahead assertion */\r
+ private static final byte REOP_END = 43;\r
+\r
+ /* maximum length of FLAT string */\r
+ private static final int REOP_FLATLEN_MAX = 255;\r
+\r
+ /* not thread safe, used only for debugging */\r
+ private static int level;\r
+\r
+ private static String[] reopname = null;\r
+ static {\r
+ if (debug) {\r
+ String a[] = {\r
+ "empty",\r
+ "alt",\r
+ "bol",\r
+ "eol",\r
+ "wbdry",\r
+ "wnonbdry",\r
+ "quant",\r
+ "star",\r
+ "plus",\r
+ "opt",\r
+ "lparen",\r
+ "rparen",\r
+ "dot",\r
+ "cclass",\r
+ "digit",\r
+ "nondigit",\r
+ "alnum",\r
+ "nonalnum",\r
+ "space",\r
+ "nonspace",\r
+ "backref",\r
+ "flat",\r
+ "flat1",\r
+ "jump",\r
+ "dotstar",\r
+ "anchor",\r
+ "eolonly",\r
+ "ucflat",\r
+ "ucflat1",\r
+ "ucclass",\r
+ "nucclass",\r
+ "backrefi",\r
+ "flati",\r
+ "flat1i",\r
+ "ucflati",\r
+ "ucflat1i",\r
+ "anchor1",\r
+ "ncclass",\r
+ "dotstar_min",\r
+ "lparen_non",\r
+ "rparen_non",\r
+ "end"\r
+ };\r
+ reopname = a;\r
+ }\r
+ }\r
+\r
+ private String getPrintableString(String str) {\r
+ if (debug) {\r
+ StringBuffer buf = new StringBuffer(str.length());\r
+ for (int i = 0; i < str.length(); i++) {\r
+ int c = str.charAt(i);\r
+ if ((c < 0x20) || (c > 0x7F)) {\r
+ if (c == '\n')\r
+ buf.append("\\n");\r
+ else\r
+ buf.append("\\u" + Integer.toHexString(c));\r
+ }\r
+ else\r
+ buf.append((char)c);\r
+ }\r
+ return buf.toString();\r
+ } else {\r
+ return "";\r
+ }\r
+ }\r
+\r
+ private void dumpRegExp(CompilerState state, RENode ren) {\r
+ if (debug) {\r
+ if (level == 0)\r
+ System.out.print("level offset flags description\n");\r
+ level++;\r
+ do {\r
+ char[] source = ren.s != null ? ren.s : state.source;\r
+ System.out.print(level);\r
+ System.out.print(" ");\r
+ System.out.print(ren.offset);\r
+ System.out.print(" " +\r
+ ((ren.flags & RENode.ANCHORED) != 0 ? "A" : "-") +\r
+ ((ren.flags & RENode.SINGLE) != 0 ? "S" : "-") +\r
+ ((ren.flags & RENode.NONEMPTY) != 0 ? "F" : "-") + // F for full\r
+ ((ren.flags & RENode.ISNEXT) != 0 ? "N" : "-") + // N for next\r
+ ((ren.flags & RENode.GOODNEXT) != 0 ? "G" : "-") +\r
+ ((ren.flags & RENode.ISJOIN) != 0 ? "J" : "-") +\r
+ ((ren.flags & RENode.MINIMAL) != 0 ? "M" : "-") +\r
+ " " +\r
+ reopname[ren.op]);\r
+\r
+ switch (ren.op) {\r
+ case REOP_ALT:\r
+ System.out.print(" ");\r
+ System.out.println(ren.next.offset);\r
+ dumpRegExp(state, (RENode) ren.kid);\r
+ break;\r
+\r
+ case REOP_STAR:\r
+ case REOP_PLUS:\r
+ case REOP_OPT:\r
+ case REOP_ANCHOR1:\r
+ case REOP_ASSERT:\r
+ case REOP_ASSERT_NOT:\r
+ System.out.println();\r
+ dumpRegExp(state, (RENode) ren.kid);\r
+ break;\r
+\r
+ case REOP_QUANT:\r
+ System.out.print(" next ");\r
+ System.out.print(ren.next.offset);\r
+ System.out.print(" min ");\r
+ System.out.print(ren.min);\r
+ System.out.print(" max ");\r
+ System.out.println(ren.max);\r
+ dumpRegExp(state, (RENode) ren.kid);\r
+ break;\r
+\r
+ case REOP_LPAREN:\r
+ System.out.print(" num ");\r
+ System.out.println(ren.num);\r
+ dumpRegExp(state, (RENode) ren.kid);\r
+ break;\r
+\r
+ case REOP_LPARENNON:\r
+ System.out.println();\r
+ dumpRegExp(state, (RENode) ren.kid);\r
+ break;\r
+\r
+ case REOP_BACKREF:\r
+ case REOP_RPAREN:\r
+ System.out.print(" num ");\r
+ System.out.println(ren.num);\r
+ break;\r
+\r
+ case REOP_CCLASS: {\r
+ int index = ((Integer) ren.kid).intValue();\r
+ int index2 = ren.kid2;\r
+ int len = index2 - index;\r
+ System.out.print(" [");\r
+ System.out.print(getPrintableString(new String(source, index, len)));\r
+ System.out.println("]");\r
+ break;\r
+ }\r
+\r
+ case REOP_FLAT: {\r
+ int index = ((Integer) ren.kid).intValue();\r
+ int index2 = ren.kid2;\r
+ int len = index2 - index;\r
+ System.out.print(" ");\r
+ System.out.print(getPrintableString(new String(source, index, len)));\r
+ System.out.print(" (");\r
+ System.out.print(len);\r
+ System.out.println(")");\r
+ break;\r
+ }\r
+\r
+ case REOP_FLAT1:\r
+ System.out.print(" ");\r
+ System.out.print(ren.chr);\r
+ System.out.print(" ('\\");\r
+ System.out.print(Integer.toString(ren.chr, 8));\r
+ System.out.println("')");\r
+ break;\r
+\r
+ case REOP_JUMP:\r
+ System.out.print(" ");\r
+ System.out.println(ren.next.offset);\r
+ break;\r
+\r
+ case REOP_UCFLAT: {\r
+ int index = ((Integer) ren.kid).intValue();\r
+ int len = ren.kid2 - index;\r
+ for (int i = 0; i < len; i++)\r
+ System.out.print("\\u" + \r
+ Integer.toHexString(source[index+i]));\r
+ System.out.println();\r
+ break;\r
+ }\r
+\r
+ case REOP_UCFLAT1:\r
+ System.out.print("\\u" + \r
+ Integer.toHexString(ren.chr));\r
+ System.out.println();\r
+ break;\r
+\r
+ case REOP_UCCLASS: {\r
+ int index = ((Integer) ren.kid).intValue();\r
+ int len = ren.kid2 - index;\r
+ System.out.print(" [");\r
+ for (int i = 0; i < len; i++)\r
+ System.out.print("\\u" + \r
+ Integer.toHexString(source[index+i]));\r
+ System.out.println("]");\r
+ break;\r
+ }\r
+\r
+ default:\r
+ System.out.println();\r
+ break;\r
+ }\r
+\r
+ if ((ren.flags & RENode.GOODNEXT) == 0)\r
+ break;\r
+ } while ((ren = ren.next) != null);\r
+ level--;\r
+ }\r
+ }\r
+\r
+ private void fixNext(CompilerState state, RENode ren1, RENode ren2,\r
+ RENode oldnext) {\r
+ boolean goodnext;\r
+ RENode next, kid, ren;\r
+\r
+ goodnext = ren2 != null && (ren2.flags & RENode.ISNEXT) == 0;\r
+\r
+ /*\r
+ * Find the final node in a list of alternatives, or concatenations, or\r
+ * even a concatenation of alternatives followed by non-alternatives (e.g.\r
+ * ((x|y)z)w where ((x|y)z) is ren1 and w is ren2).\r
+ */\r
+ for (; (next = ren1.next) != null && next != oldnext; ren1 = next) {\r
+ if (ren1.op == REOP_ALT) {\r
+ /* Find the end of this alternative's operand list. */\r
+ kid = (RENode) ren1.kid;\r
+ if (kid.op == REOP_JUMP)\r
+ continue;\r
+ for (ren = kid; ren.next != null; ren = ren.next) {\r
+ if (ren.op == REOP_ALT)\r
+ throw new RuntimeException("REOP_ALT not expected");\r
+ }\r
+\r
+ /* Append a jump node to all but the last alternative. */\r
+ ren.next = new RENode(state, REOP_JUMP, null);\r
+ ren.next.flags |= RENode.ISNEXT;\r
+ ren.flags |= RENode.GOODNEXT;\r
+\r
+ /* Recur to fix all descendent nested alternatives. */\r
+ fixNext(state, kid, ren2, oldnext);\r
+ }\r
+ }\r
+\r
+ /*\r
+ * Now ren1 points to the last alternative, or to the final node on a\r
+ * concatenation list. Set its next link to ren2, flagging a join point\r
+ * if appropriate.\r
+ */\r
+ if (ren2 != null) {\r
+ if ((ren2.flags & RENode.ISNEXT) == 0)\r
+ ren2.flags |= RENode.ISNEXT;\r
+ else\r
+ ren2.flags |= RENode.ISJOIN;\r
+ }\r
+ ren1.next = ren2;\r
+ if (goodnext)\r
+ ren1.flags |= RENode.GOODNEXT;\r
+\r
+ /*\r
+ * The following ops have a kid subtree through which to recur. Here is\r
+ * where we fix the next links under the final ALT node's kid.\r
+ */\r
+ switch (ren1.op) {\r
+ case REOP_ALT:\r
+ case REOP_QUANT:\r
+ case REOP_STAR:\r
+ case REOP_PLUS:\r
+ case REOP_OPT:\r
+ case REOP_LPAREN:\r
+ case REOP_LPARENNON:\r
+ case REOP_ASSERT:\r
+ case REOP_ASSERT_NOT:\r
+ fixNext(state, (RENode) ren1.kid, ren2, oldnext);\r
+ break;\r
+ default:;\r
+ }\r
+ }\r
+\r
+ private void setNext(CompilerState state, RENode ren1, RENode ren2) {\r
+ fixNext(state, ren1, ren2, null);\r
+ }\r
+\r
+ /*\r
+ * Top-down regular expression grammar, based closely on Perl 4.\r
+ *\r
+ * regexp: altern A regular expression is one or more\r
+ * altern '|' regexp alternatives separated by vertical bar.\r
+ */\r
+ private RENode parseRegExp(CompilerState state) {\r
+ RENode ren = parseAltern(state);\r
+ if (ren == null)\r
+ return null;\r
+ char[] source = state.source;\r
+ int index = state.index;\r
+ if (index < source.length && source[index] == '|') {\r
+ RENode kid = ren;\r
+ ren = new RENode(state, REOP_ALT, kid);\r
+ if (ren == null)\r
+ return null;\r
+ ren.flags = (byte) (kid.flags & (RENode.ANCHORED | RENode.NONEMPTY));\r
+ RENode ren1 = ren;\r
+ do {\r
+ /* (balance: */\r
+ state.index = ++index;\r
+ if (index < source.length && (source[index] == '|' ||\r
+ source[index] == ')'))\r
+ {\r
+ kid = new RENode(state, REOP_EMPTY, null);\r
+ } else {\r
+ kid = parseAltern(state);\r
+ index = state.index;\r
+ }\r
+ if (kid == null)\r
+ return null;\r
+ RENode ren2 = new RENode(state, REOP_ALT, kid);\r
+ if (ren2 == null)\r
+ return null;\r
+ ren1.next = ren2;\r
+ ren1.flags |= RENode.GOODNEXT;\r
+ ren2.flags = (byte) ((kid.flags & (RENode.ANCHORED |\r
+ RENode.NONEMPTY))\r
+ | RENode.ISNEXT);\r
+ ren1 = ren2;\r
+ } while (index < source.length && source[index] == '|');\r
+ }\r
+ return ren;\r
+ }\r
+\r
+ /*\r
+ * altern: item An alternative is one or more items,\r
+ * item altern concatenated together.\r
+ */\r
+ private RENode parseAltern(CompilerState state) {\r
+ RENode ren = parseItem(state);\r
+ if (ren == null)\r
+ return null;\r
+ RENode ren1 = ren;\r
+ int flags = 0;\r
+ char[] source = state.source;\r
+ int index = state.index;\r
+ char c;\r
+ /* (balance: */\r
+ while (index != source.length && (c = source[index]) != '|' &&\r
+ c != ')')\r
+ {\r
+ RENode ren2 = parseItem(state);\r
+ if (ren2 == null)\r
+ return null;\r
+ setNext(state, ren1, ren2);\r
+ flags |= ren2.flags;\r
+ ren1 = ren2;\r
+ index = state.index;\r
+ }\r
+\r
+ /*\r
+ * Propagate NONEMPTY to the front of a concatenation list, so that the\r
+ * first alternative in (^a|b) is considered non-empty. The first node\r
+ * in a list may match the empty string (as ^ does), but if the list is\r
+ * non-empty, then the first node's NONEMPTY flag must be set.\r
+ */\r
+ ren.flags |= flags & RENode.NONEMPTY;\r
+ return ren;\r
+ }\r
+\r
+ /*\r
+ * item: assertion An item is either an assertion or\r
+ * quantatom a quantified atom.\r
+ *\r
+ * assertion: '^' Assertions match beginning of string\r
+ * (or line if the class static property\r
+ * RegExp.multiline is true).\r
+ * '$' End of string (or line if the class\r
+ * static property RegExp.multiline is\r
+ * true).\r
+ * '\b' Word boundary (between \w and \W).\r
+ * '\B' Word non-boundary.\r
+ */\r
+ RENode parseItem(CompilerState state) {\r
+ RENode ren;\r
+ byte op;\r
+\r
+ char[] source = state.source;\r
+ int index = state.index;\r
+ switch (index < source.length ? source[index] : '\0') {\r
+ case '^':\r
+ state.index = index + 1;\r
+ ren = new RENode(state, REOP_BOL, null);\r
+ ren.flags |= RENode.ANCHORED;\r
+ return ren;\r
+\r
+ case '$':\r
+ state.index = index + 1;\r
+ return new RENode(state,\r
+ (index == state.indexBegin ||\r
+ ((source[index-1] == '(' ||\r
+ source[index-1] == '|') && /*balance)*/\r
+ (index - 1 == state.indexBegin ||\r
+ source[index-2] != '\\')))\r
+ ? REOP_EOLONLY\r
+ : REOP_EOL,\r
+ null);\r
+\r
+ case '\\':\r
+ switch (++index < source.length ? source[index] : '\0') {\r
+ case 'b':\r
+ op = REOP_WBDRY;\r
+ break;\r
+ case 'B':\r
+ op = REOP_WNONBDRY;\r
+ break;\r
+ default:\r
+ return parseQuantAtom(state);\r
+ }\r
+\r
+ /*\r
+ * Word boundaries and non-boundaries are flagged as non-empty\r
+ * so they will be prefixed by an anchoring node.\r
+ */\r
+ state.index = index + 1;\r
+ ren = new RENode(state, op, null);\r
+ ren.flags |= RENode.NONEMPTY;\r
+ return ren;\r
+\r
+ default:;\r
+ }\r
+ return parseQuantAtom(state);\r
+ }\r
+\r
+ /*\r
+ * quantatom: atom An unquantified atom.\r
+ * quantatom '{' n ',' m '}'\r
+ * Atom must occur between n and m times.\r
+ * quantatom '{' n ',' '}' Atom must occur at least n times.\r
+ * quantatom '{' n '}' Atom must occur exactly n times.\r
+ * quantatom '*' Zero or more times (same as {0,}).\r
+ * quantatom '+' One or more times (same as {1,}).\r
+ * quantatom '?' Zero or one time (same as {0,1}).\r
+ *\r
+ * any of which can be optionally followed by '?' for ungreedy\r
+ */\r
+ RENode parseQuantAtom(CompilerState state) {\r
+ RENode ren = parseAtom(state);\r
+ if (ren == null)\r
+ return null;\r
+\r
+ int up;\r
+ char c;\r
+ RENode ren2;\r
+ int min, max;\r
+ char[] source = state.source;\r
+ int index = state.index;\r
+ loop:\r
+ while (index < source.length) {\r
+ switch (source[index]) {\r
+ case '{':\r
+ if (++index == source.length || !isDigit(c = source[index])) {\r
+ reportError("msg.bad.quant",\r
+ String.valueOf(source[state.index]), state);\r
+ return null;\r
+ }\r
+ min = unDigit(c);\r
+ while (++index < source.length && isDigit(c = source[index])) {\r
+ min = 10 * min + unDigit(c);\r
+ if ((min >> 16) != 0) {\r
+ reportError("msg.overlarge.max", tail(source, index),\r
+ state);\r
+ return null;\r
+ }\r
+ }\r
+ if (source[index] == ',') {\r
+ up = ++index;\r
+ if (isDigit(source[index])) {\r
+ max = unDigit(source[index]);\r
+ while (isDigit(c = source[++index])) {\r
+ max = 10 * max + unDigit(c);\r
+ if ((max >> 16) != 0) {\r
+ reportError("msg.overlarge.max",\r
+ String.valueOf(source[up]), state);\r
+ return null;\r
+ }\r
+ }\r
+ if (max == 0) {\r
+ reportError("msg.zero.quant",\r
+ tail(source, state.index), state);\r
+ return null;\r
+ }\r
+ if (min > max) {\r
+ reportError("msg.max.lt.min", tail(source, up), state);\r
+ return null;\r
+ }\r
+ } else {\r
+ /* 0 means no upper bound. */\r
+ max = 0;\r
+ }\r
+ } else {\r
+ /* Exactly n times. */\r
+ if (min == 0) {\r
+ reportError("msg.zero.quant",\r
+ tail(source, state.index), state);\r
+ return null;\r
+ }\r
+ max = min;\r
+ }\r
+ if (source[index] != '}') {\r
+ reportError("msg.unterm.quant",\r
+ String.valueOf(source[state.index]), state);\r
+ return null;\r
+ }\r
+ index++;\r
+\r
+ ren2 = new RENode(state, REOP_QUANT, ren);\r
+ if (min > 0 && (ren.flags & RENode.NONEMPTY) != 0)\r
+ ren2.flags |= RENode.NONEMPTY;\r
+ ren2.min = (short) min;\r
+ ren2.max = (short) max;\r
+ ren = ren2;\r
+ break;\r
+\r
+ case '*':\r
+ index++;\r
+ ren = new RENode(state, REOP_STAR, ren);\r
+ break;\r
+\r
+ case '+':\r
+ index++;\r
+ ren2 = new RENode(state, REOP_PLUS, ren);\r
+ if ((ren.flags & RENode.NONEMPTY) != 0)\r
+ ren2.flags |= RENode.NONEMPTY;\r
+ ren = ren2;\r
+ break;\r
+\r
+ case '?':\r
+ index++;\r
+ ren = new RENode(state, REOP_OPT, ren);\r
+ break;\r
+\r
+ default:\r
+ break loop;\r
+ }\r
+ if ((index < source.length) && (source[index] == '?')) {\r
+ ren.flags |= RENode.MINIMAL;\r
+ index++;\r
+ }\r
+ }\r
+\r
+ state.index = index;\r
+ return ren;\r
+ }\r
+\r
+ /*\r
+ * atom: '(' regexp ')' A parenthesized regexp (what matched\r
+ * can be addressed using a backreference,\r
+ * see '\' n below).\r
+ * '.' Matches any char except '\n'.\r
+ * '[' classlist ']' A character class.\r
+ * '[' '^' classlist ']' A negated character class.\r
+ * '\f' Form Feed.\r
+ * '\n' Newline (Line Feed).\r
+ * '\r' Carriage Return.\r
+ * '\t' Horizontal Tab.\r
+ * '\v' Vertical Tab.\r
+ * '\d' A digit (same as [0-9]).\r
+ * '\D' A non-digit.\r
+ * '\w' A word character, [0-9a-z_A-Z].\r
+ * '\W' A non-word character.\r
+ * '\s' A whitespace character, [ \b\f\n\r\t\v].\r
+ * '\S' A non-whitespace character.\r
+ * '\' n A backreference to the nth (n decimal\r
+ * and positive) parenthesized expression.\r
+ * '\' octal An octal escape sequence (octal must be\r
+ * two or three digits long, unless it is\r
+ * 0 for the null character).\r
+ * '\x' hex A hex escape (hex must be two digits).\r
+ * '\c' ctrl A control character, ctrl is a letter.\r
+ * '\' literalatomchar Any character except one of the above\r
+ * that follow '\' in an atom.\r
+ * otheratomchar Any character not first among the other\r
+ * atom right-hand sides.\r
+ */\r
+ static final String metachars = "|^${*+?().[\\";\r
+ static final String closurechars = "{*+?";\r
+\r
+ RENode parseAtom(CompilerState state) {\r
+ int num = 0, len;\r
+ RENode ren = null;\r
+ RENode ren2;\r
+ char c;\r
+ byte op;\r
+\r
+ boolean skipCommon = false;\r
+ boolean doFlat = false;\r
+\r
+ char[] source = state.source;\r
+ int index = state.index;\r
+ int ocp = index;\r
+ if (index == source.length) {\r
+ state.index = index;\r
+ return new RENode(state, REOP_EMPTY, null);\r
+ }\r
+ switch (source[index]) {\r
+ /* handle /|a/ by returning an empty node for the leftside */\r
+ case '|':\r
+ return new RENode(state, REOP_EMPTY, null);\r
+\r
+ case '(':\r
+ op = REOP_END;\r
+ if (source[index + 1] == '?') {\r
+ switch (source[index + 2]) {\r
+ case ':' :\r
+ op = REOP_LPARENNON;\r
+ break;\r
+ case '=' :\r
+ op = REOP_ASSERT;\r
+ break;\r
+ case '!' :\r
+ op = REOP_ASSERT_NOT;\r
+ break;\r
+ }\r
+ }\r
+ if (op == REOP_END) {\r
+ op = REOP_LPAREN;\r
+ num = state.parenCount++; /* \1 is numbered 0, etc. */\r
+ index++;\r
+ }\r
+ else\r
+ index += 3;\r
+ state.index = index;\r
+ /* Handle empty paren */\r
+ if (source[index] == ')') {\r
+ ren2 = new RENode(state, REOP_EMPTY, null);\r
+ }\r
+ else {\r
+ ren2 = parseRegExp(state);\r
+ if (ren2 == null)\r
+ return null;\r
+ index = state.index;\r
+ if (index >= source.length || source[index] != ')') {\r
+ reportError("msg.unterm.paren", tail(source, ocp), state);\r
+ return null;\r
+ }\r
+ }\r
+ index++;\r
+ ren = new RENode(state, op, ren2);\r
+ ren.flags = (byte) (ren2.flags & (RENode.ANCHORED |\r
+ RENode.NONEMPTY));\r
+ ren.num = num;\r
+ if ((op == REOP_LPAREN) || (op == REOP_LPARENNON)) {\r
+ /* Assume RPAREN ops immediately succeed LPAREN ops */\r
+ ren2 = new RENode(state, (byte)(op + 1), null);\r
+ setNext(state, ren, ren2);\r
+ ren2.num = num;\r
+ }\r
+ break;\r
+\r
+ case '.':\r
+ ++index;\r
+ op = REOP_DOT;\r
+ if ((index < source.length) && (source[index] == '*')) {\r
+ index++;\r
+ op = REOP_DOTSTAR;\r
+ if ((index < source.length) && (source[index] == '?')) {\r
+ index++;\r
+ op = REOP_DOTSTARMIN;\r
+ }\r
+ }\r
+ ren = new RENode(state, op, null);\r
+ if (ren.op == REOP_DOT)\r
+ ren.flags = RENode.SINGLE | RENode.NONEMPTY;\r
+ break;\r
+\r
+ case '[':\r
+ /* A char class must have at least one char in it. */\r
+ if (++index == source.length) {\r
+ reportError("msg.unterm.class", tail(source, ocp), state);\r
+ return null;\r
+ }\r
+ c = source[index];\r
+ ren = new RENode(state, REOP_CCLASS, new Integer(index));\r
+\r
+ /* A negated class must have at least one char in it after the ^. */\r
+ if (c == '^' && ++index == source.length) {\r
+ reportError("msg.unterm.class", tail(source, ocp), state);\r
+ return null;\r
+ }\r
+\r
+ for (;;) {\r
+ if (++index == source.length) {\r
+ reportError("msg.unterm.paren", tail(source, ocp), state);\r
+ return null;\r
+ }\r
+ c = source[index];\r
+ if (c == ']')\r
+ break;\r
+ if (c == '\\' && index+1 != source.length)\r
+ index++;\r
+ }\r
+ ren.kid2 = index++;\r
+\r
+ /* Since we rule out [] and [^], we can set the non-empty flag. */\r
+ ren.flags = RENode.SINGLE | RENode.NONEMPTY;\r
+ break;\r
+\r
+ case '\\':\r
+ if (++index == source.length) {\r
+ Context.reportError(ScriptRuntime.getMessage("msg.trail.backslash",\r
+ null));\r
+ return null;\r
+ }\r
+ c = source[index];\r
+ switch (c) {\r
+ case 'f':\r
+ case 'n':\r
+ case 'r':\r
+ case 't':\r
+ case 'v':\r
+ c = getEscape(c);\r
+ ren = new RENode(state, REOP_FLAT1, null);\r
+ break;\r
+ case 'd':\r
+ ren = new RENode(state, REOP_DIGIT, null);\r
+ break;\r
+ case 'D':\r
+ ren = new RENode(state, REOP_NONDIGIT, null);\r
+ break;\r
+ case 'w':\r
+ ren = new RENode(state, REOP_ALNUM, null);\r
+ break;\r
+ case 'W':\r
+ ren = new RENode(state, REOP_NONALNUM, null);\r
+ break;\r
+ case 's':\r
+ ren = new RENode(state, REOP_SPACE, null);\r
+ break;\r
+ case 'S':\r
+ ren = new RENode(state, REOP_NONSPACE, null);\r
+ break;\r
+\r
+ case '0':\r
+ case '1':\r
+ case '2':\r
+ case '3':\r
+ case '4':\r
+ case '5':\r
+ case '6':\r
+ case '7':\r
+ case '8':\r
+ case '9':\r
+ /*\r
+ Yuk. Keeping the old style \n interpretation for 1.2\r
+ compatibility.\r
+ */\r
+ if ((state.cx.getLanguageVersion() != Context.VERSION_DEFAULT)\r
+ && (state.cx.getLanguageVersion() <= Context.VERSION_1_4)) {\r
+ switch (c) { \r
+ case '0':\r
+ state.index = index;\r
+ num = doOctal(state);\r
+ index = state.index;\r
+ ren = new RENode(state, REOP_FLAT1, null);\r
+ c = (char) num;\r
+ break;\r
+\r
+ case '1':\r
+ case '2':\r
+ case '3':\r
+ case '4':\r
+ case '5':\r
+ case '6':\r
+ case '7':\r
+ case '8':\r
+ case '9':\r
+ num = unDigit(c);\r
+ len = 1;\r
+ while (++index < source.length\r
+ && isDigit(c = source[index])) {\r
+ num = 10 * num + unDigit(c);\r
+ len++;\r
+ }\r
+ /* n in [8-9] and > count of parenetheses, then revert to\r
+ '8' or '9', ignoring the '\' */\r
+ if (((num == 8) || (num == 9)) && (num > state.parenCount)) {\r
+ ocp = --index; /* skip beyond the '\' */\r
+ doFlat = true;\r
+ skipCommon = true;\r
+ break; \r
+ }\r
+ /* more than 1 digit, or a number greater than\r
+ the count of parentheses => it's an octal */\r
+ if ((len > 1) || (num > state.parenCount)) {\r
+ state.index = ocp;\r
+ num = doOctal(state);\r
+ index = state.index;\r
+ ren = new RENode(state, REOP_FLAT1, null);\r
+ c = (char) num;\r
+ break;\r
+ }\r
+ index--;\r
+ ren = new RENode(state, REOP_BACKREF, null);\r
+ ren.num = num - 1; /* \1 is numbered 0, etc. */\r
+\r
+ /* Avoid common chr- and flags-setting \r
+ code after switch. */\r
+ ren.flags = RENode.NONEMPTY;\r
+ skipCommon = true;\r
+ break;\r
+ }\r
+ }\r
+ else {\r
+ if (c == '0') {\r
+ ren = new RENode(state, REOP_FLAT1, null);\r
+ c = 0;\r
+ }\r
+ else {\r
+ num = unDigit(c);\r
+ len = 1;\r
+ while (++index < source.length\r
+ && isDigit(c = source[index])) {\r
+ num = 10 * num + unDigit(c);\r
+ len++;\r
+ }\r
+ index--;\r
+ ren = new RENode(state, REOP_BACKREF, null);\r
+ ren.num = num - 1; /* \1 is numbered 0, etc. */\r
+ /* Avoid common chr- and flags-setting \r
+ code after switch. */\r
+ ren.flags = RENode.NONEMPTY;\r
+ skipCommon = true;\r
+ }\r
+ }\r
+ break;\r
+ \r
+ case 'x':\r
+ ocp = index;\r
+ if (++index < source.length && isHex(c = source[index])) {\r
+ num = unHex(c);\r
+ if (++index < source.length && isHex(c = source[index])) {\r
+ num <<= 4;\r
+ num += unHex(c);\r
+ } else {\r
+ if ((state.cx.getLanguageVersion()\r
+ != Context.VERSION_DEFAULT)\r
+ && (state.cx.getLanguageVersion() \r
+ <= Context.VERSION_1_4)) \r
+ index--; /* back up so index points to last hex char */\r
+ else { /* ecma 2 requires pairs of hex digits. */\r
+ index = ocp;\r
+ num = 'x';\r
+ }\r
+ }\r
+ } else {\r
+ index = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */\r
+ num = 'x';\r
+ }\r
+ ren = new RENode(state, REOP_FLAT1, null);\r
+ c = (char)num;\r
+ break;\r
+\r
+ case 'c':\r
+ c = source[++index];\r
+ if (!('A' <= c && c <= 'Z') && !('a' <= c && c <= 'z')) {\r
+ index -= 2;\r
+ ocp = index;\r
+ doFlat = true;\r
+ skipCommon = true;\r
+ break;\r
+ }\r
+ c = Character.toUpperCase(c);\r
+ c = (char) (c ^ 64); // JS_TOCTRL\r
+ ren = new RENode(state, REOP_FLAT1, null);\r
+ break;\r
+\r
+ case 'u':\r
+ if (index+4 < source.length &&\r
+ isHex(source[index+1]) && isHex(source[index+2]) &&\r
+ isHex(source[index+3]) && isHex(source[index+4]))\r
+ {\r
+ num = (((((unHex(source[index+1]) << 4) +\r
+ unHex(source[index+2])) << 4) +\r
+ unHex(source[index+3])) << 4) +\r
+ unHex(source[index+4]);\r
+ c = (char) num;\r
+ index += 4;\r
+ ren = new RENode(state, REOP_FLAT1, null);\r
+ break;\r
+ }\r
+\r
+ /* Unlike Perl \\xZZ, we take \\uZZZ to be literal-u then ZZZ. */\r
+ ocp = index;\r
+ doFlat = true;\r
+ skipCommon = true;\r
+ break;\r
+\r
+ default:\r
+ ocp = index;\r
+ doFlat = true;\r
+ skipCommon = true;\r
+ break;\r
+ }\r
+\r
+ /* Common chr- and flags-setting code for escape opcodes. */\r
+ if (ren != null && !skipCommon) {\r
+ ren.chr = c;\r
+ ren.flags = RENode.SINGLE | RENode.NONEMPTY;\r
+ }\r
+ skipCommon = false;\r
+\r
+ if (!doFlat) {\r
+ /* Skip to next unparsed char. */\r
+ index++;\r
+ break;\r
+ }\r
+\r
+ /* fall through since doFlat was true */\r
+ doFlat = false;\r
+\r
+ default:\r
+ while (++index != source.length &&\r
+ metachars.indexOf(source[index]) == -1)\r
+ ;\r
+ len = (int)(index - ocp);\r
+ if (index != source.length && len > 1 &&\r
+ closurechars.indexOf(source[index]) != -1)\r
+ {\r
+ index--;\r
+ len--;\r
+ }\r
+ if (len > REOP_FLATLEN_MAX) {\r
+ len = REOP_FLATLEN_MAX;\r
+ index = ocp + len;\r
+ }\r
+ ren = new RENode(state, len == 1 ? REOP_FLAT1 : REOP_FLAT,\r
+ new Integer(ocp));\r
+ ren.flags = RENode.NONEMPTY;\r
+ if (len > 1) {\r
+ ren.kid2 = index;\r
+ } else {\r
+ ren.flags |= RENode.SINGLE;\r
+ ren.chr = source[ocp];\r
+ }\r
+ break;\r
+ }\r
+\r
+ state.index = index;\r
+ return ren;\r
+ }\r
+\r
+ private int doOctal(CompilerState state) {\r
+ char[] source = state.source;\r
+ int index = state.index;\r
+ int tmp, num = 0;\r
+ char c;\r
+ while (++index < source.length && '0' <= (c = source[index]) &&\r
+ c <= '7')\r
+ {\r
+ tmp = 8 * num + (int)(c - '0');\r
+ if (tmp > 0377) {\r
+ break;\r
+ }\r
+ num = tmp;\r
+ }\r
+ index--;\r
+ state.index = index;\r
+ return num;\r
+ }\r
+\r
+ static char getEscape(char c) {\r
+ switch (c) {\r
+ case 'b':\r
+ return '\b';\r
+ case 'f':\r
+ return '\f';\r
+ case 'n':\r
+ return '\n';\r
+ case 'r':\r
+ return '\r';\r
+ case 't':\r
+ return '\t';\r
+ case 'v':\r
+ return (char) 11; // '\v' is not vtab in Java\r
+ }\r
+ throw new RuntimeException();\r
+ }\r
+\r
+ static public boolean isDigit(char c) {\r
+ return '0' <= c && c <= '9';\r
+ }\r
+\r
+ static int unDigit(char c) {\r
+ return c - '0';\r
+ }\r
+\r
+ static boolean isHex(char c) {\r
+ return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||\r
+ ('A' <= c && c <= 'F');\r
+ }\r
+\r
+ static int unHex(char c) {\r
+ if ('0' <= c && c <= '9')\r
+ return c - '0';\r
+ return 10 + Character.toLowerCase(c) - 'a';\r
+ }\r
+\r
+ static boolean isWord(char c) {\r
+ return Character.isLetter(c) || isDigit(c) || c == '_';\r
+ }\r
+\r
+ private String tail(char[] a, int i) {\r
+ return new String(a, i, a.length - i);\r
+ }\r
+\r
+ private static boolean matchChar(int flags, char c, char c2) {\r
+ if (c == c2)\r
+ return true;\r
+ else\r
+ if ((flags & FOLD) != 0) {\r
+ c = Character.toUpperCase(c);\r
+ c2 = Character.toUpperCase(c2);\r
+ return c == c2 ||\r
+ Character.toLowerCase(c) == Character.toLowerCase(c2);\r
+ }\r
+ else\r
+ return false;\r
+ }\r
+ \r
+ \r
+ int greedyRecurse(GreedyState grState, int index, int previousKid) {\r
+ int kidMatch;\r
+ int match;\r
+ int num;\r
+\r
+ /*\r
+ * when the kid match fails, we reset the parencount and run any \r
+ * previously succesful kid in order to restablish it's paren\r
+ * contents.\r
+ */\r
+\r
+ num = grState.state.parenCount;\r
+ kidMatch = matchRENodes(grState.state, grState.kid, grState.next, index);\r
+ if (kidMatch == -1) {\r
+ match = matchRENodes(grState.state, grState.next, grState.stop, index);\r
+ if (match != -1) {\r
+ grState.state.parenCount = num;\r
+ if (previousKid != -1)\r
+ matchRENodes(grState.state, grState.kid, grState.next, previousKid);\r
+ return index;\r
+ }\r
+ else\r
+ return -1;\r
+ }\r
+ else {\r
+ if (kidMatch == index) {\r
+ if (previousKid != -1)\r
+ matchRENodes(grState.state, grState.kid, grState.next, previousKid);\r
+ return kidMatch; /* no point pursuing an empty match forever */\r
+ }\r
+ if ((grState.maxKid == 0) || (++grState.kidCount < grState.maxKid)) {\r
+ match = greedyRecurse(grState, kidMatch, index);\r
+ if (match != -1) return match;\r
+ if (grState.maxKid != 0) --grState.kidCount;\r
+ }\r
+ grState.state.parenCount = num;\r
+ match = matchRENodes(grState.state, grState.next, grState.stop, kidMatch);\r
+ if (match != -1) {\r
+ matchRENodes(grState.state, grState.kid, grState.next, index);\r
+ return kidMatch;\r
+ }\r
+ else\r
+ return -1;\r
+ }\r
+ }\r
+\r
+ int matchGreedyKid(MatchState state, RENode ren, RENode stop,\r
+ int kidCount, int index, int previousKid) {\r
+ GreedyState grState = new GreedyState();\r
+ grState.state = state;\r
+ grState.kid = (RENode)ren.kid;\r
+ grState.next = ren.next;\r
+ grState.maxKid = (ren.op == REOP_QUANT) ? ren.max : 0;\r
+ /*\r
+ * We try to match the sub-tree to completion first, and if that\r
+ * doesn't work, match only up to the given end of the sub-tree.\r
+ */\r
+ grState.stop = null;\r
+ grState.kidCount = kidCount;\r
+ int match = greedyRecurse(grState, index, previousKid);\r
+ if (match != -1 || stop == null) return match;\r
+ grState.kidCount = kidCount;\r
+ grState.stop = stop;\r
+ return greedyRecurse(grState, index, previousKid);\r
+ }\r
+\r
+ int matchNonGreedyKid(MatchState state, RENode ren,\r
+ int kidCount, int maxKid,\r
+ int index) {\r
+ int kidMatch;\r
+ int match;\r
+\r
+ match = matchRENodes(state, ren.next, null, index);\r
+ if (match != -1) return index;\r
+ kidMatch = matchRENodes(state, (RENode)ren.kid, ren.next, index);\r
+ if (kidMatch == -1) return -1;\r
+ if (kidMatch == index) return kidMatch; /* no point pursuing an empty match forever */\r
+ return matchNonGreedyKid(state, ren, kidCount, maxKid, kidMatch);\r
+ }\r
+\r
+ int matchRENodes(MatchState state, RENode ren, RENode stop, int index) {\r
+ int num;\r
+ char[] input = state.input;\r
+ while ((ren != stop) && (ren != null)) {\r
+ switch (ren.op) {\r
+ case REOP_EMPTY:\r
+ break;\r
+ case REOP_ALT: {\r
+ if (ren.next.op != REOP_ALT) {\r
+ ren = (RENode)ren.kid;\r
+ continue;\r
+ }\r
+ else {\r
+ num = state.parenCount;\r
+ int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
+ stop, index);\r
+ if (kidMatch != -1) return kidMatch;\r
+ for (int i = num; i < state.parenCount; i++)\r
+ state.parens[i] = null;\r
+ state.parenCount = num;\r
+ }\r
+ }\r
+ break;\r
+ case REOP_QUANT: {\r
+ int lastKid = -1;\r
+ for (num = 0; num < ren.min; num++) {\r
+ int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
+ ren.next, index);\r
+ if (kidMatch == -1)\r
+ return -1;\r
+ else {\r
+ lastKid = index;\r
+ index = kidMatch;\r
+ }\r
+ }\r
+ if (num == ren.max)\r
+ // Have matched the exact count required, \r
+ // need to match the rest of the regexp.\r
+ break;\r
+ if ((ren.flags & RENode.MINIMAL) == 0) {\r
+ int kidMatch = matchGreedyKid(state, ren, stop, num,\r
+ index, lastKid);\r
+ if (kidMatch == -1)\r
+ index = matchRENodes(state, (RENode)ren.kid,\r
+ ren.next, index);\r
+ else \r
+ index = kidMatch;\r
+ } \r
+ else {\r
+ index = matchNonGreedyKid(state, ren, num,\r
+ ren.max, index);\r
+ } \r
+ if (index == -1) return -1;\r
+ }\r
+ break;\r
+ case REOP_PLUS: {\r
+ int kidMatch = matchRENodes(state, (RENode)ren.kid, \r
+ ren.next, index);\r
+ if (kidMatch == -1)\r
+ return -1;\r
+ if ((ren.flags & RENode.MINIMAL) == 0) {\r
+ kidMatch = matchGreedyKid(state, ren, stop, 1, \r
+ kidMatch, index);\r
+ if (kidMatch == -1)\r
+ index = matchRENodes(state,(RENode)ren.kid,\r
+ ren.next, index);\r
+ else\r
+ index = kidMatch;\r
+ }\r
+ else\r
+ index = matchNonGreedyKid(state, ren, 1, 0, kidMatch);\r
+ if (index == -1) return -1;\r
+ }\r
+ break;\r
+ case REOP_STAR: \r
+ if ((ren.flags & RENode.MINIMAL) == 0) {\r
+ int kidMatch = matchGreedyKid(state, ren, stop, 0, index, -1);\r
+ if (kidMatch != -1)\r
+ index = kidMatch;\r
+ }\r
+ else {\r
+ index = matchNonGreedyKid(state, ren, 0, 0, index);\r
+ if (index == -1) return -1;\r
+ }\r
+ break;\r
+ case REOP_OPT: {\r
+ int saveNum = state.parenCount;\r
+ if (((ren.flags & RENode.MINIMAL) != 0)) {\r
+ int restMatch = matchRENodes(state, ren.next,\r
+ stop, index);\r
+ if (restMatch != -1) return restMatch;\r
+ }\r
+ int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
+ ren.next, index);\r
+ if (kidMatch == -1) {\r
+ state.parenCount = saveNum;\r
+ break;\r
+ }\r
+ else {\r
+ int restMatch = matchRENodes(state, ren.next,\r
+ stop, kidMatch);\r
+ if (restMatch == -1) {\r
+ // need to undo the result of running the kid\r
+ state.parenCount = saveNum;\r
+ break;\r
+ }\r
+ else\r
+ return restMatch;\r
+ }\r
+ }\r
+ case REOP_LPARENNON:\r
+ ren = (RENode)ren.kid;\r
+ continue;\r
+ case REOP_RPARENNON:\r
+ break;\r
+ case REOP_LPAREN: {\r
+ num = ren.num;\r
+ ren = (RENode)ren.kid;\r
+ SubString parsub = state.parens[num];\r
+ if (parsub == null) {\r
+ parsub = state.parens[num] = new SubString();\r
+ parsub.charArray = input;\r
+ }\r
+ parsub.index = index;\r
+ parsub.length = 0;\r
+ if (num >= state.parenCount)\r
+ state.parenCount = num + 1;\r
+ continue;\r
+ }\r
+ case REOP_RPAREN: {\r
+ num = ren.num;\r
+ SubString parsub = state.parens[num];\r
+ if (parsub == null)\r
+ throw new RuntimeException("Paren problem");\r
+ parsub.length = index - parsub.index;\r
+ break;\r
+ }\r
+ case REOP_ASSERT: {\r
+ int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
+ ren.next, index);\r
+ if (kidMatch == -1) return -1;\r
+ break;\r
+ }\r
+ case REOP_ASSERT_NOT: {\r
+ int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
+ ren.next, index);\r
+ if (kidMatch != -1) return -1;\r
+ break;\r
+ }\r
+ case REOP_BACKREF: {\r
+ num = ren.num;\r
+ if (num >= state.parens.length) {\r
+ Context.reportError(\r
+ ScriptRuntime.getMessage(\r
+ "msg.bad.backref", null));\r
+ return -1;\r
+ }\r
+ SubString parsub = state.parens[num];\r
+ if (parsub == null)\r
+ parsub = state.parens[num] = new SubString();\r
+ int length = parsub.length;\r
+ for (int i = 0; i < length; i++, index++) {\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (!matchChar(state.flags, input[index],\r
+ parsub.charArray[parsub.index + i]))\r
+ return -1;\r
+ }\r
+ }\r
+ break;\r
+ case REOP_CCLASS:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (ren.bitmap == null) {\r
+ char[] source = (ren.s != null) \r
+ ? ren.s \r
+ : this.source.toCharArray();\r
+ ren.buildBitmap(state, source, ((state.flags & FOLD) != 0));\r
+ }\r
+ char c = input[index];\r
+ int b = (c >>> 3);\r
+ if (b >= ren.bmsize) {\r
+ if (ren.kid2 == -1) // a ^ class\r
+ index++;\r
+ else\r
+ return -1;\r
+ } else {\r
+ int bit = c & 7;\r
+ bit = 1 << bit;\r
+ if ((ren.bitmap[b] & bit) != 0)\r
+ index++;\r
+ else\r
+ return -1;\r
+ }\r
+ break;\r
+ case REOP_DOT:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (input[index] != '\n')\r
+ index++;\r
+ else\r
+ return -1;\r
+ break;\r
+ case REOP_DOTSTARMIN: {\r
+ int cp2;\r
+ for (cp2 = index; cp2 < input.length; cp2++) {\r
+ int cp3 = matchRENodes(state, ren.next, stop, cp2);\r
+ if (cp3 != -1) return cp3;\r
+ if (input[cp2] == '\n')\r
+ return -1;\r
+ }\r
+ return state.noMoreInput();\r
+ }\r
+ case REOP_DOTSTAR: {\r
+ int cp2;\r
+ for (cp2 = index; cp2 < input.length; cp2++)\r
+ if (input[cp2] == '\n')\r
+ break;\r
+ while (cp2 >= index) {\r
+ int cp3 = matchRENodes(state, ren.next, stop, cp2);\r
+ if (cp3 != -1) {\r
+ index = cp2;\r
+ break;\r
+ }\r
+ cp2--;\r
+ }\r
+ break;\r
+ }\r
+ case REOP_WBDRY:\r
+ if (index == 0 || !isWord(input[index-1])) {\r
+ if (index >= input.length)\r
+ return state.noMoreInput();\r
+ if (!isWord(input[index]))\r
+ return -1;\r
+ }\r
+ else {\r
+ if (index < input.length && isWord(input[index]))\r
+ return -1;\r
+ }\r
+ break;\r
+ case REOP_WNONBDRY:\r
+ if (index == 0 || !isWord(input[index-1])) {\r
+ if (index < input.length && isWord(input[index]))\r
+ return -1;\r
+ }\r
+ else {\r
+ if (index >= input.length)\r
+ return state.noMoreInput();\r
+ if (!isWord(input[index]))\r
+ return -1;\r
+ }\r
+ break;\r
+ case REOP_EOLONLY:\r
+ case REOP_EOL: {\r
+ if (index == input.length)\r
+ ; // leave index;\r
+ else {\r
+ Context cx = Context.getCurrentContext();\r
+ RegExpImpl reImpl = getImpl(cx);\r
+ if ((reImpl.multiline)\r
+ || ((state.flags & MULTILINE) != 0))\r
+ if (input[index] == '\n')\r
+ ;// leave index\r
+ else\r
+ return -1;\r
+ else\r
+ return -1;\r
+ }\r
+ }\r
+ break;\r
+ case REOP_BOL: {\r
+ Context cx = Context.getCurrentContext();\r
+ RegExpImpl reImpl = getImpl(cx);\r
+ if (index != 0) {\r
+ if (reImpl.multiline ||\r
+ ((state.flags & MULTILINE) != 0)) {\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (input[index - 1] == '\n') {\r
+ break;\r
+ }\r
+ }\r
+ return -1;\r
+ }\r
+ // leave index\r
+ }\r
+ break;\r
+ case REOP_DIGIT:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (!isDigit(input[index])) return -1;\r
+ index++;\r
+ break;\r
+ case REOP_NONDIGIT:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (isDigit(input[index])) return -1;\r
+ index++;\r
+ break;\r
+ case REOP_ALNUM:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (!isWord(input[index])) return -1;\r
+ index++;\r
+ break;\r
+ case REOP_NONALNUM:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (isWord(input[index])) return -1;\r
+ index++;\r
+ break;\r
+ case REOP_SPACE:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (!(TokenStream.isJSSpace(input[index]) ||\r
+ TokenStream.isJSLineTerminator(input[index])))\r
+ return -1;\r
+ index++;\r
+ break;\r
+ case REOP_NONSPACE:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (TokenStream.isJSSpace(input[index]) ||\r
+ TokenStream.isJSLineTerminator(input[index]))\r
+ return -1;\r
+ index++;\r
+ break;\r
+ case REOP_FLAT1:\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (!matchChar(state.flags, ren.chr, input[index]))\r
+ return -1;\r
+ index++;\r
+ break;\r
+ case REOP_FLAT: {\r
+ char[] source = (ren.s != null)\r
+ ? ren.s\r
+ : this.source.toCharArray();\r
+ int start = ((Integer)ren.kid).intValue();\r
+ int length = ren.kid2 - start;\r
+ for (int i = 0; i < length; i++, index++) {\r
+ if (index >= input.length) {\r
+ return state.noMoreInput();\r
+ }\r
+ if (!matchChar(state.flags, input[index],\r
+ source[start + i]))\r
+ return -1;\r
+ }\r
+ }\r
+ break;\r
+ case REOP_JUMP:\r
+ break;\r
+ case REOP_END:\r
+ break;\r
+ default :\r
+ throw new RuntimeException("Unsupported by node matcher");\r
+ }\r
+ ren = ren.next;\r
+ }\r
+ return index;\r
+ }\r
+\r
+ int matchRegExp(MatchState state, RENode ren, int index) {\r
+ // have to include the position beyond the last character\r
+ // in order to detect end-of-input/line condition\r
+ for (int i = index; i <= state.input.length; i++) { \r
+ state.skipped = i - index;\r
+ state.parenCount = 0;\r
+ int result = matchRENodes(state, ren, null, i);\r
+ if (result != -1)\r
+ return result;\r
+ }\r
+ return -1;\r
+ }\r
+\r
+ /*\r
+ * indexp is assumed to be an array of length 1\r
+ */\r
+ Object executeRegExp(Context cx, Scriptable scopeObj, RegExpImpl res,\r
+ String str, int indexp[], int matchType) \r
+ {\r
+ NativeRegExp re = this;\r
+ /*\r
+ * Initialize a CompilerState to minimize recursive argument traffic.\r
+ */\r
+ MatchState state = new MatchState();\r
+ state.inputExhausted = false;\r
+ state.anchoring = false;\r
+ state.flags = re.flags;\r
+ state.scope = scopeObj;\r
+\r
+ char[] charArray = str.toCharArray();\r
+ int start = indexp[0];\r
+ if (start > charArray.length)\r
+ start = charArray.length;\r
+ int index = start;\r
+ state.cpbegin = 0;\r
+ state.cpend = charArray.length;\r
+ state.start = start;\r
+ state.skipped = 0;\r
+ state.input = charArray;\r
+\r
+ state.parenCount = 0;\r
+ state.maybeParens = new SubString[re.parenCount];\r
+ state.parens = new SubString[re.parenCount];\r
+ // We allocate the elements of "parens" and "maybeParens" lazily in\r
+ // the Java port since we don't have arenas.\r
+\r
+ /*\r
+ * Call the recursive matcher to do the real work. Return null on mismatch\r
+ * whether testing or not. On match, return an extended Array object.\r
+ */\r
+ index = matchRegExp(state, ren, index);\r
+ if (index == -1) {\r
+ if (matchType != PREFIX || !state.inputExhausted) return null;\r
+ return Undefined.instance;\r
+ }\r
+ int i = index - state.cpbegin;\r
+ indexp[0] = i;\r
+ int matchlen = i - (start + state.skipped);\r
+ int ep = index; \r
+ index -= matchlen;\r
+ Object result;\r
+ Scriptable obj;\r
+\r
+ if (matchType == TEST) {\r
+ /*\r
+ * Testing for a match and updating cx.regExpImpl: don't allocate\r
+ * an array object, do return true.\r
+ */\r
+ result = Boolean.TRUE;\r
+ obj = null;\r
+ }\r
+ else {\r
+ /*\r
+ * The array returned on match has element 0 bound to the matched\r
+ * string, elements 1 through state.parenCount bound to the paren\r
+ * matches, an index property telling the length of the left context,\r
+ * and an input property referring to the input string.\r
+ */\r
+ Scriptable scope = getTopLevelScope(scopeObj);\r
+ result = ScriptRuntime.newObject(cx, scope, "Array", null);\r
+ obj = (Scriptable) result;\r
+\r
+ String matchstr = new String(charArray, index, matchlen);\r
+ obj.put(0, obj, matchstr);\r
+ }\r
+\r
+ if (state.parenCount > re.parenCount)\r
+ throw new RuntimeException();\r
+ if (state.parenCount == 0) {\r
+ res.parens.setSize(0);\r
+ res.lastParen = SubString.emptySubString;\r
+ } else {\r
+ SubString parsub = null;\r
+ int num;\r
+ res.parens.setSize(state.parenCount);\r
+ for (num = 0; num < state.parenCount; num++) {\r
+ parsub = state.parens[num];\r
+ res.parens.setElementAt(parsub, num);\r
+ if (matchType == TEST) continue;\r
+ String parstr = parsub == null ? "": parsub.toString();\r
+ obj.put(num+1, obj, parstr);\r
+ }\r
+ res.lastParen = parsub;\r
+ }\r
+\r
+ if (! (matchType == TEST)) {\r
+ /*\r
+ * Define the index and input properties last for better for/in loop\r
+ * order (so they come after the elements).\r
+ */\r
+ obj.put("index", obj, new Integer(start + state.skipped));\r
+ obj.put("input", obj, str);\r
+ }\r
+\r
+ if (res.lastMatch == null) {\r
+ res.lastMatch = new SubString();\r
+ res.leftContext = new SubString();\r
+ res.rightContext = new SubString();\r
+ }\r
+ res.lastMatch.charArray = charArray;\r
+ res.lastMatch.index = index;\r
+ res.lastMatch.length = matchlen;\r
+\r
+ res.leftContext.charArray = charArray;\r
+ if (cx.getLanguageVersion() == Context.VERSION_1_2) {\r
+ /*\r
+ * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used\r
+ * in scalar contexts, and unintentionally for the string.match "list"\r
+ * psuedo-context. On "hi there bye", the following would result:\r
+ *\r
+ * Language while(/ /g){print("$`");} s/ /$`/g\r
+ * perl4.036 "hi", "there" "hihitherehi therebye"\r
+ * perl5 "hi", "hi there" "hihitherehi therebye"\r
+ * js1.2 "hi", "there" "hihitheretherebye"\r
+ *\r
+ * Insofar as JS1.2 always defined $` as "left context from the last\r
+ * match" for global regexps, it was more consistent than perl4.\r
+ */\r
+ res.leftContext.index = start;\r
+ res.leftContext.length = state.skipped;\r
+ } else {\r
+ /*\r
+ * For JS1.3 and ECMAv2, emulate Perl5 exactly:\r
+ *\r
+ * js1.3 "hi", "hi there" "hihitherehi therebye"\r
+ */\r
+ res.leftContext.index = 0;\r
+ res.leftContext.length = start + state.skipped;\r
+ }\r
+\r
+ res.rightContext.charArray = charArray;\r
+ res.rightContext.index = ep;\r
+ res.rightContext.length = state.cpend - ep;\r
+\r
+ return result;\r
+ }\r
+\r
+ public byte getFlags() {\r
+ return flags;\r
+ }\r
+\r
+ private void reportError(String msg, String arg, CompilerState state) {\r
+ Object[] args = { arg };\r
+ throw NativeGlobal.constructError(\r
+ state.cx, "SyntaxError",\r
+ ScriptRuntime.getMessage(msg, args),\r
+ state.scope);\r
+ }\r
+\r
+ protected int getIdDefaultAttributes(int id) {\r
+ switch (id) {\r
+ case Id_lastIndex:\r
+ return ScriptableObject.PERMANENT;\r
+ case Id_source: \r
+ case Id_global: \r
+ case Id_ignoreCase: \r
+ case Id_multiline: \r
+ return ScriptableObject.PERMANENT | ScriptableObject.READONLY;\r
+ }\r
+ return super.getIdDefaultAttributes(id);\r
+ }\r
+ \r
+ protected Object getIdValue(int id) {\r
+ switch (id) {\r
+ case Id_lastIndex: return wrap_long(0xffffffffL & lastIndex);\r
+ case Id_source: return source;\r
+ case Id_global: return wrap_boolean((flags & GLOB) != 0);\r
+ case Id_ignoreCase: return wrap_boolean((flags & FOLD) != 0);\r
+ case Id_multiline: return wrap_boolean((flags & MULTILINE) != 0);\r
+ }\r
+ return super.getIdValue(id);\r
+ }\r
+ \r
+ protected void setIdValue(int id, Object value) {\r
+ if (id == Id_lastIndex) {\r
+ setLastIndex(ScriptRuntime.toInt32(value));\r
+ return;\r
+ }\r
+ super.setIdValue(id, value);\r
+ }\r
+\r
+ void setLastIndex(int value) {\r
+ lastIndex = value;\r
+ }\r
+ \r
+ public int methodArity(int methodId) {\r
+ if (prototypeFlag) {\r
+ switch (methodId) {\r
+ case Id_compile: return 1;\r
+ case Id_toString: return 0;\r
+ case Id_exec: return 1;\r
+ case Id_test: return 1;\r
+ case Id_prefix: return 1;\r
+ }\r
+ }\r
+ return super.methodArity(methodId);\r
+ }\r
+\r
+ public Object execMethod(int methodId, IdFunction f, Context cx,\r
+ Scriptable scope, Scriptable thisObj, \r
+ Object[] args)\r
+ throws JavaScriptException\r
+ {\r
+ if (prototypeFlag) {\r
+ switch (methodId) {\r
+ case Id_compile:\r
+ return realThis(thisObj, f, false).compile(cx, scope, args);\r
+ \r
+ case Id_toString:\r
+ return realThis(thisObj, f, true).toString();\r
+ \r
+ case Id_exec:\r
+ return realThis(thisObj, f, false).exec(cx, scope, args);\r
+ \r
+ case Id_test:\r
+ return realThis(thisObj, f, false).test(cx, scope, args);\r
+ \r
+ case Id_prefix:\r
+ return realThis(thisObj, f, false).prefix(cx, scope, args);\r
+ }\r
+ }\r
+ return super.execMethod(methodId, f, cx, scope, thisObj, args);\r
+ }\r
+\r
+ private NativeRegExp realThis(Scriptable thisObj, IdFunction f, \r
+ boolean readOnly)\r
+ {\r
+ while (!(thisObj instanceof NativeRegExp)) {\r
+ thisObj = nextInstanceCheck(thisObj, f, readOnly);\r
+ }\r
+ return (NativeRegExp)thisObj;\r
+ }\r
+\r
+ protected String getIdName(int id) {\r
+ switch (id) {\r
+ case Id_lastIndex: return "lastIndex";\r
+ case Id_source: return "source";\r
+ case Id_global: return "global";\r
+ case Id_ignoreCase: return "ignoreCase";\r
+ case Id_multiline: return "multiline";\r
+ }\r
+ \r
+ if (prototypeFlag) {\r
+ switch (id) {\r
+ case Id_compile: return "compile";\r
+ case Id_toString: return "toString";\r
+ case Id_exec: return "exec";\r
+ case Id_test: return "test";\r
+ case Id_prefix: return "prefix";\r
+ }\r
+ }\r
+ return null;\r
+ }\r
+\r
+ protected int maxInstanceId() { return MAX_INSTANCE_ID; }\r
+\r
+// #string_id_map#\r
+\r
+ private static final int\r
+ Id_lastIndex = 1,\r
+ Id_source = 2,\r
+ Id_global = 3,\r
+ Id_ignoreCase = 4,\r
+ Id_multiline = 5,\r
+ \r
+ MAX_INSTANCE_ID = 5;\r
+\r
+ protected int mapNameToId(String s) {\r
+ int id;\r
+// #generated# Last update: 2001-05-24 12:01:22 GMT+02:00\r
+ L0: { id = 0; String X = null; int c;\r
+ int s_length = s.length();\r
+ if (s_length==6) {\r
+ c=s.charAt(0);\r
+ if (c=='g') { X="global";id=Id_global; }\r
+ else if (c=='s') { X="source";id=Id_source; }\r
+ }\r
+ else if (s_length==9) {\r
+ c=s.charAt(0);\r
+ if (c=='l') { X="lastIndex";id=Id_lastIndex; }\r
+ else if (c=='m') { X="multiline";id=Id_multiline; }\r
+ }\r
+ else if (s_length==10) { X="ignoreCase";id=Id_ignoreCase; }\r
+ if (X!=null && X!=s && !X.equals(s)) id = 0;\r
+ }\r
+// #/generated#\r
+// #/string_id_map#\r
+\r
+ if (id != 0 || !prototypeFlag) { return id; }\r
+\r
+// #string_id_map#\r
+// #generated# Last update: 2001-05-24 12:01:22 GMT+02:00\r
+ L0: { id = 0; String X = null; int c;\r
+ L: switch (s.length()) {\r
+ case 4: c=s.charAt(0);\r
+ if (c=='e') { X="exec";id=Id_exec; }\r
+ else if (c=='t') { X="test";id=Id_test; }\r
+ break L;\r
+ case 6: X="prefix";id=Id_prefix; break L;\r
+ case 7: X="compile";id=Id_compile; break L;\r
+ case 8: X="toString";id=Id_toString; break L;\r
+ }\r
+ if (X!=null && X!=s && !X.equals(s)) id = 0;\r
+ }\r
+// #/generated#\r
+ return id;\r
+ }\r
+\r
+ private static final int\r
+ Id_compile = MAX_INSTANCE_ID + 1,\r
+ Id_toString = MAX_INSTANCE_ID + 2,\r
+ Id_exec = MAX_INSTANCE_ID + 3,\r
+ Id_test = MAX_INSTANCE_ID + 4,\r
+ Id_prefix = MAX_INSTANCE_ID + 5,\r
+ \r
+ MAX_PROTOTYPE_ID = MAX_INSTANCE_ID + 5;\r
+\r
+// #/string_id_map#\r
+ private boolean prototypeFlag;\r
+\r
+ private String source; /* locked source string, sans // */\r
+ private int lastIndex; /* index after last match, for //g iterator */\r
+ private int parenCount; /* number of parenthesized submatches */\r
+ private byte flags; /* flags */\r
+ private byte[] program; /* regular expression bytecode */\r
+\r
+ RENode ren;\r
+}\r
+\r
+class CompilerState {\r
+ CompilerState(String source, int flags, Context cx, Scriptable scope) {\r
+ this.source = source.toCharArray();\r
+ this.scope = scope;\r
+ this.flags = flags;\r
+ this.cx = cx;\r
+ }\r
+ Context cx;\r
+ Scriptable scope;\r
+ char[] source;\r
+ int indexBegin;\r
+ int index;\r
+ int flags;\r
+ int parenCount;\r
+ int progLength;\r
+ byte[] prog;\r
+}\r
+\r
+\r
+class RENode {\r
+ public static final int ANCHORED = 0x01; /* anchored at the front */\r
+ public static final int SINGLE = 0x02; /* matches a single char */\r
+ public static final int NONEMPTY = 0x04; /* does not match empty string */\r
+ public static final int ISNEXT = 0x08; /* ren is next after at least one node */\r
+ public static final int GOODNEXT = 0x10; /* ren.next is a tree-like edge in the graph */\r
+ public static final int ISJOIN = 0x20; /* ren is a join point in the graph */\r
+ public static final int REALLOK = 0x40; /* REOP_FLAT owns tempPool space to realloc */\r
+ public static final int MINIMAL = 0x80; /* un-greedy matching for ? * + {} */\r
+\r
+ RENode(CompilerState state, byte op, Object kid) {\r
+ this.op = op;\r
+ this.kid = kid;\r
+ }\r
+ \r
+ private void calcBMSize(char[] s, int index, int cp2, boolean fold) {\r
+ char maxc = 0;\r
+ while (index < cp2) {\r
+ char c = s[index++];\r
+ if (c == '\\') {\r
+ if (index + 5 <= cp2 && s[index] == 'u'\r
+ && NativeRegExp.isHex(s[index+1]) \r
+ && NativeRegExp.isHex(s[index+2])\r
+ && NativeRegExp.isHex(s[index+3]) \r
+ && NativeRegExp.isHex(s[index+4]))\r
+ {\r
+ int x = (((((NativeRegExp.unHex(s[index+0]) << 4) +\r
+ NativeRegExp.unHex(s[index+1])) << 4) +\r
+ NativeRegExp.unHex(s[index+2])) << 4) +\r
+ NativeRegExp.unHex(s[index+3]);\r
+ c = (char) x;\r
+ index += 5;\r
+ } else {\r
+ /*\r
+ * Octal and hex escapes can't be > 255. Skip this\r
+ * backslash and let the loop pass over the remaining\r
+ * escape sequence as if it were text to match.\r
+ */\r
+ if (maxc < 255) maxc = 255;\r
+ continue;\r
+ }\r
+ }\r
+ if (fold) {\r
+ /*\r
+ * Don't assume that lowercase are above uppercase, or\r
+ * that c is either even when c has upper and lowercase\r
+ * versions.\r
+ */\r
+ char c2;\r
+ if ((c2 = Character.toUpperCase(c)) > maxc)\r
+ maxc = c2;\r
+ if ((c2 = Character.toLowerCase(c2)) > maxc)\r
+ maxc = c2;\r
+ }\r
+ if (c > maxc)\r
+ maxc = c;\r
+ }\r
+ bmsize = (short)((maxc + NativeRegExp.JS_BITS_PER_BYTE) \r
+ / NativeRegExp.JS_BITS_PER_BYTE);\r
+ }\r
+ \r
+ private void matchBit(char c, int fill) {\r
+ int i = (c) >> 3;\r
+ byte b = (byte) (c & 7);\r
+ b = (byte) (1 << b);\r
+ if (fill != 0)\r
+ bitmap[i] &= ~b;\r
+ else\r
+ bitmap[i] |= b;\r
+ }\r
+\r
+ private void checkRange(char lastc, int fill) {\r
+ matchBit(lastc, fill);\r
+ matchBit('-', fill);\r
+ }\r
+\r
+ void buildBitmap(MatchState state, char[] s, boolean fold) {\r
+ int index = ((Integer) kid).intValue();\r
+ int end = kid2;\r
+ byte fill = 0;\r
+ int i,n,ocp;\r
+ \r
+ boolean not = false;\r
+ kid2 = 0;\r
+ if (s[index] == '^') {\r
+ not = true;\r
+ kid2 = -1;\r
+ index++;\r
+ }\r
+ \r
+ calcBMSize(s, index, end, fold);\r
+ bitmap = new byte[bmsize];\r
+ if (not) {\r
+ fill = (byte)0xff;\r
+ for (i = 0; i < bmsize; i++)\r
+ bitmap[i] = (byte)0xff;\r
+ bitmap[0] = (byte)0xfe;\r
+ }\r
+ int nchars = bmsize * NativeRegExp.JS_BITS_PER_BYTE;\r
+ char lastc = (char)nchars;\r
+ boolean inrange = false;\r
+ \r
+ while (index < end) {\r
+ char c = s[index++];\r
+ if (c == '\\') {\r
+ c = s[index++];\r
+ switch (c) {\r
+ case 'b':\r
+ case 'f':\r
+ case 'n':\r
+ case 'r':\r
+ case 't':\r
+ case 'v':\r
+ c = NativeRegExp.getEscape(c);\r
+ break;\r
+\r
+ case 'd':\r
+ if (inrange)\r
+ checkRange(lastc, fill);\r
+ lastc = (char) nchars;\r
+ for (c = '0'; c <= '9'; c++)\r
+ matchBit(c, fill);\r
+ continue;\r
+\r
+ case 'D':\r
+ if (inrange)\r
+ checkRange(lastc, fill);\r
+ lastc = (char) nchars;\r
+ for (c = 0; c < '0'; c++)\r
+ matchBit(c, fill);\r
+ for (c = '9' + 1; c < nchars; c++)\r
+ matchBit(c, fill);\r
+ continue;\r
+\r
+ case 'w':\r
+ if (inrange)\r
+ checkRange(lastc, fill);\r
+ lastc = (char) nchars;\r
+ for (c = 0; c < nchars; c++)\r
+ if (NativeRegExp.isWord(c))\r
+ matchBit(c, fill);\r
+ continue;\r
+\r
+ case 'W':\r
+ if (inrange)\r
+ checkRange(lastc, fill);\r
+ lastc = (char) nchars;\r
+ for (c = 0; c < nchars; c++)\r
+ if (!NativeRegExp.isWord(c))\r
+ matchBit(c, fill);\r
+ continue;\r
+\r
+ case 's':\r
+ if (inrange)\r
+ checkRange(lastc, fill);\r
+ lastc = (char) nchars;\r
+ for (c = 0; c < nchars; c++)\r
+ if (Character.isWhitespace(c))\r
+ matchBit(c, fill);\r
+ continue;\r
+\r
+ case 'S':\r
+ if (inrange)\r
+ checkRange(lastc, fill);\r
+ lastc = (char) nchars;\r
+ for (c = 0; c < nchars; c++)\r
+ if (!Character.isWhitespace(c))\r
+ matchBit(c, fill);\r
+ continue;\r
+\r
+ case '0':\r
+ case '1':\r
+ case '2':\r
+ case '3':\r
+ case '4':\r
+ case '5':\r
+ case '6':\r
+ case '7':\r
+ n = NativeRegExp.unDigit(c);\r
+ ocp = index - 2;\r
+ c = s[index];\r
+ if ('0' <= c && c <= '7') {\r
+ index++;\r
+ n = 8 * n + NativeRegExp.unDigit(c);\r
+\r
+ c = s[index];\r
+ if ('0' <= c && c <= '7') {\r
+ index++;\r
+ i = 8 * n + NativeRegExp.unDigit(c);\r
+ if (i <= 0377)\r
+ n = i;\r
+ else\r
+ index--;\r
+ }\r
+ }\r
+ c = (char) n;\r
+ break;\r
+\r
+ case 'x':\r
+ ocp = index;\r
+ if (index < s.length &&\r
+ NativeRegExp.isHex(c = s[index++]))\r
+ {\r
+ n = NativeRegExp.unHex(c);\r
+ if (index < s.length &&\r
+ NativeRegExp.isHex(c = s[index++]))\r
+ {\r
+ n <<= 4;\r
+ n += NativeRegExp.unHex(c);\r
+ }\r
+ } else {\r
+ index = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */\r
+ n = 'x';\r
+ }\r
+ c = (char) n;\r
+ break;\r
+\r
+ case 'u':\r
+ if (s.length > index+3\r
+ && NativeRegExp.isHex(s[index+0])\r
+ && NativeRegExp.isHex(s[index+1]) \r
+ && NativeRegExp.isHex(s[index+2])\r
+ && NativeRegExp.isHex(s[index+3])) {\r
+ n = (((((NativeRegExp.unHex(s[index+0]) << 4) +\r
+ NativeRegExp.unHex(s[index+1])) << 4) +\r
+ NativeRegExp.unHex(s[index+2])) << 4) +\r
+ NativeRegExp.unHex(s[index+3]);\r
+ c = (char) n;\r
+ index += 4;\r
+ }\r
+ break;\r
+\r
+ case 'c':\r
+ c = s[index++];\r
+ c = Character.toUpperCase(c);\r
+ c = (char) (c ^ 64); // JS_TOCTRL\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (inrange) {\r
+ if (lastc > c) {\r
+ throw NativeGlobal.constructError(\r
+ Context.getCurrentContext(), "RangeError",\r
+ ScriptRuntime.getMessage(\r
+ "msg.bad.range", null),\r
+ state.scope);\r
+ }\r
+ inrange = false;\r
+ } else {\r
+ // Set lastc so we match just c's bit in the for loop.\r
+ lastc = c;\r
+\r
+ // [balance:\r
+ if (index + 1 < end && s[index] == '-' &&\r
+ s[index+1] != ']')\r
+ {\r
+ index++;\r
+ inrange = true;\r
+ continue;\r
+ }\r
+ }\r
+\r
+ // Match characters in the range [lastc, c].\r
+ for (; lastc <= c; lastc++) {\r
+ matchBit(lastc, fill);\r
+ if (fold) {\r
+ /*\r
+ * Must do both upper and lower for Turkish dotless i,\r
+ * Georgian, etc.\r
+ */\r
+ char foldc = Character.toUpperCase(lastc);\r
+ matchBit(foldc, fill);\r
+ foldc = Character.toLowerCase(foldc);\r
+ matchBit(foldc, fill);\r
+ }\r
+ }\r
+ lastc = c;\r
+ }\r
+ }\r
+ byte op; /* packed r.e. op bytecode */\r
+ byte flags; /* flags, see below */\r
+ short offset; /* bytecode offset */\r
+ RENode next; /* next in concatenation order */\r
+ Object kid; /* first operand */\r
+ int kid2; /* second operand */\r
+ int num; /* could be a number */\r
+ char chr; /* or a char */\r
+ short min,max; /* or a range */\r
+ short kidlen; /* length of string at kid, in chars */\r
+ short bmsize; /* bitmap size, based on max char code */\r
+ char[] s; /* if null, use state.source */\r
+ byte[] bitmap; /* cclass bitmap */\r
+}\r
+\r
+\r
+class MatchState {\r
+ boolean inputExhausted; /* did we run out of input chars ? */\r
+ boolean anchoring; /* true if multiline anchoring ^/$ */\r
+ int pcend; /* pc limit (fencepost) */\r
+ int cpbegin, cpend; /* cp base address and limit */\r
+ int start; /* offset from cpbegin to start at */\r
+ int skipped; /* chars skipped anchoring this r.e. */\r
+ byte flags; /* pennants */\r
+ int parenCount; /* number of paren substring matches */\r
+ SubString[] maybeParens; /* possible paren substring pointers */\r
+ SubString[] parens; /* certain paren substring matches */\r
+ Scriptable scope;\r
+ char[] input;\r
+\r
+ public int noMoreInput() {\r
+ inputExhausted = true;\r
+ /*\r
+ try {\r
+ throw new Exception();\r
+ }\r
+ catch (Exception e) {\r
+ e.printStackTrace();\r
+ }\r
+ */\r
+ return -1;\r
+ }\r
+}\r
+\r
+class GreedyState {\r
+ MatchState state;\r
+ RENode kid;\r
+ RENode next;\r
+ RENode stop;\r
+ int kidCount;\r
+ int maxKid;\r
+}\r
+\r