+++ /dev/null
-/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-\r
- *\r
- * The contents of this file are subject to the Netscape Public\r
- * License Version 1.1 (the "License"); you may not use this file\r
- * except in compliance with the License. You may obtain a copy of\r
- * the License at http://www.mozilla.org/NPL/\r
- *\r
- * Software distributed under the License is distributed on an "AS\r
- * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr\r
- * implied. See the License for the specific language governing\r
- * rights and limitations under the License.\r
- *\r
- * The Original Code is Rhino code, released\r
- * May 6, 1998.\r
- *\r
- * The Initial Developer of the Original Code is Netscape\r
- * Communications Corporation. Portions created by Netscape are\r
- * Copyright (C) 1997-1999 Netscape Communications Corporation. All\r
- * Rights Reserved.\r
- *\r
- * Contributor(s): \r
- * Norris Boyd\r
- * Brendan Eich\r
- * Matthias Radestock\r
- *\r
- * Alternatively, the contents of this file may be used under the\r
- * terms of the GNU Public License (the "GPL"), in which case the\r
- * provisions of the GPL are applicable instead of those above.\r
- * If you wish to allow use of your version of this file only\r
- * under the terms of the GPL and not to allow others to use your\r
- * version of this file under the NPL, indicate your decision by\r
- * deleting the provisions above and replace them with the notice\r
- * and other provisions required by the GPL. If you do not delete\r
- * the provisions above, a recipient may use your version of this\r
- * file under either the NPL or the GPL.\r
- */\r
-\r
-package org.mozilla.javascript.regexp;\r
-\r
-import org.mozilla.javascript.*;\r
-import java.lang.reflect.Method;\r
-\r
-/**\r
- * This class implements the RegExp native object.\r
- *\r
- * Revision History:\r
- * Implementation in C by Brendan Eich\r
- * Initial port to Java by Norris Boyd from jsregexp.c version 1.36\r
- * Merged up to version 1.38, which included Unicode support.\r
- * Merged bug fixes in version 1.39.\r
- * Merged JSFUN13_BRANCH changes up to 1.32.2.13\r
- *\r
- * @author Brendan Eich\r
- * @author Norris Boyd\r
- */\r
-public class NativeRegExp extends IdScriptable implements Function {\r
-\r
- public static final int GLOB = 0x1; // 'g' flag: global\r
- public static final int FOLD = 0x2; // 'i' flag: fold\r
- public static final int MULTILINE = 0x4; // 'm' flag: multiline\r
-\r
- //type of match to perform\r
- public static final int TEST = 0;\r
- public static final int MATCH = 1;\r
- public static final int PREFIX = 2;\r
-\r
- private static final boolean debug = false;\r
-\r
- public static void init(Context cx, Scriptable scope, boolean sealed) {\r
- \r
- NativeRegExp proto = new NativeRegExp();\r
- proto.prototypeFlag = true;\r
- proto.activateIdMap(MAX_PROTOTYPE_ID);\r
- proto.setSealFunctionsFlag(sealed);\r
- proto.setFunctionParametrs(cx);\r
- proto.setParentScope(scope);\r
- proto.setPrototype(getObjectPrototype(scope));\r
-\r
-\r
- NativeRegExpCtor ctor = new NativeRegExpCtor();\r
-\r
- ctor.setPrototype(getClassPrototype(scope, "Function"));\r
- ctor.setParentScope(scope);\r
-\r
- ctor.setImmunePrototypeProperty(proto);\r
- \r
- if (sealed) {\r
- proto.sealObject();\r
- ctor.sealObject();\r
- }\r
-\r
- defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM);\r
- }\r
-\r
- public NativeRegExp(Context cx, Scriptable scope, String source, \r
- String global, boolean flat) {\r
- init(cx, scope, source, global, flat);\r
- }\r
-\r
- public void init(Context cx, Scriptable scope, String source, \r
- String global, boolean flat) {\r
- this.source = source;\r
- flags = 0;\r
- if (global != null) {\r
- for (int i=0; i < global.length(); i++) {\r
- char c = global.charAt(i);\r
- if (c == 'g') {\r
- flags |= GLOB;\r
- } else if (c == 'i') {\r
- flags |= FOLD;\r
- } else if (c == 'm') {\r
- flags |= MULTILINE;\r
- } else {\r
- Object[] errArgs = { new Character(c) };\r
- throw NativeGlobal.constructError(cx, "SyntaxError",\r
- ScriptRuntime.getMessage(\r
- "msg.invalid.re.flag", errArgs),\r
- scope);\r
- }\r
- }\r
- }\r
-\r
- CompilerState state = new CompilerState(source, flags, cx, scope);\r
- if (flat) {\r
- ren = null;\r
- int sourceLen = source.length();\r
- int index = 0;\r
- while (sourceLen > 0) {\r
- int len = sourceLen;\r
- if (len > REOP_FLATLEN_MAX) {\r
- len = REOP_FLATLEN_MAX;\r
- }\r
- RENode ren2 = new RENode(state, len == 1 ? REOP_FLAT1 : REOP_FLAT,\r
- new Integer(index)); \r
- ren2.flags = RENode.NONEMPTY;\r
- if (len > 1) {\r
- ren2.kid2 = index + len;\r
- } else {\r
- ren2.flags |= RENode.SINGLE;\r
- ren2.chr = state.source[index]; \r
- }\r
- index += len;\r
- sourceLen -= len;\r
- if (ren == null)\r
- ren = ren2;\r
- else\r
- setNext(state, ren, ren2);\r
- }\r
- }\r
- else\r
- this.ren = parseRegExp(state);\r
- if (ren == null) return;\r
- RENode end = new RENode(state, REOP_END, null);\r
- setNext(state, ren, end);\r
- if (debug)\r
- dumpRegExp(state, ren);\r
- this.lastIndex = 0;\r
- this.parenCount = state.parenCount;\r
- this.flags = flags;\r
-\r
- scope = org.xwt.util.JSObject.defaultObjects;\r
- setPrototype(getClassPrototype(scope, "RegExp"));\r
- setParentScope(scope);\r
- }\r
-\r
- public String getClassName() {\r
- return "RegExp";\r
- }\r
-\r
- public Object call(Context cx, Scriptable scope, Scriptable thisObj,\r
- Object[] args) {\r
- return execSub(cx, scope, args, MATCH);\r
- }\r
-\r
- public Scriptable construct(Context cx, Scriptable scope, Object[] args) {\r
- return (Scriptable) call(cx, scope, null, args);\r
- }\r
-\r
- Scriptable compile(Context cx, Scriptable scope, Object[] args) {\r
- if (args.length > 0 && args[0] instanceof NativeRegExp) {\r
- if (args.length > 1 && args[1] != Undefined.instance) {\r
- // report error\r
- throw NativeGlobal.constructError(\r
- cx, "TypeError",\r
- "only one argument may be specified " +\r
- "if the first argument is a RegExp object",\r
- scope);\r
- }\r
- NativeRegExp thatObj = (NativeRegExp) args[0];\r
- source = thatObj.source; \r
- lastIndex = thatObj.lastIndex;\r
- parenCount = thatObj.parenCount;\r
- flags = thatObj.flags;\r
- program = thatObj.program;\r
- ren = thatObj.ren;\r
- return this;\r
- }\r
- String s = args.length == 0 ? "" : ScriptRuntime.toString(args[0]);\r
- String global = args.length > 1 && args[1] != Undefined.instance\r
- ? ScriptRuntime.toString(args[1])\r
- : null;\r
- init(cx, scope, s, global, false);\r
- return this;\r
- }\r
-\r
- public String toString() {\r
- StringBuffer buf = new StringBuffer();\r
- buf.append('/');\r
- buf.append(source);\r
- buf.append('/');\r
- if ((flags & GLOB) != 0)\r
- buf.append('g');\r
- if ((flags & FOLD) != 0)\r
- buf.append('i');\r
- if ((flags & MULTILINE) != 0)\r
- buf.append('m');\r
- return buf.toString();\r
- }\r
-\r
- public NativeRegExp() {\r
- }\r
- \r
- private static RegExpImpl getImpl(Context cx) {\r
- return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx);\r
- }\r
-\r
- private Object execSub(Context cx, Scriptable scopeObj,\r
- Object[] args, int matchType)\r
- {\r
- RegExpImpl reImpl = getImpl(cx);\r
- String str;\r
- if (args.length == 0) {\r
- str = reImpl.input;\r
- if (str == null) {\r
- Object[] errArgs = { toString() };\r
- throw NativeGlobal.constructError(\r
- cx, "SyntaxError",\r
- ScriptRuntime.getMessage\r
- ("msg.no.re.input.for", errArgs),\r
- scopeObj);\r
- }\r
- } else {\r
- str = ScriptRuntime.toString(args[0]);\r
- }\r
- int i = ((flags & GLOB) != 0) ? lastIndex : 0;\r
- int indexp[] = { i };\r
- Object rval = executeRegExp(cx, scopeObj, \r
- reImpl, str, indexp, matchType);\r
- if ((flags & GLOB) != 0) {\r
- lastIndex = (rval == null || rval == Undefined.instance) \r
- ? 0 : indexp[0];\r
- }\r
- return rval;\r
- }\r
-\r
- private Object exec(Context cx, Scriptable scopeObj, Object[] args) {\r
- return execSub(cx, scopeObj, args, MATCH);\r
- }\r
-\r
- private Object test(Context cx, Scriptable scopeObj, Object[] args) {\r
- Object rval = execSub(cx, scopeObj, args, TEST);\r
- if (rval == null || !rval.equals(Boolean.TRUE))\r
- rval = Boolean.FALSE;\r
- return rval;\r
- }\r
-\r
- private Object prefix(Context cx, Scriptable scopeObj, Object[] args) {\r
- return execSub(cx, scopeObj, args, PREFIX);\r
- }\r
-\r
- static final int JS_BITS_PER_BYTE = 8;\r
-\r
- private static final byte REOP_EMPTY = 0; /* match rest of input against rest of r.e. */\r
- private static final byte REOP_ALT = 1; /* alternative subexpressions in kid and next */\r
- private static final byte REOP_BOL = 2; /* beginning of input (or line if multiline) */\r
- private static final byte REOP_EOL = 3; /* end of input (or line if multiline) */\r
- private static final byte REOP_WBDRY = 4; /* match "" at word boundary */\r
- private static final byte REOP_WNONBDRY = 5; /* match "" at word non-boundary */\r
- private static final byte REOP_QUANT = 6; /* quantified atom: atom{1,2} */\r
- private static final byte REOP_STAR = 7; /* zero or more occurrences of kid */\r
- private static final byte REOP_PLUS = 8; /* one or more occurrences of kid */\r
- private static final byte REOP_OPT = 9; /* optional subexpression in kid */\r
- private static final byte REOP_LPAREN = 10; /* left paren bytecode: kid is u.num'th sub-regexp */\r
- private static final byte REOP_RPAREN = 11; /* right paren bytecode */\r
- private static final byte REOP_DOT = 12; /* stands for any character */\r
- private static final byte REOP_CCLASS = 13; /* character class: [a-f] */\r
- private static final byte REOP_DIGIT = 14; /* match a digit char: [0-9] */\r
- private static final byte REOP_NONDIGIT = 15; /* match a non-digit char: [^0-9] */\r
- private static final byte REOP_ALNUM = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */\r
- private static final byte REOP_NONALNUM = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */\r
- private static final byte REOP_SPACE = 18; /* match a whitespace char */\r
- private static final byte REOP_NONSPACE = 19; /* match a non-whitespace char */\r
- private static final byte REOP_BACKREF = 20; /* back-reference (e.g., \1) to a parenthetical */\r
- private static final byte REOP_FLAT = 21; /* match a flat string */\r
- private static final byte REOP_FLAT1 = 22; /* match a single char */\r
- private static final byte REOP_JUMP = 23; /* for deoptimized closure loops */\r
- private static final byte REOP_DOTSTAR = 24; /* optimize .* to use a single opcode */\r
- private static final byte REOP_ANCHOR = 25; /* like .* but skips left context to unanchored r.e. */\r
- private static final byte REOP_EOLONLY = 26; /* $ not preceded by any pattern */\r
- private static final byte REOP_UCFLAT = 27; /* flat Unicode string; len immediate counts chars */\r
- private static final byte REOP_UCFLAT1 = 28; /* single Unicode char */\r
- private static final byte REOP_UCCLASS = 29; /* Unicode character class, vector of chars to match */\r
- private static final byte REOP_NUCCLASS = 30; /* negated Unicode character class */\r
- private static final byte REOP_BACKREFi = 31; /* case-independent REOP_BACKREF */\r
- private static final byte REOP_FLATi = 32; /* case-independent REOP_FLAT */\r
- private static final byte REOP_FLAT1i = 33; /* case-independent REOP_FLAT1 */\r
- private static final byte REOP_UCFLATi = 34; /* case-independent REOP_UCFLAT */\r
- private static final byte REOP_UCFLAT1i = 35; /* case-independent REOP_UCFLAT1 */\r
- private static final byte REOP_ANCHOR1 = 36; /* first-char discriminating REOP_ANCHOR */\r
- private static final byte REOP_NCCLASS = 37; /* negated 8-bit character class */\r
- private static final byte REOP_DOTSTARMIN = 38; /* ungreedy version of REOP_DOTSTAR */\r
- private static final byte REOP_LPARENNON = 39; /* non-capturing version of REOP_LPAREN */\r
- private static final byte REOP_RPARENNON = 40; /* non-capturing version of REOP_RPAREN */\r
- private static final byte REOP_ASSERT = 41; /* zero width positive lookahead assertion */\r
- private static final byte REOP_ASSERT_NOT = 42; /* zero width negative lookahead assertion */\r
- private static final byte REOP_END = 43;\r
-\r
- /* maximum length of FLAT string */\r
- private static final int REOP_FLATLEN_MAX = 255;\r
-\r
- /* not thread safe, used only for debugging */\r
- private static int level;\r
-\r
- private static String[] reopname = null;\r
- static {\r
- if (debug) {\r
- String a[] = {\r
- "empty",\r
- "alt",\r
- "bol",\r
- "eol",\r
- "wbdry",\r
- "wnonbdry",\r
- "quant",\r
- "star",\r
- "plus",\r
- "opt",\r
- "lparen",\r
- "rparen",\r
- "dot",\r
- "cclass",\r
- "digit",\r
- "nondigit",\r
- "alnum",\r
- "nonalnum",\r
- "space",\r
- "nonspace",\r
- "backref",\r
- "flat",\r
- "flat1",\r
- "jump",\r
- "dotstar",\r
- "anchor",\r
- "eolonly",\r
- "ucflat",\r
- "ucflat1",\r
- "ucclass",\r
- "nucclass",\r
- "backrefi",\r
- "flati",\r
- "flat1i",\r
- "ucflati",\r
- "ucflat1i",\r
- "anchor1",\r
- "ncclass",\r
- "dotstar_min",\r
- "lparen_non",\r
- "rparen_non",\r
- "end"\r
- };\r
- reopname = a;\r
- }\r
- }\r
-\r
- private String getPrintableString(String str) {\r
- if (debug) {\r
- StringBuffer buf = new StringBuffer(str.length());\r
- for (int i = 0; i < str.length(); i++) {\r
- int c = str.charAt(i);\r
- if ((c < 0x20) || (c > 0x7F)) {\r
- if (c == '\n')\r
- buf.append("\\n");\r
- else\r
- buf.append("\\u" + Integer.toHexString(c));\r
- }\r
- else\r
- buf.append((char)c);\r
- }\r
- return buf.toString();\r
- } else {\r
- return "";\r
- }\r
- }\r
-\r
- private void dumpRegExp(CompilerState state, RENode ren) {\r
- if (debug) {\r
- if (level == 0)\r
- System.out.print("level offset flags description\n");\r
- level++;\r
- do {\r
- char[] source = ren.s != null ? ren.s : state.source;\r
- System.out.print(level);\r
- System.out.print(" ");\r
- System.out.print(ren.offset);\r
- System.out.print(" " +\r
- ((ren.flags & RENode.ANCHORED) != 0 ? "A" : "-") +\r
- ((ren.flags & RENode.SINGLE) != 0 ? "S" : "-") +\r
- ((ren.flags & RENode.NONEMPTY) != 0 ? "F" : "-") + // F for full\r
- ((ren.flags & RENode.ISNEXT) != 0 ? "N" : "-") + // N for next\r
- ((ren.flags & RENode.GOODNEXT) != 0 ? "G" : "-") +\r
- ((ren.flags & RENode.ISJOIN) != 0 ? "J" : "-") +\r
- ((ren.flags & RENode.MINIMAL) != 0 ? "M" : "-") +\r
- " " +\r
- reopname[ren.op]);\r
-\r
- switch (ren.op) {\r
- case REOP_ALT:\r
- System.out.print(" ");\r
- System.out.println(ren.next.offset);\r
- dumpRegExp(state, (RENode) ren.kid);\r
- break;\r
-\r
- case REOP_STAR:\r
- case REOP_PLUS:\r
- case REOP_OPT:\r
- case REOP_ANCHOR1:\r
- case REOP_ASSERT:\r
- case REOP_ASSERT_NOT:\r
- System.out.println();\r
- dumpRegExp(state, (RENode) ren.kid);\r
- break;\r
-\r
- case REOP_QUANT:\r
- System.out.print(" next ");\r
- System.out.print(ren.next.offset);\r
- System.out.print(" min ");\r
- System.out.print(ren.min);\r
- System.out.print(" max ");\r
- System.out.println(ren.max);\r
- dumpRegExp(state, (RENode) ren.kid);\r
- break;\r
-\r
- case REOP_LPAREN:\r
- System.out.print(" num ");\r
- System.out.println(ren.num);\r
- dumpRegExp(state, (RENode) ren.kid);\r
- break;\r
-\r
- case REOP_LPARENNON:\r
- System.out.println();\r
- dumpRegExp(state, (RENode) ren.kid);\r
- break;\r
-\r
- case REOP_BACKREF:\r
- case REOP_RPAREN:\r
- System.out.print(" num ");\r
- System.out.println(ren.num);\r
- break;\r
-\r
- case REOP_CCLASS: {\r
- int index = ((Integer) ren.kid).intValue();\r
- int index2 = ren.kid2;\r
- int len = index2 - index;\r
- System.out.print(" [");\r
- System.out.print(getPrintableString(new String(source, index, len)));\r
- System.out.println("]");\r
- break;\r
- }\r
-\r
- case REOP_FLAT: {\r
- int index = ((Integer) ren.kid).intValue();\r
- int index2 = ren.kid2;\r
- int len = index2 - index;\r
- System.out.print(" ");\r
- System.out.print(getPrintableString(new String(source, index, len)));\r
- System.out.print(" (");\r
- System.out.print(len);\r
- System.out.println(")");\r
- break;\r
- }\r
-\r
- case REOP_FLAT1:\r
- System.out.print(" ");\r
- System.out.print(ren.chr);\r
- System.out.print(" ('\\");\r
- System.out.print(Integer.toString(ren.chr, 8));\r
- System.out.println("')");\r
- break;\r
-\r
- case REOP_JUMP:\r
- System.out.print(" ");\r
- System.out.println(ren.next.offset);\r
- break;\r
-\r
- case REOP_UCFLAT: {\r
- int index = ((Integer) ren.kid).intValue();\r
- int len = ren.kid2 - index;\r
- for (int i = 0; i < len; i++)\r
- System.out.print("\\u" + \r
- Integer.toHexString(source[index+i]));\r
- System.out.println();\r
- break;\r
- }\r
-\r
- case REOP_UCFLAT1:\r
- System.out.print("\\u" + \r
- Integer.toHexString(ren.chr));\r
- System.out.println();\r
- break;\r
-\r
- case REOP_UCCLASS: {\r
- int index = ((Integer) ren.kid).intValue();\r
- int len = ren.kid2 - index;\r
- System.out.print(" [");\r
- for (int i = 0; i < len; i++)\r
- System.out.print("\\u" + \r
- Integer.toHexString(source[index+i]));\r
- System.out.println("]");\r
- break;\r
- }\r
-\r
- default:\r
- System.out.println();\r
- break;\r
- }\r
-\r
- if ((ren.flags & RENode.GOODNEXT) == 0)\r
- break;\r
- } while ((ren = ren.next) != null);\r
- level--;\r
- }\r
- }\r
-\r
- private void fixNext(CompilerState state, RENode ren1, RENode ren2,\r
- RENode oldnext) {\r
- boolean goodnext;\r
- RENode next, kid, ren;\r
-\r
- goodnext = ren2 != null && (ren2.flags & RENode.ISNEXT) == 0;\r
-\r
- /*\r
- * Find the final node in a list of alternatives, or concatenations, or\r
- * even a concatenation of alternatives followed by non-alternatives (e.g.\r
- * ((x|y)z)w where ((x|y)z) is ren1 and w is ren2).\r
- */\r
- for (; (next = ren1.next) != null && next != oldnext; ren1 = next) {\r
- if (ren1.op == REOP_ALT) {\r
- /* Find the end of this alternative's operand list. */\r
- kid = (RENode) ren1.kid;\r
- if (kid.op == REOP_JUMP)\r
- continue;\r
- for (ren = kid; ren.next != null; ren = ren.next) {\r
- if (ren.op == REOP_ALT)\r
- throw new RuntimeException("REOP_ALT not expected");\r
- }\r
-\r
- /* Append a jump node to all but the last alternative. */\r
- ren.next = new RENode(state, REOP_JUMP, null);\r
- ren.next.flags |= RENode.ISNEXT;\r
- ren.flags |= RENode.GOODNEXT;\r
-\r
- /* Recur to fix all descendent nested alternatives. */\r
- fixNext(state, kid, ren2, oldnext);\r
- }\r
- }\r
-\r
- /*\r
- * Now ren1 points to the last alternative, or to the final node on a\r
- * concatenation list. Set its next link to ren2, flagging a join point\r
- * if appropriate.\r
- */\r
- if (ren2 != null) {\r
- if ((ren2.flags & RENode.ISNEXT) == 0)\r
- ren2.flags |= RENode.ISNEXT;\r
- else\r
- ren2.flags |= RENode.ISJOIN;\r
- }\r
- ren1.next = ren2;\r
- if (goodnext)\r
- ren1.flags |= RENode.GOODNEXT;\r
-\r
- /*\r
- * The following ops have a kid subtree through which to recur. Here is\r
- * where we fix the next links under the final ALT node's kid.\r
- */\r
- switch (ren1.op) {\r
- case REOP_ALT:\r
- case REOP_QUANT:\r
- case REOP_STAR:\r
- case REOP_PLUS:\r
- case REOP_OPT:\r
- case REOP_LPAREN:\r
- case REOP_LPARENNON:\r
- case REOP_ASSERT:\r
- case REOP_ASSERT_NOT:\r
- fixNext(state, (RENode) ren1.kid, ren2, oldnext);\r
- break;\r
- default:;\r
- }\r
- }\r
-\r
- private void setNext(CompilerState state, RENode ren1, RENode ren2) {\r
- fixNext(state, ren1, ren2, null);\r
- }\r
-\r
- /*\r
- * Top-down regular expression grammar, based closely on Perl 4.\r
- *\r
- * regexp: altern A regular expression is one or more\r
- * altern '|' regexp alternatives separated by vertical bar.\r
- */\r
- private RENode parseRegExp(CompilerState state) {\r
- RENode ren = parseAltern(state);\r
- if (ren == null)\r
- return null;\r
- char[] source = state.source;\r
- int index = state.index;\r
- if (index < source.length && source[index] == '|') {\r
- RENode kid = ren;\r
- ren = new RENode(state, REOP_ALT, kid);\r
- if (ren == null)\r
- return null;\r
- ren.flags = (byte) (kid.flags & (RENode.ANCHORED | RENode.NONEMPTY));\r
- RENode ren1 = ren;\r
- do {\r
- /* (balance: */\r
- state.index = ++index;\r
- if (index < source.length && (source[index] == '|' ||\r
- source[index] == ')'))\r
- {\r
- kid = new RENode(state, REOP_EMPTY, null);\r
- } else {\r
- kid = parseAltern(state);\r
- index = state.index;\r
- }\r
- if (kid == null)\r
- return null;\r
- RENode ren2 = new RENode(state, REOP_ALT, kid);\r
- if (ren2 == null)\r
- return null;\r
- ren1.next = ren2;\r
- ren1.flags |= RENode.GOODNEXT;\r
- ren2.flags = (byte) ((kid.flags & (RENode.ANCHORED |\r
- RENode.NONEMPTY))\r
- | RENode.ISNEXT);\r
- ren1 = ren2;\r
- } while (index < source.length && source[index] == '|');\r
- }\r
- return ren;\r
- }\r
-\r
- /*\r
- * altern: item An alternative is one or more items,\r
- * item altern concatenated together.\r
- */\r
- private RENode parseAltern(CompilerState state) {\r
- RENode ren = parseItem(state);\r
- if (ren == null)\r
- return null;\r
- RENode ren1 = ren;\r
- int flags = 0;\r
- char[] source = state.source;\r
- int index = state.index;\r
- char c;\r
- /* (balance: */\r
- while (index != source.length && (c = source[index]) != '|' &&\r
- c != ')')\r
- {\r
- RENode ren2 = parseItem(state);\r
- if (ren2 == null)\r
- return null;\r
- setNext(state, ren1, ren2);\r
- flags |= ren2.flags;\r
- ren1 = ren2;\r
- index = state.index;\r
- }\r
-\r
- /*\r
- * Propagate NONEMPTY to the front of a concatenation list, so that the\r
- * first alternative in (^a|b) is considered non-empty. The first node\r
- * in a list may match the empty string (as ^ does), but if the list is\r
- * non-empty, then the first node's NONEMPTY flag must be set.\r
- */\r
- ren.flags |= flags & RENode.NONEMPTY;\r
- return ren;\r
- }\r
-\r
- /*\r
- * item: assertion An item is either an assertion or\r
- * quantatom a quantified atom.\r
- *\r
- * assertion: '^' Assertions match beginning of string\r
- * (or line if the class static property\r
- * RegExp.multiline is true).\r
- * '$' End of string (or line if the class\r
- * static property RegExp.multiline is\r
- * true).\r
- * '\b' Word boundary (between \w and \W).\r
- * '\B' Word non-boundary.\r
- */\r
- RENode parseItem(CompilerState state) {\r
- RENode ren;\r
- byte op;\r
-\r
- char[] source = state.source;\r
- int index = state.index;\r
- switch (index < source.length ? source[index] : '\0') {\r
- case '^':\r
- state.index = index + 1;\r
- ren = new RENode(state, REOP_BOL, null);\r
- ren.flags |= RENode.ANCHORED;\r
- return ren;\r
-\r
- case '$':\r
- state.index = index + 1;\r
- return new RENode(state,\r
- (index == state.indexBegin ||\r
- ((source[index-1] == '(' ||\r
- source[index-1] == '|') && /*balance)*/\r
- (index - 1 == state.indexBegin ||\r
- source[index-2] != '\\')))\r
- ? REOP_EOLONLY\r
- : REOP_EOL,\r
- null);\r
-\r
- case '\\':\r
- switch (++index < source.length ? source[index] : '\0') {\r
- case 'b':\r
- op = REOP_WBDRY;\r
- break;\r
- case 'B':\r
- op = REOP_WNONBDRY;\r
- break;\r
- default:\r
- return parseQuantAtom(state);\r
- }\r
-\r
- /*\r
- * Word boundaries and non-boundaries are flagged as non-empty\r
- * so they will be prefixed by an anchoring node.\r
- */\r
- state.index = index + 1;\r
- ren = new RENode(state, op, null);\r
- ren.flags |= RENode.NONEMPTY;\r
- return ren;\r
-\r
- default:;\r
- }\r
- return parseQuantAtom(state);\r
- }\r
-\r
- /*\r
- * quantatom: atom An unquantified atom.\r
- * quantatom '{' n ',' m '}'\r
- * Atom must occur between n and m times.\r
- * quantatom '{' n ',' '}' Atom must occur at least n times.\r
- * quantatom '{' n '}' Atom must occur exactly n times.\r
- * quantatom '*' Zero or more times (same as {0,}).\r
- * quantatom '+' One or more times (same as {1,}).\r
- * quantatom '?' Zero or one time (same as {0,1}).\r
- *\r
- * any of which can be optionally followed by '?' for ungreedy\r
- */\r
- RENode parseQuantAtom(CompilerState state) {\r
- RENode ren = parseAtom(state);\r
- if (ren == null)\r
- return null;\r
-\r
- int up;\r
- char c;\r
- RENode ren2;\r
- int min, max;\r
- char[] source = state.source;\r
- int index = state.index;\r
- loop:\r
- while (index < source.length) {\r
- switch (source[index]) {\r
- case '{':\r
- if (++index == source.length || !isDigit(c = source[index])) {\r
- reportError("msg.bad.quant",\r
- String.valueOf(source[state.index]), state);\r
- return null;\r
- }\r
- min = unDigit(c);\r
- while (++index < source.length && isDigit(c = source[index])) {\r
- min = 10 * min + unDigit(c);\r
- if ((min >> 16) != 0) {\r
- reportError("msg.overlarge.max", tail(source, index),\r
- state);\r
- return null;\r
- }\r
- }\r
- if (source[index] == ',') {\r
- up = ++index;\r
- if (isDigit(source[index])) {\r
- max = unDigit(source[index]);\r
- while (isDigit(c = source[++index])) {\r
- max = 10 * max + unDigit(c);\r
- if ((max >> 16) != 0) {\r
- reportError("msg.overlarge.max",\r
- String.valueOf(source[up]), state);\r
- return null;\r
- }\r
- }\r
- if (max == 0) {\r
- reportError("msg.zero.quant",\r
- tail(source, state.index), state);\r
- return null;\r
- }\r
- if (min > max) {\r
- reportError("msg.max.lt.min", tail(source, up), state);\r
- return null;\r
- }\r
- } else {\r
- /* 0 means no upper bound. */\r
- max = 0;\r
- }\r
- } else {\r
- /* Exactly n times. */\r
- if (min == 0) {\r
- reportError("msg.zero.quant",\r
- tail(source, state.index), state);\r
- return null;\r
- }\r
- max = min;\r
- }\r
- if (source[index] != '}') {\r
- reportError("msg.unterm.quant",\r
- String.valueOf(source[state.index]), state);\r
- return null;\r
- }\r
- index++;\r
-\r
- ren2 = new RENode(state, REOP_QUANT, ren);\r
- if (min > 0 && (ren.flags & RENode.NONEMPTY) != 0)\r
- ren2.flags |= RENode.NONEMPTY;\r
- ren2.min = (short) min;\r
- ren2.max = (short) max;\r
- ren = ren2;\r
- break;\r
-\r
- case '*':\r
- index++;\r
- ren = new RENode(state, REOP_STAR, ren);\r
- break;\r
-\r
- case '+':\r
- index++;\r
- ren2 = new RENode(state, REOP_PLUS, ren);\r
- if ((ren.flags & RENode.NONEMPTY) != 0)\r
- ren2.flags |= RENode.NONEMPTY;\r
- ren = ren2;\r
- break;\r
-\r
- case '?':\r
- index++;\r
- ren = new RENode(state, REOP_OPT, ren);\r
- break;\r
-\r
- default:\r
- break loop;\r
- }\r
- if ((index < source.length) && (source[index] == '?')) {\r
- ren.flags |= RENode.MINIMAL;\r
- index++;\r
- }\r
- }\r
-\r
- state.index = index;\r
- return ren;\r
- }\r
-\r
- /*\r
- * atom: '(' regexp ')' A parenthesized regexp (what matched\r
- * can be addressed using a backreference,\r
- * see '\' n below).\r
- * '.' Matches any char except '\n'.\r
- * '[' classlist ']' A character class.\r
- * '[' '^' classlist ']' A negated character class.\r
- * '\f' Form Feed.\r
- * '\n' Newline (Line Feed).\r
- * '\r' Carriage Return.\r
- * '\t' Horizontal Tab.\r
- * '\v' Vertical Tab.\r
- * '\d' A digit (same as [0-9]).\r
- * '\D' A non-digit.\r
- * '\w' A word character, [0-9a-z_A-Z].\r
- * '\W' A non-word character.\r
- * '\s' A whitespace character, [ \b\f\n\r\t\v].\r
- * '\S' A non-whitespace character.\r
- * '\' n A backreference to the nth (n decimal\r
- * and positive) parenthesized expression.\r
- * '\' octal An octal escape sequence (octal must be\r
- * two or three digits long, unless it is\r
- * 0 for the null character).\r
- * '\x' hex A hex escape (hex must be two digits).\r
- * '\c' ctrl A control character, ctrl is a letter.\r
- * '\' literalatomchar Any character except one of the above\r
- * that follow '\' in an atom.\r
- * otheratomchar Any character not first among the other\r
- * atom right-hand sides.\r
- */\r
- static final String metachars = "|^${*+?().[\\";\r
- static final String closurechars = "{*+?";\r
-\r
- RENode parseAtom(CompilerState state) {\r
- int num = 0, len;\r
- RENode ren = null;\r
- RENode ren2;\r
- char c;\r
- byte op;\r
-\r
- boolean skipCommon = false;\r
- boolean doFlat = false;\r
-\r
- char[] source = state.source;\r
- int index = state.index;\r
- int ocp = index;\r
- if (index == source.length) {\r
- state.index = index;\r
- return new RENode(state, REOP_EMPTY, null);\r
- }\r
- switch (source[index]) {\r
- /* handle /|a/ by returning an empty node for the leftside */\r
- case '|':\r
- return new RENode(state, REOP_EMPTY, null);\r
-\r
- case '(':\r
- op = REOP_END;\r
- if (source[index + 1] == '?') {\r
- switch (source[index + 2]) {\r
- case ':' :\r
- op = REOP_LPARENNON;\r
- break;\r
- case '=' :\r
- op = REOP_ASSERT;\r
- break;\r
- case '!' :\r
- op = REOP_ASSERT_NOT;\r
- break;\r
- }\r
- }\r
- if (op == REOP_END) {\r
- op = REOP_LPAREN;\r
- num = state.parenCount++; /* \1 is numbered 0, etc. */\r
- index++;\r
- }\r
- else\r
- index += 3;\r
- state.index = index;\r
- /* Handle empty paren */\r
- if (source[index] == ')') {\r
- ren2 = new RENode(state, REOP_EMPTY, null);\r
- }\r
- else {\r
- ren2 = parseRegExp(state);\r
- if (ren2 == null)\r
- return null;\r
- index = state.index;\r
- if (index >= source.length || source[index] != ')') {\r
- reportError("msg.unterm.paren", tail(source, ocp), state);\r
- return null;\r
- }\r
- }\r
- index++;\r
- ren = new RENode(state, op, ren2);\r
- ren.flags = (byte) (ren2.flags & (RENode.ANCHORED |\r
- RENode.NONEMPTY));\r
- ren.num = num;\r
- if ((op == REOP_LPAREN) || (op == REOP_LPARENNON)) {\r
- /* Assume RPAREN ops immediately succeed LPAREN ops */\r
- ren2 = new RENode(state, (byte)(op + 1), null);\r
- setNext(state, ren, ren2);\r
- ren2.num = num;\r
- }\r
- break;\r
-\r
- case '.':\r
- ++index;\r
- op = REOP_DOT;\r
- if ((index < source.length) && (source[index] == '*')) {\r
- index++;\r
- op = REOP_DOTSTAR;\r
- if ((index < source.length) && (source[index] == '?')) {\r
- index++;\r
- op = REOP_DOTSTARMIN;\r
- }\r
- }\r
- ren = new RENode(state, op, null);\r
- if (ren.op == REOP_DOT)\r
- ren.flags = RENode.SINGLE | RENode.NONEMPTY;\r
- break;\r
-\r
- case '[':\r
- /* A char class must have at least one char in it. */\r
- if (++index == source.length) {\r
- reportError("msg.unterm.class", tail(source, ocp), state);\r
- return null;\r
- }\r
- c = source[index];\r
- ren = new RENode(state, REOP_CCLASS, new Integer(index));\r
-\r
- /* A negated class must have at least one char in it after the ^. */\r
- if (c == '^' && ++index == source.length) {\r
- reportError("msg.unterm.class", tail(source, ocp), state);\r
- return null;\r
- }\r
-\r
- for (;;) {\r
- if (++index == source.length) {\r
- reportError("msg.unterm.paren", tail(source, ocp), state);\r
- return null;\r
- }\r
- c = source[index];\r
- if (c == ']')\r
- break;\r
- if (c == '\\' && index+1 != source.length)\r
- index++;\r
- }\r
- ren.kid2 = index++;\r
-\r
- /* Since we rule out [] and [^], we can set the non-empty flag. */\r
- ren.flags = RENode.SINGLE | RENode.NONEMPTY;\r
- break;\r
-\r
- case '\\':\r
- if (++index == source.length) {\r
- Context.reportError(ScriptRuntime.getMessage("msg.trail.backslash",\r
- null));\r
- return null;\r
- }\r
- c = source[index];\r
- switch (c) {\r
- case 'f':\r
- case 'n':\r
- case 'r':\r
- case 't':\r
- case 'v':\r
- c = getEscape(c);\r
- ren = new RENode(state, REOP_FLAT1, null);\r
- break;\r
- case 'd':\r
- ren = new RENode(state, REOP_DIGIT, null);\r
- break;\r
- case 'D':\r
- ren = new RENode(state, REOP_NONDIGIT, null);\r
- break;\r
- case 'w':\r
- ren = new RENode(state, REOP_ALNUM, null);\r
- break;\r
- case 'W':\r
- ren = new RENode(state, REOP_NONALNUM, null);\r
- break;\r
- case 's':\r
- ren = new RENode(state, REOP_SPACE, null);\r
- break;\r
- case 'S':\r
- ren = new RENode(state, REOP_NONSPACE, null);\r
- break;\r
-\r
- case '0':\r
- case '1':\r
- case '2':\r
- case '3':\r
- case '4':\r
- case '5':\r
- case '6':\r
- case '7':\r
- case '8':\r
- case '9':\r
- /*\r
- Yuk. Keeping the old style \n interpretation for 1.2\r
- compatibility.\r
- */\r
- if ((state.cx.getLanguageVersion() != Context.VERSION_DEFAULT)\r
- && (state.cx.getLanguageVersion() <= Context.VERSION_1_4)) {\r
- switch (c) { \r
- case '0':\r
- state.index = index;\r
- num = doOctal(state);\r
- index = state.index;\r
- ren = new RENode(state, REOP_FLAT1, null);\r
- c = (char) num;\r
- break;\r
-\r
- case '1':\r
- case '2':\r
- case '3':\r
- case '4':\r
- case '5':\r
- case '6':\r
- case '7':\r
- case '8':\r
- case '9':\r
- num = unDigit(c);\r
- len = 1;\r
- while (++index < source.length\r
- && isDigit(c = source[index])) {\r
- num = 10 * num + unDigit(c);\r
- len++;\r
- }\r
- /* n in [8-9] and > count of parenetheses, then revert to\r
- '8' or '9', ignoring the '\' */\r
- if (((num == 8) || (num == 9)) && (num > state.parenCount)) {\r
- ocp = --index; /* skip beyond the '\' */\r
- doFlat = true;\r
- skipCommon = true;\r
- break; \r
- }\r
- /* more than 1 digit, or a number greater than\r
- the count of parentheses => it's an octal */\r
- if ((len > 1) || (num > state.parenCount)) {\r
- state.index = ocp;\r
- num = doOctal(state);\r
- index = state.index;\r
- ren = new RENode(state, REOP_FLAT1, null);\r
- c = (char) num;\r
- break;\r
- }\r
- index--;\r
- ren = new RENode(state, REOP_BACKREF, null);\r
- ren.num = num - 1; /* \1 is numbered 0, etc. */\r
-\r
- /* Avoid common chr- and flags-setting \r
- code after switch. */\r
- ren.flags = RENode.NONEMPTY;\r
- skipCommon = true;\r
- break;\r
- }\r
- }\r
- else {\r
- if (c == '0') {\r
- ren = new RENode(state, REOP_FLAT1, null);\r
- c = 0;\r
- }\r
- else {\r
- num = unDigit(c);\r
- len = 1;\r
- while (++index < source.length\r
- && isDigit(c = source[index])) {\r
- num = 10 * num + unDigit(c);\r
- len++;\r
- }\r
- index--;\r
- ren = new RENode(state, REOP_BACKREF, null);\r
- ren.num = num - 1; /* \1 is numbered 0, etc. */\r
- /* Avoid common chr- and flags-setting \r
- code after switch. */\r
- ren.flags = RENode.NONEMPTY;\r
- skipCommon = true;\r
- }\r
- }\r
- break;\r
- \r
- case 'x':\r
- ocp = index;\r
- if (++index < source.length && isHex(c = source[index])) {\r
- num = unHex(c);\r
- if (++index < source.length && isHex(c = source[index])) {\r
- num <<= 4;\r
- num += unHex(c);\r
- } else {\r
- if ((state.cx.getLanguageVersion()\r
- != Context.VERSION_DEFAULT)\r
- && (state.cx.getLanguageVersion() \r
- <= Context.VERSION_1_4)) \r
- index--; /* back up so index points to last hex char */\r
- else { /* ecma 2 requires pairs of hex digits. */\r
- index = ocp;\r
- num = 'x';\r
- }\r
- }\r
- } else {\r
- index = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */\r
- num = 'x';\r
- }\r
- ren = new RENode(state, REOP_FLAT1, null);\r
- c = (char)num;\r
- break;\r
-\r
- case 'c':\r
- c = source[++index];\r
- if (!('A' <= c && c <= 'Z') && !('a' <= c && c <= 'z')) {\r
- index -= 2;\r
- ocp = index;\r
- doFlat = true;\r
- skipCommon = true;\r
- break;\r
- }\r
- c = Character.toUpperCase(c);\r
- c = (char) (c ^ 64); // JS_TOCTRL\r
- ren = new RENode(state, REOP_FLAT1, null);\r
- break;\r
-\r
- case 'u':\r
- if (index+4 < source.length &&\r
- isHex(source[index+1]) && isHex(source[index+2]) &&\r
- isHex(source[index+3]) && isHex(source[index+4]))\r
- {\r
- num = (((((unHex(source[index+1]) << 4) +\r
- unHex(source[index+2])) << 4) +\r
- unHex(source[index+3])) << 4) +\r
- unHex(source[index+4]);\r
- c = (char) num;\r
- index += 4;\r
- ren = new RENode(state, REOP_FLAT1, null);\r
- break;\r
- }\r
-\r
- /* Unlike Perl \\xZZ, we take \\uZZZ to be literal-u then ZZZ. */\r
- ocp = index;\r
- doFlat = true;\r
- skipCommon = true;\r
- break;\r
-\r
- default:\r
- ocp = index;\r
- doFlat = true;\r
- skipCommon = true;\r
- break;\r
- }\r
-\r
- /* Common chr- and flags-setting code for escape opcodes. */\r
- if (ren != null && !skipCommon) {\r
- ren.chr = c;\r
- ren.flags = RENode.SINGLE | RENode.NONEMPTY;\r
- }\r
- skipCommon = false;\r
-\r
- if (!doFlat) {\r
- /* Skip to next unparsed char. */\r
- index++;\r
- break;\r
- }\r
-\r
- /* fall through since doFlat was true */\r
- doFlat = false;\r
-\r
- default:\r
- while (++index != source.length &&\r
- metachars.indexOf(source[index]) == -1)\r
- ;\r
- len = (int)(index - ocp);\r
- if (index != source.length && len > 1 &&\r
- closurechars.indexOf(source[index]) != -1)\r
- {\r
- index--;\r
- len--;\r
- }\r
- if (len > REOP_FLATLEN_MAX) {\r
- len = REOP_FLATLEN_MAX;\r
- index = ocp + len;\r
- }\r
- ren = new RENode(state, len == 1 ? REOP_FLAT1 : REOP_FLAT,\r
- new Integer(ocp));\r
- ren.flags = RENode.NONEMPTY;\r
- if (len > 1) {\r
- ren.kid2 = index;\r
- } else {\r
- ren.flags |= RENode.SINGLE;\r
- ren.chr = source[ocp];\r
- }\r
- break;\r
- }\r
-\r
- state.index = index;\r
- return ren;\r
- }\r
-\r
- private int doOctal(CompilerState state) {\r
- char[] source = state.source;\r
- int index = state.index;\r
- int tmp, num = 0;\r
- char c;\r
- while (++index < source.length && '0' <= (c = source[index]) &&\r
- c <= '7')\r
- {\r
- tmp = 8 * num + (int)(c - '0');\r
- if (tmp > 0377) {\r
- break;\r
- }\r
- num = tmp;\r
- }\r
- index--;\r
- state.index = index;\r
- return num;\r
- }\r
-\r
- static char getEscape(char c) {\r
- switch (c) {\r
- case 'b':\r
- return '\b';\r
- case 'f':\r
- return '\f';\r
- case 'n':\r
- return '\n';\r
- case 'r':\r
- return '\r';\r
- case 't':\r
- return '\t';\r
- case 'v':\r
- return (char) 11; // '\v' is not vtab in Java\r
- }\r
- throw new RuntimeException();\r
- }\r
-\r
- static public boolean isDigit(char c) {\r
- return '0' <= c && c <= '9';\r
- }\r
-\r
- static int unDigit(char c) {\r
- return c - '0';\r
- }\r
-\r
- static boolean isHex(char c) {\r
- return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||\r
- ('A' <= c && c <= 'F');\r
- }\r
-\r
- static int unHex(char c) {\r
- if ('0' <= c && c <= '9')\r
- return c - '0';\r
- return 10 + Character.toLowerCase(c) - 'a';\r
- }\r
-\r
- static boolean isWord(char c) {\r
- return Character.isLetter(c) || isDigit(c) || c == '_';\r
- }\r
-\r
- private String tail(char[] a, int i) {\r
- return new String(a, i, a.length - i);\r
- }\r
-\r
- private static boolean matchChar(int flags, char c, char c2) {\r
- if (c == c2)\r
- return true;\r
- else\r
- if ((flags & FOLD) != 0) {\r
- c = Character.toUpperCase(c);\r
- c2 = Character.toUpperCase(c2);\r
- return c == c2 ||\r
- Character.toLowerCase(c) == Character.toLowerCase(c2);\r
- }\r
- else\r
- return false;\r
- }\r
- \r
- \r
- int greedyRecurse(GreedyState grState, int index, int previousKid) {\r
- int kidMatch;\r
- int match;\r
- int num;\r
-\r
- /*\r
- * when the kid match fails, we reset the parencount and run any \r
- * previously succesful kid in order to restablish it's paren\r
- * contents.\r
- */\r
-\r
- num = grState.state.parenCount;\r
- kidMatch = matchRENodes(grState.state, grState.kid, grState.next, index);\r
- if (kidMatch == -1) {\r
- match = matchRENodes(grState.state, grState.next, grState.stop, index);\r
- if (match != -1) {\r
- grState.state.parenCount = num;\r
- if (previousKid != -1)\r
- matchRENodes(grState.state, grState.kid, grState.next, previousKid);\r
- return index;\r
- }\r
- else\r
- return -1;\r
- }\r
- else {\r
- if (kidMatch == index) {\r
- if (previousKid != -1)\r
- matchRENodes(grState.state, grState.kid, grState.next, previousKid);\r
- return kidMatch; /* no point pursuing an empty match forever */\r
- }\r
- if ((grState.maxKid == 0) || (++grState.kidCount < grState.maxKid)) {\r
- match = greedyRecurse(grState, kidMatch, index);\r
- if (match != -1) return match;\r
- if (grState.maxKid != 0) --grState.kidCount;\r
- }\r
- grState.state.parenCount = num;\r
- match = matchRENodes(grState.state, grState.next, grState.stop, kidMatch);\r
- if (match != -1) {\r
- matchRENodes(grState.state, grState.kid, grState.next, index);\r
- return kidMatch;\r
- }\r
- else\r
- return -1;\r
- }\r
- }\r
-\r
- int matchGreedyKid(MatchState state, RENode ren, RENode stop,\r
- int kidCount, int index, int previousKid) {\r
- GreedyState grState = new GreedyState();\r
- grState.state = state;\r
- grState.kid = (RENode)ren.kid;\r
- grState.next = ren.next;\r
- grState.maxKid = (ren.op == REOP_QUANT) ? ren.max : 0;\r
- /*\r
- * We try to match the sub-tree to completion first, and if that\r
- * doesn't work, match only up to the given end of the sub-tree.\r
- */\r
- grState.stop = null;\r
- grState.kidCount = kidCount;\r
- int match = greedyRecurse(grState, index, previousKid);\r
- if (match != -1 || stop == null) return match;\r
- grState.kidCount = kidCount;\r
- grState.stop = stop;\r
- return greedyRecurse(grState, index, previousKid);\r
- }\r
-\r
- int matchNonGreedyKid(MatchState state, RENode ren,\r
- int kidCount, int maxKid,\r
- int index) {\r
- int kidMatch;\r
- int match;\r
-\r
- match = matchRENodes(state, ren.next, null, index);\r
- if (match != -1) return index;\r
- kidMatch = matchRENodes(state, (RENode)ren.kid, ren.next, index);\r
- if (kidMatch == -1) return -1;\r
- if (kidMatch == index) return kidMatch; /* no point pursuing an empty match forever */\r
- return matchNonGreedyKid(state, ren, kidCount, maxKid, kidMatch);\r
- }\r
-\r
- int matchRENodes(MatchState state, RENode ren, RENode stop, int index) {\r
- int num;\r
- char[] input = state.input;\r
- while ((ren != stop) && (ren != null)) {\r
- switch (ren.op) {\r
- case REOP_EMPTY:\r
- break;\r
- case REOP_ALT: {\r
- if (ren.next.op != REOP_ALT) {\r
- ren = (RENode)ren.kid;\r
- continue;\r
- }\r
- else {\r
- num = state.parenCount;\r
- int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
- stop, index);\r
- if (kidMatch != -1) return kidMatch;\r
- for (int i = num; i < state.parenCount; i++)\r
- state.parens[i] = null;\r
- state.parenCount = num;\r
- }\r
- }\r
- break;\r
- case REOP_QUANT: {\r
- int lastKid = -1;\r
- for (num = 0; num < ren.min; num++) {\r
- int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
- ren.next, index);\r
- if (kidMatch == -1)\r
- return -1;\r
- else {\r
- lastKid = index;\r
- index = kidMatch;\r
- }\r
- }\r
- if (num == ren.max)\r
- // Have matched the exact count required, \r
- // need to match the rest of the regexp.\r
- break;\r
- if ((ren.flags & RENode.MINIMAL) == 0) {\r
- int kidMatch = matchGreedyKid(state, ren, stop, num,\r
- index, lastKid);\r
- if (kidMatch == -1)\r
- index = matchRENodes(state, (RENode)ren.kid,\r
- ren.next, index);\r
- else \r
- index = kidMatch;\r
- } \r
- else {\r
- index = matchNonGreedyKid(state, ren, num,\r
- ren.max, index);\r
- } \r
- if (index == -1) return -1;\r
- }\r
- break;\r
- case REOP_PLUS: {\r
- int kidMatch = matchRENodes(state, (RENode)ren.kid, \r
- ren.next, index);\r
- if (kidMatch == -1)\r
- return -1;\r
- if ((ren.flags & RENode.MINIMAL) == 0) {\r
- kidMatch = matchGreedyKid(state, ren, stop, 1, \r
- kidMatch, index);\r
- if (kidMatch == -1)\r
- index = matchRENodes(state,(RENode)ren.kid,\r
- ren.next, index);\r
- else\r
- index = kidMatch;\r
- }\r
- else\r
- index = matchNonGreedyKid(state, ren, 1, 0, kidMatch);\r
- if (index == -1) return -1;\r
- }\r
- break;\r
- case REOP_STAR: \r
- if ((ren.flags & RENode.MINIMAL) == 0) {\r
- int kidMatch = matchGreedyKid(state, ren, stop, 0, index, -1);\r
- if (kidMatch != -1)\r
- index = kidMatch;\r
- }\r
- else {\r
- index = matchNonGreedyKid(state, ren, 0, 0, index);\r
- if (index == -1) return -1;\r
- }\r
- break;\r
- case REOP_OPT: {\r
- int saveNum = state.parenCount;\r
- if (((ren.flags & RENode.MINIMAL) != 0)) {\r
- int restMatch = matchRENodes(state, ren.next,\r
- stop, index);\r
- if (restMatch != -1) return restMatch;\r
- }\r
- int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
- ren.next, index);\r
- if (kidMatch == -1) {\r
- state.parenCount = saveNum;\r
- break;\r
- }\r
- else {\r
- int restMatch = matchRENodes(state, ren.next,\r
- stop, kidMatch);\r
- if (restMatch == -1) {\r
- // need to undo the result of running the kid\r
- state.parenCount = saveNum;\r
- break;\r
- }\r
- else\r
- return restMatch;\r
- }\r
- }\r
- case REOP_LPARENNON:\r
- ren = (RENode)ren.kid;\r
- continue;\r
- case REOP_RPARENNON:\r
- break;\r
- case REOP_LPAREN: {\r
- num = ren.num;\r
- ren = (RENode)ren.kid;\r
- SubString parsub = state.parens[num];\r
- if (parsub == null) {\r
- parsub = state.parens[num] = new SubString();\r
- parsub.charArray = input;\r
- }\r
- parsub.index = index;\r
- parsub.length = 0;\r
- if (num >= state.parenCount)\r
- state.parenCount = num + 1;\r
- continue;\r
- }\r
- case REOP_RPAREN: {\r
- num = ren.num;\r
- SubString parsub = state.parens[num];\r
- if (parsub == null)\r
- throw new RuntimeException("Paren problem");\r
- parsub.length = index - parsub.index;\r
- break;\r
- }\r
- case REOP_ASSERT: {\r
- int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
- ren.next, index);\r
- if (kidMatch == -1) return -1;\r
- break;\r
- }\r
- case REOP_ASSERT_NOT: {\r
- int kidMatch = matchRENodes(state, (RENode)ren.kid,\r
- ren.next, index);\r
- if (kidMatch != -1) return -1;\r
- break;\r
- }\r
- case REOP_BACKREF: {\r
- num = ren.num;\r
- if (num >= state.parens.length) {\r
- Context.reportError(\r
- ScriptRuntime.getMessage(\r
- "msg.bad.backref", null));\r
- return -1;\r
- }\r
- SubString parsub = state.parens[num];\r
- if (parsub == null)\r
- parsub = state.parens[num] = new SubString();\r
- int length = parsub.length;\r
- for (int i = 0; i < length; i++, index++) {\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (!matchChar(state.flags, input[index],\r
- parsub.charArray[parsub.index + i]))\r
- return -1;\r
- }\r
- }\r
- break;\r
- case REOP_CCLASS:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (ren.bitmap == null) {\r
- char[] source = (ren.s != null) \r
- ? ren.s \r
- : this.source.toCharArray();\r
- ren.buildBitmap(state, source, ((state.flags & FOLD) != 0));\r
- }\r
- char c = input[index];\r
- int b = (c >>> 3);\r
- if (b >= ren.bmsize) {\r
- if (ren.kid2 == -1) // a ^ class\r
- index++;\r
- else\r
- return -1;\r
- } else {\r
- int bit = c & 7;\r
- bit = 1 << bit;\r
- if ((ren.bitmap[b] & bit) != 0)\r
- index++;\r
- else\r
- return -1;\r
- }\r
- break;\r
- case REOP_DOT:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (input[index] != '\n')\r
- index++;\r
- else\r
- return -1;\r
- break;\r
- case REOP_DOTSTARMIN: {\r
- int cp2;\r
- for (cp2 = index; cp2 < input.length; cp2++) {\r
- int cp3 = matchRENodes(state, ren.next, stop, cp2);\r
- if (cp3 != -1) return cp3;\r
- if (input[cp2] == '\n')\r
- return -1;\r
- }\r
- return state.noMoreInput();\r
- }\r
- case REOP_DOTSTAR: {\r
- int cp2;\r
- for (cp2 = index; cp2 < input.length; cp2++)\r
- if (input[cp2] == '\n')\r
- break;\r
- while (cp2 >= index) {\r
- int cp3 = matchRENodes(state, ren.next, stop, cp2);\r
- if (cp3 != -1) {\r
- index = cp2;\r
- break;\r
- }\r
- cp2--;\r
- }\r
- break;\r
- }\r
- case REOP_WBDRY:\r
- if (index == 0 || !isWord(input[index-1])) {\r
- if (index >= input.length)\r
- return state.noMoreInput();\r
- if (!isWord(input[index]))\r
- return -1;\r
- }\r
- else {\r
- if (index < input.length && isWord(input[index]))\r
- return -1;\r
- }\r
- break;\r
- case REOP_WNONBDRY:\r
- if (index == 0 || !isWord(input[index-1])) {\r
- if (index < input.length && isWord(input[index]))\r
- return -1;\r
- }\r
- else {\r
- if (index >= input.length)\r
- return state.noMoreInput();\r
- if (!isWord(input[index]))\r
- return -1;\r
- }\r
- break;\r
- case REOP_EOLONLY:\r
- case REOP_EOL: {\r
- if (index == input.length)\r
- ; // leave index;\r
- else {\r
- Context cx = Context.getCurrentContext();\r
- RegExpImpl reImpl = getImpl(cx);\r
- if ((reImpl.multiline)\r
- || ((state.flags & MULTILINE) != 0))\r
- if (input[index] == '\n')\r
- ;// leave index\r
- else\r
- return -1;\r
- else\r
- return -1;\r
- }\r
- }\r
- break;\r
- case REOP_BOL: {\r
- Context cx = Context.getCurrentContext();\r
- RegExpImpl reImpl = getImpl(cx);\r
- if (index != 0) {\r
- if (reImpl.multiline ||\r
- ((state.flags & MULTILINE) != 0)) {\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (input[index - 1] == '\n') {\r
- break;\r
- }\r
- }\r
- return -1;\r
- }\r
- // leave index\r
- }\r
- break;\r
- case REOP_DIGIT:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (!isDigit(input[index])) return -1;\r
- index++;\r
- break;\r
- case REOP_NONDIGIT:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (isDigit(input[index])) return -1;\r
- index++;\r
- break;\r
- case REOP_ALNUM:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (!isWord(input[index])) return -1;\r
- index++;\r
- break;\r
- case REOP_NONALNUM:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (isWord(input[index])) return -1;\r
- index++;\r
- break;\r
- case REOP_SPACE:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (!(TokenStream.isJSSpace(input[index]) ||\r
- TokenStream.isJSLineTerminator(input[index])))\r
- return -1;\r
- index++;\r
- break;\r
- case REOP_NONSPACE:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (TokenStream.isJSSpace(input[index]) ||\r
- TokenStream.isJSLineTerminator(input[index]))\r
- return -1;\r
- index++;\r
- break;\r
- case REOP_FLAT1:\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (!matchChar(state.flags, ren.chr, input[index]))\r
- return -1;\r
- index++;\r
- break;\r
- case REOP_FLAT: {\r
- char[] source = (ren.s != null)\r
- ? ren.s\r
- : this.source.toCharArray();\r
- int start = ((Integer)ren.kid).intValue();\r
- int length = ren.kid2 - start;\r
- for (int i = 0; i < length; i++, index++) {\r
- if (index >= input.length) {\r
- return state.noMoreInput();\r
- }\r
- if (!matchChar(state.flags, input[index],\r
- source[start + i]))\r
- return -1;\r
- }\r
- }\r
- break;\r
- case REOP_JUMP:\r
- break;\r
- case REOP_END:\r
- break;\r
- default :\r
- throw new RuntimeException("Unsupported by node matcher");\r
- }\r
- ren = ren.next;\r
- }\r
- return index;\r
- }\r
-\r
- int matchRegExp(MatchState state, RENode ren, int index) {\r
- // have to include the position beyond the last character\r
- // in order to detect end-of-input/line condition\r
- for (int i = index; i <= state.input.length; i++) { \r
- state.skipped = i - index;\r
- state.parenCount = 0;\r
- int result = matchRENodes(state, ren, null, i);\r
- if (result != -1)\r
- return result;\r
- }\r
- return -1;\r
- }\r
-\r
- /*\r
- * indexp is assumed to be an array of length 1\r
- */\r
- Object executeRegExp(Context cx, Scriptable scopeObj, RegExpImpl res,\r
- String str, int indexp[], int matchType) \r
- {\r
- NativeRegExp re = this;\r
- /*\r
- * Initialize a CompilerState to minimize recursive argument traffic.\r
- */\r
- MatchState state = new MatchState();\r
- state.inputExhausted = false;\r
- state.anchoring = false;\r
- state.flags = re.flags;\r
- state.scope = scopeObj;\r
-\r
- char[] charArray = str.toCharArray();\r
- int start = indexp[0];\r
- if (start > charArray.length)\r
- start = charArray.length;\r
- int index = start;\r
- state.cpbegin = 0;\r
- state.cpend = charArray.length;\r
- state.start = start;\r
- state.skipped = 0;\r
- state.input = charArray;\r
-\r
- state.parenCount = 0;\r
- state.maybeParens = new SubString[re.parenCount];\r
- state.parens = new SubString[re.parenCount];\r
- // We allocate the elements of "parens" and "maybeParens" lazily in\r
- // the Java port since we don't have arenas.\r
-\r
- /*\r
- * Call the recursive matcher to do the real work. Return null on mismatch\r
- * whether testing or not. On match, return an extended Array object.\r
- */\r
- index = matchRegExp(state, ren, index);\r
- if (index == -1) {\r
- if (matchType != PREFIX || !state.inputExhausted) return null;\r
- return Undefined.instance;\r
- }\r
- int i = index - state.cpbegin;\r
- indexp[0] = i;\r
- int matchlen = i - (start + state.skipped);\r
- int ep = index; \r
- index -= matchlen;\r
- Object result;\r
- Scriptable obj;\r
-\r
- if (matchType == TEST) {\r
- /*\r
- * Testing for a match and updating cx.regExpImpl: don't allocate\r
- * an array object, do return true.\r
- */\r
- result = Boolean.TRUE;\r
- obj = null;\r
- }\r
- else {\r
- /*\r
- * The array returned on match has element 0 bound to the matched\r
- * string, elements 1 through state.parenCount bound to the paren\r
- * matches, an index property telling the length of the left context,\r
- * and an input property referring to the input string.\r
- */\r
- Scriptable scope = getTopLevelScope(scopeObj);\r
- result = ScriptRuntime.newObject(cx, scope, "Array", null);\r
- obj = (Scriptable) result;\r
-\r
- String matchstr = new String(charArray, index, matchlen);\r
- obj.put(0, obj, matchstr);\r
- }\r
-\r
- if (state.parenCount > re.parenCount)\r
- throw new RuntimeException();\r
- if (state.parenCount == 0) {\r
- res.parens.setSize(0);\r
- res.lastParen = SubString.emptySubString;\r
- } else {\r
- SubString parsub = null;\r
- int num;\r
- res.parens.setSize(state.parenCount);\r
- for (num = 0; num < state.parenCount; num++) {\r
- parsub = state.parens[num];\r
- res.parens.setElementAt(parsub, num);\r
- if (matchType == TEST) continue;\r
- String parstr = parsub == null ? "": parsub.toString();\r
- obj.put(num+1, obj, parstr);\r
- }\r
- res.lastParen = parsub;\r
- }\r
-\r
- if (! (matchType == TEST)) {\r
- /*\r
- * Define the index and input properties last for better for/in loop\r
- * order (so they come after the elements).\r
- */\r
- obj.put("index", obj, new Integer(start + state.skipped));\r
- obj.put("input", obj, str);\r
- }\r
-\r
- if (res.lastMatch == null) {\r
- res.lastMatch = new SubString();\r
- res.leftContext = new SubString();\r
- res.rightContext = new SubString();\r
- }\r
- res.lastMatch.charArray = charArray;\r
- res.lastMatch.index = index;\r
- res.lastMatch.length = matchlen;\r
-\r
- res.leftContext.charArray = charArray;\r
- if (cx.getLanguageVersion() == Context.VERSION_1_2) {\r
- /*\r
- * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used\r
- * in scalar contexts, and unintentionally for the string.match "list"\r
- * psuedo-context. On "hi there bye", the following would result:\r
- *\r
- * Language while(/ /g){print("$`");} s/ /$`/g\r
- * perl4.036 "hi", "there" "hihitherehi therebye"\r
- * perl5 "hi", "hi there" "hihitherehi therebye"\r
- * js1.2 "hi", "there" "hihitheretherebye"\r
- *\r
- * Insofar as JS1.2 always defined $` as "left context from the last\r
- * match" for global regexps, it was more consistent than perl4.\r
- */\r
- res.leftContext.index = start;\r
- res.leftContext.length = state.skipped;\r
- } else {\r
- /*\r
- * For JS1.3 and ECMAv2, emulate Perl5 exactly:\r
- *\r
- * js1.3 "hi", "hi there" "hihitherehi therebye"\r
- */\r
- res.leftContext.index = 0;\r
- res.leftContext.length = start + state.skipped;\r
- }\r
-\r
- res.rightContext.charArray = charArray;\r
- res.rightContext.index = ep;\r
- res.rightContext.length = state.cpend - ep;\r
-\r
- return result;\r
- }\r
-\r
- public byte getFlags() {\r
- return flags;\r
- }\r
-\r
- private void reportError(String msg, String arg, CompilerState state) {\r
- Object[] args = { arg };\r
- throw NativeGlobal.constructError(\r
- state.cx, "SyntaxError",\r
- ScriptRuntime.getMessage(msg, args),\r
- state.scope);\r
- }\r
-\r
- protected int getIdDefaultAttributes(int id) {\r
- switch (id) {\r
- case Id_lastIndex:\r
- return ScriptableObject.PERMANENT;\r
- case Id_source: \r
- case Id_global: \r
- case Id_ignoreCase: \r
- case Id_multiline: \r
- return ScriptableObject.PERMANENT | ScriptableObject.READONLY;\r
- }\r
- return super.getIdDefaultAttributes(id);\r
- }\r
- \r
- protected Object getIdValue(int id) {\r
- switch (id) {\r
- case Id_lastIndex: return wrap_long(0xffffffffL & lastIndex);\r
- case Id_source: return source;\r
- case Id_global: return wrap_boolean((flags & GLOB) != 0);\r
- case Id_ignoreCase: return wrap_boolean((flags & FOLD) != 0);\r
- case Id_multiline: return wrap_boolean((flags & MULTILINE) != 0);\r
- }\r
- return super.getIdValue(id);\r
- }\r
- \r
- protected void setIdValue(int id, Object value) {\r
- if (id == Id_lastIndex) {\r
- setLastIndex(ScriptRuntime.toInt32(value));\r
- return;\r
- }\r
- super.setIdValue(id, value);\r
- }\r
-\r
- void setLastIndex(int value) {\r
- lastIndex = value;\r
- }\r
- \r
- public int methodArity(int methodId) {\r
- if (prototypeFlag) {\r
- switch (methodId) {\r
- case Id_compile: return 1;\r
- case Id_toString: return 0;\r
- case Id_exec: return 1;\r
- case Id_test: return 1;\r
- case Id_prefix: return 1;\r
- }\r
- }\r
- return super.methodArity(methodId);\r
- }\r
-\r
- public Object execMethod(int methodId, IdFunction f, Context cx,\r
- Scriptable scope, Scriptable thisObj, \r
- Object[] args)\r
- throws JavaScriptException\r
- {\r
- if (prototypeFlag) {\r
- switch (methodId) {\r
- case Id_compile:\r
- return realThis(thisObj, f, false).compile(cx, scope, args);\r
- \r
- case Id_toString:\r
- return realThis(thisObj, f, true).toString();\r
- \r
- case Id_exec:\r
- return realThis(thisObj, f, false).exec(cx, scope, args);\r
- \r
- case Id_test:\r
- return realThis(thisObj, f, false).test(cx, scope, args);\r
- \r
- case Id_prefix:\r
- return realThis(thisObj, f, false).prefix(cx, scope, args);\r
- }\r
- }\r
- return super.execMethod(methodId, f, cx, scope, thisObj, args);\r
- }\r
-\r
- private NativeRegExp realThis(Scriptable thisObj, IdFunction f, \r
- boolean readOnly)\r
- {\r
- while (!(thisObj instanceof NativeRegExp)) {\r
- thisObj = nextInstanceCheck(thisObj, f, readOnly);\r
- }\r
- return (NativeRegExp)thisObj;\r
- }\r
-\r
- protected String getIdName(int id) {\r
- switch (id) {\r
- case Id_lastIndex: return "lastIndex";\r
- case Id_source: return "source";\r
- case Id_global: return "global";\r
- case Id_ignoreCase: return "ignoreCase";\r
- case Id_multiline: return "multiline";\r
- }\r
- \r
- if (prototypeFlag) {\r
- switch (id) {\r
- case Id_compile: return "compile";\r
- case Id_toString: return "toString";\r
- case Id_exec: return "exec";\r
- case Id_test: return "test";\r
- case Id_prefix: return "prefix";\r
- }\r
- }\r
- return null;\r
- }\r
-\r
- protected int maxInstanceId() { return MAX_INSTANCE_ID; }\r
-\r
-// #string_id_map#\r
-\r
- private static final int\r
- Id_lastIndex = 1,\r
- Id_source = 2,\r
- Id_global = 3,\r
- Id_ignoreCase = 4,\r
- Id_multiline = 5,\r
- \r
- MAX_INSTANCE_ID = 5;\r
-\r
- protected int mapNameToId(String s) {\r
- int id;\r
-// #generated# Last update: 2001-05-24 12:01:22 GMT+02:00\r
- L0: { id = 0; String X = null; int c;\r
- int s_length = s.length();\r
- if (s_length==6) {\r
- c=s.charAt(0);\r
- if (c=='g') { X="global";id=Id_global; }\r
- else if (c=='s') { X="source";id=Id_source; }\r
- }\r
- else if (s_length==9) {\r
- c=s.charAt(0);\r
- if (c=='l') { X="lastIndex";id=Id_lastIndex; }\r
- else if (c=='m') { X="multiline";id=Id_multiline; }\r
- }\r
- else if (s_length==10) { X="ignoreCase";id=Id_ignoreCase; }\r
- if (X!=null && X!=s && !X.equals(s)) id = 0;\r
- }\r
-// #/generated#\r
-// #/string_id_map#\r
-\r
- if (id != 0 || !prototypeFlag) { return id; }\r
-\r
-// #string_id_map#\r
-// #generated# Last update: 2001-05-24 12:01:22 GMT+02:00\r
- L0: { id = 0; String X = null; int c;\r
- L: switch (s.length()) {\r
- case 4: c=s.charAt(0);\r
- if (c=='e') { X="exec";id=Id_exec; }\r
- else if (c=='t') { X="test";id=Id_test; }\r
- break L;\r
- case 6: X="prefix";id=Id_prefix; break L;\r
- case 7: X="compile";id=Id_compile; break L;\r
- case 8: X="toString";id=Id_toString; break L;\r
- }\r
- if (X!=null && X!=s && !X.equals(s)) id = 0;\r
- }\r
-// #/generated#\r
- return id;\r
- }\r
-\r
- private static final int\r
- Id_compile = MAX_INSTANCE_ID + 1,\r
- Id_toString = MAX_INSTANCE_ID + 2,\r
- Id_exec = MAX_INSTANCE_ID + 3,\r
- Id_test = MAX_INSTANCE_ID + 4,\r
- Id_prefix = MAX_INSTANCE_ID + 5,\r
- \r
- MAX_PROTOTYPE_ID = MAX_INSTANCE_ID + 5;\r
-\r
-// #/string_id_map#\r
- private boolean prototypeFlag;\r
-\r
- private String source; /* locked source string, sans // */\r
- private int lastIndex; /* index after last match, for //g iterator */\r
- private int parenCount; /* number of parenthesized submatches */\r
- private byte flags; /* flags */\r
- private byte[] program; /* regular expression bytecode */\r
-\r
- RENode ren;\r
-}\r
-\r
-class CompilerState {\r
- CompilerState(String source, int flags, Context cx, Scriptable scope) {\r
- this.source = source.toCharArray();\r
- this.scope = scope;\r
- this.flags = flags;\r
- this.cx = cx;\r
- }\r
- Context cx;\r
- Scriptable scope;\r
- char[] source;\r
- int indexBegin;\r
- int index;\r
- int flags;\r
- int parenCount;\r
- int progLength;\r
- byte[] prog;\r
-}\r
-\r
-\r
-class RENode {\r
- public static final int ANCHORED = 0x01; /* anchored at the front */\r
- public static final int SINGLE = 0x02; /* matches a single char */\r
- public static final int NONEMPTY = 0x04; /* does not match empty string */\r
- public static final int ISNEXT = 0x08; /* ren is next after at least one node */\r
- public static final int GOODNEXT = 0x10; /* ren.next is a tree-like edge in the graph */\r
- public static final int ISJOIN = 0x20; /* ren is a join point in the graph */\r
- public static final int REALLOK = 0x40; /* REOP_FLAT owns tempPool space to realloc */\r
- public static final int MINIMAL = 0x80; /* un-greedy matching for ? * + {} */\r
-\r
- RENode(CompilerState state, byte op, Object kid) {\r
- this.op = op;\r
- this.kid = kid;\r
- }\r
- \r
- private void calcBMSize(char[] s, int index, int cp2, boolean fold) {\r
- char maxc = 0;\r
- while (index < cp2) {\r
- char c = s[index++];\r
- if (c == '\\') {\r
- if (index + 5 <= cp2 && s[index] == 'u'\r
- && NativeRegExp.isHex(s[index+1]) \r
- && NativeRegExp.isHex(s[index+2])\r
- && NativeRegExp.isHex(s[index+3]) \r
- && NativeRegExp.isHex(s[index+4]))\r
- {\r
- int x = (((((NativeRegExp.unHex(s[index+0]) << 4) +\r
- NativeRegExp.unHex(s[index+1])) << 4) +\r
- NativeRegExp.unHex(s[index+2])) << 4) +\r
- NativeRegExp.unHex(s[index+3]);\r
- c = (char) x;\r
- index += 5;\r
- } else {\r
- /*\r
- * Octal and hex escapes can't be > 255. Skip this\r
- * backslash and let the loop pass over the remaining\r
- * escape sequence as if it were text to match.\r
- */\r
- if (maxc < 255) maxc = 255;\r
- continue;\r
- }\r
- }\r
- if (fold) {\r
- /*\r
- * Don't assume that lowercase are above uppercase, or\r
- * that c is either even when c has upper and lowercase\r
- * versions.\r
- */\r
- char c2;\r
- if ((c2 = Character.toUpperCase(c)) > maxc)\r
- maxc = c2;\r
- if ((c2 = Character.toLowerCase(c2)) > maxc)\r
- maxc = c2;\r
- }\r
- if (c > maxc)\r
- maxc = c;\r
- }\r
- bmsize = (short)((maxc + NativeRegExp.JS_BITS_PER_BYTE) \r
- / NativeRegExp.JS_BITS_PER_BYTE);\r
- }\r
- \r
- private void matchBit(char c, int fill) {\r
- int i = (c) >> 3;\r
- byte b = (byte) (c & 7);\r
- b = (byte) (1 << b);\r
- if (fill != 0)\r
- bitmap[i] &= ~b;\r
- else\r
- bitmap[i] |= b;\r
- }\r
-\r
- private void checkRange(char lastc, int fill) {\r
- matchBit(lastc, fill);\r
- matchBit('-', fill);\r
- }\r
-\r
- void buildBitmap(MatchState state, char[] s, boolean fold) {\r
- int index = ((Integer) kid).intValue();\r
- int end = kid2;\r
- byte fill = 0;\r
- int i,n,ocp;\r
- \r
- boolean not = false;\r
- kid2 = 0;\r
- if (s[index] == '^') {\r
- not = true;\r
- kid2 = -1;\r
- index++;\r
- }\r
- \r
- calcBMSize(s, index, end, fold);\r
- bitmap = new byte[bmsize];\r
- if (not) {\r
- fill = (byte)0xff;\r
- for (i = 0; i < bmsize; i++)\r
- bitmap[i] = (byte)0xff;\r
- bitmap[0] = (byte)0xfe;\r
- }\r
- int nchars = bmsize * NativeRegExp.JS_BITS_PER_BYTE;\r
- char lastc = (char)nchars;\r
- boolean inrange = false;\r
- \r
- while (index < end) {\r
- char c = s[index++];\r
- if (c == '\\') {\r
- c = s[index++];\r
- switch (c) {\r
- case 'b':\r
- case 'f':\r
- case 'n':\r
- case 'r':\r
- case 't':\r
- case 'v':\r
- c = NativeRegExp.getEscape(c);\r
- break;\r
-\r
- case 'd':\r
- if (inrange)\r
- checkRange(lastc, fill);\r
- lastc = (char) nchars;\r
- for (c = '0'; c <= '9'; c++)\r
- matchBit(c, fill);\r
- continue;\r
-\r
- case 'D':\r
- if (inrange)\r
- checkRange(lastc, fill);\r
- lastc = (char) nchars;\r
- for (c = 0; c < '0'; c++)\r
- matchBit(c, fill);\r
- for (c = '9' + 1; c < nchars; c++)\r
- matchBit(c, fill);\r
- continue;\r
-\r
- case 'w':\r
- if (inrange)\r
- checkRange(lastc, fill);\r
- lastc = (char) nchars;\r
- for (c = 0; c < nchars; c++)\r
- if (NativeRegExp.isWord(c))\r
- matchBit(c, fill);\r
- continue;\r
-\r
- case 'W':\r
- if (inrange)\r
- checkRange(lastc, fill);\r
- lastc = (char) nchars;\r
- for (c = 0; c < nchars; c++)\r
- if (!NativeRegExp.isWord(c))\r
- matchBit(c, fill);\r
- continue;\r
-\r
- case 's':\r
- if (inrange)\r
- checkRange(lastc, fill);\r
- lastc = (char) nchars;\r
- for (c = 0; c < nchars; c++)\r
- if (Character.isWhitespace(c))\r
- matchBit(c, fill);\r
- continue;\r
-\r
- case 'S':\r
- if (inrange)\r
- checkRange(lastc, fill);\r
- lastc = (char) nchars;\r
- for (c = 0; c < nchars; c++)\r
- if (!Character.isWhitespace(c))\r
- matchBit(c, fill);\r
- continue;\r
-\r
- case '0':\r
- case '1':\r
- case '2':\r
- case '3':\r
- case '4':\r
- case '5':\r
- case '6':\r
- case '7':\r
- n = NativeRegExp.unDigit(c);\r
- ocp = index - 2;\r
- c = s[index];\r
- if ('0' <= c && c <= '7') {\r
- index++;\r
- n = 8 * n + NativeRegExp.unDigit(c);\r
-\r
- c = s[index];\r
- if ('0' <= c && c <= '7') {\r
- index++;\r
- i = 8 * n + NativeRegExp.unDigit(c);\r
- if (i <= 0377)\r
- n = i;\r
- else\r
- index--;\r
- }\r
- }\r
- c = (char) n;\r
- break;\r
-\r
- case 'x':\r
- ocp = index;\r
- if (index < s.length &&\r
- NativeRegExp.isHex(c = s[index++]))\r
- {\r
- n = NativeRegExp.unHex(c);\r
- if (index < s.length &&\r
- NativeRegExp.isHex(c = s[index++]))\r
- {\r
- n <<= 4;\r
- n += NativeRegExp.unHex(c);\r
- }\r
- } else {\r
- index = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */\r
- n = 'x';\r
- }\r
- c = (char) n;\r
- break;\r
-\r
- case 'u':\r
- if (s.length > index+3\r
- && NativeRegExp.isHex(s[index+0])\r
- && NativeRegExp.isHex(s[index+1]) \r
- && NativeRegExp.isHex(s[index+2])\r
- && NativeRegExp.isHex(s[index+3])) {\r
- n = (((((NativeRegExp.unHex(s[index+0]) << 4) +\r
- NativeRegExp.unHex(s[index+1])) << 4) +\r
- NativeRegExp.unHex(s[index+2])) << 4) +\r
- NativeRegExp.unHex(s[index+3]);\r
- c = (char) n;\r
- index += 4;\r
- }\r
- break;\r
-\r
- case 'c':\r
- c = s[index++];\r
- c = Character.toUpperCase(c);\r
- c = (char) (c ^ 64); // JS_TOCTRL\r
- break;\r
- }\r
- }\r
-\r
- if (inrange) {\r
- if (lastc > c) {\r
- throw NativeGlobal.constructError(\r
- Context.getCurrentContext(), "RangeError",\r
- ScriptRuntime.getMessage(\r
- "msg.bad.range", null),\r
- state.scope);\r
- }\r
- inrange = false;\r
- } else {\r
- // Set lastc so we match just c's bit in the for loop.\r
- lastc = c;\r
-\r
- // [balance:\r
- if (index + 1 < end && s[index] == '-' &&\r
- s[index+1] != ']')\r
- {\r
- index++;\r
- inrange = true;\r
- continue;\r
- }\r
- }\r
-\r
- // Match characters in the range [lastc, c].\r
- for (; lastc <= c; lastc++) {\r
- matchBit(lastc, fill);\r
- if (fold) {\r
- /*\r
- * Must do both upper and lower for Turkish dotless i,\r
- * Georgian, etc.\r
- */\r
- char foldc = Character.toUpperCase(lastc);\r
- matchBit(foldc, fill);\r
- foldc = Character.toLowerCase(foldc);\r
- matchBit(foldc, fill);\r
- }\r
- }\r
- lastc = c;\r
- }\r
- }\r
- byte op; /* packed r.e. op bytecode */\r
- byte flags; /* flags, see below */\r
- short offset; /* bytecode offset */\r
- RENode next; /* next in concatenation order */\r
- Object kid; /* first operand */\r
- int kid2; /* second operand */\r
- int num; /* could be a number */\r
- char chr; /* or a char */\r
- short min,max; /* or a range */\r
- short kidlen; /* length of string at kid, in chars */\r
- short bmsize; /* bitmap size, based on max char code */\r
- char[] s; /* if null, use state.source */\r
- byte[] bitmap; /* cclass bitmap */\r
-}\r
-\r
-\r
-class MatchState {\r
- boolean inputExhausted; /* did we run out of input chars ? */\r
- boolean anchoring; /* true if multiline anchoring ^/$ */\r
- int pcend; /* pc limit (fencepost) */\r
- int cpbegin, cpend; /* cp base address and limit */\r
- int start; /* offset from cpbegin to start at */\r
- int skipped; /* chars skipped anchoring this r.e. */\r
- byte flags; /* pennants */\r
- int parenCount; /* number of paren substring matches */\r
- SubString[] maybeParens; /* possible paren substring pointers */\r
- SubString[] parens; /* certain paren substring matches */\r
- Scriptable scope;\r
- char[] input;\r
-\r
- public int noMoreInput() {\r
- inputExhausted = true;\r
- /*\r
- try {\r
- throw new Exception();\r
- }\r
- catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- */\r
- return -1;\r
- }\r
-}\r
-\r
-class GreedyState {\r
- MatchState state;\r
- RENode kid;\r
- RENode next;\r
- RENode stop;\r
- int kidCount;\r
- int maxKid;\r
-}\r
-\r