1 /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
\r
3 * The contents of this file are subject to the Netscape Public
\r
4 * License Version 1.1 (the "License"); you may not use this file
\r
5 * except in compliance with the License. You may obtain a copy of
\r
6 * the License at http://www.mozilla.org/NPL/
\r
8 * Software distributed under the License is distributed on an "AS
\r
9 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr
\r
10 * implied. See the License for the specific language governing
\r
11 * rights and limitations under the License.
\r
13 * The Original Code is Rhino code, released
\r
16 * The Initial Developer of the Original Code is Netscape
\r
17 * Communications Corporation. Portions created by Netscape are
\r
18 * Copyright (C) 1997-1999 Netscape Communications Corporation. All
\r
23 * Alternatively, the contents of this file may be used under the
\r
24 * terms of the GNU Public License (the "GPL"), in which case the
\r
25 * provisions of the GPL are applicable instead of those above.
\r
26 * If you wish to allow use of your version of this file only
\r
27 * under the terms of the GPL and not to allow others to use your
\r
28 * version of this file under the NPL, indicate your decision by
\r
29 * deleting the provisions above and replace them with the notice
\r
30 * and other provisions required by the GPL. If you do not delete
\r
31 * the provisions above, a recipient may use your version of this
\r
32 * file under either the NPL or the GPL.
\r
35 package org.mozilla.javascript.regexp;
\r
37 import org.mozilla.javascript.*;
\r
38 import java.util.Vector;
\r
43 public class RegExpImpl implements RegExpProxy {
\r
45 public RegExpImpl() {
\r
46 parens = new Vector(9);
\r
49 public boolean isRegExp(Object obj) {
\r
50 return obj instanceof NativeRegExp;
\r
53 public Object newRegExp(Context cx, Scriptable scope, String source,
\r
54 String global, boolean flat)
\r
56 return new NativeRegExp(cx, scope, source, global, flat);
\r
59 public Object match(Context cx, Scriptable scope,
\r
60 Scriptable thisObj, Object[] args)
\r
61 throws JavaScriptException
\r
63 MatchData mdata = new MatchData();
\r
65 mdata.mode = GlobData.GLOB_MATCH;
\r
66 mdata.parent = ScriptableObject.getTopLevelScope(scope);
\r
67 Object rval = matchOrReplace(cx, scope, thisObj, args,
\r
68 this, mdata, false);
\r
69 return mdata.arrayobj == null ? rval : mdata.arrayobj;
\r
72 public Object search(Context cx, Scriptable scope,
\r
73 Scriptable thisObj, Object[] args)
\r
74 throws JavaScriptException
\r
76 MatchData mdata = new MatchData();
\r
78 mdata.mode = GlobData.GLOB_SEARCH;
\r
79 mdata.parent = ScriptableObject.getTopLevelScope(scope);
\r
80 return matchOrReplace(cx, scope, thisObj, args, this, mdata, false);
\r
83 public Object replace(Context cx, Scriptable scope,
\r
84 Scriptable thisObj, Object[] args)
\r
85 throws JavaScriptException
\r
87 Object arg1 = args.length < 2 ? Undefined.instance : args[1];
\r
88 String repstr = null;
\r
89 Function lambda = null;
\r
90 if (arg1 instanceof Function) {
\r
91 lambda = (Function) arg1;
\r
93 repstr = ScriptRuntime.toString(arg1);
\r
96 ReplaceData rdata = new ReplaceData();
\r
98 rdata.mode = GlobData.GLOB_REPLACE;
\r
99 rdata.lambda = lambda;
\r
100 rdata.repstr = repstr == null ? null : repstr.toCharArray();
\r
101 rdata.dollar = repstr == null ? -1 : repstr.indexOf('$');
\r
102 rdata.charArray = null;
\r
105 rdata.leftIndex = 0;
\r
106 Object val = matchOrReplace(cx, scope, thisObj, args,
\r
107 this, rdata, true);
\r
110 if (rdata.charArray == null) {
\r
111 if (rdata.global || val == null || !val.equals(Boolean.TRUE)) {
\r
112 /* Didn't match even once. */
\r
115 int leftlen = this.leftContext.length;
\r
116 int length = leftlen + rdata.findReplen(cx, this);
\r
117 charArray = new char[length];
\r
118 SubString leftContext = this.leftContext;
\r
119 System.arraycopy(leftContext.charArray, leftContext.index,
\r
120 charArray, 0, leftlen);
\r
121 rdata.doReplace(cx, this, charArray, leftlen);
\r
122 rdata.charArray = charArray;
\r
123 rdata.length = length;
\r
126 SubString rc = this.rightContext;
\r
127 int rightlen = rc.length;
\r
128 int length = rdata.length + rightlen;
\r
129 charArray = new char[length];
\r
130 System.arraycopy(rdata.charArray, 0,
\r
131 charArray, 0, rdata.charArray.length);
\r
132 System.arraycopy(rc.charArray, rc.index, charArray,
\r
133 rdata.length, rightlen);
\r
134 return new String(charArray, 0, length);
\r
138 * Analog of C match_or_replace.
\r
140 private static Object matchOrReplace(Context cx, Scriptable scope,
\r
141 Scriptable thisObj, Object[] args,
\r
143 GlobData data, boolean forceFlat)
\r
144 throws JavaScriptException
\r
148 String str = ScriptRuntime.toString(thisObj);
\r
150 Scriptable topScope = ScriptableObject.getTopLevelScope(scope);
\r
152 if (args.length == 0)
\r
153 re = new NativeRegExp(cx, topScope, "", "", false);
\r
155 if (args[0] instanceof NativeRegExp) {
\r
156 re = (NativeRegExp) args[0];
\r
158 String src = ScriptRuntime.toString(args[0]);
\r
160 if (data.optarg < args.length) {
\r
162 opt = ScriptRuntime.toString(args[data.optarg]);
\r
166 re = new NativeRegExp(cx, topScope, src, opt, forceFlat);
\r
170 data.global = (re.getFlags() & NativeRegExp.GLOB) != 0;
\r
171 int[] indexp = { 0 };
\r
172 Object result = null;
\r
173 if (data.mode == GlobData.GLOB_SEARCH) {
\r
174 result = re.executeRegExp(cx, scope, reImpl,
\r
175 str, indexp, NativeRegExp.TEST);
\r
176 if (result != null && result.equals(Boolean.TRUE))
\r
177 result = new Integer(reImpl.leftContext.length);
\r
179 result = new Integer(-1);
\r
180 } else if (data.global) {
\r
181 re.setLastIndex(0);
\r
182 for (int count = 0; indexp[0] <= str.length(); count++) {
\r
183 result = re.executeRegExp(cx, scope, reImpl,
\r
184 str, indexp, NativeRegExp.TEST);
\r
185 if (result == null || !result.equals(Boolean.TRUE))
\r
187 data.doGlobal(cx, scope, count, reImpl);
\r
188 if (reImpl.lastMatch.length == 0) {
\r
189 if (indexp[0] == str.length())
\r
195 result = re.executeRegExp(cx, scope, reImpl, str, indexp,
\r
196 ((data.mode == GlobData.GLOB_REPLACE)
\r
197 ? NativeRegExp.TEST
\r
198 : NativeRegExp.MATCH));
\r
206 public int find_split(Scriptable scope, String target, String separator,
\r
207 Object reObj, int[] ip, int[] matchlen,
\r
208 boolean[] matched, String[][] parensp)
\r
211 int length = target.length();
\r
213 Context cx = Context.getCurrentContext();
\r
215 int version = cx.getLanguageVersion();
\r
216 NativeRegExp re = (NativeRegExp) reObj;
\r
218 while (true) { // imitating C label
\r
219 /* JS1.2 deviated from Perl by never matching at end of string. */
\r
220 int ipsave = ip[0]; // reuse ip to save object creation
\r
222 Object ret = re.executeRegExp(cx, scope, this, target, ip,
\r
223 NativeRegExp.TEST);
\r
224 if (ret != Boolean.TRUE) {
\r
225 // Mismatch: ensure our caller advances i past end of string.
\r
228 matched[0] = false;
\r
235 SubString sep = this.lastMatch;
\r
236 matchlen[0] = sep.length;
\r
237 if (matchlen[0] == 0) {
\r
239 * Empty string match: never split on an empty
\r
240 * match at the start of a find_split cycle. Same
\r
241 * rule as for an empty global match in
\r
242 * match_or_replace.
\r
246 * "Bump-along" to avoid sticking at an empty
\r
247 * match, but don't bump past end of string --
\r
248 * our caller must do that by adding
\r
249 * sep->length to our return value.
\r
252 if (version == Context.VERSION_1_2) {
\r
261 continue again; // imitating C goto
\r
264 // PR_ASSERT((size_t)i >= sep->length);
\r
265 result = i - matchlen[0];
\r
268 int size = parens.size();
\r
269 parensp[0] = new String[size];
\r
270 for (int num = 0; num < size; num++) {
\r
271 SubString parsub = getParenSubString(num);
\r
272 parensp[0][num] = parsub.toString();
\r
278 * Analog of REGEXP_PAREN_SUBSTRING in C jsregexp.h.
\r
279 * Assumes zero-based; i.e., for $3, i==2
\r
281 SubString getParenSubString(int i) {
\r
282 if (i >= parens.size())
\r
283 return SubString.emptySubString;
\r
284 return (SubString) parens.elementAt(i);
\r
287 String input; /* input string to match (perl $_, GC root) */
\r
288 boolean multiline; /* whether input contains newlines (perl $*) */
\r
289 Vector parens; /* Vector of SubString; last set of parens
\r
290 matched (perl $1, $2) */
\r
291 SubString lastMatch; /* last string matched (perl $&) */
\r
292 SubString lastParen; /* last paren matched (perl $+) */
\r
293 SubString leftContext; /* input to left of last match (perl $`) */
\r
294 SubString rightContext; /* input to right of last match (perl $') */
\r
298 abstract class GlobData {
\r
299 static final int GLOB_MATCH = 1;
\r
300 static final int GLOB_REPLACE = 2;
\r
301 static final int GLOB_SEARCH = 3;
\r
303 abstract void doGlobal(Context cx, Scriptable scope, int count,
\r
304 RegExpImpl reImpl)
\r
305 throws JavaScriptException;
\r
307 byte mode; /* input: return index, match object, or void */
\r
308 int optarg; /* input: index of optional flags argument */
\r
309 boolean global; /* output: whether regexp was global */
\r
310 String str; /* output: 'this' parameter object as string */
\r
311 NativeRegExp regexp;/* output: regexp parameter object private data */
\r
316 class MatchData extends GlobData {
\r
319 * Analog of match_glob() in jsstr.c
\r
321 void doGlobal(Context cx, Scriptable scope, int count, RegExpImpl reImpl)
\r
322 throws JavaScriptException
\r
328 if (arrayobj == null) {
\r
329 Scriptable s = ScriptableObject.getTopLevelScope(scope);
\r
330 arrayobj = ScriptRuntime.newObject(cx, s, "Array", null);
\r
332 SubString matchsub = reImpl.lastMatch;
\r
333 String matchstr = matchsub.toString();
\r
334 arrayobj.put(count, arrayobj, matchstr);
\r
337 Scriptable arrayobj;
\r
341 class ReplaceData extends GlobData {
\r
348 * Analog of replace_glob() in jsstr.c
\r
350 void doGlobal(Context cx, Scriptable scope, int count, RegExpImpl reImpl)
\r
351 throws JavaScriptException
\r
353 ReplaceData rdata = this;
\r
355 SubString lc = reImpl.leftContext;
\r
357 char[] leftArray = lc.charArray;
\r
358 int leftIndex = rdata.leftIndex;
\r
360 int leftlen = reImpl.lastMatch.index - leftIndex;
\r
361 rdata.leftIndex = reImpl.lastMatch.index + reImpl.lastMatch.length;
\r
362 int replen = findReplen(cx, reImpl);
\r
363 int growth = leftlen + replen;
\r
365 if (rdata.charArray != null) {
\r
366 charArray = new char[rdata.length + growth];
\r
367 System.arraycopy(rdata.charArray, 0, charArray, 0, rdata.length);
\r
369 charArray = new char[growth];
\r
372 rdata.charArray = charArray;
\r
373 rdata.length += growth;
\r
374 int index = rdata.index;
\r
375 rdata.index += growth;
\r
376 System.arraycopy(leftArray, leftIndex, charArray, index, leftlen);
\r
378 doReplace(cx, reImpl, charArray, index);
\r
381 static SubString dollarStr = new SubString("$");
\r
383 static SubString interpretDollar(Context cx, RegExpImpl res,
\r
384 char[] da, int dp, int bp, int[] skip)
\r
391 /* Allow a real backslash (literal "\\") to escape "$1" etc. */
\r
393 throw new RuntimeException();
\r
394 if ((cx.getLanguageVersion() != Context.VERSION_DEFAULT)
\r
395 && (cx.getLanguageVersion() <= Context.VERSION_1_4))
\r
396 if (dp > bp && da[dp-1] == '\\')
\r
399 /* Interpret all Perl match-induced dollar variables. */
\r
401 if (NativeRegExp.isDigit(dc)) {
\r
402 if ((cx.getLanguageVersion() != Context.VERSION_DEFAULT)
\r
403 && (cx.getLanguageVersion() <= Context.VERSION_1_4)) {
\r
406 /* Check for overflow to avoid gobbling arbitrary decimal digits. */
\r
410 while (++cp < ca.length && NativeRegExp.isDigit(dc = ca[cp])) {
\r
411 tmp = 10 * num + NativeRegExp.unDigit(dc);
\r
417 else { /* ECMA 3, 1-9 or 01-99 */
\r
418 num = NativeRegExp.unDigit(dc);
\r
420 if ((dp + 2) < da.length) {
\r
422 if (NativeRegExp.isDigit(dc)) {
\r
423 num = 10 * num + NativeRegExp.unDigit(dc);
\r
427 if (num == 0) return null; /* $0 or $00 is not valid */
\r
429 /* Adjust num from 1 $n-origin to 0 array-index-origin. */
\r
432 return res.getParenSubString(num);
\r
440 return res.lastMatch;
\r
442 return res.lastParen;
\r
444 if (cx.getLanguageVersion() == Context.VERSION_1_2) {
\r
446 * JS1.2 imitated the Perl4 bug where left context at each step
\r
447 * in an iterative use of a global regexp started from last match,
\r
448 * not from the start of the target string. But Perl4 does start
\r
449 * $` at the beginning of the target string when it is used in a
\r
450 * substitution, so we emulate that special case here.
\r
452 res.leftContext.index = 0;
\r
453 res.leftContext.length = res.lastMatch.index;
\r
455 return res.leftContext;
\r
457 return res.rightContext;
\r
463 * Corresponds to find_replen in jsstr.c. rdata is 'this', and
\r
464 * the result parameter sizep is the return value (errors are
\r
465 * propagated with exceptions).
\r
467 int findReplen(Context cx, RegExpImpl reImpl)
\r
468 throws JavaScriptException
\r
470 if (lambda != null) {
\r
471 // invoke lambda function with args lastMatch, $1, $2, ... $n,
\r
472 // leftContext.length, whole string.
\r
473 Vector parens = reImpl.parens;
\r
474 int parenCount = parens.size();
\r
475 Object[] args = new Object[parenCount + 3];
\r
476 args[0] = reImpl.lastMatch.toString();
\r
477 for (int i=0; i < parenCount; i++) {
\r
478 SubString sub = (SubString) parens.elementAt(i);
\r
479 args[i+1] = sub.toString();
\r
481 args[parenCount+1] = new Integer(reImpl.leftContext.length);
\r
482 args[parenCount+2] = str;
\r
483 Scriptable parent = lambda.getParentScope();
\r
484 Object result = lambda.call(cx, parent, parent, args);
\r
486 this.repstr = ScriptRuntime.toString(result).toCharArray();
\r
487 return this.repstr.length;
\r
490 int replen = this.repstr.length;
\r
495 for (int dp = dollar; dp < this.repstr.length ; ) {
\r
496 char c = this.repstr[dp];
\r
501 int[] skip = { 0 };
\r
502 SubString sub = interpretDollar(cx, reImpl, this.repstr, dp,
\r
505 replen += sub.length - skip[0];
\r
515 * Analog of do_replace in jsstr.c
\r
517 void doReplace(Context cx, RegExpImpl regExpImpl, char[] charArray,
\r
521 char[] da = repstr;
\r
522 int dp = this.dollar;
\r
528 System.arraycopy(repstr, cp, charArray, arrayIndex,
\r
532 int[] skip = { 0 };
\r
533 SubString sub = interpretDollar(cx, regExpImpl, da,
\r
538 System.arraycopy(sub.charArray, sub.index, charArray,
\r
547 if (dp >= repstr.length) break;
\r
548 while (repstr[dp] != '$') {
\r
550 if (dp >= repstr.length) break outer;
\r
554 if (repstr.length > cp) {
\r
555 System.arraycopy(repstr, cp, charArray, arrayIndex,
\r
556 repstr.length - cp);
\r
560 Function lambda; /* replacement function object or null */
\r
561 char[] repstr; /* replacement string */
\r
562 int dollar; /* -1 or index of first $ in repstr */
\r
563 char[] charArray; /* result characters, null initially */
\r
564 int length; /* result length, 0 initially */
\r
565 int index; /* index in result of next replacement */
\r
566 int leftIndex; /* leftContext index, always 0 for JS1.2 */
\r