2 * gnu/regexp/RETokenRepeated.java
3 * Copyright (C) 1998-2001 Wes Biggs
5 * This library is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; either version 2.1 of the License, or
8 * (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 import java.util.Vector;
23 final class RETokenRepeated extends REToken {
24 private REToken token;
26 private boolean stingy;
28 RETokenRepeated(int subIndex, REToken token, int min, int max) {
35 /** Sets the minimal matching mode to true. */
40 /** Queries if this token has minimal matching enabled. */
46 * The minimum length of a repeated token is the minimum length
47 * of the token multiplied by the minimum number of times it must
50 int getMinimumLength() {
51 return (min * token.getMinimumLength());
54 // We do need to save every possible point, but the number of clone()
55 // invocations here is really a killer for performance on non-stingy
56 // repeat operators. I'm open to suggestions...
58 // Hypothetical question: can you have a RE that matches 1 times,
59 // 3 times, 5 times, but not 2 times or 4 times? Does having
60 // the subexpression back-reference operator allow that?
62 boolean match(CharIndexed input, REMatch mymatch) {
63 // number of times we've matched so far
66 // Possible positions for the next repeat to match at
67 REMatch newMatch = mymatch;
71 // Add the '0-repeats' index
72 // positions.elementAt(z) == position [] in input after <<z>> matches
73 Vector positions = new Vector();
74 positions.addElement(newMatch);
76 // Declare variables used in loop
82 // Check for stingy match for each possibility.
83 if (stingy && (numRepeats >= min)) {
84 REMatch result = matchRest(input, newMatch);
86 mymatch.assignFrom(result);
94 // try next repeat at all possible positions
95 for (current = newMatch; current != null; current = current.next) {
96 recurrent = (REMatch) current.clone();
97 if (token.match(input, recurrent)) {
98 // add all items in current to doables array
99 if (doables == null) {
101 doablesLast = recurrent;
103 // Order these from longest to shortest
104 // Start by assuming longest (more repeats)
105 doablesLast.next = recurrent;
107 // Find new doablesLast
108 while (doablesLast.next != null) {
109 doablesLast = doablesLast.next;
113 // if none of the possibilities worked out, break out of do/while
114 if (doables == null) break;
116 // reassign where the next repeat can match
119 // increment how many repeats we've successfully found
122 positions.addElement(newMatch);
123 } while (numRepeats < max);
125 // If there aren't enough repeats, then fail
126 if (numRepeats < min) return false;
128 // We're greedy, but ease off until a true match is found
129 int posIndex = positions.size();
131 // At this point we've either got too many or just the right amount.
132 // See if this numRepeats works with the rest of the regexp.
133 REMatch allResults = null;
134 REMatch allResultsLast = null;
136 REMatch results = null;
137 while (--posIndex >= min) {
138 newMatch = (REMatch) positions.elementAt(posIndex);
139 results = matchRest(input, newMatch);
140 if (results != null) {
141 if (allResults == null) {
142 allResults = results;
143 allResultsLast = results;
145 // Order these from longest to shortest
146 // Start by assuming longest (more repeats)
147 allResultsLast.next = results;
149 // Find new doablesLast
150 while (allResultsLast.next != null) {
151 allResultsLast = allResultsLast.next;
154 // else did not match rest of the tokens, try again on smaller sample
156 if (allResults != null) {
157 mymatch.assignFrom(allResults); // does this get all?
160 // If we fall out, no matches.
164 private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
165 REMatch current, single;
166 REMatch doneIndex = null;
167 REMatch doneIndexLast = null;
168 // Test all possible matches for this number of repeats
169 for (current = newMatch; current != null; current = current.next) {
170 // clone() separates a single match from the chain
171 single = (REMatch) current.clone();
172 if (next(input, single)) {
173 // chain results to doneIndex
174 if (doneIndex == null) {
176 doneIndexLast = single;
178 doneIndexLast.next = single;
180 // Find new doneIndexLast
181 while (doneIndexLast.next != null) {
182 doneIndexLast = doneIndexLast.next;
189 void dump(StringBuffer os) {
193 if ((max == Integer.MAX_VALUE) && (min <= 1))
194 os.append( (min == 0) ? '*' : '+' );
195 else if ((min == 0) && (max == 1))
198 os.append('{').append(min);
201 if (max != Integer.MAX_VALUE) os.append(max);
205 if (stingy) os.append('?');