1 /*******************************************************************************
2 * Copyright (c) 2000, 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11 package org.eclipse.jdt.internal.compiler.parser;
13 import java.util.Iterator;
15 import org.eclipse.jdt.core.compiler.CharOperation;
16 import org.eclipse.jdt.core.compiler.InvalidInputException;
17 import org.eclipse.jdt.internal.compiler.CompilationResult;
18 import org.eclipse.jdt.internal.compiler.ast.StringLiteral;
19 import org.eclipse.jdt.internal.compiler.classfmt.ClassFileConstants;
22 * IMPORTANT NOTE: Internal Scanner implementation. It is mirrored in
23 * org.eclipse.jdt.core.compiler public package where it is API.
24 * The mirror implementation is using the backward compatible ITerminalSymbols constant
25 * definitions (stable with 2.0), whereas the internal implementation uses TerminalTokens
26 * which constant values reflect the latest parser generation state.
28 public class Scanner implements TerminalTokens {
31 - getNextToken() which return the current type of the token
32 (this value is not memorized by the scanner)
33 - getCurrentTokenSource() which provides with the token "REAL" source
34 (aka all unicode have been transformed into a correct char)
35 - sourceStart gives the position into the stream
36 - currentPosition-1 gives the sourceEnd position into the stream
38 protected long sourceLevel;
40 public boolean useAssertAsAnIndentifier = false;
41 //flag indicating if processed source contains occurrences of keyword assert
42 public boolean containsAssertKeyword = false;
45 public boolean useEnumAsAnIndentifier = false;
47 public boolean recordLineSeparator = false;
48 public char currentCharacter;
49 public int startPosition;
50 public int currentPosition;
51 public int initialPosition, eofPosition;
52 // after this position eof are generated instead of real token from the source
54 public boolean tokenizeComments = false;
55 public boolean tokenizeWhiteSpace = false;
57 //source should be viewed as a window (aka a part)
58 //of a entire very large stream
62 public char[] withoutUnicodeBuffer;
63 public int withoutUnicodePtr; //when == 0 ==> no unicode in the current token
64 public boolean unicodeAsBackSlash = false;
66 public boolean scanningFloatLiteral = false;
68 //support for /** comments
69 public int[] commentStops = new int[10];
70 public int[] commentStarts = new int[10];
71 public int commentPtr = -1; // no comment test with commentPtr value -1
72 protected int lastCommentLinePosition = -1;
75 public char[][] foundTaskTags = null;
76 public char[][] foundTaskMessages;
77 public char[][] foundTaskPriorities = null;
78 public int[][] foundTaskPositions;
79 public int foundTaskCount = 0;
80 public char[][] taskTags = null;
81 public char[][] taskPriorities = null;
82 public boolean isTaskCaseSensitive = true;
84 //diet parsing support - jump over some method body when requested
85 public boolean diet = false;
87 //support for the poor-line-debuggers ....
88 //remember the position of the cr/lf
89 public int[] lineEnds = new int[250];
90 public int linePtr = -1;
91 public boolean wasAcr = false;
93 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
95 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
96 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
97 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
98 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
99 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
100 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
101 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
103 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
104 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
105 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
106 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
107 public static final String INVALID_DIGIT = "Invalid_Digit"; //$NON-NLS-1$
109 //----------------optimized identifier managment------------------
110 static final char[] charArray_a = new char[] {'a'},
111 charArray_b = new char[] {'b'},
112 charArray_c = new char[] {'c'},
113 charArray_d = new char[] {'d'},
114 charArray_e = new char[] {'e'},
115 charArray_f = new char[] {'f'},
116 charArray_g = new char[] {'g'},
117 charArray_h = new char[] {'h'},
118 charArray_i = new char[] {'i'},
119 charArray_j = new char[] {'j'},
120 charArray_k = new char[] {'k'},
121 charArray_l = new char[] {'l'},
122 charArray_m = new char[] {'m'},
123 charArray_n = new char[] {'n'},
124 charArray_o = new char[] {'o'},
125 charArray_p = new char[] {'p'},
126 charArray_q = new char[] {'q'},
127 charArray_r = new char[] {'r'},
128 charArray_s = new char[] {'s'},
129 charArray_t = new char[] {'t'},
130 charArray_u = new char[] {'u'},
131 charArray_v = new char[] {'v'},
132 charArray_w = new char[] {'w'},
133 charArray_x = new char[] {'x'},
134 charArray_y = new char[] {'y'},
135 charArray_z = new char[] {'z'};
137 static final char[] initCharArray =
138 new char[] {'\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000'};
139 static final int TableSize = 30, InternalTableSize = 6; //30*6 = 180 entries
140 public static final int OptimizedLength = 6;
141 public /*static*/ final char[][][][] charArray_length =
142 new char[OptimizedLength][TableSize][InternalTableSize][];
143 // support for detecting non-externalized string literals
144 public NLSLine currentLine= null;
145 public static final String TAG_PREFIX= "//$NON-NLS-"; //$NON-NLS-1$
146 public static final int TAG_PREFIX_LENGTH= TAG_PREFIX.length();
147 public static final String TAG_POSTFIX= "$"; //$NON-NLS-1$
148 public static final int TAG_POSTFIX_LENGTH= TAG_POSTFIX.length();
149 public StringLiteral[] nonNLSStrings = null;
150 public boolean checkNonExternalizedStringLiterals = false;
151 public boolean wasNonExternalizedStringLiteral = false;
154 public boolean returnOnlyGreater = false;
157 for (int i = 0; i < 6; i++) {
158 for (int j = 0; j < TableSize; j++) {
159 for (int k = 0; k < InternalTableSize; k++) {
160 this.charArray_length[i][j][k] = initCharArray;
165 static int newEntry2 = 0,
171 public static final int RoundBracket = 0;
172 public static final int SquareBracket = 1;
173 public static final int CurlyBracket = 2;
174 public static final int BracketKinds = 3;
177 this(false /*comment*/, false /*whitespace*/, false /*nls*/, ClassFileConstants.JDK1_3 /*sourceLevel*/, null/*taskTag*/, null/*taskPriorities*/, true /*taskCaseSensitive*/);
181 boolean tokenizeComments,
182 boolean tokenizeWhiteSpace,
183 boolean checkNonExternalizedStringLiterals,
186 char[][] taskPriorities,
187 boolean isTaskCaseSensitive) {
189 this.eofPosition = Integer.MAX_VALUE;
190 this.tokenizeComments = tokenizeComments;
191 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
192 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
193 this.sourceLevel = sourceLevel;
194 this.taskTags = taskTags;
195 this.taskPriorities = taskPriorities;
196 this.isTaskCaseSensitive = isTaskCaseSensitive;
199 public final boolean atEnd() {
200 // This code is not relevant if source is
201 // Only a part of the real stream input
203 return this.source.length == this.currentPosition;
206 protected void checkNonExternalizedString() {
207 if (this.currentLine == null)
209 parseTags(this.currentLine);
212 // chech presence of task: tags
213 // TODO (frederic) see if we need to take unicode characters into account...
214 public void checkTaskTag(int commentStart, int commentEnd) {
215 char[] src = this.source;
217 // only look for newer task: tags
218 if (this.foundTaskCount > 0
219 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
222 int foundTaskIndex = this.foundTaskCount;
223 char previous = src[commentStart+1]; // should be '*' or '/'
225 int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
227 char[] priority = null;
228 // check for tag occurrence only if not ambiguous with javadoc tag
229 if (previous != '@') {
230 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
231 tag = this.taskTags[itag];
232 int tagLength = tag.length;
233 if (tagLength == 0) continue nextTag;
235 // ensure tag is not leaded with letter if tag starts with a letter
236 if (Character.isJavaIdentifierStart(tag[0])) {
237 if (Character.isJavaIdentifierPart(previous)) {
242 for (int t = 0; t < tagLength; t++) {
245 if (x >= this.eofPosition || x >= commentEnd) continue nextTag;
246 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
247 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
252 // ensure tag is not followed with letter if tag finishes with a letter
253 if (i+tagLength < commentEnd && Character.isJavaIdentifierPart(src[i+tagLength-1])) {
254 if (Character.isJavaIdentifierPart(src[i + tagLength]))
257 if (this.foundTaskTags == null) {
258 this.foundTaskTags = new char[5][];
259 this.foundTaskMessages = new char[5][];
260 this.foundTaskPriorities = new char[5][];
261 this.foundTaskPositions = new int[5][];
262 } else if (this.foundTaskCount == this.foundTaskTags.length) {
263 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
264 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
265 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
266 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0, this.foundTaskCount);
269 priority = this.taskPriorities != null && itag < this.taskPriorities.length
270 ? this.taskPriorities[itag]
273 this.foundTaskTags[this.foundTaskCount] = tag;
274 this.foundTaskPriorities[this.foundTaskCount] = priority;
275 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
276 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
277 this.foundTaskCount++;
278 i += tagLength - 1; // will be incremented when looping
284 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
285 // retrieve message start and end positions
286 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
287 int max_value = i + 1 < this.foundTaskCount
288 ? this.foundTaskPositions[i + 1][0] - 1
290 // at most beginning of next task
291 if (max_value < msgStart) {
292 max_value = msgStart; // would only occur if tag is before EOF.
296 for (int j = msgStart; j < max_value; j++) {
297 if ((c = src[j]) == '\n' || c == '\r') {
303 for (int j = max_value; j > msgStart; j--) {
304 if ((c = src[j]) == '*') {
315 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
317 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
319 // update the end position of the task
320 this.foundTaskPositions[i][1] = end;
321 // get the message source
322 final int messageLength = end - msgStart + 1;
323 char[] message = new char[messageLength];
324 System.arraycopy(src, msgStart, message, 0, messageLength);
325 this.foundTaskMessages[i] = message;
328 public char[] getCurrentIdentifierSource() {
329 //return the token REAL source (aka unicodes are precomputed)
332 if (this.withoutUnicodePtr != 0) {
333 //0 is used as a fast test flag so the real first char is in position 1
335 this.withoutUnicodeBuffer,
337 result = new char[this.withoutUnicodePtr],
339 this.withoutUnicodePtr);
341 int length = this.currentPosition - this.startPosition;
342 if (length == this.source.length) return this.source;
343 switch (length) { // see OptimizedLength
345 return optimizedCurrentTokenSource1();
347 return optimizedCurrentTokenSource2();
349 return optimizedCurrentTokenSource3();
351 return optimizedCurrentTokenSource4();
353 return optimizedCurrentTokenSource5();
355 return optimizedCurrentTokenSource6();
358 System.arraycopy(this.source, this.startPosition, result = new char[length], 0, length);
362 public int getCurrentTokenEndPosition(){
363 return this.currentPosition - 1;
365 public final char[] getCurrentTokenSource() {
366 // Return the token REAL source (aka unicodes are precomputed)
369 if (this.withoutUnicodePtr != 0)
370 // 0 is used as a fast test flag so the real first char is in position 1
372 this.withoutUnicodeBuffer,
374 result = new char[this.withoutUnicodePtr],
376 this.withoutUnicodePtr);
382 result = new char[length = this.currentPosition - this.startPosition],
388 public final char[] getCurrentTokenSourceString() {
389 //return the token REAL source (aka unicodes are precomputed).
390 //REMOVE the two " that are at the beginning and the end.
393 if (this.withoutUnicodePtr != 0)
394 //0 is used as a fast test flag so the real first char is in position 1
395 System.arraycopy(this.withoutUnicodeBuffer, 2,
396 //2 is 1 (real start) + 1 (to jump over the ")
397 result = new char[this.withoutUnicodePtr - 2], 0, this.withoutUnicodePtr - 2);
402 this.startPosition + 1,
403 result = new char[length = this.currentPosition - this.startPosition - 2],
410 public final char[] getRawTokenSource() {
411 int length = this.currentPosition - this.startPosition;
412 char[] tokenSource = new char[length];
413 System.arraycopy(this.source, this.startPosition, tokenSource, 0, length);
417 public final char[] getRawTokenSourceEnd() {
418 int length = this.eofPosition - this.currentPosition - 1;
419 char[] sourceEnd = new char[length];
420 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
424 public int getCurrentTokenStartPosition(){
425 return this.startPosition;
428 * Search the source position corresponding to the end of a given line number
430 * Line numbers are 1-based, and relative to the scanner initialPosition.
431 * Character positions are 0-based.
433 * In case the given line number is inconsistent, answers -1.
435 public final int getLineEnd(int lineNumber) {
437 if (this.lineEnds == null)
439 if (lineNumber > this.lineEnds.length+1)
443 if (lineNumber == this.lineEnds.length + 1)
444 return this.eofPosition;
445 return this.lineEnds[lineNumber-1]; // next line start one character behind the lineEnd of the previous line
448 public final int[] getLineEnds() {
449 //return a bounded copy of this.lineEnds
452 System.arraycopy(this.lineEnds, 0, copy = new int[this.linePtr + 1], 0, this.linePtr + 1);
457 * Search the source position corresponding to the beginning of a given line number
459 * Line numbers are 1-based, and relative to the scanner initialPosition.
460 * Character positions are 0-based.
462 * e.g. getLineStart(1) --> 0 indicates that the first line starts at character 0.
464 * In case the given line number is inconsistent, answers -1.
466 * @param lineNumber int
469 public final int getLineStart(int lineNumber) {
471 if (this.lineEnds == null)
473 if (lineNumber > this.lineEnds.length + 1)
479 return this.initialPosition;
480 return this.lineEnds[lineNumber-2]+1; // next line start one character behind the lineEnd of the previous line
482 public final int getNextChar() {
484 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
485 && (this.source[this.currentPosition] == 'u')) {
486 //-------------unicode traitement ------------
489 this.currentPosition++;
490 while (this.source[this.currentPosition] == 'u') {
491 this.currentPosition++;
495 if (((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
497 || ((c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c2 < 0)
498 || ((c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c3 < 0)
499 || ((c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c4 < 0)) {
503 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
505 this.unicodeAsBackSlash = this.currentCharacter == '\\';
507 //need the unicode buffer
508 if (this.withoutUnicodePtr == 0) {
509 //buffer all the entries that have been left aside....
510 unicodeInitializeBuffer(this.currentPosition - unicodeSize - this.startPosition);
512 //fill the buffer with the char
513 unicodeStoreAt(++this.withoutUnicodePtr);
514 return this.currentCharacter;
516 } //-------------end unicode traitement--------------
518 this.unicodeAsBackSlash = false;
519 if (this.withoutUnicodePtr != 0) {
520 unicodeStoreAt(++this.withoutUnicodePtr);
522 return this.currentCharacter;
524 } catch (IndexOutOfBoundsException e) {
528 public final boolean getNextChar(char testedChar) {
530 //handle the case of unicode.
531 //when a unicode appears then we must use a buffer that holds char internal values
532 //At the end of this method currentCharacter holds the new visited char
533 //and currentPosition points right next after it
534 //Both previous lines are true if the currentCharacter is == to the testedChar
535 //On false, no side effect has occured.
537 //ALL getNextChar.... ARE OPTIMIZED COPIES
539 if (this.currentPosition >= this.source.length) { // handle the obvious case upfront
540 this.unicodeAsBackSlash = false;
544 int temp = this.currentPosition;
546 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
547 && (this.source[this.currentPosition] == 'u')) {
548 //-------------unicode traitement ------------
551 this.currentPosition++;
552 while (this.source[this.currentPosition] == 'u') {
553 this.currentPosition++;
557 if (((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
559 || ((c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c2 < 0)
560 || ((c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c3 < 0)
561 || ((c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c4 < 0)) {
562 this.currentPosition = temp;
566 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
567 if (this.currentCharacter != testedChar) {
568 this.currentPosition = temp;
571 this.unicodeAsBackSlash = this.currentCharacter == '\\';
573 //need the unicode buffer
574 if (this.withoutUnicodePtr == 0) {
575 //buffer all the entries that have been left aside....
576 unicodeInitializeBuffer(this.currentPosition - unicodeSize - this.startPosition);
578 //fill the buffer with the char
579 unicodeStoreAt(++this.withoutUnicodePtr);
582 } //-------------end unicode traitement--------------
584 if (this.currentCharacter != testedChar) {
585 this.currentPosition = temp;
588 this.unicodeAsBackSlash = false;
589 if (this.withoutUnicodePtr != 0)
590 unicodeStoreAt(++this.withoutUnicodePtr);
593 } catch (IndexOutOfBoundsException e) {
594 this.unicodeAsBackSlash = false;
595 this.currentPosition = temp;
599 public final int getNextChar(char testedChar1, char testedChar2) {
600 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
601 //test can be done with (x==0) for the first and (x>0) for the second
602 //handle the case of unicode.
603 //when a unicode appears then we must use a buffer that holds char internal values
604 //At the end of this method currentCharacter holds the new visited char
605 //and currentPosition points right next after it
606 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
607 //On false, no side effect has occured.
609 //ALL getNextChar.... ARE OPTIMIZED COPIES
610 if (this.currentPosition >= this.source.length) // handle the obvious case upfront
613 int temp = this.currentPosition;
616 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
617 && (this.source[this.currentPosition] == 'u')) {
618 //-------------unicode traitement ------------
621 this.currentPosition++;
622 while (this.source[this.currentPosition] == 'u') {
623 this.currentPosition++;
627 if (((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
629 || ((c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c2 < 0)
630 || ((c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c3 < 0)
631 || ((c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c4 < 0)) {
632 this.currentPosition = temp;
636 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
637 if (this.currentCharacter == testedChar1)
640 if (this.currentCharacter == testedChar2)
643 this.currentPosition = temp;
647 //need the unicode buffer
648 if (this.withoutUnicodePtr == 0) {
649 //buffer all the entries that have been left aside....
650 unicodeInitializeBuffer(this.currentPosition - unicodeSize - this.startPosition);
652 //fill the buffer with the char
653 unicodeStoreAt(++this.withoutUnicodePtr);
655 } //-------------end unicode traitement--------------
657 if (this.currentCharacter == testedChar1)
660 if (this.currentCharacter == testedChar2)
663 this.currentPosition = temp;
667 if (this.withoutUnicodePtr != 0)
668 unicodeStoreAt(++this.withoutUnicodePtr);
671 } catch (IndexOutOfBoundsException e) {
672 this.currentPosition = temp;
676 public final boolean getNextCharAsDigit() throws InvalidInputException {
678 //handle the case of unicode.
679 //when a unicode appears then we must use a buffer that holds char internal values
680 //At the end of this method currentCharacter holds the new visited char
681 //and currentPosition points right next after it
682 //Both previous lines are true if the currentCharacter is a digit
683 //On false, no side effect has occured.
685 //ALL getNextChar.... ARE OPTIMIZED COPIES
686 if (this.currentPosition >= this.source.length) // handle the obvious case upfront
689 int temp = this.currentPosition;
691 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
692 && (this.source[this.currentPosition] == 'u')) {
693 //-------------unicode traitement ------------
696 this.currentPosition++;
697 while (this.source[this.currentPosition] == 'u') {
698 this.currentPosition++;
702 if (((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
704 || ((c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c2 < 0)
705 || ((c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c3 < 0)
706 || ((c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c4 < 0)) {
707 this.currentPosition = temp;
711 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
712 if (!isDigit(this.currentCharacter)) {
713 this.currentPosition = temp;
717 //need the unicode buffer
718 if (this.withoutUnicodePtr == 0) {
719 //buffer all the entries that have been left aside....
720 unicodeInitializeBuffer(this.currentPosition - unicodeSize - this.startPosition);
722 //fill the buffer with the char
723 unicodeStoreAt(++this.withoutUnicodePtr);
725 } //-------------end unicode traitement--------------
727 if (!isDigit(this.currentCharacter)) {
728 this.currentPosition = temp;
731 if (this.withoutUnicodePtr != 0)
732 unicodeStoreAt(++this.withoutUnicodePtr);
735 } catch (IndexOutOfBoundsException e) {
736 this.currentPosition = temp;
740 public final boolean getNextCharAsDigit(int radix) {
742 //handle the case of unicode.
743 //when a unicode appears then we must use a buffer that holds char internal values
744 //At the end of this method currentCharacter holds the new visited char
745 //and currentPosition points right next after it
746 //Both previous lines are true if the currentCharacter is a digit base on radix
747 //On false, no side effect has occured.
749 //ALL getNextChar.... ARE OPTIMIZED COPIES
750 if (this.currentPosition >= this.source.length) // handle the obvious case upfront
753 int temp = this.currentPosition;
755 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
756 && (this.source[this.currentPosition] == 'u')) {
757 //-------------unicode traitement ------------
760 this.currentPosition++;
761 while (this.source[this.currentPosition] == 'u') {
762 this.currentPosition++;
766 if (((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
768 || ((c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c2 < 0)
769 || ((c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c3 < 0)
770 || ((c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c4 < 0)) {
771 this.currentPosition = temp;
775 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
776 if (Character.digit(this.currentCharacter, radix) == -1) {
777 this.currentPosition = temp;
781 //need the unicode buffer
782 if (this.withoutUnicodePtr == 0) {
783 //buffer all the entries that have been left aside....
784 unicodeInitializeBuffer(this.currentPosition - unicodeSize - this.startPosition);
786 //fill the buffer with the char
787 unicodeStoreAt(++this.withoutUnicodePtr);
789 } //-------------end unicode traitement--------------
791 if (Character.digit(this.currentCharacter, radix) == -1) {
792 this.currentPosition = temp;
795 if (this.withoutUnicodePtr != 0)
796 unicodeStoreAt(++this.withoutUnicodePtr);
799 } catch (IndexOutOfBoundsException e) {
800 this.currentPosition = temp;
804 public boolean getNextCharAsJavaIdentifierPart() {
806 //handle the case of unicode.
807 //when a unicode appears then we must use a buffer that holds char internal values
808 //At the end of this method currentCharacter holds the new visited char
809 //and currentPosition points right next after it
810 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
811 //On false, no side effect has occured.
813 //ALL getNextChar.... ARE OPTIMIZED COPIES
814 if (this.currentPosition >= this.source.length) // handle the obvious case upfront
817 int temp = this.currentPosition;
819 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
820 && (this.source[this.currentPosition] == 'u')) {
821 //-------------unicode traitement ------------
824 this.currentPosition++;
825 while (this.source[this.currentPosition] == 'u') {
826 this.currentPosition++;
830 if (((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
832 || ((c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c2 < 0)
833 || ((c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c3 < 0)
834 || ((c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c4 < 0)) {
835 this.currentPosition = temp;
839 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
840 if (!Character.isJavaIdentifierPart(this.currentCharacter)) {
841 this.currentPosition = temp;
845 //need the unicode buffer
846 if (this.withoutUnicodePtr == 0) {
847 //buffer all the entries that have been left aside....
848 unicodeInitializeBuffer(this.currentPosition - unicodeSize - this.startPosition);
850 //fill the buffer with the char
851 unicodeStoreAt(++this.withoutUnicodePtr);
853 } //-------------end unicode traitement--------------
855 if (!Character.isJavaIdentifierPart(this.currentCharacter)) {
856 this.currentPosition = temp;
860 if (this.withoutUnicodePtr != 0)
861 unicodeStoreAt(++this.withoutUnicodePtr);
864 } catch (IndexOutOfBoundsException e) {
865 this.currentPosition = temp;
869 public int getNextToken() throws InvalidInputException {
872 jumpOverMethodBody();
874 return this.currentPosition > this.source.length ? TokenNameEOF : TokenNameRBRACE;
878 while (true) { //loop for jumping over comments
879 this.withoutUnicodePtr = 0;
880 //start with a new token (even comment written with unicode )
882 // ---------Consume white space and handles startPosition---------
883 whiteStart = this.currentPosition;
884 boolean isWhiteSpace, hasWhiteSpaces = false;
887 this.startPosition = this.currentPosition;
888 boolean checkIfUnicode = false;
890 checkIfUnicode = ((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
891 && (this.source[this.currentPosition] == 'u');
892 } catch(IndexOutOfBoundsException e) {
893 if (this.tokenizeWhiteSpace && (whiteStart != this.currentPosition - 1)) {
894 // reposition scanner in case we are interested by spaces as tokens
895 this.currentPosition--;
896 this.startPosition = whiteStart;
897 return TokenNameWHITESPACE;
899 if (this.currentPosition > this.eofPosition)
902 if (checkIfUnicode) {
903 isWhiteSpace = jumpOverUnicodeWhiteSpace();
907 if ((this.currentCharacter == '\r') || (this.currentCharacter == '\n')) {
908 checkNonExternalizedString();
909 if (this.recordLineSeparator) {
912 this.currentLine = null;
916 (this.currentCharacter == ' ') || CharOperation.isWhitespace(this.currentCharacter);
919 hasWhiteSpaces = true;
921 } while (isWhiteSpace);
922 if (this.tokenizeWhiteSpace && hasWhiteSpaces) {
923 // reposition scanner in case we are interested by spaces as tokens
924 this.currentPosition-=offset;
925 this.startPosition = whiteStart;
926 return TokenNameWHITESPACE;
928 //little trick to get out in the middle of a source compuation
929 if (this.currentPosition > this.eofPosition)
932 // ---------Identify the next token-------------
934 switch (this.currentCharacter) {
936 /* if (this.sourceLevel >= ClassFileConstants.JDK1_5) {
939 return TokenNameERROR;
943 return TokenNameLPAREN;
945 return TokenNameRPAREN;
947 return TokenNameLBRACE;
949 return TokenNameRBRACE;
951 return TokenNameLBRACKET;
953 return TokenNameRBRACKET;
955 return TokenNameSEMICOLON;
957 return TokenNameCOMMA;
959 if (getNextCharAsDigit()) {
960 return scanNumber(true);
962 /* if (this.sourceLevel >= ClassFileConstants.JDK1_5) {
963 int temp = this.currentPosition;
964 if (getNextChar('.')) {
965 if (getNextChar('.')) {
966 return TokenNameELLIPSIS;
968 this.currentPosition = temp;
972 this.currentPosition = temp;
978 int temp = this.currentPosition;
979 if (getNextChar('.')) {
980 if (getNextChar('.')) {
981 return TokenNameELLIPSIS;
983 this.currentPosition = temp;
987 this.currentPosition = temp;
993 if ((test = getNextChar('+', '=')) == 0)
994 return TokenNamePLUS_PLUS;
996 return TokenNamePLUS_EQUAL;
997 return TokenNamePLUS;
1002 if ((test = getNextChar('-', '=')) == 0)
1003 return TokenNameMINUS_MINUS;
1005 return TokenNameMINUS_EQUAL;
1006 return TokenNameMINUS;
1009 return TokenNameTWIDDLE;
1011 if (getNextChar('='))
1012 return TokenNameNOT_EQUAL;
1013 return TokenNameNOT;
1015 if (getNextChar('='))
1016 return TokenNameMULTIPLY_EQUAL;
1017 return TokenNameMULTIPLY;
1019 if (getNextChar('='))
1020 return TokenNameREMAINDER_EQUAL;
1021 return TokenNameREMAINDER;
1025 if ((test = getNextChar('=', '<')) == 0)
1026 return TokenNameLESS_EQUAL;
1028 if (getNextChar('='))
1029 return TokenNameLEFT_SHIFT_EQUAL;
1030 return TokenNameLEFT_SHIFT;
1032 return TokenNameLESS;
1037 if (this.returnOnlyGreater) {
1038 return TokenNameGREATER;
1040 if ((test = getNextChar('=', '>')) == 0)
1041 return TokenNameGREATER_EQUAL;
1043 if ((test = getNextChar('=', '>')) == 0)
1044 return TokenNameRIGHT_SHIFT_EQUAL;
1046 if (getNextChar('='))
1047 return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL;
1048 return TokenNameUNSIGNED_RIGHT_SHIFT;
1050 return TokenNameRIGHT_SHIFT;
1052 return TokenNameGREATER;
1055 if (getNextChar('='))
1056 return TokenNameEQUAL_EQUAL;
1057 return TokenNameEQUAL;
1061 if ((test = getNextChar('&', '=')) == 0)
1062 return TokenNameAND_AND;
1064 return TokenNameAND_EQUAL;
1065 return TokenNameAND;
1070 if ((test = getNextChar('|', '=')) == 0)
1071 return TokenNameOR_OR;
1073 return TokenNameOR_EQUAL;
1077 if (getNextChar('='))
1078 return TokenNameXOR_EQUAL;
1079 return TokenNameXOR;
1081 return TokenNameQUESTION;
1083 return TokenNameCOLON;
1087 if ((test = getNextChar('\n', '\r')) == 0) {
1088 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1091 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1092 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
1093 if (this.currentPosition + lookAhead == this.source.length)
1095 if (this.source[this.currentPosition + lookAhead] == '\n')
1097 if (this.source[this.currentPosition + lookAhead] == '\'') {
1098 this.currentPosition += lookAhead + 1;
1102 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1105 if (getNextChar('\'')) {
1106 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1107 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
1108 if (this.currentPosition + lookAhead == this.source.length)
1110 if (this.source[this.currentPosition + lookAhead] == '\n')
1112 if (this.source[this.currentPosition + lookAhead] == '\'') {
1113 this.currentPosition += lookAhead + 1;
1117 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1119 if (getNextChar('\\'))
1120 scanEscapeCharacter();
1121 else { // consume next character
1122 this.unicodeAsBackSlash = false;
1123 boolean checkIfUnicode = false;
1125 checkIfUnicode = ((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1126 && (this.source[this.currentPosition] == 'u');
1127 } catch(IndexOutOfBoundsException e) {
1128 this.currentPosition--;
1129 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1131 if (checkIfUnicode) {
1132 getNextUnicodeChar();
1134 if (this.withoutUnicodePtr != 0) {
1135 unicodeStoreAt(++this.withoutUnicodePtr);
1139 if (getNextChar('\''))
1140 return TokenNameCharacterLiteral;
1141 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1142 for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1143 if (this.currentPosition + lookAhead == this.source.length)
1145 if (this.source[this.currentPosition + lookAhead] == '\n')
1147 if (this.source[this.currentPosition + lookAhead] == '\'') {
1148 this.currentPosition += lookAhead + 1;
1152 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1155 // consume next character
1156 this.unicodeAsBackSlash = false;
1157 boolean isUnicode = false;
1158 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1159 && (this.source[this.currentPosition] == 'u')) {
1160 getNextUnicodeChar();
1163 if (this.withoutUnicodePtr != 0) {
1164 unicodeStoreAt(++this.withoutUnicodePtr);
1168 while (this.currentCharacter != '"') {
1169 /**** \r and \n are not valid in string literals ****/
1170 if ((this.currentCharacter == '\n') || (this.currentCharacter == '\r')) {
1171 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1173 int start = this.currentPosition;
1174 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1175 if (this.currentPosition >= this.eofPosition) {
1176 this.currentPosition = start;
1179 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\') && (this.source[this.currentPosition] == 'u')) {
1181 getNextUnicodeChar();
1185 if (!isUnicode && this.currentCharacter == '\n') {
1186 this.currentPosition--; // set current position on new line character
1189 if (this.currentCharacter == '\"') {
1190 throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1194 this.currentPosition--; // set current position on new line character
1196 throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1198 if (this.currentCharacter == '\\') {
1199 int escapeSize = this.currentPosition;
1200 boolean backSlashAsUnicodeInString = this.unicodeAsBackSlash;
1201 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1202 scanEscapeCharacter();
1203 escapeSize = this.currentPosition - escapeSize;
1204 if (this.withoutUnicodePtr == 0) {
1205 //buffer all the entries that have been left aside....
1206 unicodeInitializeBuffer(this.currentPosition - escapeSize - 1 - this.startPosition);
1207 unicodeStoreAt(++this.withoutUnicodePtr);
1208 } else { //overwrite the / in the buffer
1209 unicodeStoreAt(this.withoutUnicodePtr);
1210 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1211 this.withoutUnicodePtr--;
1215 // consume next character
1216 this.unicodeAsBackSlash = false;
1217 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1218 && (this.source[this.currentPosition] == 'u')) {
1219 getNextUnicodeChar();
1221 if (this.withoutUnicodePtr != 0) {
1222 unicodeStoreAt(++this.withoutUnicodePtr);
1227 } catch (IndexOutOfBoundsException e) {
1228 this.currentPosition--;
1229 throw new InvalidInputException(UNTERMINATED_STRING);
1230 } catch (InvalidInputException e) {
1231 if (e.getMessage().equals(INVALID_ESCAPE)) {
1232 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1233 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1234 if (this.currentPosition + lookAhead == this.source.length)
1236 if (this.source[this.currentPosition + lookAhead] == '\n')
1238 if (this.source[this.currentPosition + lookAhead] == '\"') {
1239 this.currentPosition += lookAhead + 1;
1247 if (this.checkNonExternalizedStringLiterals){ // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1248 if (this.currentLine == null) {
1249 this.currentLine = new NLSLine();
1251 this.currentLine.add(
1253 getCurrentTokenSourceString(),
1255 this.currentPosition - 1));
1257 return TokenNameStringLiteral;
1261 if ((test = getNextChar('/', '*')) == 0) { //line comment
1262 this.lastCommentLinePosition = this.currentPosition;
1263 try { //get the next char
1264 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1265 && (this.source[this.currentPosition] == 'u')) {
1266 //-------------unicode traitement ------------
1267 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1268 this.currentPosition++;
1269 while (this.source[this.currentPosition] == 'u') {
1270 this.currentPosition++;
1272 if ((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1274 || (c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1276 || (c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1278 || (c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1280 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1282 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1286 //handle the \\u case manually into comment
1287 if (this.currentCharacter == '\\') {
1288 if (this.source[this.currentPosition] == '\\')
1289 this.currentPosition++;
1290 } //jump over the \\
1291 boolean isUnicode = false;
1292 while (this.currentCharacter != '\r' && this.currentCharacter != '\n') {
1293 this.lastCommentLinePosition = this.currentPosition;
1296 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1297 && (this.source[this.currentPosition] == 'u')) {
1299 //-------------unicode traitement ------------
1300 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1301 this.currentPosition++;
1302 while (this.source[this.currentPosition] == 'u') {
1303 this.currentPosition++;
1305 if ((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1307 || (c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1309 || (c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1311 || (c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1313 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1315 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1318 //handle the \\u case manually into comment
1319 if (this.currentCharacter == '\\') {
1320 if (this.source[this.currentPosition] == '\\')
1321 this.currentPosition++;
1322 } //jump over the \\
1325 * We need to completely consume the line break
1327 if (this.currentCharacter == '\r'
1328 && this.source.length > this.currentPosition) {
1329 if (this.source[this.currentPosition] == '\n') {
1330 this.currentPosition++;
1331 this.currentCharacter = '\n';
1332 } else if ((this.source[this.currentPosition] == '\\')
1333 && (this.source[this.currentPosition + 1] == 'u')) {
1336 int index = this.currentPosition + 1;
1338 while (this.source[index] == 'u') {
1341 //-------------unicode traitement ------------
1342 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1343 if ((c1 = Character.getNumericValue(this.source[index++])) > 15
1345 || (c2 = Character.getNumericValue(this.source[index++])) > 15
1347 || (c3 = Character.getNumericValue(this.source[index++])) > 15
1349 || (c4 = Character.getNumericValue(this.source[index++])) > 15
1351 this.currentPosition = index;
1352 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1354 unicodeChar = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1356 if (unicodeChar == '\n') {
1357 this.currentPosition = index;
1358 this.currentCharacter = '\n';
1362 recordComment(TokenNameCOMMENT_LINE);
1363 if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1364 if ((this.currentCharacter == '\r') || (this.currentCharacter == '\n')) {
1365 checkNonExternalizedString();
1366 if (this.recordLineSeparator) {
1368 pushUnicodeLineSeparator();
1370 pushLineSeparator();
1373 this.currentLine = null;
1376 if (this.tokenizeComments) {
1377 return TokenNameCOMMENT_LINE;
1379 } catch (IndexOutOfBoundsException e) {
1380 this.currentPosition--;
1381 recordComment(TokenNameCOMMENT_LINE);
1382 if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1383 if (this.tokenizeComments) {
1384 return TokenNameCOMMENT_LINE;
1386 this.currentPosition++;
1391 if (test > 0) { //traditional and javadoc comment
1392 try { //get the next char
1393 boolean isJavadoc = false, star = false;
1394 boolean isUnicode = false;
1395 // consume next character
1396 this.unicodeAsBackSlash = false;
1397 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1398 && (this.source[this.currentPosition] == 'u')) {
1399 getNextUnicodeChar();
1403 if (this.withoutUnicodePtr != 0) {
1404 unicodeStoreAt(++this.withoutUnicodePtr);
1408 if (this.currentCharacter == '*') {
1412 if ((this.currentCharacter == '\r') || (this.currentCharacter == '\n')) {
1413 checkNonExternalizedString();
1414 if (this.recordLineSeparator) {
1416 pushUnicodeLineSeparator();
1418 pushLineSeparator();
1421 this.currentLine = null;
1425 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1426 && (this.source[this.currentPosition] == 'u')) {
1427 //-------------unicode traitement ------------
1428 getNextUnicodeChar();
1433 //handle the \\u case manually into comment
1434 if (this.currentCharacter == '\\') {
1435 if (this.source[this.currentPosition] == '\\')
1436 this.currentPosition++; //jump over the \\
1438 // empty comment is not a javadoc /**/
1439 if (this.currentCharacter == '/') {
1442 //loop until end of comment */
1443 while ((this.currentCharacter != '/') || (!star)) {
1444 if ((this.currentCharacter == '\r') || (this.currentCharacter == '\n')) {
1445 checkNonExternalizedString();
1446 if (this.recordLineSeparator) {
1448 pushUnicodeLineSeparator();
1450 pushLineSeparator();
1453 this.currentLine = null;
1456 star = this.currentCharacter == '*';
1458 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1459 && (this.source[this.currentPosition] == 'u')) {
1460 //-------------unicode traitement ------------
1461 getNextUnicodeChar();
1466 //handle the \\u case manually into comment
1467 if (this.currentCharacter == '\\') {
1468 if (this.source[this.currentPosition] == '\\')
1469 this.currentPosition++;
1470 } //jump over the \\
1472 int token = isJavadoc ? TokenNameCOMMENT_JAVADOC : TokenNameCOMMENT_BLOCK;
1473 recordComment(token);
1474 if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1475 if (this.tokenizeComments) {
1478 return TokenNameCOMMENT_JAVADOC;
1479 return TokenNameCOMMENT_BLOCK;
1483 } catch (IndexOutOfBoundsException e) {
1484 this.currentPosition--;
1485 throw new InvalidInputException(UNTERMINATED_COMMENT);
1489 if (getNextChar('='))
1490 return TokenNameDIVIDE_EQUAL;
1491 return TokenNameDIVIDE;
1495 return TokenNameEOF;
1496 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1497 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1500 if (Character.isJavaIdentifierStart(this.currentCharacter))
1501 return scanIdentifierOrKeyword();
1502 if (isDigit(this.currentCharacter)) {
1503 return scanNumber(false);
1505 return TokenNameERROR;
1508 } //-----------------end switch while try--------------------
1509 catch (IndexOutOfBoundsException e) {
1510 if (this.tokenizeWhiteSpace && (whiteStart != this.currentPosition - 1)) {
1511 // reposition scanner in case we are interested by spaces as tokens
1512 this.currentPosition--;
1513 this.startPosition = whiteStart;
1514 return TokenNameWHITESPACE;
1517 return TokenNameEOF;
1519 public final void getNextUnicodeChar()
1520 throws InvalidInputException {
1522 //handle the case of unicode.
1523 //when a unicode appears then we must use a buffer that holds char internal values
1524 //At the end of this method currentCharacter holds the new visited char
1525 //and currentPosition points right next after it
1527 //ALL getNextChar.... ARE OPTIMIZED COPIES
1530 int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1531 this.currentPosition++;
1532 while (this.source[this.currentPosition] == 'u') {
1533 this.currentPosition++;
1537 if ((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1539 || (c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1541 || (c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1543 || (c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1545 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1547 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1548 //need the unicode buffer
1549 if (this.withoutUnicodePtr == 0) {
1550 //buffer all the entries that have been left aside....
1551 unicodeInitializeBuffer(this.currentPosition - unicodeSize - this.startPosition);
1553 //fill the buffer with the char
1554 unicodeStoreAt(++this.withoutUnicodePtr);
1556 this.unicodeAsBackSlash = this.currentCharacter == '\\';
1557 } catch (ArrayIndexOutOfBoundsException e) {
1558 this.currentPosition--;
1559 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1563 public char[] getSource(){
1566 protected boolean isDigit(char c) throws InvalidInputException {
1567 if (Character.isDigit(c)) {
1581 throw new InvalidInputException(Scanner.INVALID_DIGIT);
1586 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1588 public final void jumpOverMethodBody() {
1590 this.wasAcr = false;
1593 while (true) { //loop for jumping over comments
1594 // ---------Consume white space and handles startPosition---------
1595 boolean isWhiteSpace;
1597 this.startPosition = this.currentPosition;
1598 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1599 && (this.source[this.currentPosition] == 'u')) {
1600 isWhiteSpace = jumpOverUnicodeWhiteSpace();
1602 if (this.recordLineSeparator
1603 && ((this.currentCharacter == '\r') || (this.currentCharacter == '\n')))
1604 pushLineSeparator();
1605 isWhiteSpace = CharOperation.isWhitespace(this.currentCharacter);
1607 } while (isWhiteSpace);
1609 // -------consume token until } is found---------
1610 switch (this.currentCharacter) {
1622 test = getNextChar('\\');
1625 scanEscapeCharacter();
1626 } catch (InvalidInputException ex) {
1630 try { // consume next character
1631 this.unicodeAsBackSlash = false;
1632 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1633 && (this.source[this.currentPosition] == 'u')) {
1634 getNextUnicodeChar();
1636 if (this.withoutUnicodePtr != 0) {
1637 unicodeStoreAt(++this.withoutUnicodePtr);
1640 } catch (InvalidInputException ex) {
1649 try { // consume next character
1650 this.unicodeAsBackSlash = false;
1651 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1652 && (this.source[this.currentPosition] == 'u')) {
1653 getNextUnicodeChar();
1655 if (this.withoutUnicodePtr != 0) {
1656 unicodeStoreAt(++this.withoutUnicodePtr);
1659 } catch (InvalidInputException ex) {
1662 while (this.currentCharacter != '"') {
1663 if (this.currentCharacter == '\r'){
1664 if (this.source[this.currentPosition] == '\n') this.currentPosition++;
1665 break; // the string cannot go further that the line
1667 if (this.currentCharacter == '\n'){
1668 break; // the string cannot go further that the line
1670 if (this.currentCharacter == '\\') {
1672 scanEscapeCharacter();
1673 } catch (InvalidInputException ex) {
1677 try { // consume next character
1678 this.unicodeAsBackSlash = false;
1679 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1680 && (this.source[this.currentPosition] == 'u')) {
1681 getNextUnicodeChar();
1683 if (this.withoutUnicodePtr != 0) {
1684 unicodeStoreAt(++this.withoutUnicodePtr);
1687 } catch (InvalidInputException ex) {
1691 } catch (IndexOutOfBoundsException e) {
1698 if ((test = getNextChar('/', '*')) == 0) { //line comment
1700 this.lastCommentLinePosition = this.currentPosition;
1702 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1703 && (this.source[this.currentPosition] == 'u')) {
1704 //-------------unicode traitement ------------
1705 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1706 this.currentPosition++;
1707 while (this.source[this.currentPosition] == 'u') {
1708 this.currentPosition++;
1710 if ((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1712 || (c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1714 || (c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1716 || (c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1717 || c4 < 0) { //error don't care of the value
1718 this.currentCharacter = 'A';
1719 } //something different from \n and \r
1721 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1724 //handle the \\u case manually into comment
1725 if (this.currentCharacter == '\\') {
1726 if (this.source[this.currentPosition] == '\\')
1727 this.currentPosition++;
1728 } //jump over the \\
1729 boolean isUnicode = false;
1730 while (this.currentCharacter != '\r' && this.currentCharacter != '\n') {
1731 this.lastCommentLinePosition = this.currentPosition;
1734 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1735 && (this.source[this.currentPosition] == 'u')) {
1737 //-------------unicode traitement ------------
1738 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1739 this.currentPosition++;
1740 while (this.source[this.currentPosition] == 'u') {
1741 this.currentPosition++;
1743 if ((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1745 || (c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1747 || (c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1749 || (c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1750 || c4 < 0) { //error don't care of the value
1751 this.currentCharacter = 'A';
1752 } //something different from \n and \r
1754 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1757 //handle the \\u case manually into comment
1758 if (this.currentCharacter == '\\') {
1759 if (this.source[this.currentPosition] == '\\')
1760 this.currentPosition++;
1761 } //jump over the \\
1764 * We need to completely consume the line break
1766 if (this.currentCharacter == '\r'
1767 && this.source.length > this.currentPosition) {
1768 if (this.source[this.currentPosition] == '\n') {
1769 this.currentPosition++;
1770 this.currentCharacter = '\n';
1771 } else if ((this.source[this.currentPosition] == '\\')
1772 && (this.source[this.currentPosition + 1] == 'u')) {
1775 int index = this.currentPosition + 1;
1777 while (this.source[index] == 'u') {
1780 //-------------unicode traitement ------------
1781 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1782 if ((c1 = Character.getNumericValue(this.source[index++])) > 15
1784 || (c2 = Character.getNumericValue(this.source[index++])) > 15
1786 || (c3 = Character.getNumericValue(this.source[index++])) > 15
1788 || (c4 = Character.getNumericValue(this.source[index++])) > 15
1789 || c4 < 0) { //error don't care of the value
1792 unicodeChar = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1794 if (unicodeChar == '\n') {
1795 this.currentPosition = index;
1796 this.currentCharacter = '\n';
1800 recordComment(TokenNameCOMMENT_LINE);
1801 if (this.recordLineSeparator
1802 && ((this.currentCharacter == '\r') || (this.currentCharacter == '\n'))) {
1804 pushUnicodeLineSeparator();
1806 pushLineSeparator();
1809 } catch (IndexOutOfBoundsException e) {
1810 //an eof will then be generated
1811 this.currentPosition--;
1812 recordComment(TokenNameCOMMENT_LINE);
1813 if (!this.tokenizeComments) {
1814 this.currentPosition++;
1819 if (test > 0) { //traditional and javadoc comment
1820 boolean isJavadoc = false;
1821 try { //get the next char
1822 boolean star = false;
1823 boolean isUnicode = false;
1824 // consume next character
1825 this.unicodeAsBackSlash = false;
1826 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1827 && (this.source[this.currentPosition] == 'u')) {
1828 getNextUnicodeChar();
1832 if (this.withoutUnicodePtr != 0) {
1833 unicodeStoreAt(++this.withoutUnicodePtr);
1837 if (this.currentCharacter == '*') {
1841 if ((this.currentCharacter == '\r') || (this.currentCharacter == '\n')) {
1842 if (this.recordLineSeparator) {
1844 pushUnicodeLineSeparator();
1846 pushLineSeparator();
1849 this.currentLine = null;
1853 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1854 && (this.source[this.currentPosition] == 'u')) {
1855 //-------------unicode traitement ------------
1856 getNextUnicodeChar();
1861 //handle the \\u case manually into comment
1862 if (this.currentCharacter == '\\') {
1863 if (this.source[this.currentPosition] == '\\')
1864 this.currentPosition++; //jump over the \\
1866 // empty comment is not a javadoc /**/
1867 if (this.currentCharacter == '/') {
1870 //loop until end of comment */
1871 while ((this.currentCharacter != '/') || (!star)) {
1872 if ((this.currentCharacter == '\r') || (this.currentCharacter == '\n')) {
1873 if (this.recordLineSeparator) {
1875 pushUnicodeLineSeparator();
1877 pushLineSeparator();
1880 this.currentLine = null;
1883 star = this.currentCharacter == '*';
1885 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
1886 && (this.source[this.currentPosition] == 'u')) {
1887 //-------------unicode traitement ------------
1888 getNextUnicodeChar();
1893 //handle the \\u case manually into comment
1894 if (this.currentCharacter == '\\') {
1895 if (this.source[this.currentPosition] == '\\')
1896 this.currentPosition++;
1897 } //jump over the \\
1899 recordComment(isJavadoc ? TokenNameCOMMENT_JAVADOC : TokenNameCOMMENT_BLOCK);
1900 } catch (IndexOutOfBoundsException e) {
1909 if (Character.isJavaIdentifierStart(this.currentCharacter)) {
1910 scanIdentifierOrKeyword();
1913 if (isDigit(this.currentCharacter)) {
1916 } catch (InvalidInputException ex) {
1923 //-----------------end switch while try--------------------
1924 } catch (IndexOutOfBoundsException e) {
1926 } catch (InvalidInputException e) {
1931 public final boolean jumpOverUnicodeWhiteSpace() throws InvalidInputException {
1933 //handle the case of unicode. Jump over the next whiteSpace
1934 //making startPosition pointing on the next available char
1935 //On false, the currentCharacter is filled up with a potential
1939 this.wasAcr = false;
1941 int unicodeSize = 6;
1942 this.currentPosition++;
1943 while (this.source[this.currentPosition] == 'u') {
1944 this.currentPosition++;
1948 if (((c1 = Character.getNumericValue(this.source[this.currentPosition++])) > 15
1950 || ((c2 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c2 < 0)
1951 || ((c3 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c3 < 0)
1952 || ((c4 = Character.getNumericValue(this.source[this.currentPosition++])) > 15 || c4 < 0)) {
1953 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1956 this.currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1957 if (CharOperation.isWhitespace(this.currentCharacter))
1960 //buffer the new char which is not a white space
1961 unicodeStoreAt(++this.withoutUnicodePtr);
1962 //this.withoutUnicodePtr == 1 is true here
1964 } catch (IndexOutOfBoundsException e){
1965 this.currentPosition--;
1966 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1970 final char[] optimizedCurrentTokenSource1() {
1971 //return always the same char[] build only once
1973 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
1974 char charOne = this.source[this.startPosition];
2029 return new char[] {charOne};
2032 final char[] optimizedCurrentTokenSource2() {
2033 //try to return the same char[] build only once
2037 (((c0 = this.source[this.startPosition]) << 6) + (c1 = this.source[this.startPosition + 1]))
2039 char[][] table = this.charArray_length[0][hash];
2041 while (++i < InternalTableSize) {
2042 char[] charArray = table[i];
2043 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2046 //---------other side---------
2048 int max = newEntry2;
2049 while (++i <= max) {
2050 char[] charArray = table[i];
2051 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2054 //--------add the entry-------
2055 if (++max >= InternalTableSize) max = 0;
2057 table[max] = (r = new char[] {c0, c1});
2061 final char[] optimizedCurrentTokenSource3() {
2062 //try to return the same char[] build only once
2066 (((c0 = this.source[this.startPosition]) << 12)
2067 + ((c1 = this.source[this.startPosition + 1]) << 6)
2068 + (c2 = this.source[this.startPosition + 2]))
2070 char[][] table = this.charArray_length[1][hash];
2072 while (++i < InternalTableSize) {
2073 char[] charArray = table[i];
2074 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2077 //---------other side---------
2079 int max = newEntry3;
2080 while (++i <= max) {
2081 char[] charArray = table[i];
2082 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2085 //--------add the entry-------
2086 if (++max >= InternalTableSize) max = 0;
2088 table[max] = (r = new char[] {c0, c1, c2});
2092 final char[] optimizedCurrentTokenSource4() {
2093 //try to return the same char[] build only once
2095 char c0, c1, c2, c3;
2097 ((((long) (c0 = this.source[this.startPosition])) << 18)
2098 + ((c1 = this.source[this.startPosition + 1]) << 12)
2099 + ((c2 = this.source[this.startPosition + 2]) << 6)
2100 + (c3 = this.source[this.startPosition + 3]))
2102 char[][] table = this.charArray_length[2][(int) hash];
2104 while (++i < InternalTableSize) {
2105 char[] charArray = table[i];
2106 if ((c0 == charArray[0])
2107 && (c1 == charArray[1])
2108 && (c2 == charArray[2])
2109 && (c3 == charArray[3]))
2112 //---------other side---------
2114 int max = newEntry4;
2115 while (++i <= max) {
2116 char[] charArray = table[i];
2117 if ((c0 == charArray[0])
2118 && (c1 == charArray[1])
2119 && (c2 == charArray[2])
2120 && (c3 == charArray[3]))
2123 //--------add the entry-------
2124 if (++max >= InternalTableSize) max = 0;
2126 table[max] = (r = new char[] {c0, c1, c2, c3});
2131 final char[] optimizedCurrentTokenSource5() {
2132 //try to return the same char[] build only once
2134 char c0, c1, c2, c3, c4;
2136 ((((long) (c0 = this.source[this.startPosition])) << 24)
2137 + (((long) (c1 = this.source[this.startPosition + 1])) << 18)
2138 + ((c2 = this.source[this.startPosition + 2]) << 12)
2139 + ((c3 = this.source[this.startPosition + 3]) << 6)
2140 + (c4 = this.source[this.startPosition + 4]))
2142 char[][] table = this.charArray_length[3][(int) hash];
2144 while (++i < InternalTableSize) {
2145 char[] charArray = table[i];
2146 if ((c0 == charArray[0])
2147 && (c1 == charArray[1])
2148 && (c2 == charArray[2])
2149 && (c3 == charArray[3])
2150 && (c4 == charArray[4]))
2153 //---------other side---------
2155 int max = newEntry5;
2156 while (++i <= max) {
2157 char[] charArray = table[i];
2158 if ((c0 == charArray[0])
2159 && (c1 == charArray[1])
2160 && (c2 == charArray[2])
2161 && (c3 == charArray[3])
2162 && (c4 == charArray[4]))
2165 //--------add the entry-------
2166 if (++max >= InternalTableSize) max = 0;
2168 table[max] = (r = new char[] {c0, c1, c2, c3, c4});
2173 final char[] optimizedCurrentTokenSource6() {
2174 //try to return the same char[] build only once
2176 char c0, c1, c2, c3, c4, c5;
2178 ((((long) (c0 = this.source[this.startPosition])) << 32)
2179 + (((long) (c1 = this.source[this.startPosition + 1])) << 24)
2180 + (((long) (c2 = this.source[this.startPosition + 2])) << 18)
2181 + ((c3 = this.source[this.startPosition + 3]) << 12)
2182 + ((c4 = this.source[this.startPosition + 4]) << 6)
2183 + (c5 = this.source[this.startPosition + 5]))
2185 char[][] table = this.charArray_length[4][(int) hash];
2187 while (++i < InternalTableSize) {
2188 char[] charArray = table[i];
2189 if ((c0 == charArray[0])
2190 && (c1 == charArray[1])
2191 && (c2 == charArray[2])
2192 && (c3 == charArray[3])
2193 && (c4 == charArray[4])
2194 && (c5 == charArray[5]))
2197 //---------other side---------
2199 int max = newEntry6;
2200 while (++i <= max) {
2201 char[] charArray = table[i];
2202 if ((c0 == charArray[0])
2203 && (c1 == charArray[1])
2204 && (c2 == charArray[2])
2205 && (c3 == charArray[3])
2206 && (c4 == charArray[4])
2207 && (c5 == charArray[5]))
2210 //--------add the entry-------
2211 if (++max >= InternalTableSize) max = 0;
2213 table[max] = (r = new char[] {c0, c1, c2, c3, c4, c5});
2217 protected void parseTags(NLSLine line) {
2218 String s = new String(getCurrentTokenSource());
2219 int pos = s.indexOf(TAG_PREFIX);
2220 int lineLength = line.size();
2222 int start = pos + TAG_PREFIX_LENGTH;
2223 int end = s.indexOf(TAG_POSTFIX, start);
2225 String index = s.substring(start, end);
2228 i = Integer.parseInt(index) - 1; // Tags are one based not zero based.
2229 } catch (NumberFormatException e) {
2230 i = -1; // we don't want to consider this as a valid NLS tag
2232 if (line.exists(i)) {
2236 pos = s.indexOf(TAG_PREFIX, start);
2239 this.nonNLSStrings = new StringLiteral[lineLength];
2240 int nonNLSCounter = 0;
2241 for (Iterator iterator = line.iterator(); iterator.hasNext(); ) {
2242 StringLiteral literal = (StringLiteral) iterator.next();
2243 if (literal != null) {
2244 this.nonNLSStrings[nonNLSCounter++] = literal;
2247 if (nonNLSCounter == 0) {
2248 this.nonNLSStrings = null;
2249 this.currentLine = null;
2252 this.wasNonExternalizedStringLiteral = true;
2253 if (nonNLSCounter != lineLength) {
2254 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
2256 this.currentLine = null;
2259 public final void pushLineSeparator() {
2260 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2261 final int INCREMENT = 250;
2263 if (this.checkNonExternalizedStringLiterals) {
2264 // reinitialize the current line for non externalize strings purpose
2265 this.currentLine = null;
2267 //currentCharacter is at position currentPosition-1
2270 if (this.currentCharacter == '\r') {
2271 int separatorPos = this.currentPosition - 1;
2272 //TODO (olivier) david - why the following line was "if ((this.linePtr > 0) && (this.lineEnds[this.linePtr] >= separatorPos)) return;" ?
2273 if ((this.linePtr >= 0) && (this.lineEnds[this.linePtr] >= separatorPos)) return;
2274 //System.out.println("CR-" + separatorPos);
2275 int length = this.lineEnds.length;
2276 if (++this.linePtr >= length)
2277 System.arraycopy(this.lineEnds, 0, this.lineEnds = new int[length + INCREMENT], 0, length);
2278 this.lineEnds[this.linePtr] = separatorPos;
2279 // look-ahead for merged cr+lf
2281 if (this.source[this.currentPosition] == '\n') {
2282 //System.out.println("look-ahead LF-" + this.currentPosition);
2283 this.lineEnds[this.linePtr] = this.currentPosition;
2284 this.currentPosition++;
2285 this.wasAcr = false;
2289 } catch(IndexOutOfBoundsException e) {
2294 if (this.currentCharacter == '\n') { //must merge eventual cr followed by lf
2295 if (this.wasAcr && (this.lineEnds[this.linePtr] == (this.currentPosition - 2))) {
2296 //System.out.println("merge LF-" + (this.currentPosition - 1));
2297 this.lineEnds[this.linePtr] = this.currentPosition - 1;
2299 int separatorPos = this.currentPosition - 1;
2300 //TODO (olivier) david - why the following line was "if ((this.linePtr > 0) && (this.lineEnds[this.linePtr] >= separatorPos)) return;" ?
2301 if ((this.linePtr >= 0) && (this.lineEnds[this.linePtr] >= separatorPos)) return;
2302 // System.out.println("LF-" + separatorPos);
2303 int length = this.lineEnds.length;
2304 if (++this.linePtr >= length)
2305 System.arraycopy(this.lineEnds, 0, this.lineEnds = new int[length + INCREMENT], 0, length);
2306 this.lineEnds[this.linePtr] = separatorPos;
2308 this.wasAcr = false;
2312 public final void pushUnicodeLineSeparator() {
2313 if (this.checkNonExternalizedStringLiterals) {
2314 // reinitialize the current line for non externalize strings purpose
2315 this.currentLine = null;
2319 if (this.currentCharacter == '\r') {
2320 if (this.source[this.currentPosition] == '\n') {
2321 this.wasAcr = false;
2327 if (this.currentCharacter == '\n') { //must merge eventual cr followed by lf
2328 this.wasAcr = false;
2332 public void recordComment(int token) {
2334 int stopPosition = this.currentPosition;
2336 case TokenNameCOMMENT_LINE:
2337 stopPosition = -this.lastCommentLinePosition;
2339 case TokenNameCOMMENT_BLOCK:
2340 stopPosition = -this.currentPosition;
2344 // a new comment is recorded
2345 int length = this.commentStops.length;
2346 if (++this.commentPtr >= length) {
2347 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2348 //grows the positions buffers too
2349 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2351 this.commentStops[this.commentPtr] = stopPosition;
2352 this.commentStarts[this.commentPtr] = this.startPosition;
2356 * Reposition the scanner on some portion of the original source. The given endPosition is the last valid position.
2357 * Beyond this position, the scanner will answer EOF tokens (<code>ITerminalSymbols.TokenNameEOF</code>).
2359 * @param begin the given start position
2360 * @param end the given end position
2362 public void resetTo(int begin, int end) {
2363 //reset the scanner to a given position where it may rescan again
2366 this.initialPosition = this.startPosition = this.currentPosition = begin;
2367 this.eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2368 this.commentPtr = -1; // reset comment stack
2369 this.foundTaskCount = 0;
2371 // // if resetTo is used with being > than end.
2372 // if (begin > this.eofPosition) {
2373 // begin = this.eofPosition;
2377 public final void scanEscapeCharacter() throws InvalidInputException {
2378 // the string with "\\u" is a legal string of two chars \ and u
2379 //thus we use a direct access to the source (for regular cases).
2381 if (this.unicodeAsBackSlash) {
2382 // consume next character
2383 this.unicodeAsBackSlash = false;
2384 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\') && (this.source[this.currentPosition] == 'u')) {
2385 getNextUnicodeChar();
2387 if (this.withoutUnicodePtr != 0) {
2388 unicodeStoreAt(++this.withoutUnicodePtr);
2392 this.currentCharacter = this.source[this.currentPosition++];
2393 switch (this.currentCharacter) {
2395 this.currentCharacter = '\b';
2398 this.currentCharacter = '\t';
2401 this.currentCharacter = '\n';
2404 this.currentCharacter = '\f';
2407 this.currentCharacter = '\r';
2410 this.currentCharacter = '\"';
2413 this.currentCharacter = '\'';
2416 this.currentCharacter = '\\';
2419 // -----------octal escape--------------
2421 // OctalDigit OctalDigit
2422 // ZeroToThree OctalDigit OctalDigit
2424 int number = Character.getNumericValue(this.currentCharacter);
2425 if (number >= 0 && number <= 7) {
2426 boolean zeroToThreeNot = number > 3;
2427 if (isDigit(this.currentCharacter = this.source[this.currentPosition++])) {
2428 int digit = Character.getNumericValue(this.currentCharacter);
2429 if (digit >= 0 && digit <= 7) {
2430 number = (number * 8) + digit;
2431 if (isDigit(this.currentCharacter = this.source[this.currentPosition++])) {
2432 if (zeroToThreeNot) {// has read \NotZeroToThree OctalDigit Digit --> ignore last character
2433 this.currentPosition--;
2435 digit = Character.getNumericValue(this.currentCharacter);
2436 if (digit >= 0 && digit <= 7){ // has read \ZeroToThree OctalDigit OctalDigit
2437 number = (number * 8) + digit;
2438 } else {// has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2439 this.currentPosition--;
2442 } else { // has read \OctalDigit NonDigit--> ignore last character
2443 this.currentPosition--;
2445 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2446 this.currentPosition--;
2448 } else { // has read \OctalDigit --> ignore last character
2449 this.currentPosition--;
2452 throw new InvalidInputException(INVALID_ESCAPE);
2453 this.currentCharacter = (char) number;
2455 throw new InvalidInputException(INVALID_ESCAPE);
2458 public int scanIdentifierOrKeyword() {
2461 //first dispatch on the first char.
2462 //then the length. If there are several
2463 //keywors with the same length AND the same first char, then do another
2464 //dispatch on the second char
2465 this.useAssertAsAnIndentifier = false;
2466 this.useEnumAsAnIndentifier = false;
2467 while (getNextCharAsJavaIdentifierPart()){/*empty*/}
2472 if (this.withoutUnicodePtr == 0)
2474 //quick test on length == 1 but not on length > 12 while most identifier
2475 //have a length which is <= 12...but there are lots of identifier with
2479 if ((length = this.currentPosition - this.startPosition) == 1)
2480 return TokenNameIdentifier;
2482 index = this.startPosition;
2484 if ((length = this.withoutUnicodePtr) == 1)
2485 return TokenNameIdentifier;
2486 data = this.withoutUnicodeBuffer;
2490 firstLetter = data[index];
2491 switch (firstLetter) {
2496 if ((data[++index] == 'b')
2497 && (data[++index] == 's')
2498 && (data[++index] == 't')
2499 && (data[++index] == 'r')
2500 && (data[++index] == 'a')
2501 && (data[++index] == 'c')
2502 && (data[++index] == 't')) {
2503 return TokenNameabstract;
2505 return TokenNameIdentifier;
2508 if ((data[++index] == 's')
2509 && (data[++index] == 's')
2510 && (data[++index] == 'e')
2511 && (data[++index] == 'r')
2512 && (data[++index] == 't')) {
2513 if (this.sourceLevel >= ClassFileConstants.JDK1_4) {
2514 this.containsAssertKeyword = true;
2515 return TokenNameassert;
2517 this.useAssertAsAnIndentifier = true;
2518 return TokenNameIdentifier;
2521 return TokenNameIdentifier;
2524 return TokenNameIdentifier;
2526 case 'b' : //boolean break byte
2529 if ((data[++index] == 'y') && (data[++index] == 't') && (data[++index] == 'e'))
2530 return TokenNamebyte;
2532 return TokenNameIdentifier;
2534 if ((data[++index] == 'r')
2535 && (data[++index] == 'e')
2536 && (data[++index] == 'a')
2537 && (data[++index] == 'k'))
2538 return TokenNamebreak;
2540 return TokenNameIdentifier;
2542 if ((data[++index] == 'o')
2543 && (data[++index] == 'o')
2544 && (data[++index] == 'l')
2545 && (data[++index] == 'e')
2546 && (data[++index] == 'a')
2547 && (data[++index] == 'n'))
2548 return TokenNameboolean;
2550 return TokenNameIdentifier;
2552 return TokenNameIdentifier;
2555 case 'c' : //case char catch const class continue
2558 if (data[++index] == 'a')
2559 if ((data[++index] == 's') && (data[++index] == 'e'))
2560 return TokenNamecase;
2562 return TokenNameIdentifier;
2564 if ((data[index] == 'h') && (data[++index] == 'a') && (data[++index] == 'r'))
2565 return TokenNamechar;
2567 return TokenNameIdentifier;
2569 if (data[++index] == 'a')
2570 if ((data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
2571 return TokenNamecatch;
2573 return TokenNameIdentifier;
2575 if (data[index] == 'l')
2576 if ((data[++index] == 'a')
2577 && (data[++index] == 's')
2578 && (data[++index] == 's'))
2579 return TokenNameclass;
2581 return TokenNameIdentifier;
2582 else if ((data[index] == 'o')
2583 && (data[++index] == 'n')
2584 && (data[++index] == 's')
2585 && (data[++index] == 't'))
2586 return TokenNameERROR; //const is not used in java ???????
2588 return TokenNameIdentifier;
2590 if ((data[++index] == 'o')
2591 && (data[++index] == 'n')
2592 && (data[++index] == 't')
2593 && (data[++index] == 'i')
2594 && (data[++index] == 'n')
2595 && (data[++index] == 'u')
2596 && (data[++index] == 'e'))
2597 return TokenNamecontinue;
2599 return TokenNameIdentifier;
2601 return TokenNameIdentifier;
2604 case 'd' : //default do double
2607 if ((data[++index] == 'o'))
2610 return TokenNameIdentifier;
2612 if ((data[++index] == 'o')
2613 && (data[++index] == 'u')
2614 && (data[++index] == 'b')
2615 && (data[++index] == 'l')
2616 && (data[++index] == 'e'))
2617 return TokenNamedouble;
2619 return TokenNameIdentifier;
2621 if ((data[++index] == 'e')
2622 && (data[++index] == 'f')
2623 && (data[++index] == 'a')
2624 && (data[++index] == 'u')
2625 && (data[++index] == 'l')
2626 && (data[++index] == 't'))
2627 return TokenNamedefault;
2629 return TokenNameIdentifier;
2631 return TokenNameIdentifier;
2633 case 'e' : //else extends
2636 if ((data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2637 return TokenNameelse;
2638 else if ((data[index] == 'n')
2639 && (data[++index] == 'u')
2640 && (data[++index] == 'm')) {
2641 if (this.sourceLevel >= ClassFileConstants.JDK1_5) {
2642 return TokenNameenum;
2644 this.useEnumAsAnIndentifier = true;
2645 return TokenNameIdentifier;
2648 return TokenNameIdentifier;
2651 if ((data[++index] == 'x')
2652 && (data[++index] == 't')
2653 && (data[++index] == 'e')
2654 && (data[++index] == 'n')
2655 && (data[++index] == 'd')
2656 && (data[++index] == 's'))
2657 return TokenNameextends;
2659 return TokenNameIdentifier;
2661 return TokenNameIdentifier;
2664 case 'f' : //final finally float for false
2667 if ((data[++index] == 'o') && (data[++index] == 'r'))
2668 return TokenNamefor;
2670 return TokenNameIdentifier;
2672 if (data[++index] == 'i')
2673 if ((data[++index] == 'n')
2674 && (data[++index] == 'a')
2675 && (data[++index] == 'l')) {
2676 return TokenNamefinal;
2678 return TokenNameIdentifier;
2680 if (data[index] == 'l')
2681 if ((data[++index] == 'o')
2682 && (data[++index] == 'a')
2683 && (data[++index] == 't'))
2684 return TokenNamefloat;
2686 return TokenNameIdentifier;
2688 if ((data[index] == 'a')
2689 && (data[++index] == 'l')
2690 && (data[++index] == 's')
2691 && (data[++index] == 'e'))
2692 return TokenNamefalse;
2694 return TokenNameIdentifier;
2696 if ((data[++index] == 'i')
2697 && (data[++index] == 'n')
2698 && (data[++index] == 'a')
2699 && (data[++index] == 'l')
2700 && (data[++index] == 'l')
2701 && (data[++index] == 'y'))
2702 return TokenNamefinally;
2704 return TokenNameIdentifier;
2707 return TokenNameIdentifier;
2711 if ((data[++index] == 'o')
2712 && (data[++index] == 't')
2713 && (data[++index] == 'o')) {
2714 return TokenNameERROR;
2716 } //no goto in java are allowed, so why java removes this keyword ???
2717 return TokenNameIdentifier;
2719 case 'i' : //if implements import instanceof int interface
2722 if (data[++index] == 'f')
2725 return TokenNameIdentifier;
2727 if ((data[++index] == 'n') && (data[++index] == 't'))
2728 return TokenNameint;
2730 return TokenNameIdentifier;
2732 if ((data[++index] == 'm')
2733 && (data[++index] == 'p')
2734 && (data[++index] == 'o')
2735 && (data[++index] == 'r')
2736 && (data[++index] == 't'))
2737 return TokenNameimport;
2739 return TokenNameIdentifier;
2741 if ((data[++index] == 'n')
2742 && (data[++index] == 't')
2743 && (data[++index] == 'e')
2744 && (data[++index] == 'r')
2745 && (data[++index] == 'f')
2746 && (data[++index] == 'a')
2747 && (data[++index] == 'c')
2748 && (data[++index] == 'e'))
2749 return TokenNameinterface;
2751 return TokenNameIdentifier;
2753 if (data[++index] == 'm')
2754 if ((data[++index] == 'p')
2755 && (data[++index] == 'l')
2756 && (data[++index] == 'e')
2757 && (data[++index] == 'm')
2758 && (data[++index] == 'e')
2759 && (data[++index] == 'n')
2760 && (data[++index] == 't')
2761 && (data[++index] == 's'))
2762 return TokenNameimplements;
2764 return TokenNameIdentifier;
2766 if ((data[index] == 'n')
2767 && (data[++index] == 's')
2768 && (data[++index] == 't')
2769 && (data[++index] == 'a')
2770 && (data[++index] == 'n')
2771 && (data[++index] == 'c')
2772 && (data[++index] == 'e')
2773 && (data[++index] == 'o')
2774 && (data[++index] == 'f'))
2775 return TokenNameinstanceof;
2777 return TokenNameIdentifier;
2780 return TokenNameIdentifier;
2785 if ((data[++index] == 'o')
2786 && (data[++index] == 'n')
2787 && (data[++index] == 'g')) {
2788 return TokenNamelong;
2791 return TokenNameIdentifier;
2793 case 'n' : //native new null
2796 if ((data[++index] == 'e') && (data[++index] == 'w'))
2797 return TokenNamenew;
2799 return TokenNameIdentifier;
2801 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2802 return TokenNamenull;
2804 return TokenNameIdentifier;
2806 if ((data[++index] == 'a')
2807 && (data[++index] == 't')
2808 && (data[++index] == 'i')
2809 && (data[++index] == 'v')
2810 && (data[++index] == 'e')) {
2811 return TokenNamenative;
2813 return TokenNameIdentifier;
2815 return TokenNameIdentifier;
2818 case 'p' : //package private protected public
2821 if ((data[++index] == 'u')
2822 && (data[++index] == 'b')
2823 && (data[++index] == 'l')
2824 && (data[++index] == 'i')
2825 && (data[++index] == 'c')) {
2826 return TokenNamepublic;
2828 return TokenNameIdentifier;
2830 if (data[++index] == 'a')
2831 if ((data[++index] == 'c')
2832 && (data[++index] == 'k')
2833 && (data[++index] == 'a')
2834 && (data[++index] == 'g')
2835 && (data[++index] == 'e'))
2836 return TokenNamepackage;
2838 return TokenNameIdentifier;
2840 if ((data[index] == 'r')
2841 && (data[++index] == 'i')
2842 && (data[++index] == 'v')
2843 && (data[++index] == 'a')
2844 && (data[++index] == 't')
2845 && (data[++index] == 'e')) {
2846 return TokenNameprivate;
2848 return TokenNameIdentifier;
2850 if ((data[++index] == 'r')
2851 && (data[++index] == 'o')
2852 && (data[++index] == 't')
2853 && (data[++index] == 'e')
2854 && (data[++index] == 'c')
2855 && (data[++index] == 't')
2856 && (data[++index] == 'e')
2857 && (data[++index] == 'd')) {
2858 return TokenNameprotected;
2860 return TokenNameIdentifier;
2863 return TokenNameIdentifier;
2868 if ((data[++index] == 'e')
2869 && (data[++index] == 't')
2870 && (data[++index] == 'u')
2871 && (data[++index] == 'r')
2872 && (data[++index] == 'n')) {
2873 return TokenNamereturn;
2876 return TokenNameIdentifier;
2878 case 's' : //short static super switch synchronized strictfp
2881 if (data[++index] == 'h')
2882 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 't'))
2883 return TokenNameshort;
2885 return TokenNameIdentifier;
2887 if ((data[index] == 'u')
2888 && (data[++index] == 'p')
2889 && (data[++index] == 'e')
2890 && (data[++index] == 'r'))
2891 return TokenNamesuper;
2893 return TokenNameIdentifier;
2896 if (data[++index] == 't')
2897 if ((data[++index] == 'a')
2898 && (data[++index] == 't')
2899 && (data[++index] == 'i')
2900 && (data[++index] == 'c')) {
2901 return TokenNamestatic;
2903 return TokenNameIdentifier;
2905 if ((data[index] == 'w')
2906 && (data[++index] == 'i')
2907 && (data[++index] == 't')
2908 && (data[++index] == 'c')
2909 && (data[++index] == 'h'))
2910 return TokenNameswitch;
2912 return TokenNameIdentifier;
2914 if ((data[++index] == 't')
2915 && (data[++index] == 'r')
2916 && (data[++index] == 'i')
2917 && (data[++index] == 'c')
2918 && (data[++index] == 't')
2919 && (data[++index] == 'f')
2920 && (data[++index] == 'p'))
2921 return TokenNamestrictfp;
2923 return TokenNameIdentifier;
2925 if ((data[++index] == 'y')
2926 && (data[++index] == 'n')
2927 && (data[++index] == 'c')
2928 && (data[++index] == 'h')
2929 && (data[++index] == 'r')
2930 && (data[++index] == 'o')
2931 && (data[++index] == 'n')
2932 && (data[++index] == 'i')
2933 && (data[++index] == 'z')
2934 && (data[++index] == 'e')
2935 && (data[++index] == 'd')) {
2936 return TokenNamesynchronized;
2938 return TokenNameIdentifier;
2940 return TokenNameIdentifier;
2943 case 't' : //try throw throws transient this true
2946 if ((data[++index] == 'r') && (data[++index] == 'y'))
2947 return TokenNametry;
2949 return TokenNameIdentifier;
2951 if (data[++index] == 'h')
2952 if ((data[++index] == 'i') && (data[++index] == 's'))
2953 return TokenNamethis;
2955 return TokenNameIdentifier;
2957 if ((data[index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2958 return TokenNametrue;
2960 return TokenNameIdentifier;
2962 if ((data[++index] == 'h')
2963 && (data[++index] == 'r')
2964 && (data[++index] == 'o')
2965 && (data[++index] == 'w'))
2966 return TokenNamethrow;
2968 return TokenNameIdentifier;
2970 if ((data[++index] == 'h')
2971 && (data[++index] == 'r')
2972 && (data[++index] == 'o')
2973 && (data[++index] == 'w')
2974 && (data[++index] == 's'))
2975 return TokenNamethrows;
2977 return TokenNameIdentifier;
2979 if ((data[++index] == 'r')
2980 && (data[++index] == 'a')
2981 && (data[++index] == 'n')
2982 && (data[++index] == 's')
2983 && (data[++index] == 'i')
2984 && (data[++index] == 'e')
2985 && (data[++index] == 'n')
2986 && (data[++index] == 't')) {
2987 return TokenNametransient;
2989 return TokenNameIdentifier;
2992 return TokenNameIdentifier;
2995 case 'v' : //void volatile
2998 if ((data[++index] == 'o') && (data[++index] == 'i') && (data[++index] == 'd'))
2999 return TokenNamevoid;
3001 return TokenNameIdentifier;
3003 if ((data[++index] == 'o')
3004 && (data[++index] == 'l')
3005 && (data[++index] == 'a')
3006 && (data[++index] == 't')
3007 && (data[++index] == 'i')
3008 && (data[++index] == 'l')
3009 && (data[++index] == 'e')) {
3010 return TokenNamevolatile;
3012 return TokenNameIdentifier;
3015 return TokenNameIdentifier;
3018 case 'w' : //while widefp
3021 if ((data[++index] == 'h')
3022 && (data[++index] == 'i')
3023 && (data[++index] == 'l')
3024 && (data[++index] == 'e'))
3025 return TokenNamewhile;
3027 return TokenNameIdentifier;
3028 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3029 //return TokenNamewidefp ;
3031 //return TokenNameIdentifier;
3033 return TokenNameIdentifier;
3037 return TokenNameIdentifier;
3040 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3042 //when entering this method the currentCharacter is the first
3043 //digit of the number. It may be preceeded by a '.' when
3046 boolean floating = dotPrefix;
3047 if ((!dotPrefix) && (this.currentCharacter == '0')) {
3048 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3049 int start = this.currentPosition;
3050 while (getNextCharAsDigit(16)){/*empty*/}
3051 int end = this.currentPosition;
3052 if (getNextChar('l', 'L') >= 0) {
3054 throw new InvalidInputException(INVALID_HEXA);
3056 return TokenNameLongLiteral;
3057 } else if (getNextChar('.')) {
3058 if (this.sourceLevel < ClassFileConstants.JDK1_5) {
3059 // if we are in source level < 1.5, we report an integer literal
3060 this.currentPosition = end;
3061 return TokenNameIntegerLiteral;
3063 // hexadeciman floating point literal
3064 // read decimal part
3065 boolean hasNoDigitsBeforeDot = end == start;
3066 start = this.currentPosition;
3067 while (getNextCharAsDigit(16)){/*empty*/}
3068 end = this.currentPosition;
3069 if (hasNoDigitsBeforeDot && end == start) {
3070 throw new InvalidInputException(INVALID_HEXA);
3073 if (getNextChar('p', 'P') >= 0) { // consume next character
3074 this.unicodeAsBackSlash = false;
3075 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3076 && (this.source[this.currentPosition] == 'u')) {
3077 getNextUnicodeChar();
3079 if (this.withoutUnicodePtr != 0) {
3080 unicodeStoreAt(++this.withoutUnicodePtr);
3084 if ((this.currentCharacter == '-')
3085 || (this.currentCharacter == '+')) { // consume next character
3086 this.unicodeAsBackSlash = false;
3087 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3088 && (this.source[this.currentPosition] == 'u')) {
3089 getNextUnicodeChar();
3091 if (this.withoutUnicodePtr != 0) {
3092 unicodeStoreAt(++this.withoutUnicodePtr);
3096 if (!isDigit(this.currentCharacter)) {
3097 throw new InvalidInputException(INVALID_HEXA);
3099 while (getNextCharAsDigit()){/*empty*/}
3100 if (getNextChar('f', 'F') >= 0) {
3101 return TokenNameFloatingPointLiteral;
3103 if (getNextChar('d', 'D') >= 0) {
3104 return TokenNameDoubleLiteral;
3106 if (getNextChar('l', 'L') >= 0) {
3107 throw new InvalidInputException(INVALID_HEXA);
3109 return TokenNameDoubleLiteral;
3111 throw new InvalidInputException(INVALID_HEXA);
3113 } else if (getNextChar('p', 'P') >= 0) { // consume next character
3114 if (this.sourceLevel < ClassFileConstants.JDK1_5) {
3115 // if we are in source level < 1.5 we report an integer literal
3116 this.currentPosition = end;
3117 return TokenNameIntegerLiteral;
3119 this.unicodeAsBackSlash = false;
3120 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3121 && (this.source[this.currentPosition] == 'u')) {
3122 getNextUnicodeChar();
3124 if (this.withoutUnicodePtr != 0) {
3125 unicodeStoreAt(++this.withoutUnicodePtr);
3129 if ((this.currentCharacter == '-')
3130 || (this.currentCharacter == '+')) { // consume next character
3131 this.unicodeAsBackSlash = false;
3132 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3133 && (this.source[this.currentPosition] == 'u')) {
3134 getNextUnicodeChar();
3136 if (this.withoutUnicodePtr != 0) {
3137 unicodeStoreAt(++this.withoutUnicodePtr);
3141 if (!isDigit(this.currentCharacter))
3142 throw new InvalidInputException(INVALID_FLOAT);
3143 while (getNextCharAsDigit()){/*empty*/}
3144 if (getNextChar('f', 'F') >= 0)
3145 return TokenNameFloatingPointLiteral;
3146 if (getNextChar('d', 'D') >= 0)
3147 return TokenNameDoubleLiteral;
3148 if (getNextChar('l', 'L') >= 0) {
3149 throw new InvalidInputException(INVALID_HEXA);
3151 return TokenNameDoubleLiteral;
3154 throw new InvalidInputException(INVALID_HEXA);
3155 return TokenNameIntegerLiteral;
3159 //there is x or X in the number
3160 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3161 if (getNextCharAsDigit()) { //-------------potential octal-----------------
3162 while (getNextCharAsDigit()){/*empty*/}
3164 if (getNextChar('l', 'L') >= 0) {
3165 return TokenNameLongLiteral;
3168 if (getNextChar('f', 'F') >= 0) {
3169 return TokenNameFloatingPointLiteral;
3172 if (getNextChar('d', 'D') >= 0) {
3173 return TokenNameDoubleLiteral;
3174 } else { //make the distinction between octal and float ....
3175 boolean isInteger = true;
3176 if (getNextChar('.')) {
3178 while (getNextCharAsDigit()){/*empty*/}
3180 if (getNextChar('e', 'E') >= 0) { // consume next character
3182 this.unicodeAsBackSlash = false;
3183 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3184 && (this.source[this.currentPosition] == 'u')) {
3185 getNextUnicodeChar();
3187 if (this.withoutUnicodePtr != 0) {
3188 unicodeStoreAt(++this.withoutUnicodePtr);
3192 if ((this.currentCharacter == '-')
3193 || (this.currentCharacter == '+')) { // consume next character
3194 this.unicodeAsBackSlash = false;
3195 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3196 && (this.source[this.currentPosition] == 'u')) {
3197 getNextUnicodeChar();
3199 if (this.withoutUnicodePtr != 0) {
3200 unicodeStoreAt(++this.withoutUnicodePtr);
3204 if (!isDigit(this.currentCharacter))
3205 throw new InvalidInputException(INVALID_FLOAT);
3206 while (getNextCharAsDigit()){/*empty*/}
3208 if (getNextChar('f', 'F') >= 0)
3209 return TokenNameFloatingPointLiteral;
3210 if (getNextChar('d', 'D') >= 0 || !isInteger)
3211 return TokenNameDoubleLiteral;
3212 return TokenNameIntegerLiteral;
3219 while (getNextCharAsDigit()){/*empty*/}
3221 if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3222 return TokenNameLongLiteral;
3224 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3225 while (getNextCharAsDigit()){/*empty*/}
3229 //if floating is true both exponant and suffix may be optional
3231 if (getNextChar('e', 'E') >= 0) {
3233 // consume next character
3234 this.unicodeAsBackSlash = false;
3235 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3236 && (this.source[this.currentPosition] == 'u')) {
3237 getNextUnicodeChar();
3239 if (this.withoutUnicodePtr != 0) {
3240 unicodeStoreAt(++this.withoutUnicodePtr);
3244 if ((this.currentCharacter == '-')
3245 || (this.currentCharacter == '+')) { // consume next character
3246 this.unicodeAsBackSlash = false;
3247 if (((this.currentCharacter = this.source[this.currentPosition++]) == '\\')
3248 && (this.source[this.currentPosition] == 'u')) {
3249 getNextUnicodeChar();
3251 if (this.withoutUnicodePtr != 0) {
3252 unicodeStoreAt(++this.withoutUnicodePtr);
3256 if (!isDigit(this.currentCharacter))
3257 throw new InvalidInputException(INVALID_FLOAT);
3258 while (getNextCharAsDigit()){/*empty*/}
3261 if (getNextChar('d', 'D') >= 0)
3262 return TokenNameDoubleLiteral;
3263 if (getNextChar('f', 'F') >= 0)
3264 return TokenNameFloatingPointLiteral;
3266 //the long flag has been tested before
3268 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3271 * Search the line number corresponding to a specific position
3272 * @param position int
3275 public final int getLineNumber(int position) {
3277 if (this.lineEnds == null)
3279 int length = this.linePtr+1;
3282 int g = 0, d = length - 1;
3286 if (position < this.lineEnds[m]) {
3288 } else if (position > this.lineEnds[m]) {
3294 if (position < this.lineEnds[m]) {
3299 public final void setSource(char[] sourceString){
3300 //the source-buffer is set to sourceString
3303 if (sourceString == null) {
3304 this.source = CharOperation.NO_CHAR;
3307 this.source = sourceString;
3308 sourceLength = sourceString.length;
3310 this.startPosition = -1;
3311 this.eofPosition = sourceLength;
3312 this.initialPosition = this.currentPosition = 0;
3313 this.containsAssertKeyword = false;
3318 * Should be used if a parse (usually a diet parse) has already been performed on the unit,
3319 * so as to get the already computed line end positions.
3321 public final void setSource(CompilationResult compilationResult) {
3322 char[] contents = compilationResult.compilationUnit.getContents();
3323 setSource(contents);
3324 int[] lineSeparatorPositions = compilationResult.lineSeparatorPositions;
3325 if (lineSeparatorPositions != null) {
3326 this.lineEnds = lineSeparatorPositions;
3327 this.linePtr = lineSeparatorPositions.length - 1;
3331 public String toString() {
3332 if (this.startPosition == this.source.length)
3333 return "EOF\n\n" + new String(this.source); //$NON-NLS-1$
3334 if (this.currentPosition > this.source.length)
3335 return "behind the EOF\n\n" + new String(this.source); //$NON-NLS-1$
3337 char front[] = new char[this.startPosition];
3338 System.arraycopy(this.source, 0, front, 0, this.startPosition);
3340 int middleLength = (this.currentPosition - 1) - this.startPosition + 1;
3342 if (middleLength > -1) {
3343 middle = new char[middleLength];
3351 middle = CharOperation.NO_CHAR;
3354 char end[] = new char[this.source.length - (this.currentPosition - 1)];
3357 (this.currentPosition - 1) + 1,
3360 this.source.length - (this.currentPosition - 1) - 1);
3362 return new String(front)
3363 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3364 + new String(middle)
3365 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3368 public String toStringAction(int act) {
3370 case TokenNameIdentifier :
3371 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3372 case TokenNameabstract :
3373 return "abstract"; //$NON-NLS-1$
3374 case TokenNameboolean :
3375 return "boolean"; //$NON-NLS-1$
3376 case TokenNamebreak :
3377 return "break"; //$NON-NLS-1$
3378 case TokenNamebyte :
3379 return "byte"; //$NON-NLS-1$
3380 case TokenNamecase :
3381 return "case"; //$NON-NLS-1$
3382 case TokenNamecatch :
3383 return "catch"; //$NON-NLS-1$
3384 case TokenNamechar :
3385 return "char"; //$NON-NLS-1$
3386 case TokenNameclass :
3387 return "class"; //$NON-NLS-1$
3388 case TokenNamecontinue :
3389 return "continue"; //$NON-NLS-1$
3390 case TokenNamedefault :
3391 return "default"; //$NON-NLS-1$
3393 return "do"; //$NON-NLS-1$
3394 case TokenNamedouble :
3395 return "double"; //$NON-NLS-1$
3396 case TokenNameelse :
3397 return "else"; //$NON-NLS-1$
3398 case TokenNameextends :
3399 return "extends"; //$NON-NLS-1$
3400 case TokenNamefalse :
3401 return "false"; //$NON-NLS-1$
3402 case TokenNamefinal :
3403 return "final"; //$NON-NLS-1$
3404 case TokenNamefinally :
3405 return "finally"; //$NON-NLS-1$
3406 case TokenNamefloat :
3407 return "float"; //$NON-NLS-1$
3409 return "for"; //$NON-NLS-1$
3411 return "if"; //$NON-NLS-1$
3412 case TokenNameimplements :
3413 return "implements"; //$NON-NLS-1$
3414 case TokenNameimport :
3415 return "import"; //$NON-NLS-1$
3416 case TokenNameinstanceof :
3417 return "instanceof"; //$NON-NLS-1$
3419 return "int"; //$NON-NLS-1$
3420 case TokenNameinterface :
3421 return "interface"; //$NON-NLS-1$
3422 case TokenNamelong :
3423 return "long"; //$NON-NLS-1$
3424 case TokenNamenative :
3425 return "native"; //$NON-NLS-1$
3427 return "new"; //$NON-NLS-1$
3428 case TokenNamenull :
3429 return "null"; //$NON-NLS-1$
3430 case TokenNamepackage :
3431 return "package"; //$NON-NLS-1$
3432 case TokenNameprivate :
3433 return "private"; //$NON-NLS-1$
3434 case TokenNameprotected :
3435 return "protected"; //$NON-NLS-1$
3436 case TokenNamepublic :
3437 return "public"; //$NON-NLS-1$
3438 case TokenNamereturn :
3439 return "return"; //$NON-NLS-1$
3440 case TokenNameshort :
3441 return "short"; //$NON-NLS-1$
3442 case TokenNamestatic :
3443 return "static"; //$NON-NLS-1$
3444 case TokenNamesuper :
3445 return "super"; //$NON-NLS-1$
3446 case TokenNameswitch :
3447 return "switch"; //$NON-NLS-1$
3448 case TokenNamesynchronized :
3449 return "synchronized"; //$NON-NLS-1$
3450 case TokenNamethis :
3451 return "this"; //$NON-NLS-1$
3452 case TokenNamethrow :
3453 return "throw"; //$NON-NLS-1$
3454 case TokenNamethrows :
3455 return "throws"; //$NON-NLS-1$
3456 case TokenNametransient :
3457 return "transient"; //$NON-NLS-1$
3458 case TokenNametrue :
3459 return "true"; //$NON-NLS-1$
3461 return "try"; //$NON-NLS-1$
3462 case TokenNamevoid :
3463 return "void"; //$NON-NLS-1$
3464 case TokenNamevolatile :
3465 return "volatile"; //$NON-NLS-1$
3466 case TokenNamewhile :
3467 return "while"; //$NON-NLS-1$
3469 case TokenNameIntegerLiteral :
3470 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3471 case TokenNameLongLiteral :
3472 return "Long(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3473 case TokenNameFloatingPointLiteral :
3474 return "Float(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3475 case TokenNameDoubleLiteral :
3476 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3477 case TokenNameCharacterLiteral :
3478 return "Char(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3479 case TokenNameStringLiteral :
3480 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3482 case TokenNamePLUS_PLUS :
3483 return "++"; //$NON-NLS-1$
3484 case TokenNameMINUS_MINUS :
3485 return "--"; //$NON-NLS-1$
3486 case TokenNameEQUAL_EQUAL :
3487 return "=="; //$NON-NLS-1$
3488 case TokenNameLESS_EQUAL :
3489 return "<="; //$NON-NLS-1$
3490 case TokenNameGREATER_EQUAL :
3491 return ">="; //$NON-NLS-1$
3492 case TokenNameNOT_EQUAL :
3493 return "!="; //$NON-NLS-1$
3494 case TokenNameLEFT_SHIFT :
3495 return "<<"; //$NON-NLS-1$
3496 case TokenNameRIGHT_SHIFT :
3497 return ">>"; //$NON-NLS-1$
3498 case TokenNameUNSIGNED_RIGHT_SHIFT :
3499 return ">>>"; //$NON-NLS-1$
3500 case TokenNamePLUS_EQUAL :
3501 return "+="; //$NON-NLS-1$
3502 case TokenNameMINUS_EQUAL :
3503 return "-="; //$NON-NLS-1$
3504 case TokenNameMULTIPLY_EQUAL :
3505 return "*="; //$NON-NLS-1$
3506 case TokenNameDIVIDE_EQUAL :
3507 return "/="; //$NON-NLS-1$
3508 case TokenNameAND_EQUAL :
3509 return "&="; //$NON-NLS-1$
3510 case TokenNameOR_EQUAL :
3511 return "|="; //$NON-NLS-1$
3512 case TokenNameXOR_EQUAL :
3513 return "^="; //$NON-NLS-1$
3514 case TokenNameREMAINDER_EQUAL :
3515 return "%="; //$NON-NLS-1$
3516 case TokenNameLEFT_SHIFT_EQUAL :
3517 return "<<="; //$NON-NLS-1$
3518 case TokenNameRIGHT_SHIFT_EQUAL :
3519 return ">>="; //$NON-NLS-1$
3520 case TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL :
3521 return ">>>="; //$NON-NLS-1$
3522 case TokenNameOR_OR :
3523 return "||"; //$NON-NLS-1$
3524 case TokenNameAND_AND :
3525 return "&&"; //$NON-NLS-1$
3526 case TokenNamePLUS :
3527 return "+"; //$NON-NLS-1$
3528 case TokenNameMINUS :
3529 return "-"; //$NON-NLS-1$
3531 return "!"; //$NON-NLS-1$
3532 case TokenNameREMAINDER :
3533 return "%"; //$NON-NLS-1$
3535 return "^"; //$NON-NLS-1$
3537 return "&"; //$NON-NLS-1$
3538 case TokenNameMULTIPLY :
3539 return "*"; //$NON-NLS-1$
3541 return "|"; //$NON-NLS-1$
3542 case TokenNameTWIDDLE :
3543 return "~"; //$NON-NLS-1$
3544 case TokenNameDIVIDE :
3545 return "/"; //$NON-NLS-1$
3546 case TokenNameGREATER :
3547 return ">"; //$NON-NLS-1$
3548 case TokenNameLESS :
3549 return "<"; //$NON-NLS-1$
3550 case TokenNameLPAREN :
3551 return "("; //$NON-NLS-1$
3552 case TokenNameRPAREN :
3553 return ")"; //$NON-NLS-1$
3554 case TokenNameLBRACE :
3555 return "{"; //$NON-NLS-1$
3556 case TokenNameRBRACE :
3557 return "}"; //$NON-NLS-1$
3558 case TokenNameLBRACKET :
3559 return "["; //$NON-NLS-1$
3560 case TokenNameRBRACKET :
3561 return "]"; //$NON-NLS-1$
3562 case TokenNameSEMICOLON :
3563 return ";"; //$NON-NLS-1$
3564 case TokenNameQUESTION :
3565 return "?"; //$NON-NLS-1$
3566 case TokenNameCOLON :
3567 return ":"; //$NON-NLS-1$
3568 case TokenNameCOMMA :
3569 return ","; //$NON-NLS-1$
3571 return "."; //$NON-NLS-1$
3572 case TokenNameEQUAL :
3573 return "="; //$NON-NLS-1$
3575 return "EOF"; //$NON-NLS-1$
3577 return "not-a-token"; //$NON-NLS-1$
3580 public void unicodeInitializeBuffer(int length) {
3581 this.withoutUnicodePtr = length;
3582 if (this.withoutUnicodeBuffer == null) this.withoutUnicodeBuffer = new char[length+(1+10)];
3583 int bLength = this.withoutUnicodeBuffer.length;
3584 if (1+length >= bLength) {
3585 System.arraycopy(this.withoutUnicodeBuffer, 0, this.withoutUnicodeBuffer = new char[length + (1+10)], 0, bLength);
3587 System.arraycopy(this.source, this.startPosition, this.withoutUnicodeBuffer, 1, length);
3589 public void unicodeStoreAt(int pos) {
3590 if (this.withoutUnicodeBuffer == null) this.withoutUnicodeBuffer = new char[10];
3591 int length = this.withoutUnicodeBuffer.length;
3592 if (pos == length) {
3593 System.arraycopy(this.withoutUnicodeBuffer, 0, this.withoutUnicodeBuffer = new char[length * 2], 0, length);
3595 this.withoutUnicodeBuffer[pos] = this.currentCharacter;