src/gnu/regexp/RETokenWordBoundary.java

   1 /*
   2  *  gnu/regexp/RETokenWordBoundary.java
   3  *  Copyright (C) 2001 Wes Biggs
   4  *
   5  *  This library is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU Lesser General Public License as published
   7  *  by the Free Software Foundation; either version 2.1 of the License, or
   8  *  (at your option) any later version.
   9  *
  10  *  This library is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  *  GNU Lesser General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU Lesser General Public License
  16  *  along with this program; if not, write to the Free Software
  17  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  */
  19
  20 package gnu.regexp;
  21
  22 /**
  23  * Represents a combination lookahead/lookbehind for POSIX [:alnum:].
  24  */
  25 final class RETokenWordBoundary extends REToken {
  26     private boolean negated;
  27     private int where;
  28     static final int BEGIN = 1;
  29     static final int END = 2;
  30
  31     RETokenWordBoundary(int subIndex, int where, boolean negated) {
  32         super(subIndex);
  33         this.where = where;
  34         this.negated = negated;
  35     }
  36
  37     boolean match(CharIndexed input, REMatch mymatch) {
  38         // Word boundary means input[index-1] was a word character
  39         // and input[index] is not, or input[index] is a word character
  40         // and input[index-1] was not
  41         //  In the string "one two three", these positions match:
  42         //  |o|n|e| |t|w|o| |t|h|r|e|e|
  43         //  ^     ^ ^     ^ ^         ^
  44         boolean after = false;  // is current character a letter or digit?
  45         boolean before = false; // is previous character a letter or digit?
  46         char ch;
  47
  48         // TODO: Also check REG_ANCHORINDEX vs. anchor
  49         if (((mymatch.eflags & RE.REG_ANCHORINDEX) != RE.REG_ANCHORINDEX)
  50             || (mymatch.offset + mymatch.index > mymatch.anchor)) {
  51             if ((ch = input.charAt(mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) {
  52                 before = Character.isLetterOrDigit(ch) || (ch == '_');
  53             }
  54         }
  55
  56         if ((ch = input.charAt(mymatch.index)) != CharIndexed.OUT_OF_BOUNDS) {
  57             after = Character.isLetterOrDigit(ch) || (ch == '_');
  58         }
  59
  60         // if (before) and (!after), we're at end (\>)
  61         // if (after) and (!before), we're at beginning (\<)
  62         boolean doNext = false;
  63
  64         if ((where & BEGIN) == BEGIN) {
  65             doNext = after && !before;
  66         }
  67         if ((where & END) == END) {
  68             doNext ^= before && !after;
  69         }
  70
  71         if (negated) doNext = !doNext;
  72
  73         return (doNext ? next(input, mymatch) : false);
  74     }
  75
  76     void dump(StringBuffer os) {
  77         if (where == (BEGIN | END)) {
  78             os.append( negated ? "\\B" : "\\b" );
  79         } else if (where == BEGIN) {
  80             os.append("\\<");
  81         } else {
  82             os.append("\\>");
  83         }
  84     }
  85 }