got new tib tokenizer going
[sbp.git] / src / edu / berkeley / sbp / tib / Tib.java
index 915158b..69ac824 100644 (file)
@@ -21,9 +21,7 @@ import java.io.*;
  *
  *   This was written as an ad-hoc parser to facilitate
  *   experimentation with the TIB spec.  Once the spec is finalized it
- *   should probably be rewritten using a parser-generator, if
- *   possible (it is unclear whether or not the associated grammar is
- *   context-free).
+ *   should probably be rewritten.
  */
 public class Tib implements Token.Stream<CharToken> {
 
@@ -31,39 +29,130 @@ public class Tib implements Token.Stream<CharToken> {
     public Tib(Reader r) throws IOException, Invalid { this(new BufferedReader(r)); }
     public Tib(InputStream is) throws IOException, Invalid { this(new BufferedReader(new InputStreamReader(is))); }
     public Tib(BufferedReader br) throws IOException, Invalid {
-        cur = parse(br);
-        System.out.println("\rparsing: \"" + cur.toString(0, -1) + "\"");
+        this.br = br;
+        istack.add(-1);
+        //cur = parse(br);
+        //System.out.println("\rparsing: \"" + cur.toString(0, -1) + "\"");
     }
 
     private Block cur;
-    private String s = null;
+    private String s = "";
     int pos = 0;
     int spos = 0;
 
-    int _row = 0;
+    int _row = 1;
     int _col = 0;
-    public CharToken next() throws IOException {
+    public Token.Location getLocation() { return new CharToken.CartesianLocation(_row, _col); }
+    private BufferedReader br;
+
+    boolean waiting = false;
+    char waitingChar = ' ';
+    boolean indenting = true;
+    int indentation = 0;
+    private ArrayList<Integer> istack = new ArrayList<Integer>();
+    public CharToken next(int numstates) throws IOException {
+        CharToken ret = nextc(numstates);
+        if      (ret==CharToken.left)  System.out.print("\033[31m{\033[0m");
+        else if (ret==CharToken.right) System.out.print("\033[31m}\033[0m");
+        else if (ret==null) return null;
+        else System.out.print(ret.c);
+        return ret;
+    }
+
+    CharToken waitingBrace = null;
+    public CharToken nextc(int numstates) throws IOException {
+        char c;
+        if (waitingBrace != null) {
+            CharToken ret = waitingBrace;
+            waitingBrace = null;
+            return ret;
+        }
+        if (waiting) {
+            waiting = false;
+            c = waitingChar;
+        } else {
+            int i = br.read();
+            if (i==-1) {
+                if (istack.size() > 1) {
+                    istack.remove(istack.size()-1);
+                    return CharToken.right;
+                }
+                return null;
+            }
+            c = (char)i;
+        }
+        if (c=='\n') { _row++; _col=0; }
+        else         _col++;
+        if (indenting) {
+            if (c==' ') { indentation++; return done(c); }
+            if (c=='\n') { indentation = 0; if (blank) return nextc(numstates); blank = true; waiting = true; waitingChar='\n'; return new CharToken('\n'); }
+            int last = istack.size()==0 ? -1 : istack.get(istack.size()-1);
+            if (indentation==last) {
+                if (blank) {
+                    indenting = false;
+                    waitingChar = c;
+                    waiting = true;
+                    waitingBrace = CharToken.left;
+                    return CharToken.right;
+                    //return nextc(numstates);
+                }
+                blank = false;
+                indenting = false;
+                return done(c);
+            }
+            blank = false;
+            waitingChar = c;
+            waiting = true;
+            if (indentation > last) {
+                indenting = false;
+                istack.add(indentation);
+                System.out.print("\033[31m+"+indentation+"+\033[0m");
+                return CharToken.left;
+            } else /*if (indentation < last)*/ {
+                istack.remove(istack.size()-1);
+                System.out.print("\033[31m-"+last+"-\033[0m");
+                blank = true;
+                return CharToken.right;
+            }
+        } else {
+            blank = false;
+            if (c=='\n') { indenting=true; indentation = 0; }
+            return done(c);
+        }
+    }
+    public CharToken done(char c) {
+        switch(c) {
+            case '{': return CharToken.left;
+            case '}': return CharToken.right;
+            default: return new CharToken(c);
+        }
+    }
+    boolean blank = false;
+    /*
+    public CharToken next(int numstates) throws IOException {
         if (cur==null) return null;
         if (s != null) {
             if (spos < s.length()) {
                 char c = s.charAt(spos++);
                 if (c=='\n') { _row++; _col = 0; }
                 else _col++;
-                return new CharToken(c, _row, _col);
+                return new CharToken(c);
             }
             s = null;
         }
         if (pos >= cur.size()) {
             pos = cur.iip+1;
+            _row = cur.endrow;
+            _col = cur.endcol;
             cur = cur.parent;
             if (cur==null) return null;
-            return CharToken.right(_row, _col);
+            return CharToken.right;
         }
         Object o = cur.child(pos++);
         if (o instanceof String) {
             spos = 0;
             s = (String)o;
-            return next();
+            return next(numstates);
         }
         if (o instanceof Block) {
             Block b = (Block)o;
@@ -73,20 +162,21 @@ public class Tib implements Token.Stream<CharToken> {
         if (((Block)o).isLiteral()) {
             spos = 0;
             s = ((Block.Literal)o).text();
-            return next();
+            return next(numstates);
         }
         cur = (Block)o;
         pos = 0;
-        return CharToken.left(_row, _col);
+        return CharToken.left;
     }
-
+    */
     public static Block parse(BufferedReader br) throws Invalid, IOException {
         int row=0, col=0;
         try {
             boolean blankLine = false;
             Block top = new Block.Root();
             for(String s = br.readLine(); s != null; s = br.readLine()) {
-                col = 0;
+                row++;
+                col=0;
                 while (s.length() > 0 &&
                        s.charAt(0) == ' ' &&
                        (!(top instanceof Block.Literal) || col < top.col)) { col++; s = s.substring(1); }
@@ -95,6 +185,8 @@ public class Tib implements Token.Stream<CharToken> {
                 while (col < top.col) {
                     if (s.startsWith("{}") && top instanceof Block.Literal && ((Block.Literal)top).braceCol == col) break;
                     blankLine = false;
+                    top.endrow = row;
+                    top.endcol = col;
                     top = top.closeIndent();
                 }
                 if (s.startsWith("{}")) {
@@ -108,16 +200,16 @@ public class Tib implements Token.Stream<CharToken> {
                 }
                 while (s.length() > 0 && s.charAt(s.length()-1)==' ') { s = s.substring(0, s.length()-1); }
                 if (col > top.col) top = new Block.Indent(top, row, col);
-                else if (blankLine) { top = top.closeIndent(); top = new Block.Indent(top, row, col); }
+                else if (blankLine) { top.endrow=row; top.endcol=col; top = top.closeIndent(); top = new Block.Indent(top, row, col); }
                 blankLine = false;
                 for(int i=0; i<s.length(); i++) {
                     top.add(s.charAt(i));
                     switch(s.charAt(i)) {
                         case '{':  top = new Block.Brace(top, row, col);   break;
-                        case '}':  top = top.closeBrace();                 break;
+                        case '}':  top.endrow=row; top.endcol=col; top = top.closeBrace();                 break;
                     }
                 }
-                top.add(' ');
+                top.add('\n');
                 top.finishWord();
             }
             // FIXME
@@ -129,15 +221,16 @@ public class Tib implements Token.Stream<CharToken> {
         }
     }
 
-    public static class Block /*implements Token*/ {
+    public static class Block {
                       Block  parent;
         public  final int    row;
         public  final int    col;
+        public   int    endrow;
+        public   int    endcol;
         public final int iip;
         private final Vector children = new Vector();
         private       String pending  = "";
 
-        //public Location getLocation() { return /*new Location.Cartesian(row, col)*/null; }
         public int    size() { return children.size(); }
         public Object child(int i) { return children.elementAt(i); }
         public boolean isLiteral() {  return false; }
@@ -264,7 +357,8 @@ public class Tib implements Token.Stream<CharToken> {
 
     // Testing //////////////////////////////////////////////////////////////////////////////
 
-    public static void main(String[] s) throws Exception { System.out.println(parse(new BufferedReader(new InputStreamReader(System.in))).toString(-1)); }
+    public static void main(String[] s) throws Exception {
+        System.out.println(parse(new BufferedReader(new InputStreamReader(System.in))).toString(-1)); }
     
     // Utilities //////////////////////////////////////////////////////////////////////////////
 
@@ -288,5 +382,37 @@ public class Tib implements Token.Stream<CharToken> {
         return ret.toString();
     }
 
+    // Grammar //////////////////////////////////////////////////////////////////////////////
+
+    public static class Grammar extends MetaGrammar {
+        private int anon = 0;
+        private final Element ws = Repeat.maximal0(nonTerminal("w"));
+        public Grammar() { dropAll.add(ws); }
+        public Object walk(Tree<String> tree) {
+            String head = tree.head();
+            if (tree.numChildren()==0) return super.walk(tree);
+            if ("{".equals(head)) {
+                String s = "braced"+(anon++);
+                Union u = nonTerminal(s);
+                Union u2 = ((PreSequence)walk(tree, 0)).sparse(ws).buildUnion();
+                u2.add(Sequence.singleton(new Element[] { u }, 0, null, null));
+                return nonTerminal(s,
+                                   new PreSequence[][] {
+                                       new PreSequence[] {
+                                           new PreSequence(new Element[] { CharToken.leftBrace,
+                                                                           ws,
+                                                                           u2,
+                                                                           ws,
+                                                                           CharToken.rightBrace
+                                           })
+                                       }
+                                   },
+                                   false,
+                                   false);
+            }
+            return super.walk(tree);
+        }
+    }
+
 }