added decent error reporting
[sbp.git] / src / edu / berkeley / sbp / tib / Tib.java
1 // Copyright 2005 the Contributors, as shown in the revision logs.
2 // Licensed under the Apache Public Source License 2.0 ("the License").
3 // You may not use this file except in compliance with the License.
4
5 package edu.berkeley.sbp.tib;
6 import edu.berkeley.sbp.*;
7 import edu.berkeley.sbp.misc.*;
8 import java.util.*;
9 import java.io.*;
10
11 // TODO: multiple {{ }} for superquotation
12 // TODO: strings
13 // TODO: comments
14
15 /**
16  *   A slow, ugly, inefficient, inelegant, ad-hoc parser for TIB files.
17  *
18  *   Despite being inelegant, this parser keeps all internal state on
19  *   the stack (the only heap objects created are those which are
20  *   returned).
21  *
22  *   This was written as an ad-hoc parser to facilitate
23  *   experimentation with the TIB spec.  Once the spec is finalized it
24  *   should probably be rewritten.
25  */
26 public class Tib implements Token.Stream<CharToken> {
27
28     public Tib(String s) throws IOException, Invalid { this(new StringReader(s)); }
29     public Tib(Reader r) throws IOException, Invalid { this(new BufferedReader(r)); }
30     public Tib(InputStream is) throws IOException, Invalid { this(new BufferedReader(new InputStreamReader(is))); }
31     public Tib(BufferedReader br) throws IOException, Invalid {
32         this.br = br;
33         istack.add(-1);
34         //cur = parse(br);
35         //System.out.println("\rparsing: \"" + cur.toString(0, -1) + "\"");
36     }
37
38     private Block cur;
39     private String s = "";
40     int pos = 0;
41     int spos = 0;
42
43     int _row = 1;
44     int _col = 0;
45     public Token.Location getLocation() { return new CharToken.CartesianLocation(_row, _col); }
46     private BufferedReader br;
47
48     boolean waiting = false;
49     char waitingChar = ' ';
50     boolean indenting = true;
51     int indentation = 0;
52     private ArrayList<Integer> istack = new ArrayList<Integer>();
53     public CharToken next(int numstates) throws IOException {
54         CharToken ret = nextc(numstates);
55         if      (ret==CharToken.left)  System.out.print("\033[31m{\033[0m");
56         else if (ret==CharToken.right) System.out.print("\033[31m}\033[0m");
57         else if (ret==null) return null;
58         else System.out.print(ret.c);
59         return ret;
60     }
61
62     CharToken waitingBrace = null;
63     public CharToken nextc(int numstates) throws IOException {
64         char c;
65         if (waitingBrace != null) {
66             CharToken ret = waitingBrace;
67             waitingBrace = null;
68             return ret;
69         }
70         if (waiting) {
71             waiting = false;
72             c = waitingChar;
73         } else {
74             int i = br.read();
75             if (i==-1) {
76                 if (istack.size() > 1) {
77                     istack.remove(istack.size()-1);
78                     return CharToken.right;
79                 }
80                 return null;
81             }
82             c = (char)i;
83         }
84         if (c=='\n') { _row++; _col=0; }
85         else         _col++;
86         if (indenting) {
87             if (c==' ') { indentation++; return done(c); }
88             if (c=='\n') { indentation = 0; if (blank) return nextc(numstates); blank = true; waiting = true; waitingChar='\n'; return new CharToken('\n'); }
89             int last = istack.size()==0 ? -1 : istack.get(istack.size()-1);
90             if (indentation==last) {
91                 if (blank) {
92                     indenting = false;
93                     waitingChar = c;
94                     waiting = true;
95                     waitingBrace = CharToken.left;
96                     return CharToken.right;
97                     //return nextc(numstates);
98                 }
99                 blank = false;
100                 indenting = false;
101                 return done(c);
102             }
103             blank = false;
104             waitingChar = c;
105             waiting = true;
106             if (indentation > last) {
107                 indenting = false;
108                 istack.add(indentation);
109                 //System.out.print("\033[31m+"+indentation+"+\033[0m");
110                 return CharToken.left;
111             } else /*if (indentation < last)*/ {
112                 istack.remove(istack.size()-1);
113                 //System.out.print("\033[31m-"+last+"-\033[0m");
114                 blank = true;
115                 return CharToken.right;
116             }
117         } else {
118             blank = false;
119             if (c=='\n') { indenting=true; indentation = 0; }
120             return done(c);
121         }
122     }
123
124     public CharToken done(char c) {
125         switch(c) {
126             case '{': return CharToken.left;
127             case '}': return CharToken.right;
128             default: return new CharToken(c);
129         }
130     }
131     boolean blank = false;
132     /*
133     public CharToken next(int numstates) throws IOException {
134         if (cur==null) return null;
135         if (s != null) {
136             if (spos < s.length()) {
137                 char c = s.charAt(spos++);
138                 if (c=='\n') { _row++; _col = 0; }
139                 else _col++;
140                 return new CharToken(c);
141             }
142             s = null;
143         }
144         if (pos >= cur.size()) {
145             pos = cur.iip+1;
146             _row = cur.endrow;
147             _col = cur.endcol;
148             cur = cur.parent;
149             if (cur==null) return null;
150             return CharToken.right;
151         }
152         Object o = cur.child(pos++);
153         if (o instanceof String) {
154             spos = 0;
155             s = (String)o;
156             return next(numstates);
157         }
158         if (o instanceof Block) {
159             Block b = (Block)o;
160             _row = b.row;
161             _col = b.col;
162         }
163         if (((Block)o).isLiteral()) {
164             spos = 0;
165             s = ((Block.Literal)o).text();
166             return next(numstates);
167         }
168         cur = (Block)o;
169         pos = 0;
170         return CharToken.left;
171     }
172     */
173     public static Block parse(BufferedReader br) throws Invalid, IOException {
174         int row=0, col=0;
175         try {
176             boolean blankLine = false;
177             Block top = new Block.Root();
178             for(String s = br.readLine(); s != null; s = br.readLine()) {
179                 row++;
180                 col=0;
181                 while (s.length() > 0 &&
182                        s.charAt(0) == ' ' &&
183                        (!(top instanceof Block.Literal) || col < top.col)) { col++; s = s.substring(1); }
184                 if ((top instanceof Block.Literal) && col >= top.col) { top.add(s); continue; }
185                 if (s.length()==0)                                    { blankLine = true; continue; }
186                 while (col < top.col) {
187                     if (s.startsWith("{}") && top instanceof Block.Literal && ((Block.Literal)top).braceCol == col) break;
188                     blankLine = false;
189                     top.endrow = row;
190                     top.endcol = col;
191                     top = top.closeIndent();
192                 }
193                 if (s.startsWith("{}")) {
194                     int bc = col;
195                     boolean append = top instanceof Block.Literal && ((Block.Literal)top).braceCol == bc;
196                     s = s.substring(2);
197                     col += 2;
198                     while (s.length() > 0 && s.charAt(0) == ' ' && !(append && col >= top.col) ) { col++; s = s.substring(1); }
199                     if (append) top.add(s); else (top = new Block.Literal(top, row, col, bc)).add(s);
200                     continue;
201                 }
202                 while (s.length() > 0 && s.charAt(s.length()-1)==' ') { s = s.substring(0, s.length()-1); }
203                 if (col > top.col) top = new Block.Indent(top, row, col);
204                 else if (blankLine) { top.endrow=row; top.endcol=col; top = top.closeIndent(); top = new Block.Indent(top, row, col); }
205                 blankLine = false;
206                 for(int i=0; i<s.length(); i++) {
207                     top.add(s.charAt(i));
208                     switch(s.charAt(i)) {
209                         case '{':  top = new Block.Brace(top, row, col);   break;
210                         case '}':  top.endrow=row; top.endcol=col; top = top.closeBrace();                 break;
211                     }
212                 }
213                 top.add('\n');
214                 top.finishWord();
215             }
216             // FIXME
217             Block ret = top;
218             while (ret.parent != null) ret = ret.parent;
219             return ret;
220         } catch (InternalException ie) {
221             throw new Invalid(ie, row, col);
222         }
223     }
224
225     public static class Block {
226                       Block  parent;
227         public  final int    row;
228         public  final int    col;
229         public   int    endrow;
230         public   int    endcol;
231         public final int iip;
232         private final Vector children = new Vector();
233         private       String pending  = "";
234
235         public int    size() { return children.size(); }
236         public Object child(int i) { return children.elementAt(i); }
237         public boolean isLiteral() {  return false; }
238
239         protected Block(int row, int col)                {
240             this.row=row;
241             this.col=col;
242             this.iip = -1;
243             parent = null;
244         }
245         public    Block(Block parent, int row, int col)  {
246             this.row=row;
247             this.col=col;
248             this.iip = parent.size();
249             (this.parent = parent).add(this);
250         }
251
252         public void    add(String s)        { children.addElement(s); }
253         public void    add(char c)          {
254             if (c == '{' || c == '}') { finishWord(); return; }
255             if (c != ' ') { pending += c; return; }
256             if (pending.length() > 0) { finishWord(); add(" "); return; }
257             if (size()==0) return;
258             if (child(size()-1).equals(" ")) return;
259             add(" ");
260             return;
261         }
262
263         public void    add(Block  b)        { children.addElement(b); }
264         public Block   promote()            { parent.parent.replaceLast(this); return close(); }
265         public Object  lastChild()          { return children.lastElement(); }
266         public Block   lastChildAsBlock()   { return (Block)lastChild(); }
267         public void    replaceLast(Block b) { children.setElementAt(b, children.size()-1); b.parent = this; }
268
269         public void    finishWord()         { if (pending.length() > 0) { add(pending); pending = ""; } }
270
271         public Block   closeBrace()         { throw new InternalException("attempt to closeBrace() a "+getClass().getName()); }
272         public Block   closeIndent()        { throw new InternalException("attempt to closeIndent() a "+getClass().getName()); }
273         public Block   close() {
274             while(size() > 0 && child(size()-1).equals(" ")) children.setSize(children.size()-1);
275             if (size()==0) throw new InternalException("PARSER BUG: attempt to close an empty block (should never happen)");
276             if (size() > 1 || !(lastChild() instanceof Block)) return parent;
277             return lastChildAsBlock().promote();
278         }
279         public String toString() { return toString(80); }
280         public String toString(int justificationLimit) { return toString(0, 80); }
281         protected String toString(int indent, int justificationLimit) {
282             StringBuffer ret = new StringBuffer();
283             StringBuffer line = new StringBuffer();
284             for(int i=0; i<children.size(); i++) {
285                 Object o = children.elementAt(i);
286                 if (i>0 && children.elementAt(i-1) instanceof Block && justificationLimit!=-1) ret.append("\n");
287                 if (o instanceof Block) {
288                     ret.append(justify(line.toString(), indent, justificationLimit));
289                     line.setLength(0);
290                     if (justificationLimit==-1) {
291                         ret.append("{");
292                         ret.append(((Block)o).toString(indent+2, justificationLimit));
293                         ret.append("}");
294                     } else {
295                         ret.append(((Block)o).toString(indent+2, justificationLimit));
296                     }
297                 } else {
298                     line.append(o.toString());
299                 }
300             }
301             ret.append(justify(line.toString(), indent, justificationLimit));
302             return ret.toString();
303         }
304
305         private static class Root extends Block {
306             public Root() { super(0, Integer.MIN_VALUE); }
307             public Block close() { throw new InternalException("attempted to close top block"); }
308             public String toString(int justificationLimit) { return toString(-2, justificationLimit); }
309         }
310
311         private static class Brace  extends Block {
312             public Brace(Block parent, int row, int col) { super(parent, row, col); }
313             public Block closeBrace() { return super.close(); }
314         }
315     
316         private static class Indent extends Block {
317             public Indent(Block parent, int row, int col) { super(parent, row, col); }
318             public Block closeIndent() { return super.close(); }
319         }
320
321         private static class Literal extends Block {
322             private StringBuffer content = new StringBuffer();
323             public final int braceCol;
324             public    Literal(Block parent, int row, int col, int braceCol) { super(parent,row,col); this.braceCol = braceCol; }
325             public    boolean isLiteral() {  return true; }
326             public    int size() { return 1; }
327             public    Object child(int i) { return i==0 ? content.toString() : null; }
328             public    Block  close() { return parent; }
329             public    Block  closeIndent() { return close(); }
330             public    void   add(String s) { if (content.length()>0) content.append('\n'); content.append(s); }
331             public    String text() { return content.toString(); }
332             protected String toString(int indent, int justificationLimit) {
333                 StringBuffer ret = new StringBuffer();
334                 String s = content.toString();
335                 while(s.length() > 0) {
336                     int nl = s.indexOf('\n');
337                     if (nl==-1) nl = s.length();
338                     ret.append(spaces(indent));
339                     ret.append("{}  ");
340                     ret.append(s.substring(0, nl));
341                     s = s.substring(Math.min(s.length(),nl+1));
342                     ret.append('\n');
343                 }
344                 return ret.toString();
345             }
346         }
347     }
348
349
350     // Exceptions //////////////////////////////////////////////////////////////////////////////
351
352     private static class InternalException extends RuntimeException { public InternalException(String s) { super(s); } }
353     public static class Invalid extends /*IOException*/RuntimeException {
354         public Invalid(InternalException ie, int row, int col) {
355             super(ie.getMessage() + " at " + row + ":" + col);
356         }
357     }
358
359     // Testing //////////////////////////////////////////////////////////////////////////////
360
361     public static void main(String[] s) throws Exception {
362         System.out.println(parse(new BufferedReader(new InputStreamReader(System.in))).toString(-1)); }
363     
364     // Utilities //////////////////////////////////////////////////////////////////////////////
365
366     public static String spaces(int i) { if (i<=0) return ""; return " " + spaces(i-1); }
367
368     private static String justify(String s, int indent, int justificationLimit) {
369         if (s.length() == 0) return "";
370         if (justificationLimit==-1) return s;
371         StringBuffer ret = new StringBuffer();
372         while(s.length() > 0) {
373             if (s.charAt(0) == ' ') { s = s.substring(1); continue; }
374             ret.append(spaces(indent));
375             int i = s.indexOf(' ');
376             if (i==-1) i = s.length();
377             while(s.indexOf(' ', i+1) != -1 && s.indexOf(' ', i+1) < justificationLimit-indent) i = s.indexOf(' ', i+1);
378             if (s.length() + indent < justificationLimit) i = s.length();
379             ret.append(s.substring(0, i));
380             s = s.substring(i);
381             ret.append('\n');
382         }
383         return ret.toString();
384     }
385
386     // Grammar //////////////////////////////////////////////////////////////////////////////
387
388     public static class Grammar extends MetaGrammar {
389         private int anon = 0;
390         private final Element ws = Repeat.maximal0(nonTerminal("w"));
391         public Grammar() { dropAll.add(ws); }
392         public Object walk(Tree<String> tree) {
393             String head = tree.head();
394             if (tree.numChildren()==0) return super.walk(tree);
395             if ("{".equals(head)) {
396                 String s = "braced"+(anon++);
397                 Union u = nonTerminal(s);
398                 Union u2 = ((PreSequence)walk(tree, 0)).sparse(ws).buildUnion();
399                 u2.add(Sequence.singleton(new Element[] { u }, 0, null, null));
400                 return nonTerminal(s,
401                                    new PreSequence[][] {
402                                        new PreSequence[] {
403                                            new PreSequence(new Element[] { CharToken.leftBrace,
404                                                                            ws,
405                                                                            u2,
406                                                                            ws,
407                                                                            CharToken.rightBrace
408                                            })
409                                        }
410                                    },
411                                    false,
412                                    false);
413             }
414             return super.walk(tree);
415         }
416     }
417
418 }
419