From: adam Date: Fri, 6 Jan 2006 08:32:19 +0000 (-0500) Subject: got new tib tokenizer going X-Git-Tag: tag_for_25-Mar~424 X-Git-Url: http://git.megacz.com/?p=sbp.git;a=commitdiff_plain;h=08aa6cdd07a882fdedf7a6d5c7bd0d754460965b got new tib tokenizer going darcs-hash:20060106083219-5007d-c4ad8cc9e003df55e8db41c0870347f428856992.gz --- diff --git a/src/edu/berkeley/sbp/GSS.java b/src/edu/berkeley/sbp/GSS.java index 65c12e7..311fd29 100644 --- a/src/edu/berkeley/sbp/GSS.java +++ b/src/edu/berkeley/sbp/GSS.java @@ -27,7 +27,7 @@ class GSS { public Forest.Ref finalResult = null; /** all nodes, keyed by the value returned by code() */ - private HashMap hash = new HashMap(); /* ALLOC */ + /*private*/ HashMap hash = new HashMap(); /* ALLOC */ /** the number of nodes in this phase */ private int numNodes = 0; @@ -45,6 +45,8 @@ class GSS { private String error = "generic syntax error"; public void checkFailure() throws Parser.Failed { + if (token==null && finalResult==null) + throw new Parser.Failed(error, getLocation()); if (numNodes <= 0) throw new Parser.Failed(error, getLocation()); } @@ -66,8 +68,8 @@ class GSS { private void newNode2(Node p, Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) { p.holder.merge(pending); if (p.parents().contains(parent)) return; - if (p.fe && p.phase() != parent.phase()) throw new Error("yep yep"); - if (!p.fe && p.phase() == parent.phase()) throw new Error("yep yep2"); + //if (p.fe && p.phase() != parent.phase()) throw new Error("yep yep"); + //if (!p.fe && p.phase() == parent.phase()) throw new Error("yep yep2"); p.parents().add(parent, true); if (p!=parent && !fromEmptyReduction) p.queueReductions(parent); } diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index fac14b0..1a0d307 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -36,16 +36,18 @@ public abstract class Parser { public Forest parse(Token.Stream input) throws IOException, Failed { GSS gss = new GSS(); Token.Location loc = input.getLocation(); - GSS.Phase current = gss.new Phase(null, input.next(), loc); + GSS.Phase current = gss.new Phase(null, input.next(1), loc); current.newNode(null, null, pt.start, true); + int count = 1; for(;;) { loc = input.getLocation(); - GSS.Phase next = gss.new Phase(current, input.next(), loc); + GSS.Phase next = gss.new Phase(current, input.next(count), loc); current.reduce(); Forest forest = current.token==null ? null : shiftedToken((T)current.token, loc); current.shift(next, forest); - if (current.isDone()) return (Forest)current.finalResult; + count = next.hash.size(); current.checkFailure(); + if (current.isDone()) return (Forest)current.finalResult; current = next; } } diff --git a/src/edu/berkeley/sbp/Token.java b/src/edu/berkeley/sbp/Token.java index 2be8f3e..551ee6d 100644 --- a/src/edu/berkeley/sbp/Token.java +++ b/src/edu/berkeley/sbp/Token.java @@ -15,7 +15,7 @@ public interface Token { /** a sequence of input tokens; returns null when EOF is reached */ public static interface Stream { - public T next() throws IOException; + public T next(int numstates) throws IOException; public abstract Location getLocation(); } diff --git a/src/edu/berkeley/sbp/misc/CharToken.java b/src/edu/berkeley/sbp/misc/CharToken.java index 8804fee..9d10889 100644 --- a/src/edu/berkeley/sbp/misc/CharToken.java +++ b/src/edu/berkeley/sbp/misc/CharToken.java @@ -154,7 +154,7 @@ public class CharToken implements Token, IntegerTopology.IntegerMappable { long then = 0; private Token.Location location = new LocWrap(1, 1); public Token.Location getLocation() { return location; } - public Token next() throws IOException { + public Token next(int numstates) throws IOException { int i = r.read(); if (i==-1) return null; char c = (char)i; @@ -163,10 +163,12 @@ public class CharToken implements Token, IntegerTopology.IntegerMappable { String s = line + ""; while(s.length() < 4) s = " " + s; s = "line "+s+", col " + col; + while(s.length() < 20) s += " "; + s += "[ambiguity level: " + (numstates-1) + "]"; long now = System.currentTimeMillis(); if (now-then > 10) { then = now; - System.out.print(" "+(message==null?"":message)+" " + s + " \r"); + System.out.print(" "+(message==null?"":message)+" " + s + " \r"); } if (c=='\n') { currentLine = new Line(); diff --git a/src/edu/berkeley/sbp/tib/Tib.java b/src/edu/berkeley/sbp/tib/Tib.java index 29181a0..69ac824 100644 --- a/src/edu/berkeley/sbp/tib/Tib.java +++ b/src/edu/berkeley/sbp/tib/Tib.java @@ -29,19 +29,107 @@ public class Tib implements Token.Stream { public Tib(Reader r) throws IOException, Invalid { this(new BufferedReader(r)); } public Tib(InputStream is) throws IOException, Invalid { this(new BufferedReader(new InputStreamReader(is))); } public Tib(BufferedReader br) throws IOException, Invalid { - cur = parse(br); - System.out.println("\rparsing: \"" + cur.toString(0, -1) + "\""); + this.br = br; + istack.add(-1); + //cur = parse(br); + //System.out.println("\rparsing: \"" + cur.toString(0, -1) + "\""); } private Block cur; - private String s = null; + private String s = ""; int pos = 0; int spos = 0; - int _row = 0; + int _row = 1; int _col = 0; public Token.Location getLocation() { return new CharToken.CartesianLocation(_row, _col); } - public CharToken next() throws IOException { + private BufferedReader br; + + boolean waiting = false; + char waitingChar = ' '; + boolean indenting = true; + int indentation = 0; + private ArrayList istack = new ArrayList(); + public CharToken next(int numstates) throws IOException { + CharToken ret = nextc(numstates); + if (ret==CharToken.left) System.out.print("\033[31m{\033[0m"); + else if (ret==CharToken.right) System.out.print("\033[31m}\033[0m"); + else if (ret==null) return null; + else System.out.print(ret.c); + return ret; + } + + CharToken waitingBrace = null; + public CharToken nextc(int numstates) throws IOException { + char c; + if (waitingBrace != null) { + CharToken ret = waitingBrace; + waitingBrace = null; + return ret; + } + if (waiting) { + waiting = false; + c = waitingChar; + } else { + int i = br.read(); + if (i==-1) { + if (istack.size() > 1) { + istack.remove(istack.size()-1); + return CharToken.right; + } + return null; + } + c = (char)i; + } + if (c=='\n') { _row++; _col=0; } + else _col++; + if (indenting) { + if (c==' ') { indentation++; return done(c); } + if (c=='\n') { indentation = 0; if (blank) return nextc(numstates); blank = true; waiting = true; waitingChar='\n'; return new CharToken('\n'); } + int last = istack.size()==0 ? -1 : istack.get(istack.size()-1); + if (indentation==last) { + if (blank) { + indenting = false; + waitingChar = c; + waiting = true; + waitingBrace = CharToken.left; + return CharToken.right; + //return nextc(numstates); + } + blank = false; + indenting = false; + return done(c); + } + blank = false; + waitingChar = c; + waiting = true; + if (indentation > last) { + indenting = false; + istack.add(indentation); + System.out.print("\033[31m+"+indentation+"+\033[0m"); + return CharToken.left; + } else /*if (indentation < last)*/ { + istack.remove(istack.size()-1); + System.out.print("\033[31m-"+last+"-\033[0m"); + blank = true; + return CharToken.right; + } + } else { + blank = false; + if (c=='\n') { indenting=true; indentation = 0; } + return done(c); + } + } + public CharToken done(char c) { + switch(c) { + case '{': return CharToken.left; + case '}': return CharToken.right; + default: return new CharToken(c); + } + } + boolean blank = false; + /* + public CharToken next(int numstates) throws IOException { if (cur==null) return null; if (s != null) { if (spos < s.length()) { @@ -64,7 +152,7 @@ public class Tib implements Token.Stream { if (o instanceof String) { spos = 0; s = (String)o; - return next(); + return next(numstates); } if (o instanceof Block) { Block b = (Block)o; @@ -74,13 +162,13 @@ public class Tib implements Token.Stream { if (((Block)o).isLiteral()) { spos = 0; s = ((Block.Literal)o).text(); - return next(); + return next(numstates); } cur = (Block)o; pos = 0; return CharToken.left; } - + */ public static Block parse(BufferedReader br) throws Invalid, IOException { int row=0, col=0; try { diff --git a/tests/input.tibdoc b/tests/input.tibdoc index f026034..de38bdc 100644 --- a/tests/input.tibdoc +++ b/tests/input.tibdoc @@ -16,11 +16,9 @@ header "" this is a test of \sc{paragraph of fun} the blockquote mechanism they,,{yer mom} +== Conclusion == + Furthermore, we can try things like * this - * this - - - - + * this \ No newline at end of file diff --git a/tests/tibdoc.g b/tests/tibdoc.g index 2d026e8..adc6e5e 100644 --- a/tests/tibdoc.g +++ b/tests/tibdoc.g @@ -20,25 +20,28 @@ nw ::= ~[\r\n\ ] ////////////////////////////////////////////////////////////////////////////// -s ::= {Doc} => top +s ::= Doc => top -Doc ::= Header Body /ws => doc -Header ::= "header" { kv */ ws } /ws => header +Doc ::= {Header} Body /ws => doc +Header ::= "header" { kv */ ws } /ws => header Body ::= Section*/ws => body -Section ::= SectionHeader Paragraph*/ws /ws => section +Section ::= { SectionHeader Paragraph* /ws => section } SectionHeader ::= "==" SectionHeaderBody "==" SectionHeaderBody ::= "=" SectionHeaderBody "=" > ws text ws +sp !::= " "** +blank ::= sp "\n" sp "\n" ws + kv ::= word "=" text /ws => kv1 num !::= [0-9]++ -Paragraph ::= { "\"\"" ws text } => "blockquote" - > { "*" " " ws text } => "ul" - > { "#" " " ws text } => "ol" - > { num " " ws text } => "ol" - > { "---" "-"* } => "hr" - > { text } => "p" +Paragraph ::= { "\"\"" ws text } => "blockquote" + > { "*" " " ws text } => "ul" + > { "#" " " ws text } => "ol" + > { num " " ws text } => "ol" + > { "---" "-"* } => "hr" + > { text } => "p" text ::= item itemx ::= ws item | ()