X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FParser.java;h=861c32328961aeff7c4da6658881b33bcf1034b6;hp=de96760406ee1a4f7b2452a2f63c092879855c5e;hb=4692c8e9ba0c4b44ac5222f5bf5168703c478cbd;hpb=eef891a53c43901acccac0dead16a79dbdb34c77 diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index de96760..861c323 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -3,13 +3,10 @@ package edu.berkeley.sbp; import edu.berkeley.sbp.util.*; import edu.berkeley.sbp.Sequence.Position; +import edu.berkeley.sbp.Sequence.Pos; import java.io.*; import java.util.*; -// FEATURE: try harder to "fuse" states together along two dimensions: -// - identical (equivalent) states, or states that subsume each other -// - unnecessary intermediate states ("short cut" GLR) - /** a parser which translates an Input<Token> into a Forest<NodeType> */ public abstract class Parser { @@ -30,33 +27,21 @@ public abstract class Parser { public Forest parse(Input input) throws IOException, ParseFailed { verbose = System.getProperty("sbp.verbose", null) != null; spinpos = 0; + GSS gss = new GSS(input, this); try { - GSS gss = new GSS(input, this); for(GSS.Phase current = gss.new Phase(pt.start); ;) { - - if (verbose) { - // FIXME: clean this up - String s; - s = " " + spin[spinpos++ % (spin.length)]+" parsing "; - s += input.getName(); - s += " "+input.getLocation(); - while(s.indexOf(':') != -1 && s.indexOf(':') < 8) s = " " + s; - String y = "@"+gss.viewPos+" "; - while(y.length() < 9) y = " " + y; - s += y; - s += " nodes="+gss.numOldNodes; - while(s.length() < 50) s = s + " "; - s += " shifted="+gss.numNewNodes; - while(s.length() < 60) s = s + " "; - s += " reductions="+gss.numReductions; - System.err.print("\r"+s+ANSI.clreol()+"\r"); - } - + if (verbose) debug(current.token, gss, input); if (current.isDone()) return (Forest)current.finalResult; - Forest forest = shiftToken((Token)current.token, current.getRegion()); + Input.Region region = current.getLocation().createRegion(current.getNextLocation()); + Forest forest = shiftToken((Token)current.token, region); current = gss.new Phase(current, forest); } - } finally { if (verbose) System.err.print("\r"+ANSI.clreol()); } + } finally { + if (verbose) { + System.err.print("\r"+ANSI.clreol()); + debug(null, gss, input); + } + } } // Spinner ////////////////////////////////////////////////////////////////////////////// @@ -73,10 +58,56 @@ public abstract class Parser { System.err.print("\r " + spin[spinpos++ % (spin.length)]+"\r"); } + private int _last = -1; + private String buf = ""; + private void debug(Object t, GSS gss, Input input) { + //FIXME + int c = t==null ? -1 : ((t+"").charAt(0)); + int last = _last; + _last = c; + switch(c) { + case edu.berkeley.sbp.chr.CharAtom.left: + buf += "\033[31m>\033[0m"; + break; + case edu.berkeley.sbp.chr.CharAtom.right: + buf += "\033[31m<\033[0m"; + break; + case -1: // FIXME + case '\n': + if (verbose) { + if (last==' ') buf += ANSI.blue("\\n"); + System.err.println("\r"+ANSI.clreol()+"\r"+buf); + buf = ""; + } + break; + default: + buf += ANSI.cyan(""+((char)c)); + break; + } + if (t==null) return; + + // FIXME: clean this up + String s; + s = " " + spin[spinpos++ % (spin.length)]+" parsing "; + s += input.getName(); + s += " "+input.getLocation(); + while(s.indexOf(':') != -1 && s.indexOf(':') < 8) s = " " + s; + String y = "@"+gss.viewPos+" "; + while(y.length() < 9) y = " " + y; + s += y; + s += " nodes="+gss.numOldNodes; + while(s.length() < 50) s = s + " "; + s += " shifted="+gss.numNewNodes; + while(s.length() < 60) s = s + " "; + s += " reductions="+gss.numReductions; + while(s.length() < 78) s = s + " "; + System.err.print("\r"+ANSI.invert(s+ANSI.clreol())+"\r"); + } + // Table ////////////////////////////////////////////////////////////////////////////// /** an SLR(1) parse table which may contain conflicts */ - class Table extends Grammar { + class Table extends Grammar implements Serializable { /** the start state */ final State start; @@ -88,13 +119,13 @@ public abstract class Parser { private int master_state_idx = 0; /** all the states for this table */ - HashSet> all_states = new HashSet>(); + private transient HashSet> all_states = new HashSet>(); /** all the doomed states in this table */ - HashMap,State> doomed_states = new HashMap,State>(); + private transient HashMap,State> doomed_states = new HashMap,State>(); /** all the non-doomed states in this table */ - HashMap,State> normal_states = new HashMap,State>(); + private transient HashMap,State> normal_states = new HashMap,State>(); Topology emptyTopology() { return Parser.this.emptyTopology(); } @@ -162,7 +193,7 @@ public abstract class Parser { // al will be sorted in DECREASING order (al[0] >= al[1]) ArrayList al = new ArrayList(); for(State s : all_states) { - for(Object po : s) { + for(Object po : s.positions()) { Sequence.Position p = (Sequence.Position)po; if (al.contains(p)) continue; int i=0; @@ -233,38 +264,41 @@ public abstract class Parser { * space+time complexity in otherwise simple grammars. There * is an example of this in the regression suite. */ - class State implements IntegerMappable, Iterable { + class State implements IntegerMappable, Serializable { public final int idx = master_state_idx++; - private final HashSet hs; + private final transient HashSet hs; public HashSet> conjunctStates = new HashSet>(); - HashMap> gotoSetNonTerminals = new HashMap>(); + HashMap> gotoSetNonTerminals = new HashMap>(); private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); - private TopologicalBag reductions = new TopologicalBag(); - private HashSet eofReductions = new HashSet(); + private TopologicalBag reductions = new TopologicalBag(); + private HashSet eofReductions = new HashSet(); private TopologicalBag> shifts = new TopologicalBag>(); private boolean accept = false; private VisitableMap> oshifts = null; - private VisitableMap oreductions = null; + private VisitableMap oreductions = null; public final boolean doomed; // Interface Methods ////////////////////////////////////////////////////////////////////////////// + public boolean doomed() { return doomed; } boolean isAccepting() { return accept; } - public Iterator iterator() { return hs.iterator(); } + + Iterable positions() { return hs; } + boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); } void invokeShifts(Token t, GSS.Phase phase, Result r) { oshifts.invoke(t, phase, r); } boolean canReduce(Token t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } void invokeEpsilonReductions(Token t, Node node) { - if (t==null) for(Position r : eofReductions) node.invoke(r, null); + if (t==null) for(Pos r : eofReductions) node.invoke(r, null); else oreductions.invoke(t, node, null); } void invokeReductions(Token t, Node node, Result b) { - if (t==null) for(Position r : eofReductions) node.invoke(r, b); + if (t==null) for(Pos r : eofReductions) node.invoke(r, b); else oreductions.invoke(t, node, b); } @@ -368,10 +402,12 @@ public abstract class Parser { if (seq.needs.contains(y) || seq.hates.contains(y)) { // FIXME: assumption that no sequence is ever both usefully (non-lamely) matched // and also directly lamely matched - ((HashMap)gotoSetNonTerminals).put(y, dead_state); + for(Position pp = y.firstp(); pp != null; pp = pp.next()) + ((HashMap)gotoSetNonTerminals).put(pp, dead_state); continue OUTER; } - gotoSetNonTerminals.put(y, s); + for(Position pp = y.firstp(); pp != null; pp = pp.next()) + gotoSetNonTerminals.put(pp, s); } } @@ -382,6 +418,12 @@ public abstract class Parser { } public int toInt() { return idx; } + public String toString() { + StringBuffer ret = new StringBuffer(); + for(Position p : hs) + ret.append(p+"\n"); + return ret.toString(); + } } }