X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FParser.java;h=c2e7023c42a5ff31a22a4453d1fef9c2a4ed3a83;hp=adf24c46174a2d5408c17998d00c8a52aae191a5;hb=9d727bd14c659cdc6c34153b988e8d3fdb8067f5;hpb=a4cd33f348d3802ed746e517c23205958a4ceeb4 diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index adf24c4..c2e7023 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -9,96 +9,73 @@ import java.util.*; /** a parser which translates an Input<Token> into a Forest<NodeType> */ public abstract class Parser { + protected final Table pt; /** create a parser to parse the grammar with start symbol u */ public Parser(Union u, Topology top) { this.pt = new Table(u, top); } - Parser(Table pt) { this.pt = pt; } /** implement this method to create the output forest corresponding to a lone shifted input token */ public abstract Forest shiftToken(Token t, Input.Location newloc); - boolean helpgc = true; - public String toString() { return pt.toString(); } - /** parse input, and return the shared packed parse forest (or throw an exception) */ - public Forest parse(Input input) throws IOException, ParseFailed { - GSS gss = new GSS(input); - Input.Location loc = input.getLocation(); - Token tok = input.next(); - GSS.Phase current = gss.new Phase(null, null, tok, loc, input.getLocation(), null); - current.newNode(new Result(Forest.create(loc.createRegion(loc), null, null, false), null, null), pt.start, true); - int count = 1; - for(int idx=0;;idx++) { - Input.Location oldloc = loc; - current.reduce(); - Forest forest = current.token==null ? null : shiftToken((Token)current.token, loc); - loc = input.getLocation(); - Token nextToken = input.next(); - GSS.Phase next = gss.new Phase(current, current, nextToken, loc, input.getLocation(), forest); - - /* - FileOutputStream fos = new FileOutputStream("out-"+idx+".dot"); - PrintWriter p = new PrintWriter(new OutputStreamWriter(fos)); - GraphViz gv = new GraphViz(); - for(Object n : current) - ((Node)n).toGraphViz(gv); - gv.dump(p); - p.flush(); - p.close(); - */ - - count = next.size(); - if (current.isDone()) return (Forest)gss.finalResult; - current = next; + private boolean verbose = false;; + private static final char[] spin = new char[] { '-', '\\', '|', '/' }; + private int spinpos = 0; + private long last = 0; + void spin() { + if (verbose) { + long now = System.currentTimeMillis(); + if (now-last < 100) return; + last = now; + System.err.print("\r " + spin[spinpos++ % (spin.length)]+ANSI.clreol()+"\r"); } } - // Table ////////////////////////////////////////////////////////////////////////////// - - /** an SLR(1) parse table which may contain conflicts */ - static class Table extends Walk.Cache { - - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append("parse table"); - for(State state : all_states) { - sb.append(" " + state + "\n"); - for(Topology t : state.shifts) { - sb.append(" shift \""+ - new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => "); - for(State st : state.shifts.getAll(t)) - sb.append(st.idx+" "); - sb.append("\n"); + /** parse input, and return the shared packed parse forest (or throw an exception) */ + public Forest parse(Input input) throws IOException, ParseFailed { + verbose = System.getProperty("sbp.verbose", null) != null; + spinpos = 0; + try { + GSS gss = new GSS(input, this); + for(GSS.Phase current = gss.new Phase(pt.start); ;) { + + if (verbose) { + String s; + s = " " + spin[spinpos++ % (spin.length)]+" parsing "; + s += input.getName(); + s += " "+input.getLocation(); + while(s.indexOf(':') != -1 && s.indexOf(':') < 8) s = " " + s; + String y = "@"+gss.viewPos+" "; + while(y.length() < 9) y = " " + y; + s += y; + //s += " doom="+Node.doomedNodes; + //while(s.length() < 40) s = s + " "; + s += " nodes="+gss.numOldNodes; + while(s.length() < 50) s = s + " "; + s += " shifted="+gss.numNewNodes; + while(s.length() < 60) s = s + " "; + s += " reductions="+gss.numReductions; + System.err.print("\r"+s+ANSI.clreol()+"\r"); } - for(Topology t : state.reductions) - sb.append(" reduce \""+ - new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => " + - state.reductions.getAll(t) + "\n"); - for(Sequence s : state.gotoSetNonTerminals.keySet()) - sb.append(" goto "+state.gotoSetNonTerminals.get(s)+" from " + s + "\n"); + + // FIXME: make sure all the locations line up properly in here + if (current.isDone()) return (Forest)current.finalResult; + Forest forest = shiftToken((Token)current.token, input.getLocation()); + current = gss.new Phase(current, forest); } - return sb.toString(); + } finally { + if (verbose) + System.err.print("\r \r"); } + } - public final Walk.Cache cache = this; - private void walk(Element e, HashSet hs) { - if (e==null) return; - if (hs.contains(e)) return; - hs.add(e); - if (e instanceof Atom) return; - for(Sequence s : (Union)e) - walk(s, hs); - } - private void walk(Sequence s, HashSet hs) { - hs.add(s); - for(Position p = s.firstp(); p != null; p = p.next()) - walk(p.element(), hs); - for(Sequence ss : s.needs()) walk(ss, hs); - for(Sequence ss : s.hates()) walk(ss, hs); - } + // Table ////////////////////////////////////////////////////////////////////////////// + + /** an SLR(1) parse table which may contain conflicts */ + static class Table extends Cache { /** the start state */ public final State start; @@ -111,24 +88,18 @@ public abstract class Parser { HashSet> all_states = new HashSet>(); HashMap,State> doomed_states = new HashMap,State>(); HashMap,State> normal_states = new HashMap,State>(); - HashSet all_elements = new HashSet(); /** construct a parse table for the given grammar */ public Table(Topology top) { this("s", top); } public Table(String startSymbol, Topology top) { this(new Union(startSymbol), top); } public Table(Union ux, Topology top) { + super(ux, top); Union start0 = new Union("0"); - start0.add(new Sequence.Singleton(ux)); - - for(Sequence s : start0) cache.eof.put(s, true); - cache.eof.put(start0, true); + Sequence seq0 = new Sequence.Singleton(ux); + start0.add(seq0); + buildFollowSet(seq0, top, true); // construct the set of states - walk(start0, all_elements); - for(SequenceOrElement e : all_elements) - cache.ys.addAll(e, new Walk.YieldSet(e, cache).walk()); - for(SequenceOrElement e : all_elements) - cache.ys2.addAll(e, new Walk.YieldSet2(e, cache).walk()); HashSet hp = new HashSet(); reachable(start0, hp); @@ -144,17 +115,19 @@ public abstract class Parser { state.accept = true; if (isRightNullable(p)) { - Walk.Follow wf = new Walk.Follow(top.empty(), p.owner(), all_elements, cache); - Topology follow = wf.walk(p.owner()); + Topology follow = (Topology)follow(p.owner()); for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) { - Atom set = new Walk.EpsilonFollowSet(new edu.berkeley.sbp.chr.CharAtom(top.empty().complement()), - new edu.berkeley.sbp.chr.CharAtom(top.empty()), - cache).walk(p2.element()); - follow = follow.intersect(new Walk.Follow(top.empty(), p2.element(), all_elements, cache).walk(p2.element())); + if (!(p2.element() instanceof Union)) throw new Error("impossible"); + Union u = (Union)p2.element(); + Atom set = new edu.berkeley.sbp.chr.CharAtom(new edu.berkeley.sbp.chr.CharTopology((Topology)epsilonFollowSet(u))); + Element p2e = p2.element(); + if (p2e instanceof Union) + for(Sequence p2es : ((Union)p2e)) + follow = follow.intersect(follow(p2es)); if (set != null) follow = follow.intersect(set.getTokenTopology()); } state.reductions.put(follow, p); - if (wf.includesEof()) state.eofReductions.add(p); + if (followEof.contains(p.owner())) state.eofReductions.add(p); } // if the element following this position is an atom, copy the corresponding @@ -170,6 +143,7 @@ public abstract class Parser { } // crude algorithm to assing an ordinal ordering to every position + // al will be sorted in DECREASING order (al[0] >= al[1]) ArrayList al = new ArrayList(); for(State s : all_states) { for(Object po : s) { @@ -177,55 +151,83 @@ public abstract class Parser { if (al.contains(p)) continue; int i=0; for(; i 0) + if (comparePositions(p, al.get(i)) < 0) break; } al.add(i, p); } } - for(int i=0; i 0) { + Sequence.Position p = al.remove(j); + al.add(i, p); + continue OUTER; + } + break; + } + + int j = 1; + int pk = 0; + for(int i=0; i 0) + { inc = true; break; } + } + inc = true; + if (inc) { + j++; + pk = i; + } + al.get(i).ord = j; + } - private boolean isRightNullable(Position p) { - if (p.isLast()) return true; - if (!possiblyEpsilon(p.element())) return false; - return isRightNullable(p.next()); + /* + for(int i=0; i implements IntegerMappable, Iterable { public final int idx = master_state_idx++; private final HashSet hs; public HashSet> also = new HashSet>(); - public transient HashMap> gotoSetNonTerminals = new HashMap>(); + public transient HashMap> gotoSetNonTerminals = new HashMap>(); private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); - private TopologicalBag reductions = new TopologicalBag(); - private HashSet eofReductions = new HashSet(); - private TopologicalBag> shifts = new TopologicalBag>(); - private boolean accept = false; + private TopologicalBag reductions = new TopologicalBag(); + private HashSet eofReductions = new HashSet(); + private TopologicalBag> shifts = new TopologicalBag>(); + private boolean accept = false; - private VisitableMap> oshifts = null; - private VisitableMap oreductions = null; + private VisitableMap> oshifts = null; + private VisitableMap oreductions = null; // Interface Methods ////////////////////////////////////////////////////////////////////////////// - boolean isAccepting() { return accept; } - public Iterator iterator() { return hs.iterator(); } - - boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); } - void invokeShifts(Token t, Invokable,B,C> irbc, B b, C c) { - oshifts.invoke(t, irbc, b, c); + boolean isAccepting() { return accept; } + public Iterator iterator() { return hs.iterator(); } + boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); } + void invokeShifts(Token t, GSS.Phase phase, Result r) { oshifts.invoke(t, phase, r); } + boolean canReduce(Token t) { + return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } + void invokeEpsilonReductions(Token t, Node node) { + if (t==null) for(Position r : eofReductions) node.invoke(r, null); + else oreductions.invoke(t, node, null); } - - boolean canReduce(Token t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } - void invokeReductions(Token t, Invokable irbc, B b, C c) { - if (t==null) for(Position r : eofReductions) irbc.invoke(r, b, c); - else oreductions.invoke(t, irbc, b, c); + void invokeReductions(Token t, Node node, Result b) { + //System.err.println("\rinvokage: " + this); + if (t==null) for(Position r : eofReductions) node.invoke(r, b); + else oreductions.invoke(t, node, b); } // Constructor ////////////////////////////////////////////////////////////////////////////// @@ -233,7 +235,7 @@ public abstract class Parser { /** * create a new state consisting of all the Positions in hs * @param hs the set of Positions comprising this State - * @param all_elements the set of all elements (Atom instances need not be included) + * @param all the set of all elements (Atom instances need not be included) * * In principle these two steps could be merged, but they * are written separately to highlight these two facts: @@ -267,7 +269,7 @@ public abstract class Parser { for(Sequence s : p.owner().needs()) { if (hs.contains(s.firstp())) continue; HashSet h2 = new HashSet(); - reachable(s.firstp(), h2); + reachable(s, h2); also.add(mkstate(h2, true)); } for(Sequence s : p.owner().hates()) { @@ -302,43 +304,35 @@ public abstract class Parser { ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed)); } - // Step 2: for every non-Atom element (ie every Element which has a corresponding reduction), - // compute the closure over every position in this set which is followed by a symbol - // which could yield the Element in question. + // Step 2: for every Sequence, compute the closure over every position in this set which + // is followed by a symbol which could yield the Sequence. // // "yields" [in one or more step] is used instead of "produces" [in exactly one step] // to avoid having to iteratively construct our set of States as shown in most // expositions of the algorithm (ie "keep doing XYZ until things stop changing"). - HashMapBag move = new HashMapBag(); - for(Position p : hs) { - Element e = p.element(); - if (e==null) continue; - for(SequenceOrElement y : cache.ys2.getAll(e)) { - //System.out.println(e + " yields " + y); - HashSet hp = new HashSet(); - reachable(p.next(), hp); - move.addAll(y, hp); - } - } - OUTER: for(SequenceOrElement y : move) { + HashMapBag move = new HashMapBag(); + for(Position p : hs) + if (!p.isLast() && p.element() instanceof Union) + for(Sequence s : ((Union)p.element())) { + HashSet hp = new HashSet(); + reachable(p.next(), hp); + move.addAll(s, hp); + } + OUTER: for(Sequence y : move) { + // if a reduction is "lame", it should wind up in the dead_state after reducing HashSet h = move.getAll(y); State s = mkstate(h, doomed); - // if a reduction is "lame", it should wind up in the dead_state after reducing - if (y instanceof Sequence) { - for(Position p : hs) { - if (p.element() != null && (p.element() instanceof Union)) { - Union u = (Union)p.element(); - for(Sequence seq : u) - if (seq.needs.contains((Sequence)y) || seq.hates.contains((Sequence)y)) { - // FIXME: what if there are two "routes" to get to the sequence? - ((HashMap)gotoSetNonTerminals).put((Sequence)y, dead_state); - continue OUTER; - } - } - } - gotoSetNonTerminals.put((Sequence)y, s); - } + for(Position p : hs) + if (p.element() != null && (p.element() instanceof Union)) + for(Sequence seq : ((Union)p.element())) + if (seq.needs.contains(y) || seq.hates.contains(y)) { + // FIXME: assumption that no sequence is ever both usefully (non-lamely) matched + // and also directly lamely matched + ((HashMap)gotoSetNonTerminals).put(y, dead_state); + continue OUTER; + } + gotoSetNonTerminals.put(y, s); } } @@ -355,6 +349,7 @@ public abstract class Parser { } return st.toString(); } + public String toString() { StringBuffer ret = new StringBuffer(); ret.append("state["+idx+"]: "); @@ -362,9 +357,30 @@ public abstract class Parser { return ret.toString(); } - public Walk.Cache cache() { return cache; } public int toInt() { return idx; } } + + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("parse table"); + for(State state : all_states) { + sb.append(" " + state + "\n"); + for(Topology t : state.shifts) { + sb.append(" shift \""+ + new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => "); + for(State st : state.shifts.getAll(t)) + sb.append(st.idx+" "); + sb.append("\n"); + } + for(Topology t : state.reductions) + sb.append(" reduce \""+ + new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => " + + state.reductions.getAll(t) + "\n"); + for(Sequence s : state.gotoSetNonTerminals.keySet()) + sb.append(" goto "+state.gotoSetNonTerminals.get(s)+" from " + s + "\n"); + } + return sb.toString(); + } } // Helpers ////////////////////////////////////////////////////////////////////////////// @@ -384,5 +400,5 @@ public abstract class Parser { h.add(p); if (p.element() != null) reachable(p.element(), h); } - + //public static Cache mastercache = null; }