X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FParser.java;h=64ad67ac9abc46b3790a890dfd9398c4694f9121;hp=38b7e3a58b96e90dcfd15f03f70c12d094563c88;hb=dc9bb3a45ed306e2e35549076842b3e74efecb48;hpb=bf51becde116f08ec503a4a83e709277aaa98e36 diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index 38b7e3a..64ad67a 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -1,37 +1,30 @@ // Copyright 2006 all rights reserved; see LICENSE file for BSD-style license package edu.berkeley.sbp; -import edu.berkeley.sbp.*; import edu.berkeley.sbp.util.*; import edu.berkeley.sbp.Sequence.Position; import java.io.*; import java.util.*; +// FEATURE: try harder to "fuse" states together along two dimensions: +// - identical (equivalent) states, or states that subsume each other +// - unnecessary intermediate states ("short cut" GLR) + /** a parser which translates an Input<Token> into a Forest<NodeType> */ public abstract class Parser { - protected final Table pt; + final Table pt; /** create a parser to parse the grammar with start symbol u */ - public Parser(Union u, Topology top) { this.pt = new Table(u, top); } + public Parser(Union u) { this.pt = new Table(u); } /** implement this method to create the output forest corresponding to a lone shifted input token */ - public abstract Forest shiftToken(Token t, Input.Location newloc); + public abstract Forest shiftToken(Token t, Input.Region region); - public String toString() { return pt.toString(); } + public abstract Topology emptyTopology(); - private boolean verbose = false;; - private static final char[] spin = new char[] { '-', '\\', '|', '/' }; - private int spinpos = 0; - private long last = 0; - void spin() { - if (verbose) { - long now = System.currentTimeMillis(); - if (now-last < 70) return; - last = now; - System.err.print("\r " + spin[spinpos++ % (spin.length)]+"\r"); - } - } + public String toString() { return pt.toString(); } + Cache cache() { return pt; } /** parse input, and return the shared packed parse forest (or throw an exception) */ public Forest parse(Input input) throws IOException, ParseFailed { @@ -42,6 +35,7 @@ public abstract class Parser { for(GSS.Phase current = gss.new Phase(pt.start); ;) { if (verbose) { + // FIXME: clean this up String s; s = " " + spin[spinpos++ % (spin.length)]+" parsing "; s += input.getName(); @@ -50,8 +44,6 @@ public abstract class Parser { String y = "@"+gss.viewPos+" "; while(y.length() < 9) y = " " + y; s += y; - //s += " doom="+Node.doomedNodes; - //while(s.length() < 40) s = s + " "; s += " nodes="+gss.numOldNodes; while(s.length() < 50) s = s + " "; s += " shifted="+gss.numNewNodes; @@ -60,88 +52,112 @@ public abstract class Parser { System.err.print("\r"+s+ANSI.clreol()+"\r"); } - // FIXME: make sure all the locations line up properly in here if (current.isDone()) return (Forest)current.finalResult; - Forest forest = shiftToken((Token)current.token, input.getLocation()); + Forest forest = shiftToken((Token)current.token, current.getRegion()); current = gss.new Phase(current, forest); } - } finally { - if (verbose) - System.err.print("\r \r"); - } + } finally { if (verbose) System.err.print("\r"+ANSI.clreol()); } } + // Spinner ////////////////////////////////////////////////////////////////////////////// + + private boolean verbose = false; + private static final char[] spin = new char[] { '-', '\\', '|', '/' }; + private int spinpos = 0; + private long last = 0; + void spin() { + if (!verbose) return; + long now = System.currentTimeMillis(); + if (now-last < 70) return; + last = now; + System.err.print("\r " + spin[spinpos++ % (spin.length)]+"\r"); + } // Table ////////////////////////////////////////////////////////////////////////////// /** an SLR(1) parse table which may contain conflicts */ - static class Table extends Cache { + class Table extends Cache { /** the start state */ - public final State start; + final State start; - /** the state from which no reductions can be done */ + /** a dummy state from which no reductions can be performed */ private final State dead_state; /** used to generate unique values for State.idx */ private int master_state_idx = 0; - HashSet> all_states = new HashSet>(); + + /** all the states for this table */ + HashSet> all_states = new HashSet>(); + + /** all the doomed states in this table */ HashMap,State> doomed_states = new HashMap,State>(); + + /** all the non-doomed states in this table */ HashMap,State> normal_states = new HashMap,State>(); + Topology emptyTopology() { return Parser.this.emptyTopology(); } + /** construct a parse table for the given grammar */ - public Table(Topology top) { this("s", top); } - public Table(String startSymbol, Topology top) { this(new Union(startSymbol), top); } - public Table(Union ux, Topology top) { - super(ux, top); - Union start0 = new Union("0"); - Sequence seq0 = new Sequence.Singleton(ux); - start0.add(seq0); - buildFollowSet(seq0, top, true); - - // construct the set of states - HashSet hp = new HashSet(); - reachable(start0, hp); + Table(Union ux) { + super(new Union("0", Sequence.create(ux), true)); + // create the "dead state" this.dead_state = new State(new HashSet(), true); - this.start = new State(hp); + // construct the start state; this will recursively create *all* the states + this.start = new State(reachable(rootUnion), false); + + buildReductions(); + sortReductions(); + } + + /** fill in the reductions table */ + private void buildReductions() { // for each state, fill in the corresponding "row" of the parse table for(State state : all_states) for(Position p : state.hs) { - // the Grammar's designated "last position" is the only accepting state - if (start0.contains(p.owner()) && p.next()==null && !state.doomed) - state.accept = true; - - if (isRightNullable(p)) { - Topology follow = (Topology)follow(p.owner()); - for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) { - if (!(p2.element() instanceof Union)) throw new Error("impossible"); - Union u = (Union)p2.element(); - Atom set = new edu.berkeley.sbp.chr.CharAtom(new edu.berkeley.sbp.chr.CharTopology((Topology)epsilonFollowSet(u))); - Element p2e = p2.element(); - if (p2e instanceof Union) - for(Sequence p2es : ((Union)p2e)) - follow = follow.intersect(follow(p2es)); - if (set != null) follow = follow.intersect(set.getTokenTopology()); - } - state.reductions.put(follow, p); - if (followEof.contains(p.owner())) state.eofReductions.add(p); - } - // if the element following this position is an atom, copy the corresponding // set of rows out of the "master" goto table and into this state's shift table if (p.element() != null && p.element() instanceof Atom) state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology())); + + // RNGLR: we can potentially reduce from any "right-nullable" position -- that is, + // any position for which all Elements after it in the Sequence are capable of + // matching the empty string. + if (!isRightNullable(p)) continue; + Topology follow = follow(p.owner()); + for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) { + if (!(p2.element() instanceof Union)) + throw new Error("impossible -- only Unions can be nullable"); + + // interesting RNGLR-followRestriction interaction: we must intersect + // not just the follow-set of the last non-nullable element, but the + // follow-sets of the nulled elements as well. + for(Sequence s : ((Union)p2.element())) + follow = follow.intersect(follow(s)); + Topology set = epsilonFollowSet((Union)p2.element()); + if (set != null) follow = follow.intersect(set); + } + + // indicate that when the next token is in the set "follow", nodes in this + // state should reduce according to Position "p" + state.reductions.put(follow, p); + if (followEof.contains(p.owner())) state.eofReductions.add(p); } - if (top instanceof IntegerTopology) + // optimize the reductions table + if (emptyTopology() instanceof IntegerTopology) for(State state : all_states) { - state.oreductions = state.reductions.optimize(((IntegerTopology)top).functor()); - state.oshifts = state.shifts.optimize(((IntegerTopology)top).functor()); + // FIXME: this is pretty ugly + state.oreductions = state.reductions.optimize(((IntegerTopology)emptyTopology()).functor()); + state.oshifts = state.shifts.optimize(((IntegerTopology)emptyTopology()).functor()); } + } + // FIXME: this method needs to be cleaned up and documented + private void sortReductions() { // crude algorithm to assing an ordinal ordering to every position // al will be sorted in DECREASING order (al[0] >= al[1]) ArrayList al = new ArrayList(); @@ -185,24 +201,46 @@ public abstract class Parser { } al.get(i).ord = j; } - - /* - for(int i=0; i implements IntegerMappable, Iterable { public final int idx = master_state_idx++; private final HashSet hs; - public HashSet> also = new HashSet>(); + public HashSet> conjunctStates = new HashSet>(); - public transient HashMap> gotoSetNonTerminals = new HashMap>(); - private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); + HashMap> gotoSetNonTerminals = new HashMap>(); + private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); private TopologicalBag reductions = new TopologicalBag(); private HashSet eofReductions = new HashSet(); @@ -211,6 +249,7 @@ public abstract class Parser { private VisitableMap> oshifts = null; private VisitableMap oreductions = null; + public final boolean doomed; // Interface Methods ////////////////////////////////////////////////////////////////////////////// @@ -252,34 +291,35 @@ public abstract class Parser { * for non-Atom Elements. * */ - public State(HashSet hs) { this(hs, false); } - public boolean doomed; public State(HashSet hs, boolean doomed) { this.hs = hs; this.doomed = doomed; - // register ourselves in the all_states hash so that no - // two states are ever created with an identical position set + // register ourselves so that no two states are ever + // created with an identical position set (termination depends on this) ((HashMap)(doomed ? doomed_states : normal_states)).put(hs, this); ((HashSet)all_states).add(this); - + for(Position p : hs) { + // Step 1a: take note if we are an accepting state + // (last position of the root Union's sequence) + if (p.next()==null && !doomed && rootUnion.contains(p.owner())) + accept = true; + + // Step 1b: If any Position in the set is the first position of its sequence, then this + // state is responsible for spawning the "doomed" states for each of the + // Sequence's conjuncts. This obligation is recorded by adding the to-be-spawned + // states to conjunctStates. if (!p.isFirst()) continue; - for(Sequence s : p.owner().needs()) { - if (hs.contains(s.firstp())) continue; - HashSet h2 = new HashSet(); - reachable(s, h2); - also.add(mkstate(h2, true)); - } - for(Sequence s : p.owner().hates()) { - if (hs.contains(s.firstp())) continue; - HashSet h2 = new HashSet(); - reachable(s, h2); - also.add(mkstate(h2, true)); - } + for(Sequence s : p.owner().needs()) + if (!hs.contains(s.firstp())) + conjunctStates.add(mkstate(reachable(s.firstp()), true)); + for(Sequence s : p.owner().hates()) + if (!hs.contains(s.firstp())) + conjunctStates.add(mkstate(reachable(s.firstp()), true)); } - // Step 1a: examine all Position's in this state and compute the mappings from + // Step 2a: examine all Position's in this state and compute the mappings from // sets of follow tokens (tokens which could follow this position) to sets // of _new_ positions (positions after shifting). These mappings are // collectively known as the _closure_ @@ -293,7 +333,7 @@ public abstract class Parser { bag0.addAll(a.getTokenTopology(), hp); } - // Step 1b: for each _minimal, contiguous_ set of characters having an identical next-position + // Step 2b: for each _minimal, contiguous_ set of characters having an identical next-position // set, add that character set to the goto table (with the State corresponding to the // computed next-position set). @@ -303,7 +343,7 @@ public abstract class Parser { ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed)); } - // Step 2: for every Sequence, compute the closure over every position in this set which + // Step 3: for every Sequence, compute the closure over every position in this set which // is followed by a symbol which could yield the Sequence. // // "yields" [in one or more step] is used instead of "produces" [in exactly one step] @@ -336,68 +376,37 @@ public abstract class Parser { } private State mkstate(HashSet h, boolean b) { - if (b) return doomed_states.get(h) == null ? (State)new State(h,b) : (State)doomed_states.get(h); - else return normal_states.get(h) == null ? (State)new State(h,b) : (State)normal_states.get(h); - } - - public String toStringx() { - StringBuffer st = new StringBuffer(); - for(Position p : this) { - if (st.length() > 0) st.append("\n"); - st.append(p); - } - return st.toString(); - } - - public String toString() { - StringBuffer ret = new StringBuffer(); - ret.append("state["+idx+"]: "); - for(Position p : this) ret.append("{"+p+"} "); - return ret.toString(); + State ret = (b?doomed_states:normal_states).get(h); + if (ret==null) ret = new State(h,b); + return ret; } public int toInt() { return idx; } } - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append("parse table"); - for(State state : all_states) { - sb.append(" " + state + "\n"); - for(Topology t : state.shifts) { - sb.append(" shift \""+ - new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => "); - for(State st : state.shifts.getAll(t)) - sb.append(st.idx+" "); - sb.append("\n"); - } - for(Topology t : state.reductions) - sb.append(" reduce \""+ - new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => " + - state.reductions.getAll(t) + "\n"); - for(Sequence s : state.gotoSetNonTerminals.keySet()) - sb.append(" goto "+state.gotoSetNonTerminals.get(s)+" from " + s + "\n"); - } - return sb.toString(); - } } // Helpers ////////////////////////////////////////////////////////////////////////////// - private static void reachable(Sequence s, HashSet h) { - reachable(s.firstp(), h); - //for(Sequence ss : s.needs()) reachable(ss, h); - //for(Sequence ss : s.hates()) reachable(ss, h); + private static HashSet reachable(Element e) { + HashSet h = new HashSet(); + reachable(e, h); + return h; } private static void reachable(Element e, HashSet h) { if (e instanceof Atom) return; for(Sequence s : ((Union)e)) - reachable(s, h); + reachable(s.firstp(), h); } private static void reachable(Position p, HashSet h) { if (h.contains(p)) return; h.add(p); if (p.element() != null) reachable(p.element(), h); } - //public static Cache mastercache = null; + private static HashSet reachable(Position p) { + HashSet ret = new HashSet(); + reachable(p, ret); + return ret; + } + }