X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FParser.java;h=2344e15b96c2cb9f451c27f248edc3e04c15978f;hp=24f8410d0d5f521a19cc47b2091dc65b69d447b1;hb=ae0cef03f2e46f6ae6438f9a3e60ca36ff1a4643;hpb=37d28a2fa4ecc9534ca03827c23fdb493af79645 diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index 24f8410..2344e15 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -6,34 +6,46 @@ import java.io.*; import java.util.*; /** a parser which translates streams of Tokens of type T into a Forest */ -public abstract class Parser { +public abstract class Parser { - private final Table pt; + protected final Table pt; /** create a parser to parse the grammar with start symbol u */ - protected Parser(Union u) { this.pt = new Table(u, top()); } - protected Parser(Table pt) { this.pt = pt; } + protected Parser(Union u, Topology top) { this.pt = new Table(u, top); } + protected Parser(Table pt) { this.pt = pt; } /** implement this method to create the output forest corresponding to a lone shifted input token */ - public abstract Forest shiftedToken(T t, Token.Location loc); + protected abstract Forest shiftToken(Tok t, Input.Location newloc); - /** this method must return an empty topology of the input token type */ - public abstract Topology top(); + boolean helpgc = true; + + public String toString() { return pt.toString(); } /** parse input, using the table pt to drive the parser */ - public Forest parse(Token.Stream input) throws IOException, ParseFailed { + public Forest parse(Input input) throws IOException, ParseFailed { GSS gss = new GSS(); - Token.Location loc = input.getLocation(); - GSS.Phase current = gss.new Phase(null, this, null, input.next(1, 0, 0), loc, null); - current.newNode(null, Forest.leaf(null, null), pt.start, true); + Input.Location loc = input.getLocation(); + GSS.Phase current = gss.new Phase(null, this, null, input.next(), loc, null); + current.newNode(null, Forest.create(null, null, null, false), pt.start, true); int count = 1; - for(;;) { + for(int idx=0;;idx++) { + Input.Location oldloc = loc; loc = input.getLocation(); current.reduce(); - Forest forest = current.token==null ? null : shiftedToken((T)current.token, loc); - GSS.Phase next = gss.new Phase(current, this, current, input.next(count, gss.resets, gss.waits), loc, forest); - count = next.hash.size(); - if (current.isDone()) return (Forest)current.finalResult; + Forest forest = current.token==null ? null : shiftToken((Tok)current.token, loc); + GSS.Phase next = gss.new Phase(current, this, current, input.next(), loc, forest); + if (!helpgc) { + FileOutputStream fos = new FileOutputStream("out-"+idx+".dot"); + PrintWriter p = new PrintWriter(new OutputStreamWriter(fos)); + GraphViz gv = new GraphViz(); + for(Object n : next) + ((GSS.Phase.Node)n).toGraphViz(gv); + gv.dump(p); + p.flush(); + p.close(); + } + count = next.size(); + if (current.isDone()) return (Forest)gss.finalResult; current = next; } } @@ -41,7 +53,27 @@ public abstract class Parser { // Table ////////////////////////////////////////////////////////////////////////////// /** an SLR(1) parse table which may contain conflicts */ - static class Table extends Walk.Cache { + static class Table extends Walk.Cache { + + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("parse table"); + for(State state : all_states.values()) { + sb.append(" " + state + "\n"); + for(Topology t : state.shifts) { + sb.append(" shift \""+ + new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => "); + for(State st : state.shifts.getAll(t)) + sb.append(st.idx+" "); + sb.append("\n"); + } + for(Topology t : state.reductions) + sb.append(" reduce \""+ + new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => " + + state.reductions.getAll(t) + "\n"); + } + return sb.toString(); + } public final Walk.Cache cache = this; @@ -50,41 +82,50 @@ public abstract class Parser { if (hs.contains(e)) return; hs.add(e); if (e instanceof Atom) return; - for(Sequence s : (Union)e) { - hs.add(s); - for(Position p = s.firstp(); p != null; p = p.next()) - walk(p.element(), hs); - } + for(Sequence s : (Union)e) + walk(s, hs); + } + private void walk(Sequence s, HashSet hs) { + hs.add(s); + for(Position p = s.firstp(); p != null; p = p.next()) + walk(p.element(), hs); + for(Sequence ss : s.needs()) walk(ss, hs); + for(Sequence ss : s.hates()) walk(ss, hs); } /** the start state */ - public final State start; + public final State start; + + /** the state from which no reductions can be done */ + private final State dead_state; /** used to generate unique values for State.idx */ private int master_state_idx = 0; + HashMap,State> all_states = new HashMap,State>(); /** construct a parse table for the given grammar */ public Table(Topology top) { this("s", top); } public Table(String startSymbol, Topology top) { this(new Union(startSymbol), top); } public Table(Union ux, Topology top) { Union start0 = new Union("0"); - start0.add(new Sequence.Singleton(ux, null, null)); + start0.add(new Sequence.Singleton(ux)); for(Sequence s : start0) cache.eof.put(s, true); cache.eof.put(start0, true); // construct the set of states - HashMap,State> all_states = new HashMap,State>(); - HashSet all_elements = new HashSet(); + HashSet all_elements = new HashSet(); walk(start0, all_elements); for(Element e : all_elements) cache.ys.addAll(e, new Walk.YieldSet(e, cache).walk()); HashSet hp = new HashSet(); reachable(start0, hp); - this.start = new State(hp, all_states, all_elements); + + this.dead_state = new State(new HashSet(), all_states, all_elements); + this.start = new State(hp, all_states, all_elements); // for each state, fill in the corresponding "row" of the parse table - for(State state : all_states.values()) + for(State state : all_states.values()) for(Position p : state.hs) { // the Grammar's designated "last position" is the only accepting state @@ -93,13 +134,11 @@ public abstract class Parser { if (isRightNullable(p)) { Walk.Follow wf = new Walk.Follow(top.empty(), p.owner(), all_elements, cache); - Reduction red = new Reduction(p); - Topology follow = wf.walk(p.owner()); for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) follow = follow.intersect(new Walk.Follow(top.empty(), p2.element(), all_elements, cache).walk(p2.element())); - state.reductions.put(follow, red); - if (wf.includesEof()) state.eofReductions.add(red); + state.reductions.put(follow, p); + if (wf.includesEof()) state.eofReductions.add(p); } // if the element following this position is an atom, copy the corresponding @@ -107,50 +146,50 @@ public abstract class Parser { if (p.element() != null && p.element() instanceof Atom) state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()))); } - for(State state : all_states.values()) { - state.oreductions = state.reductions.optimize(); - state.oshifts = state.shifts.optimize(); - } + if (top instanceof IntegerTopology) + for(State state : all_states.values()) { + state.oreductions = state.reductions.optimize(((IntegerTopology)top).functor()); + state.oshifts = state.shifts.optimize(((IntegerTopology)top).functor()); + } } private boolean isRightNullable(Position p) { if (p.isLast()) return true; - if (!p.element().possiblyEpsilon(this)) return false; + if (!possiblyEpsilon(p.element())) return false; return isRightNullable(p.next()); } /** a single state in the LR table and the transitions possible from it */ - public class State implements Comparable, IntegerMappable, Iterable { + class State implements Comparable>, IntegerMappable, Iterable { public final int idx = master_state_idx++; private final HashSet hs; - private transient HashMap gotoSetNonTerminals = new HashMap(); - private transient TopologicalBag gotoSetTerminals = new TopologicalBag(); + public transient HashMap> gotoSetNonTerminals = new HashMap>(); + private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); - private TopologicalBag reductions = new TopologicalBag(); - private HashSet eofReductions = new HashSet(); - private TopologicalBag shifts = new TopologicalBag(); + private TopologicalBag reductions = new TopologicalBag(); + private HashSet eofReductions = new HashSet(); + private TopologicalBag> shifts = new TopologicalBag>(); private boolean accept = false; - private VisitableMap oshifts = null; - private VisitableMap oreductions = null; + private VisitableMap> oshifts = null; + private VisitableMap oreductions = null; // Interface Methods ////////////////////////////////////////////////////////////////////////////// - public boolean isAccepting() { return accept; } - - public boolean canShift(Token t) { return oshifts.contains(t); } - public boolean canReduce(Token t) { return t==null ? eofReductions.size()>0 : oreductions.contains(t); } + boolean isAccepting() { return accept; } + public Iterator iterator() { return hs.iterator(); } - public Iterator iterator() { return hs.iterator(); } - - public void invokeShifts(Token t, Invokable irbc, B b, C c) { + boolean canShift(Tok t) { return oshifts!=null && oshifts.contains(t); } + void invokeShifts(Tok t, Invokable,B,C> irbc, B b, C c) { oshifts.invoke(t, irbc, b, c); } - public void invokeReductions(Token t, Invokable irbc, B b, C c) { - if (t==null) for(Reduction r : eofReductions) irbc.invoke(r, b, c); + + boolean canReduce(Tok t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } + void invokeReductions(Tok t, Invokable irbc, B b, C c) { + if (t==null) for(Position r : eofReductions) irbc.invoke(r, b, c); else oreductions.invoke(t, irbc, b, c); } @@ -179,7 +218,7 @@ public abstract class Parser { * */ public State(HashSet hs, - HashMap,State> all_states, + HashMap,State> all_states, HashSet all_elements) { this.hs = hs; @@ -192,7 +231,7 @@ public abstract class Parser { // of _new_ positions (positions after shifting). These mappings are // collectively known as the _closure_ - TopologicalBag bag0 = new TopologicalBag(); + TopologicalBag bag0 = new TopologicalBag(); for(Position position : hs) { if (position.isLast() || !(position.element() instanceof Atom)) continue; Atom a = (Atom)position.element(); @@ -205,10 +244,10 @@ public abstract class Parser { // set, add that character set to the goto table (with the State corresponding to the // computed next-position set). - for(Topology r : bag0) { + for(Topology r : bag0) { HashSet h = new HashSet(); for(Position p : bag0.getAll(r)) h.add(p); - gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); + gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); } // Step 2: for every non-Atom element (ie every Element which has a corresponding reduction), @@ -218,6 +257,7 @@ public abstract class Parser { // "yields" [in one or more step] is used instead of "produces" [in exactly one step] // to avoid having to iteratively construct our set of States as shown in most // expositions of the algorithm (ie "keep doing XYZ until things stop changing"). + HashMapBag move = new HashMapBag(); for(Position p : hs) { Element e = p.element(); @@ -230,105 +270,46 @@ public abstract class Parser { } for(Element y : move) { HashSet h = move.getAll(y); - State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); - gotoSetNonTerminals.put(y, s); + State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); + // if a reduction is "lame", it should wind up in the dead_state after reducing + if (y instanceof Sequence && ((Sequence)y).lame) + ((HashMap)gotoSetNonTerminals).put(y, dead_state); + else + gotoSetNonTerminals.put(y, s); } } + public String toStringx() { + StringBuffer st = new StringBuffer(); + for(Position p : this) { + if (st.length() > 0) st.append("\n"); + st.append(p); + } + return st.toString(); + } public String toString() { - //return "state["+idx+"]"; StringBuffer ret = new StringBuffer(); ret.append("state["+idx+"]: "); for(Position p : this) ret.append("{"+p+"} "); return ret.toString(); } - public int compareTo(Table.State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } + public int compareTo(State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } public int toInt() { return idx; } } - - /** - * the information needed to perform a reduction; copied here to - * avoid keeping references to Element objects in a Table - */ - public class Reduction { - // FIXME: cleanup; almost everything in here could go in either Sequence.Position.getRewrite() or else in GSS.Reduct - public final int numPop; - /*private*/ final Position position; - private final Forest[] holder; // to avoid constant reallocation - public int hashCode() { return position.hashCode(); } - public boolean equals(Object o) { - if (o==null) return false; - if (o==this) return true; - if (!(o instanceof Reduction)) return false; - Reduction r = (Reduction)o; - return r.position == position; - } - public Reduction(Position p) { - this.position = p; - this.numPop = p.pos; - this.holder = new Forest[numPop]; - } - public String toString() { return "[reduce " + position + "]"; } - - private Forest zero = null; - public Forest zero() { - if (zero != null) return zero; - if (numPop > 0) throw new Error(); - return zero = position.rewrite(null); - } - - public void reduce(GSS.Phase.Node parent) { - if (numPop==0) finish(parent, zero(), parent.phase()); - else reduce(parent, numPop-1, parent.phase()); - } - - public void reduce(GSS.Phase.Node parent, GSS.Phase.Node onlychild) { - if (numPop<=0) throw new Error("called wrong form of reduce()"); - int pos = numPop-1; - Forest old = holder[pos]; - holder[pos] = parent.pending(); - if (pos==0) { - System.arraycopy(holder, 0, position.holder, 0, holder.length); - finish(onlychild, position.rewrite(parent.phase().getLocation()), parent.phase()); - } else { - reduce(onlychild, pos-1, parent.phase()); - } - holder[pos] = old; - } - - // FIXME: this could be more elegant and/or cleaner and/or somewhere else - private void reduce(GSS.Phase.Node parent, int pos, GSS.Phase target) { - Forest old = holder[pos]; - holder[pos] = parent.pending(); - if (pos==0) { - System.arraycopy(holder, 0, position.holder, 0, holder.length); - for(int i=0; i h) { + reachable(s.firstp(), h); + for(Sequence ss : s.needs()) reachable(ss, h); + for(Sequence ss : s.hates()) reachable(ss, h); + } private static void reachable(Element e, HashSet h) { if (e instanceof Atom) return; for(Sequence s : ((Union)e)) - reachable(s.firstp(), h); + reachable(s, h); } private static void reachable(Position p, HashSet h) { if (h.contains(p)) return;