X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FParser.java;h=cbf9b479f5bfb7e7fd763c5e08a68dc03ac450a4;hp=046e725fcd90da6543f03d7eee099f4b4a97071c;hb=c366dacc334fe2e35835164f5a37d3eebb2ca6d5;hpb=0a0227b9180534d2a431f3d6e08a398bde2244c4 diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index 046e725..cbf9b47 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -9,35 +9,43 @@ import java.util.*; import java.lang.reflect.*; /** a parser which translates streams of Tokens of type T into a Forest */ -public class Parser { +public abstract class Parser { - private final Table pt; + public final Table pt; - //public Parser( Topology top) { this(new Table( top)); } - //public Parser(String s, Topology top) { this(new Table(s, top)); } + /** create a parser to parse the grammar with start symbol u */ + protected Parser(Union u) { this.pt = new Table(u, top()); } + protected Parser(Table pt) { this.pt = pt; } - /** - * create a parser to parse the grammar with start symbol u - * @param top a "sample" Topology that can be cloned (FIXME, demanding this is lame) - */ - public Parser(Union u, Topology top) { this(new Table(u, top)); } + public abstract Forest shiftedToken(T t, Token.Location loc); + public abstract Topology top(); - Parser(Table pt) { this.pt = pt; } /** parse input for a exactly one unique result, throwing Ambiguous if not unique or Failed if none */ - public Tree parse1(Token.Stream input) throws IOException, Failed, Ambiguous { return parse(input).expand1(); } + public Tree parse1(Token.Stream input) throws IOException, Failed, Ambiguous { + Forest ret = parse(input); + try { return ret.expand1(); } + catch (Ambiguous a) { + System.out.println("while expanding:"); + System.out.println(ret); + throw a; + } + } /** parse input, using the table pt to drive the parser */ public Forest parse(Token.Stream input) throws IOException, Failed { GSS gss = new GSS(); - GSS.Phase current = gss.new Phase(null, input.next()); - current.newNode(null, null, pt.start, true, null); + Token.Location loc = input.getLocation(); + GSS.Phase current = gss.new Phase(null, this, null, input.next(1, 0, 0), loc, null); + current.newNode(null, Forest.leaf(null, null), pt.start, true); + int count = 1; for(;;) { - GSS.Phase next = gss.new Phase(current, input.next()); + loc = input.getLocation(); current.reduce(); - current.shift(next); + Forest forest = current.token==null ? null : shiftedToken((T)current.token, loc); + GSS.Phase next = gss.new Phase(current, this, current, input.next(count, gss.resets, gss.waits), loc, forest); + count = next.hash.size(); if (current.isDone()) return (Forest)current.finalResult; - current.checkFailure(); current = next; } } @@ -45,13 +53,13 @@ public class Parser { // Exceptions ////////////////////////////////////////////////////////////////////////////// - public static class Failed extends Exception { + public static class Failed extends RuntimeException { private final Token.Location location; private final String message; public Failed() { this("", null); } public Failed(String message, Token.Location loc) { this.location = loc; this.message = message; } public Token.Location getLocation() { return location; } - public String toString() { return message + (location==null ? "" : (" at " + location + "\n" + location.getContext())); } + public String toString() { return message/* + (location==null ? "" : (" at " + location))*/; } } public static class Ambiguous extends RuntimeException { @@ -70,21 +78,11 @@ public class Parser { // Table ////////////////////////////////////////////////////////////////////////////// /** an SLR(1) parse table which may contain conflicts */ - static class Table { + static class Table extends Walk.Cache { - private final Union start0 = new Top(); - private final Sequence start0seq; - static class Top extends Union { public Top() { super("0"); } } - - public final Walk.Cache cache = new Walk.Cache(); - - public HashSet closure() { - HashSet hp = new HashSet(); - start0.reachable(hp); - return hp; - } - public Position firstPosition() { return start0seq.firstp(); } - public Position lastPosition() { Position ret = start0seq.firstp(); while(!ret.isLast()) ret = ret.next(); return ret; } + public final Walk.Cache cache = this; + + public HashMapBag byPosition = new HashMapBag(); private void walk(Element e, HashSet hs) { if (e==null) return; @@ -97,21 +95,6 @@ public class Parser { walk(p.element(), hs); } } - public HashSet walk() { - HashSet ret = new HashSet(); - walk(start0, ret); - return ret; - } - - /* - public String toString() { - StringBuffer sb = new StringBuffer(); - for(Element e : walk()) - if (e instanceof Union) - ((Union)e).toString(sb); - return sb.toString(); - } - */ /** the start state */ public final State start; @@ -122,64 +105,126 @@ public class Parser { /** construct a parse table for the given grammar */ public Table(Topology top) { this("s", top); } public Table(String startSymbol, Topology top) { this(new Union(startSymbol), top); } - public Table(Union u, Topology top) { - start0seq = new Sequence.Singleton(u, null, null); - start0.add(start0seq); + public Table(Union ux, Topology top) { + Union start0 = new Union("0"); + start0.add(new Sequence.Singleton(ux, null, null)); + + for(Sequence s : start0) cache.eof.put(s, true); + cache.eof.put(start0, true); // construct the set of states HashMap,State> all_states = new HashMap,State>(); - HashSet all_elements = walk(); + HashSet all_elements = new HashSet(); + walk(start0, all_elements); for(Element e : all_elements) cache.ys.put(e, new Walk.YieldSet(e, cache).walk()); - this.start = new State(closure(), all_states, all_elements); + HashSet hp = new HashSet(); + reachable(start0, hp); + this.start = new State(hp, all_states, all_elements); // for each state, fill in the corresponding "row" of the parse table for(State state : all_states.values()) for(Position p : state.hs) { // the Grammar's designated "last position" is the only accepting state - if (p==lastPosition()) + if (start0.contains(p.owner()) && p.next()==null) state.accept = true; - // FIXME: how does right-nullability interact with follow restrictions? - // all right-nullable rules get a reduction [Johnstone 2000] if (p.isRightNullable(cache)) { - Walk.Follow wf = new Walk.Follow(top.fresh(), p.owner(), all_elements, cache); + Walk.Follow wf = new Walk.Follow(top.empty(), p.owner(), all_elements, cache); Reduction red = new Reduction(p); - state.reductions.put(wf.walk(p.owner()), red); - if (wf.includesEof()) state.eofReductions.add(red, true); + + Topology follow = wf.walk(p.owner()); + if (p.owner() instanceof Sequence.RewritingSequence && + (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) { + System.out.println("follow before: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow)); + } + for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) + follow = follow.intersect(new Walk.Follow(top.empty(), p2.element(), all_elements, cache).walk(p2.element())); + if (p.owner() instanceof Sequence.RewritingSequence && + (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) { + System.out.println("follow after: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow)); + } + state.reductions.put(follow, red); + if (wf.includesEof()) state.eofReductions.add(red); } // if the element following this position is an atom, copy the corresponding // set of rows out of the "master" goto table and into this state's shift table if (p.element() != null && p.element() instanceof Atom) - state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).top())); + state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()))); } + for(State state : all_states.values()) { + state.oreductions = state.reductions.optimize(); + state.oshifts = state.shifts.optimize(); + } } /** a single state in the LR table and the transitions possible from it */ - public class State implements Comparable, Iterable { + + public class State implements Comparable, IntegerMappable, Iterable { - public final int idx = master_state_idx++; + public int toInt() { return idx; } + + public boolean lame() { + for(Position p : this) + for(Position p2 = p; p2!=null; p2=p2.next()) + if (p2.isLast() && !p2.owner().lame) + return false; + return true; + } + /* + public boolean isResolvable(Token t) { + boolean found = false; + for(Reduction r : getReductions(t)) { + Position p = r.position; + if (!p.isRightNullable(cache)) continue; + if (p.owner().firstp()==p) continue; + if (found) { + // found two items meeting criteria #1 + return false; + } else { + found = true; + continue; + } + if (p.element()==null) continue; + Topology first = new Walk.First(top(), cache).walk(p.element()); + if (first.contains(t)) + } + } + */ + + public final int idx = master_state_idx++; private final HashSet hs; private transient HashMap gotoSetNonTerminals = new HashMap(); private transient TopologicalBag gotoSetTerminals = new TopologicalBag(); private TopologicalBag reductions = new TopologicalBag(); - private FastSet eofReductions = new FastSet(); + private HashSet eofReductions = new HashSet(); private TopologicalBag shifts = new TopologicalBag(); private boolean accept = false; + private VisitableMap oshifts = null; + private VisitableMap oreductions = null; + // Interface Methods ////////////////////////////////////////////////////////////////////////////// - public boolean canShift(Token t) { return shifts.contains(t); } - public Iterable getShifts(Token t) { return shifts.get(t); } public boolean isAccepting() { return accept; } - public Iterable getReductions(Token t) { return reductions.get(t); } - public Iterable getEofReductions() { return eofReductions; } + + public boolean canShift(Token t) { return oshifts.contains(t); } + public boolean canReduce(Token t) { return t==null ? eofReductions.size()>0 : oreductions.contains(t); } + public Iterator iterator() { return hs.iterator(); } + public void invokeShifts(Token t, Invokable irbc, B b, C c) { + oshifts.invoke(t, irbc, b, c); + } + public void invokeReductions(Token t, Invokable irbc, B b, C c) { + if (t==null) for(Reduction r : eofReductions) irbc.invoke(r, b, c); + else oreductions.invoke(t, irbc, b, c); + } + // Constructor ////////////////////////////////////////////////////////////////////////////// /** @@ -212,6 +257,7 @@ public class Parser { // register ourselves in the all_states hash so that no // two states are ever created with an identical position set all_states.put(hs, this); + for(Position p : hs) byPosition.add(p,this); // Step 1a: examine all Position's in this state and compute the mappings from // sets of follow tokens (tokens which could follow this position) to sets @@ -223,8 +269,8 @@ public class Parser { if (position.isLast() || !(position.element() instanceof Atom)) continue; Atom a = (Atom)position.element(); HashSet hp = new HashSet(); - position.next().reachable(hp); - bag0.addAll(a.top(), /*clo.walk()*/hp); + reachable(position.next(), hp); + bag0.addAll(a, hp); } // Step 1b: for each _minimal, contiguous_ set of characters having an identical next-position @@ -262,7 +308,7 @@ public class Parser { if (ys != null) { for(Element y : ys) { HashSet hp = new HashSet(); - p.next().reachable(hp); + reachable(p.next(), hp); move.addAll(y, hp); } } @@ -274,7 +320,13 @@ public class Parser { } } - public String toString() { return "state["+idx+"]"; } + public String toString() { + //return "state["+idx+"]"; + StringBuffer ret = new StringBuffer(); + ret.append("state["+idx+"]: "); + for(Position p : this) ret.append("{"+p+"} "); + return ret.toString(); + } public int compareTo(Table.State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } } @@ -286,7 +338,7 @@ public class Parser { public class Reduction { // FIXME: cleanup; almost everything in here could go in either Sequence.Position.getRewrite() or else in GSS.Reduct public final int numPop; - private final Position position; + /*private*/ final Position position; private final Forest[] holder; // to avoid constant reallocation public int hashCode() { return position.hashCode(); } public boolean equals(Object o) { @@ -302,36 +354,70 @@ public class Parser { this.holder = new Forest[numPop]; } public String toString() { return "[reduce " + position + "]"; } - public Forest reduce(Forest f, GSS.Phase.Node parent, GSS.Phase.Node onlychild, GSS.Phase target, Forest rex) { - holder[numPop-1] = f; - return reduce(parent, numPop-2, rex, onlychild, target); + + private Forest zero = null; + public Forest zero() { + if (zero != null) return zero; + if (numPop > 0) throw new Error(); + return zero = position.rewrite(null); } - public Forest reduce(GSS.Phase.Node parent, GSS.Phase.Node onlychild, GSS.Phase target, Forest rex) { - return reduce(parent, numPop-1, rex, onlychild, target); + + public void reduce(GSS.Phase.Node parent) { + if (numPop==0) finish(parent, zero(), parent.phase()); + else reduce(parent, numPop-1, parent.phase()); } - // FIXME: this could be more elegant and/or cleaner and/or somewhere else - private Forest reduce(GSS.Phase.Node parent, int pos, Forest rex, GSS.Phase.Node onlychild, GSS.Phase target) { - if (pos>=0) holder[pos] = parent.pending(); - if (pos<=0 && rex==null) { + public void reduce(GSS.Phase.Node parent, GSS.Phase.Node onlychild) { + if (numPop<=0) throw new Error("called wrong form of reduce()"); + int pos = numPop-1; + Forest old = holder[pos]; + holder[pos] = parent.pending(); + if (pos==0) { System.arraycopy(holder, 0, position.holder, 0, holder.length); - rex = position.rewrite(target.getLocation()); + finish(onlychild, position.rewrite(parent.phase().getLocation()), parent.phase()); + } else { + reduce(onlychild, pos-1, parent.phase()); } - if (pos >=0) { - if (onlychild != null) - reduce(onlychild, pos-1, rex, null, target); - else - for(GSS.Phase.Node child : parent.parents()) - reduce(child, pos-1, rex, null, target); + holder[pos] = old; + } + + // FIXME: this could be more elegant and/or cleaner and/or somewhere else + private void reduce(GSS.Phase.Node parent, int pos, GSS.Phase target) { + Forest old = holder[pos]; + holder[pos] = parent.pending(); + if (pos==0) { + System.arraycopy(holder, 0, position.holder, 0, holder.length); + for(int i=0; i h) { + if (e instanceof Atom) return; + for(Sequence s : ((Union)e)) + reachable(s.firstp(), h); + } + private static void reachable(Position p, HashSet h) { + if (h.contains(p)) return; + h.add(p); + if (p.element() != null) reachable(p.element(), h); + } + }