X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FParser.java;h=0b5f7c1a5cf15766d5e9d35ab6f88de6ed69feff;hp=ead35511fd0c1986b994c4b610c0a2bc4ccff118;hb=7fce42d3d1d4333aa7ed6963057e95dae12daed5;hpb=842f3c9b981b35721bb50d49e85c11085b2040a3 diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index ead3551..0b5f7c1 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -1,77 +1,73 @@ +// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license + package edu.berkeley.sbp; import edu.berkeley.sbp.*; import edu.berkeley.sbp.util.*; -import edu.berkeley.sbp.*; import edu.berkeley.sbp.Sequence.Position; -import edu.berkeley.sbp.*; import java.io.*; import java.util.*; -import java.lang.reflect.*; -/** a parser which translates streams of Tokens of type T into a Forest */ -public abstract class Parser { +/** a parser which translates an Input<Token> into a Forest<NodeType> */ +public abstract class Parser { - public final Table pt; + protected final Table pt; /** create a parser to parse the grammar with start symbol u */ - protected Parser(Union u) { this.pt = new Table(u, top()); } - protected Parser(Table pt) { this.pt = pt; } - - public abstract Forest shiftedToken(T t, Token.Location loc); - public abstract Topology top(); - - - /** parse input for a exactly one unique result, throwing Ambiguous if not unique or Failed if none */ - public Tree parse1(Token.Stream input) throws IOException, Failed, Ambiguous { - Forest ret = parse(input); - try { return ret.expand1(); } - catch (Ambiguous a) { - System.out.println("while expanding:"); - System.out.println(ret); - throw a; + public Parser(Union u, Topology top) { this.pt = new Table(u, top); } + + /** implement this method to create the output forest corresponding to a lone shifted input token */ + public abstract Forest shiftToken(Token t, Input.Location newloc); + + public String toString() { return pt.toString(); } + + private boolean verbose = false;; + private static final char[] spin = new char[] { '-', '\\', '|', '/' }; + private int spinpos = 0; + private long last = 0; + void spin() { + if (verbose) { + long now = System.currentTimeMillis(); + if (now-last < 70) return; + last = now; + System.err.print("\r " + spin[spinpos++ % (spin.length)]+"\r"); } } - /** parse input, using the table pt to drive the parser */ - public Forest parse(Token.Stream input) throws IOException, Failed { - GSS gss = new GSS(); - Token.Location loc = input.getLocation(); - GSS.Phase current = gss.new Phase(null, this, null, input.next(1, 0, 0), loc, null); - current.newNode(null, null, pt.start, true); - int count = 1; - for(;;) { - loc = input.getLocation(); - //current.checkFailure(); - current.reduce(); - Forest forest = current.token==null ? null : shiftedToken((T)current.token, loc); - GSS.Phase next = gss.new Phase(current, this, current, input.next(count, gss.resets, gss.waits), loc, forest); - count = next.hash.size(); - if (current.isDone()) return (Forest)current.finalResult; - current = next; - } - } - - - // Exceptions ////////////////////////////////////////////////////////////////////////////// - - public static class Failed extends RuntimeException { - private final Token.Location location; - private final String message; - public Failed() { this("", null); } - public Failed(String message, Token.Location loc) { this.location = loc; this.message = message; } - public Token.Location getLocation() { return location; } - public String toString() { return message/* + (location==null ? "" : (" at " + location))*/; } - } - - public static class Ambiguous extends RuntimeException { - public final Forest ambiguity; - public Ambiguous(Forest ambiguity) { this.ambiguity = ambiguity; } - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append("unresolved ambiguity "/*"at " + ambiguity.getLocation() + ":"*/); - for(Object result : ambiguity.expand(false)) - sb.append("\n " + result); - return sb.toString(); + /** parse input, and return the shared packed parse forest (or throw an exception) */ + public Forest parse(Input input) throws IOException, ParseFailed { + verbose = System.getProperty("sbp.verbose", null) != null; + spinpos = 0; + try { + GSS gss = new GSS(input, this); + for(GSS.Phase current = gss.new Phase(pt.start); ;) { + + if (verbose) { + String s; + s = " " + spin[spinpos++ % (spin.length)]+" parsing "; + s += input.getName(); + s += " "+input.getLocation(); + while(s.indexOf(':') != -1 && s.indexOf(':') < 8) s = " " + s; + String y = "@"+gss.viewPos+" "; + while(y.length() < 9) y = " " + y; + s += y; + //s += " doom="+Node.doomedNodes; + //while(s.length() < 40) s = s + " "; + s += " nodes="+gss.numOldNodes; + while(s.length() < 50) s = s + " "; + s += " shifted="+gss.numNewNodes; + while(s.length() < 60) s = s + " "; + s += " reductions="+gss.numReductions; + System.err.print("\r"+s+ANSI.clreol()+"\r"); + } + + // FIXME: make sure all the locations line up properly in here + if (current.isDone()) return (Forest)current.finalResult; + Forest forest = shiftToken((Token)current.token, input.getLocation()); + current = gss.new Phase(current, forest); + } + } finally { + if (verbose) + System.err.print("\r \r"); } } @@ -79,141 +75,159 @@ public abstract class Parser { // Table ////////////////////////////////////////////////////////////////////////////// /** an SLR(1) parse table which may contain conflicts */ - static class Table extends Walk.Cache { - - public final Walk.Cache cache = this; - - public HashMapBag byPosition = new HashMapBag(); - - private void walk(Element e, HashSet hs) { - if (e==null) return; - if (hs.contains(e)) return; - hs.add(e); - if (e instanceof Atom) return; - for(Sequence s : (Union)e) { - hs.add(s); - for(Position p = s.firstp(); p != null; p = p.next()) - walk(p.element(), hs); - } - } + static class Table extends Cache { /** the start state */ - public final State start; + public final State start; + + /** the state from which no reductions can be done */ + private final State dead_state; /** used to generate unique values for State.idx */ private int master_state_idx = 0; + HashSet> all_states = new HashSet>(); + HashMap,State> doomed_states = new HashMap,State>(); + HashMap,State> normal_states = new HashMap,State>(); /** construct a parse table for the given grammar */ public Table(Topology top) { this("s", top); } public Table(String startSymbol, Topology top) { this(new Union(startSymbol), top); } public Table(Union ux, Topology top) { + super(ux, top); Union start0 = new Union("0"); - start0.add(new Sequence.Singleton(ux, null, null)); - - for(Sequence s : start0) cache.eof.put(s, true); - cache.eof.put(start0, true); + Sequence seq0 = new Sequence.Singleton(ux); + start0.add(seq0); + buildFollowSet(seq0, top, true); // construct the set of states - HashMap,State> all_states = new HashMap,State>(); - HashSet all_elements = new HashSet(); - walk(start0, all_elements); - for(Element e : all_elements) - cache.ys.put(e, new Walk.YieldSet(e, cache).walk()); HashSet hp = new HashSet(); reachable(start0, hp); - this.start = new State(hp, all_states, all_elements); + + this.dead_state = new State(new HashSet(), true); + this.start = new State(hp); // for each state, fill in the corresponding "row" of the parse table - for(State state : all_states.values()) + for(State state : all_states) for(Position p : state.hs) { // the Grammar's designated "last position" is the only accepting state - if (start0.contains(p.owner()) && p.next()==null) + if (start0.contains(p.owner()) && p.next()==null && !state.doomed) state.accept = true; - if (p.isRightNullable(cache)) { - Walk.Follow wf = new Walk.Follow(top.empty(), p.owner(), all_elements, cache); - Reduction red = new Reduction(p); - - Topology follow = wf.walk(p.owner()); - if (p.owner() instanceof Sequence.RewritingSequence && - (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) { - System.out.println("follow before: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow)); + if (isRightNullable(p)) { + Topology follow = (Topology)follow(p.owner()); + for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) { + if (!(p2.element() instanceof Union)) throw new Error("impossible"); + Union u = (Union)p2.element(); + Atom set = new edu.berkeley.sbp.chr.CharAtom(new edu.berkeley.sbp.chr.CharTopology((Topology)epsilonFollowSet(u))); + Element p2e = p2.element(); + if (p2e instanceof Union) + for(Sequence p2es : ((Union)p2e)) + follow = follow.intersect(follow(p2es)); + if (set != null) follow = follow.intersect(set.getTokenTopology()); } - for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) - follow = follow.intersect(new Walk.Follow(top.empty(), p2.element(), all_elements, cache).walk(p2.element())); - if (p.owner() instanceof Sequence.RewritingSequence && - (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) { - System.out.println("follow after: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow)); - } - state.reductions.put(follow, red); - if (wf.includesEof()) state.eofReductions.add(red); + state.reductions.put(follow, p); + if (followEof.contains(p.owner())) state.eofReductions.add(p); } // if the element following this position is an atom, copy the corresponding // set of rows out of the "master" goto table and into this state's shift table if (p.element() != null && p.element() instanceof Atom) - state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()))); + state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology())); } - for(State state : all_states.values()) { - state.oreductions = state.reductions.optimize(); - state.oshifts = state.shifts.optimize(); - } - } - /** a single state in the LR table and the transitions possible from it */ - public class State implements Comparable, Iterable { - - /* - public boolean isResolvable(Token t) { - boolean found = false; - for(Reduction r : getReductions(t)) { - Position p = r.position; - if (!p.isRightNullable(cache)) continue; - if (p.owner().firstp()==p) continue; - if (found) { - // found two items meeting criteria #1 - return false; - } else { - found = true; - continue; + if (top instanceof IntegerTopology) + for(State state : all_states) { + state.oreductions = state.reductions.optimize(((IntegerTopology)top).functor()); + state.oshifts = state.shifts.optimize(((IntegerTopology)top).functor()); + } + + // crude algorithm to assing an ordinal ordering to every position + // al will be sorted in DECREASING order (al[0] >= al[1]) + ArrayList al = new ArrayList(); + for(State s : all_states) { + for(Object po : s) { + Sequence.Position p = (Sequence.Position)po; + if (al.contains(p)) continue; + int i=0; + for(; i 0) { + Sequence.Position p = al.remove(j); + al.add(i, p); + continue OUTER; + } + break; + } + + int j = 1; + int pk = 0; + for(int i=0; i 0) + { inc = true; break; } + } + inc = true; + if (inc) { + j++; + pk = i; } + al.get(i).ord = j; } + + /* + for(int i=0; i implements IntegerMappable, Iterable { + public final int idx = master_state_idx++; private final HashSet hs; + public HashSet> also = new HashSet>(); - private transient HashMap gotoSetNonTerminals = new HashMap(); - private transient TopologicalBag gotoSetTerminals = new TopologicalBag(); + public transient HashMap> gotoSetNonTerminals = new HashMap>(); + private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); - private TopologicalBag reductions = new TopologicalBag(); - private HashSet eofReductions = new HashSet(); - private TopologicalBag shifts = new TopologicalBag(); - private boolean accept = false; + private TopologicalBag reductions = new TopologicalBag(); + private HashSet eofReductions = new HashSet(); + private TopologicalBag> shifts = new TopologicalBag>(); + private boolean accept = false; - private VisitableMap oshifts = null; - private VisitableMap oreductions = null; + private VisitableMap> oshifts = null; + private VisitableMap oreductions = null; // Interface Methods ////////////////////////////////////////////////////////////////////////////// - public boolean isAccepting() { return accept; } - - public boolean canShift(Token t) { return oshifts.contains(t); } - public boolean canReduce(Token t) { return t==null ? eofReductions.size()>0 : oreductions.contains(t); } - - public Iterator iterator() { return hs.iterator(); } - - public void invokeShifts(Token t, Invokable irbc, B b, C c) { - oshifts.invoke(t, irbc, b, c); + boolean isAccepting() { return accept; } + public Iterator iterator() { return hs.iterator(); } + boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); } + void invokeShifts(Token t, GSS.Phase phase, Result r) { oshifts.invoke(t, phase, r); } + boolean canReduce(Token t) { + return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } + void invokeEpsilonReductions(Token t, Node node) { + if (t==null) for(Position r : eofReductions) node.invoke(r, null); + else oreductions.invoke(t, node, null); } - public void invokeReductions(Token t, Invokable irbc, B b, C c) { - if (t==null) for(Reduction r : eofReductions) irbc.invoke(r, b, c); - else oreductions.invoke(t, irbc, b, c); + void invokeReductions(Token t, Node node, Result b) { + //System.err.println("\rinvokage: " + this); + if (t==null) for(Position r : eofReductions) node.invoke(r, b); + else oreductions.invoke(t, node, b); } // Constructor ////////////////////////////////////////////////////////////////////////////// @@ -221,8 +235,7 @@ public abstract class Parser { /** * create a new state consisting of all the Positions in hs * @param hs the set of Positions comprising this State - * @param all_states the set of states already constructed (to avoid recreating states) - * @param all_elements the set of all elements (Atom instances need not be included) + * @param all the set of all elements (Atom instances need not be included) * * In principle these two steps could be merged, but they * are written separately to highlight these two facts: @@ -240,15 +253,32 @@ public abstract class Parser { * for non-Atom Elements. * */ - public State(HashSet hs, - HashMap,State> all_states, - HashSet all_elements) { + public State(HashSet hs) { this(hs, false); } + public boolean doomed; + public State(HashSet hs, boolean doomed) { this.hs = hs; + this.doomed = doomed; // register ourselves in the all_states hash so that no // two states are ever created with an identical position set - all_states.put(hs, this); - for(Position p : hs) byPosition.add(p,this); + ((HashMap)(doomed ? doomed_states : normal_states)).put(hs, this); + ((HashSet)all_states).add(this); + + for(Position p : hs) { + if (!p.isFirst()) continue; + for(Sequence s : p.owner().needs()) { + if (hs.contains(s.firstp())) continue; + HashSet h2 = new HashSet(); + reachable(s, h2); + also.add(mkstate(h2, true)); + } + for(Sequence s : p.owner().hates()) { + if (hs.contains(s.firstp())) continue; + HashSet h2 = new HashSet(); + reachable(s, h2); + also.add(mkstate(h2, true)); + } + } // Step 1a: examine all Position's in this state and compute the mappings from // sets of follow tokens (tokens which could follow this position) to sets @@ -261,7 +291,7 @@ public abstract class Parser { Atom a = (Atom)position.element(); HashSet hp = new HashSet(); reachable(position.next(), hp); - bag0.addAll(a, hp); + bag0.addAll(a.getTokenTopology(), hp); } // Step 1b: for each _minimal, contiguous_ set of characters having an identical next-position @@ -271,137 +301,104 @@ public abstract class Parser { for(Topology r : bag0) { HashSet h = new HashSet(); for(Position p : bag0.getAll(r)) h.add(p); - gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); + ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed)); } - // Step 2: for every non-Atom element (ie every Element which has a corresponding reduction), - // compute the closure over every position in this set which is followed by a symbol - // which could yield the Element in question. + // Step 2: for every Sequence, compute the closure over every position in this set which + // is followed by a symbol which could yield the Sequence. // // "yields" [in one or more step] is used instead of "produces" [in exactly one step] // to avoid having to iteratively construct our set of States as shown in most // expositions of the algorithm (ie "keep doing XYZ until things stop changing"). - /* - for(Element e : all_elements) { - if (e instanceof Atom) continue; - HashSet h = new Walk.Closure(null, g.cache).closure(e, hs); - State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); - if (gotoSetNonTerminals.get(e) != null) - throw new Error("this should not happen"); - gotoSetNonTerminals.put(e, s); - } - */ - HashMapBag move = new HashMapBag(); - for(Position p : hs) { - Element e = p.element(); - if (e==null) continue; - HashSet ys = cache.ys.get(e); - if (ys != null) { - for(Element y : ys) { + + HashMapBag move = new HashMapBag(); + for(Position p : hs) + if (!p.isLast() && p.element() instanceof Union) + for(Sequence s : ((Union)p.element())) { HashSet hp = new HashSet(); reachable(p.next(), hp); - move.addAll(y, hp); + move.addAll(s, hp); } - } - } - for(Element y : move) { + OUTER: for(Sequence y : move) { + // if a reduction is "lame", it should wind up in the dead_state after reducing HashSet h = move.getAll(y); - State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); + State s = mkstate(h, doomed); + for(Position p : hs) + if (p.element() != null && (p.element() instanceof Union)) + for(Sequence seq : ((Union)p.element())) + if (seq.needs.contains(y) || seq.hates.contains(y)) { + // FIXME: assumption that no sequence is ever both usefully (non-lamely) matched + // and also directly lamely matched + ((HashMap)gotoSetNonTerminals).put(y, dead_state); + continue OUTER; + } gotoSetNonTerminals.put(y, s); } } - public String toString() { return "state["+idx+"]"; } - - public int compareTo(Table.State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } - } - - /** - * the information needed to perform a reduction; copied here to - * avoid keeping references to Element objects in a Table - */ - public class Reduction { - // FIXME: cleanup; almost everything in here could go in either Sequence.Position.getRewrite() or else in GSS.Reduct - public final int numPop; - /*private*/ final Position position; - private final Forest[] holder; // to avoid constant reallocation - public int hashCode() { return position.hashCode(); } - public boolean equals(Object o) { - if (o==null) return false; - if (o==this) return true; - if (!(o instanceof Reduction)) return false; - Reduction r = (Reduction)o; - return r.position == position; - } - public Reduction(Position p) { - this.position = p; - this.numPop = p.pos; - this.holder = new Forest[numPop]; + private State mkstate(HashSet h, boolean b) { + if (b) return doomed_states.get(h) == null ? (State)new State(h,b) : (State)doomed_states.get(h); + else return normal_states.get(h) == null ? (State)new State(h,b) : (State)normal_states.get(h); } - public String toString() { return "[reduce " + position + "]"; } - private Forest zero = null; - public Forest zero() { - if (zero != null) return zero; - if (numPop > 0) throw new Error(); - return zero = position.rewrite(null); + public String toStringx() { + StringBuffer st = new StringBuffer(); + for(Position p : this) { + if (st.length() > 0) st.append("\n"); + st.append(p); + } + return st.toString(); } - public void reduce(GSS.Phase.Node parent) { - if (numPop==0) finish(parent, zero(), parent.phase()); - else reduce(parent, numPop-1, parent.phase()); + public String toString() { + StringBuffer ret = new StringBuffer(); + ret.append("state["+idx+"]: "); + for(Position p : this) ret.append("{"+p+"} "); + return ret.toString(); } - public void reduce(GSS.Phase.Node parent, GSS.Phase.Node onlychild) { - if (numPop<=0) throw new Error("called wrong form of reduce()"); - int pos = numPop-1; - Forest old = holder[pos]; - holder[pos] = parent.pending(); - if (pos==0) { - System.arraycopy(holder, 0, position.holder, 0, holder.length); - finish(onlychild, position.rewrite(parent.phase().getLocation()), parent.phase()); - } else { - reduce(onlychild, pos-1, parent.phase()); - } - holder[pos] = old; - } + public int toInt() { return idx; } + } - // FIXME: this could be more elegant and/or cleaner and/or somewhere else - private void reduce(GSS.Phase.Node parent, int pos, GSS.Phase target) { - Forest old = holder[pos]; - holder[pos] = parent.pending(); - if (pos==0) { - System.arraycopy(holder, 0, position.holder, 0, holder.length); - for(int i=0; i state : all_states) { + sb.append(" " + state + "\n"); + for(Topology t : state.shifts) { + sb.append(" shift \""+ + new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => "); + for(State st : state.shifts.getAll(t)) + sb.append(st.idx+" "); + sb.append("\n"); } - holder[pos] = old; - } - private void finish(GSS.Phase.Node parent, Forest result, GSS.Phase target) { - State state = parent.state.gotoSetNonTerminals.get(position.owner()); - if (state!=null) - target.newNode(parent, result, state, numPop<=0, this); + for(Topology t : state.reductions) + sb.append(" reduce \""+ + new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => " + + state.reductions.getAll(t) + "\n"); + for(Sequence s : state.gotoSetNonTerminals.keySet()) + sb.append(" goto "+state.gotoSetNonTerminals.get(s)+" from " + s + "\n"); } + return sb.toString(); } } - private static final Forest[] emptyForestArray = new Forest[0]; - - // Helpers ////////////////////////////////////////////////////////////////////////////// - + + private static void reachable(Sequence s, HashSet h) { + reachable(s.firstp(), h); + //for(Sequence ss : s.needs()) reachable(ss, h); + //for(Sequence ss : s.hates()) reachable(ss, h); + } private static void reachable(Element e, HashSet h) { if (e instanceof Atom) return; for(Sequence s : ((Union)e)) - reachable(s.firstp(), h); + reachable(s, h); } private static void reachable(Position p, HashSet h) { if (h.contains(p)) return; h.add(p); if (p.element() != null) reachable(p.element(), h); } - + //public static Cache mastercache = null; }