X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FGSS.java;h=2d2e550cce16e613bec3951d8d8a348a0ba949d9;hp=6d5f8abaa6d238ec72fc4f1f1ae082df727fdbbe;hb=76ce41540f06ac1fbcb44332dd62f53e88c27cf1;hpb=4989eef7b4c4ff06e54f89ffd78b1483da088d6e diff --git a/src/edu/berkeley/sbp/GSS.java b/src/edu/berkeley/sbp/GSS.java index 6d5f8ab..2d2e550 100644 --- a/src/edu/berkeley/sbp/GSS.java +++ b/src/edu/berkeley/sbp/GSS.java @@ -1,81 +1,186 @@ +// Copyright 2006-2007 all rights reserved; see LICENSE file for BSD-style license + package edu.berkeley.sbp; import edu.berkeley.sbp.*; -import edu.berkeley.sbp.*; -import edu.berkeley.sbp.*; import edu.berkeley.sbp.util.*; +import edu.berkeley.sbp.Parser.Table.*; +import edu.berkeley.sbp.Sequence.Pos; +import edu.berkeley.sbp.Sequence.Pos; import java.io.*; import java.util.*; import java.lang.reflect.*; -////////////////////////////////////////////////////////////////////////////// -// TODO: -// -// - fix public/package/private status -// - -////////////////////////////////////////////////////////////////////////////// -// Optimizations to add -// -// ** NOTE: not all of these are appropriate for this class -- it is -// simply a list of optimizations not implemented. This -// class is meant to remain simple and easy to understand; -// optimizations which obscure that do not belong here (they -// should go into the compiled version instead) -// -// - most of our time is now spent creating and storing Reduct instances -// - we should be able to perform Reduct's immediately after creating them... -// - /** implements Tomita's Graph Structured Stack */ class GSS { - public GSS() { } + Input input; + private Parser parser; + public GSS(Input input, Parser parser) { this.input = input; this.parser = parser;} + public Input getInput() { return input; } + + /* + HashSet finishedReductions = new HashSet(); + */ + int numNewNodes = 0; + int numOldNodes = 0; + int viewPos = 0; + int numReductions = 0; /** corresponds to a positions between tokens the input stream; same as Tomita's U_i's */ - public class Phase { + class Phase implements Invokable, IntegerMappable, GraphViz.ToGraphViz, Iterable { - /** the token immediately after this phase */ - public final Token token; + // FIXME: right now, these are the performance bottleneck + private HashMapBag performed = new HashMapBag(); - /** currently this is necessary only for the code() hack -- it doesn't actually correspond to the input */ - private final int pos; + public Forest.Many finalResult; + private PriorityQueue reductionQueue = new PriorityQueue(); - /** FIXME */ - public Forest.Ref finalResult = null; + Parser parser() { return parser; } + public void addReduction(Reduction r) { + //System.out.println("+ " + r); + parser.spin(); + reductionQueue.add(r); + } + + public void invoke(State st, StateNode pred, Forest f) { + parser.spin(); + good |= next.newNode(f, null, pred, st, false); + } - /** all reductions (pending and completed) */ - private HashSet reductions = new HashSet(); /* ALLOC */ + /** the token immediately after this phase */ + final Tok token; + final int pos; + public IntPairMap hash = new IntPairMap(); /* ALLOC */ + private boolean good = false; + private Phase next = null; + private Phase prev; + private Input.Location location; + private Input.Location nextLocation; - /** all nodes, keyed by the value returned by code() */ - private HashMap hash = new HashMap(); /* ALLOC */ + private Forest forest; - /** the number of pending reductions */ - private int pendingReductions = 0; - private int totalReductions = 0; - //private HashSet pendingReduct = new HashSet(); - private LinkedList pendingReduct = new LinkedList(); + public Phase(State startState) throws ParseFailed, IOException { + this(null, null); + newNode(null, null, null, startState, true); + } + public Phase(Phase prev, Forest forest) throws ParseFailed, IOException { + this.location = input.getLocation(); + this.token = (Tok)input.next(); + this.nextLocation = input.getLocation(); + this.prev = prev; + this.forest = forest; + this.pos = prev==null ? 0 : prev.pos+1; + if (prev != null) prev.shift(this, forest); + numReductions = 0; + /* + finishedReductions.clear(); + */ + + int minPhasePos = Integer.MAX_VALUE; + Reduction best = null; + //System.out.println("=============================================================================="); + while(!reductionQueue.isEmpty()) { + Reduction r = reductionQueue.poll(); + //System.out.println("- " + r); + if (r.predPhase() != null) + if (r.predPhase().pos > minPhasePos) + throw new Error(); + r.perform(); + if (r.predPhase() != null) { + if (r.predPhase().pos < minPhasePos) { + minPhasePos = r.predPhase().pos; + best = r; + } else if (r.predPhase().pos == minPhasePos) { + /* + if (best != null && Parser.mastercache.comparePositions(r.reduction(), best.reduction()) < 0) + throw new Error("\n"+r+"\n"+best+"\n"+ + Parser.mastercache.comparePositions(r.reduction(), best.reduction())+"\n"+r.compareTo(best)+ + "\n"+(r.reduction().ord-best.reduction().ord)); + */ + best = r; + } + } + /* + finishedReductions.add(r); + */ + numReductions++; + } + if (token==null) shift(null, null); + } - /** the number of nodes in this phase */ - private int numNodes = 0; + public boolean isDone() throws ParseFailed { + if (token != null) return false; + if (token==null && finalResult==null) + ParseFailed.error("unexpected end of file", this, null, + getLocation().createRegion(getLocation())); + return true; + } - boolean closed = false; + public Input.Location getLocation() { return location; } + public Input.Location getNextLocation() { return nextLocation; } + public boolean isFrontier() { return hash!=null; } - private Token.Location location; - public Phase(Phase previous, Token token, Token.Location location) { - this.pos = previous==null ? 0 : previous.pos+1; - this.token = token; - this.location = location; + /** perform all shift operations, adding promoted nodes to next */ + private void shift(Phase next, Forest f) throws ParseFailed { + this.next = next; + // this massively improves GC performance + if (prev != null) { + IntPairMap h = prev.hash; + prev.hash = null; + prev.performed = null; + for(StateNode n : h) n.check(); + } + numOldNodes = hash.size(); + for(StateNode n : hash.values()) { + if (token == null && n.state().isAccepting()) { + if (finalResult==null) finalResult = new Forest.Many(); + for(ResultNode r : n) + finalResult.merge(r.getForest()); + } + if (token == null) continue; + n.state().invokeShifts(token, this, n, f); + } + numNewNodes = next==null ? 0 : next.hash.size(); + viewPos = this.pos; + + if (!good && token!=null) { + String toks = token+""; + if (toks.length()==1 && toks.charAt(0) == edu.berkeley.sbp.chr.CharAtom.left) { + ParseFailed.error("unexpected increase in indentation", this, + token, getRegionFromThisToNext()); + } else if (toks.length()==1 && toks.charAt(0) == edu.berkeley.sbp.chr.CharAtom.right) { + ParseFailed.error("unexpected decrease in indentation", this, + token, getRegionFromThisToNext()); + } else { + ParseFailed.error("unexpected character '"+ANSI.cyan(StringUtil.escapify(token+"", + "\\\'\r\n"))+"'", + this, token, getRegionFromThisToNext()); + } + } + if (token==null && finalResult==null) + ParseFailed.error("unexpected end of file", this, null, + getLocation().createRegion(getLocation())); + for(StateNode n : hash) n.check(); } - public boolean isDone() { return token == null; } - - private String error = "generic syntax error"; - public void checkFailure() throws Parser.Failed { - if (numNodes <= 0) - throw new Parser.Failed(error, getLocation()); + Input.Region getRegionFromThisToNext() { + return getLocation().createRegion(getNextLocation()); } - public Token.Location getLocation() { return location; } + void newNodeFromReduction(Forest f, Pos reduction, StateNode pred) { + int pos = pred.phase().pos; + for(int s : reduction.hates()) + if (performed.contains(pos, s)) + return; + for(int s : reduction.needs()) + if (!performed.contains(pos, s)) + return; + if (reduction.owner_needed_or_hated() && !performed.contains(pos, reduction.provides())) + performed.add(pos, reduction.provides()); + Parser.Table.State state = (Parser.Table.State)pred.state().gotoSetNonTerminals.get(reduction); + if (state!=null) + newNode(f, reduction, pred, state, reduction.numPops()<=0); + } /** add a new node (merging with existing nodes if possible) * @param parent the parent of the new node @@ -84,252 +189,56 @@ class GSS { * @param fromEmptyReduction true iff this node is being created as a result of a reduction of length zero (see GRMLR paper) * @param start the earliest part of the input contributing to this node (used to make merging decisions) */ - public void newNode(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) { - Node p = hash.get(code(state, start)); - if (p != null) newNode2(p, parent, pending, state, fromEmptyReduction, start); - else newNode3(parent, pending, state, fromEmptyReduction, start); - } - private void newNode2(Node p, Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) { - p.holder.merge(pending); - if (p.parents.contains(parent)) return; - p.parents.add(parent, true); - if (p!=parent && !fromEmptyReduction) p.queueReductions(parent); - } - private void newNode3(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) { + private boolean newNode(Forest f, Pos reduction, StateNode pred, State state, boolean fromEmptyReduction) { + StateNode p = pred==null ? null : hash.get(state, pred.phase()); + if (p != null) { + p.addPred(f, reduction, pred); + return !state.doomed(); + } do { if (token != null && state.canShift(token)) break; if (state.isAccepting()) break; if (token==null) break; - int count = 0; - Parser.Table.Reduction r = null; - for(Parser.Table.Reduction red : token==null ? state.getEofReductions() : state.getReductions(token)) { r = red; count++; } - //if (count==0) return; // BEWARE! this optimization is suspected to cause really nasty heisenbugs - //if (count > 1) break; - //if (r.numPop == 0) break; - //r.reduce(pending, parent, null, Phase.this, null); - //return; + if (!state.canReduce(token)) return false; } while(false); - - Node n = new Node(parent, pending, state, start); // ALLOC - n.queueEmptyReductions(); - if (!fromEmptyReduction) n.queueReductions(); - } - - - boolean reducing = false; - /** perform all reduction operations */ - public void reduce() { - reducing = true; - HashSet s = new HashSet(); - s.addAll(hash.values()); - //while(pendingReduct.size()>0) - //pendingReduct.removeFirst().go(); - for(Phase.Node n : s) n.queueEmptyReductions(); - for(Phase.Node n : s) n.queueReductions(); - } - - /** perform all shift operations, adding promoted nodes to next */ - public void shift(Phase next, Forest result) { - closed = true; - Forest res = null; - boolean ok = false; - for(Phase.Node n : hash.values()) { - if (n.holder==null) continue; - n.holder.resolve(); - if (token == null && n.state.isAccepting()) { - ok = true; - if (finalResult==null) finalResult = new Forest.Ref(); - finalResult.merge(n.holder); - } - if (!n.holder.valid()) continue; - if (token == null) continue; - for(Parser.Table.State st : n.state.getShifts(token)) { - if (res == null) res = result; - next.newNode(n, res, st, true, this); - ok = true; - } - } - - if (!ok && token != null) { - StringBuffer error = new StringBuffer(); - error.append("error: unable to shift token \"" + token + "\"\n"); - error.append(" before: " +pendingReductions+ "\n"); - error.append(" before: " +totalReductions+ "\n"); - //for(Phase.Node n : hash.values()) { - //n.queueReductions(); - //n.queueEmptyReductions(); - //} - error.append(" after: " +pendingReductions+ "\n"); - error.append(" candidate states:\n"); - for(Phase.Node n : hash.values()) { - //for(Sequence.Position p : n.state) error.append(" " + p + "\n"); - //error.append(" --\n"); - for(Parser.Table.Reduction r : n.state.getReductions(token)) error.append(" " + r + "\n"); - //error.append(" ==\n"); - } - next.error = error.toString(); - } - - // this massively improves GC performance - reductions = null; - hash = null; + StateNode n = new StateNode(Phase.this, f, reduction, pred, state, fromEmptyReduction); // ALLOC + /** FIXME: this null-result can be used to notice bogus/dead states */ + for(Object s : state.conjunctStates) + newNode(null, null, n, (State)s, fromEmptyReduction); + return !n.state().doomed(); } - - // GSS Nodes ////////////////////////////////////////////////////////////////////////////// - - //private HashMap pcache = new HashMap(); - /** a node in the GSS */ - public final class Node { - - private Forest.Ref holder = null; - - private HashMap cache = null; - - /** the set of nodes to which there is an edge starting at this node */ - public final FastSet parents = new FastSet(); /* ALLOC */ - - /** what state this node is in */ - public final Parser.Table.State state; - /** which Phase this Node belongs to (node that Node is also a non-static inner class of Phase) */ - public final Phase phase = Phase.this; - - public HashMap cache() { - return cache==null ? (cache = new HashMap()) : cache; } - public Forest.Ref holder() { return holder==null ? (holder = new Forest.Ref()) : holder; } - public Forest pending() { return Phase.this.closed ? holder().resolve() : holder; } - public FastSet parents() { return parents; } - - /** FIXME */ - public void queueReductions() { - for(Node n2 : parents) - queueReductions(n2); - } - - private HashSet queued = new HashSet(); - /** FIXME */ - public void queueReductions(Node n2) { - if (queued.contains(n2)) return; - queued.add(n2); - new Reduct(this, n2, null).go(); - } - - - /** FIXME */ - public void queueEmptyReductions() { - if (reducing) - for(Parser.Table.Reduction r : token==null ? state.getEofReductions() : state.getReductions(token)) - if (r.numPop==0) - r.reduce(this, null, this.phase, r.zero()); - } - - private Node(Node parent, Forest pending, Parser.Table.State state, Phase start) { - this.state = state; - if (pending != null) this.holder().merge(pending); - if (parent != null) parents.add(parent, true); - if (Phase.this.hash.get(code(state, start)) != null) throw new Error("severe problem!"); - Phase.this.hash.put(code(state, start), this); - Phase.this.numNodes++; - if (parent==null) holder().valid = true; // hack to make sure that the "base" node is always considered valid - } + public int toInt() { return pos+1; } + public int size() { return hash==null ? 0 : hash.size(); } + public int pos() { return pos; } + public Tok getToken() { return token; } + public Iterator iterator() { return hash.iterator(); } + public GSS getGSS() { return GSS.this; } + + // GraphViz ////////////////////////////////////////////////////////////////////////////// + + public GraphViz.StateNode toGraphViz(GraphViz gv) { + if (gv.hasNode(this)) return gv.createNode(this); + GraphViz.Group g = gv.createGroup(this); + g.label = "Phase " + pos; + g.color = "gray"; + g.cluster = true; + return g; } - - // Forest / Completed Reductions ////////////////////////////////////////////////////////////////////////////// - - /** a pending or completed reduction */ - class Reduct { - - /** the node from which the reduction should begin */ - public Node n = null; - - /** the node on the other end of the edge to be reduced along (either: null, the second node of the reduction, - * or the parent of the result of a length-one reduction) - */ - public Node n2 = null; - - /** true iff the reduction has already been performed */ - private boolean done = false; - - /** the reduction to be applied */ - public Parser.Table.Reduction r; - - public Tree result = null; - - public Reduct(Node n, Node n2, Parser.Table.Reduction r) { - this.n = n; - this.n2 = n2; - this.r = r; - //if (reductions.contains(this)) { done = true; return; } - reductions.add(this); - pendingReduct.addFirst(this); - pendingReductions++; - go(); - } - - /** perform the reduction */ - public void go() { - if (done) return; - done = true; - pendingReduct.remove(this); - pendingReductions--; - - if (r==null) { - for(Parser.Table.Reduction r : token==null ? n.state.getEofReductions() : n.state.getReductions(token)) { - - // UGLY HACK - // The problem here is that a "reduction of length 1" - // performed twice with different values of n2 needs - // to only create a *single* new result, but must add - // multiple parents to the node holding that result. - // The current reducer doesn't differentiate between - // the next node of an n-pop reduction and the - // ultimate parent of the last pop, so we need to - // cache instances here as a way of avoiding - // recreating them. - - // currently we have this weird problem where we - // have to do an individual reduct for each child - // when the reduction length is one (ie the - // children wind up being children of the newly - // created node rather than part of the popped - // sequence - if (r.numPop <= 0) continue; - if (r.numPop == 1) { - Forest ret = n.cache().get(r); - if (ret != null) r.reduce(n, n2, n.phase, ret); - else n.cache().put(r, r.reduce(n, n2, n.phase, null)); - } else { - r.reduce(n, n2, Phase.this, null); - } - } - } else if (r.numPop != 1) { - r.reduce(n, n2, Phase.this, null); - } - } - - // FIXME: this is a PITA - public int hashCode() { return n.hashCode() ^ (r==null ? 0 : r.hashCode()) ^ (n2==null ? 0 : n2.hashCode()); } - public boolean equals(Object o) { - if (o==null) return false; - if (o==this) return true; - if (!(o instanceof Reduct)) return false; - Reduct other = (Reduct)o; - return equal(r, other.r) && equal(n, other.n) && equal(n2, other.n2); - } + public boolean isTransparent() { return false; } + public boolean isHidden() { return false; } + + public void dumpGraphViz(String filename) throws IOException { + FileOutputStream fos = new FileOutputStream(filename); + PrintWriter p = new PrintWriter(new OutputStreamWriter(fos)); + GraphViz gv = new GraphViz(); + for(Object n : this) + ((StateNode)n).toGraphViz(gv); + gv.dump(p); + p.flush(); + p.close(); } } - /** helper method */ - private static boolean equal(Object a, Object b) { - if (a==null && b==null) return true; - if (a==null || b==null) return false; - return a.equals(b); - } - - /** this is something of a hack right now */ - private static long code(Parser.Table.State state, Phase start) { - return (((long)state.idx) << 32) | (start==null ? 0 : start.pos); - } - public boolean yak = false; }