From: adam Date: Mon, 26 Feb 2007 03:00:47 +0000 (-0500) Subject: summary patch for Nov->Jan work X-Git-Url: http://git.megacz.com/?p=sbp.git;a=commitdiff_plain;h=e84029a8b861075d6d0ed5040f919b2e4da4c98f summary patch for Nov->Jan work darcs-hash:20070226030047-5007d-9bc564c0e6b4ad93ac4d21befe0226352114da02.gz --- diff --git a/Makefile b/Makefile index bff5800..43d6717 100644 --- a/Makefile +++ b/Makefile @@ -198,3 +198,15 @@ bin/HaskellDemo: src/SBP.hs \ cd bin; $(ghc) -i../src/ -c ../src/HaskellDemo.hs $(link) -o HaskellDemo.o cd bin; for A in *.hs; do $(ghc) -c $$A $(link); done cd bin; $(ghc) $(linkopts) $(link) -o HaskellDemo *.o + + +ghcroot = /usr/local/brian/ghc +ghc = $(ghcroot)/compiler/ghc-inplace +ghclibs = $(ghcroot)/rts/HSrts.jar:$(ghcroot)/libraries/base/HSbase.jar:$(ghcroot)/libraries/stm/HSstm.jar + +bin/Main.class: src/Main.lhs src/SBP.lhs + cd src; $(ghc) -fglasgow-exts -cpp -odir ../bin -c -java SBP.lhs + cd src; $(ghc) -fglasgow-exts -cpp -odir ../bin -java Main.lhs + +go: bin/Main.class edu.berkeley.sbp.jar + java -cp bin:$(ghclibs):edu.berkeley.sbp.jar Main \ No newline at end of file diff --git a/TODO b/TODO index 0861440..ed981e8 100644 --- a/TODO +++ b/TODO @@ -1,44 +1,33 @@ _____________________________________________________________________________ Immediately - - Check if the only remaining stack is lame - - write a testcase for this + - comparison test is probably chewing up most of the time + - Check if the only remaining stack is lame (hopeful/nothopeful) + - write a testcase for this - circular gramars s = A A = A | "b" - - foo.add(x) foo.add(y.andnot(x)) ==> this is broken - - Annotation Tutorial .................................................. - - evil problems with: (x y? z /ws) - - it gets even more evil than that - - basically, follow restrictions are not honored when the element - matches against the empty string + - serializable parse tables? + - Treewalker code compiler? ______________________________________________________________________________ v1.1 - precedes restrictions ("<-") - - MUST HAVE BETTER ERROR MESSAGES - use for developing java15.g - - java15.g - once this is ready, do big announcement - - - topology no longer needed as an arg to parser? - - broader regression testing (for stuff like error messages, etc) - - More topology untangling [later] - - tib: use the lexer only for indentation increases/decreases - grammar highlighting? - - Forest needs a "manual access" API - the unwrap bit in Forest makes it really hard to expose an API for forests diff --git a/src/Main.lhs b/src/Main.lhs new file mode 100644 index 0000000..d9d862f --- /dev/null +++ b/src/Main.lhs @@ -0,0 +1,12 @@ +\begin{code} +module Main +where +import SBP +main = do t <- parseFile "../fleeterpreter/fleet.g" "../fleeterpreter/demo.fleet" + putStrLn $ "hi" + putStrLn $ show (prettyPrintTree t) +\end{code} + + + + diff --git a/src/SBP.hs b/src/SBP.lhs similarity index 50% rename from src/SBP.hs rename to src/SBP.lhs index 89fd9ca..61a30a5 100644 --- a/src/SBP.hs +++ b/src/SBP.lhs @@ -1,10 +1,93 @@ +\begin{code} -- -- These bindings are highly experimental and subject to change -- without notice. You've been warned. -- +module SBP(Tree(Tree),Location(Location),Region(Region),parseFile,prettyPrintTree) + where + +#if defined(java_HOST_OS) + +#define CONCAT(x,y) x/**/y +#define DEFINE_OBJECT(s,name) \ +data CONCAT(name,_); \ +type name = Object CONCAT(name,_); \ +foreign import jvm s CONCAT(_,name) :: JClass; \ +instance JType_ CONCAT(name,_) where jClass_ _ = CONCAT(_,name); -module SBP +import Foreign +import Foreign.Java +import Text.PrettyPrint.HughesPJ + +data Location = Location Int Int +data Region = Region Location Location + +data Tree = Tree String [Tree] Region +instance Show Tree where + show t@(Tree _ _ _) = show $ prettyPrintTree $ t + +coalesceFlatHeadlessNodes t@(Tree s children r) + | s==[], flat t = Tree (concat $ map (\(Tree s _ _) -> s) children) [] r + | otherwise = Tree s (map coalesceFlatHeadlessNodes children) r + where + flat (Tree _ children _) = not (any id $ map notFlatComponent children) + notFlatComponent (Tree _ [] _) = False + notFlatComponent (Tree _ _ _) = True + +prettyPrintTree (Tree "" [] _) = empty +prettyPrintTree (Tree s [] _) = text s +prettyPrintTree t@(Tree s children _) + | s==[] = (text "{") <+> ((prettyPrintTreeList children) <+> (text "}")) + | otherwise = ((text s) <> (text ":")) <+> prettyPrintTreeList children + where + prettyPrintTreeList children = (vcat $ map prettyPrintTree children) + +nullRegion = (Region (Location 0 0) (Location 0 0)) + +foreign import jvm type "edu.berkeley.sbp.Tree" JTree# +data JTree = JTree JTree# + +foreign import jvm safe "edu.berkeley.sbp.misc.RegressionTests.main" regressionTests :: IO () +foreign import jvm safe "edu.berkeley.sbp.misc.HaskellHelper.help" haskellHelper :: JString -> JString -> IO JTree +foreign import jvm safe "edu.berkeley.sbp.misc.HaskellHelper.isNull" isNull :: (Object a) -> IO Bool +foreign import jvm safe "getHead" getHead :: JTree -> IO (Object a) +foreign import jvm safe "child" getChild :: JTree -> Int32 -> IO JTree +foreign import jvm safe "size" size :: JTree -> IO Int32 +foreign import jvm safe "toString" jtoString :: (Object a) -> IO JString + +toString o = do isn <- isNull o + if isn then return "" + else do str <- jtoString o + return (unpackJString str) + + +haskify :: JTree -> IO Tree +haskify t = + do head <- getHead t + str <- toString head + numChildren <- size t + children <- if numChildren == 0 + then do return [] + else do children <- mapM (\i -> getChild t i) + $ take (fromIntegral numChildren) + $ iterate (+1) 0 + h <- mapM haskify children + return h + return $ Tree str children nullRegion + +parseFile :: + String -> -- grammar *.g file + String -> -- file to be parsed + IO Tree + +parseFile g f = do g' <- packJString g + f' <- packJString f + tree <- haskellHelper g' f' + x <- haskify tree + return x + +#else import Header_Java; import Class_edu_berkeley_sbp_misc_HaskellHelper; import Class_java_lang_Object; @@ -86,4 +169,5 @@ module SBP return $ Tree str children nullRegion ) :: JVM Tree) - +#endif +\end{code} diff --git a/src/edu/berkeley/sbp/Forest.java b/src/edu/berkeley/sbp/Forest.java index 04e06be..b2f4a22 100644 --- a/src/edu/berkeley/sbp/Forest.java +++ b/src/edu/berkeley/sbp/Forest.java @@ -129,7 +129,6 @@ public abstract class Forest implements GraphViz.ToGraphViz { /** An "ambiguity node"; this is immutable once it has been "looked at" */ static class Many extends Forest { - HashSet parents = new HashSet(); private FastSet> hp = new FastSet>(); private boolean touched = false; diff --git a/src/edu/berkeley/sbp/GSS.java b/src/edu/berkeley/sbp/GSS.java index 0358ef6..be4276f 100644 --- a/src/edu/berkeley/sbp/GSS.java +++ b/src/edu/berkeley/sbp/GSS.java @@ -12,107 +12,60 @@ import java.lang.reflect.*; /** implements Tomita's Graph Structured Stack */ class GSS { - public static Queue removals = new LinkedList(); - - static String note = ""; - static int single_newnode = 0; - static int toplevel_reductions = 0; - static int multi_newnode = 0; - static int waiting_newnode = 0; - static int shifts = 0; - - static int count = 0; - static int reductions = 0; - int resets = 0; - int waits = 0; - Input input; - public GSS(Input input) { this.input = input; } - - private Node[] reducing_list = null; + public Input getInput() { return input; } // FIXME: right now, these are the performance bottleneck - HashMapBag waiting = new HashMapBag(); HashMapBag performed = new HashMapBag(); - HashMapBag lastperformed = new HashMapBag(); - HashMapBag expected = new HashMapBag(); - + /** FIXME */ Forest.Many finalResult; /** corresponds to a positions between tokens the input stream; same as Tomita's U_i's */ - class Phase implements Invokable, IntegerMappable, GraphViz.ToGraphViz, Iterable { + class Phase implements Invokable, IntegerMappable, GraphViz.ToGraphViz, Iterable { - public int pos() { return pos; } - public boolean closed() { return closed; } - public Tok token() { return token; } + public ArrayList reductionQueue = new ArrayList(); - public Iterator iterator() { return hash.iterator(); } - public void invoke(State st, Forest result, Node n) { - shifts++; - good |= next.newNode(n, result, st, false); + public void invoke(State st, Result result, Object o) { + //shifts++; + good |= next.newNode(result, st, false); } /** the token immediately after this phase */ final Tok token; + final int pos; - private final int pos; - - boolean reducing; public IntPairMap hash; /* ALLOC */ - private boolean closed; private boolean good; - private Phase next = null; + Phase next = null; private Phase prev; private Input.Location location; private Input.Location nextLocation; private Input.Location prevLocation; - public final Parser parser; - private Forest forest; - public Phase(Phase prev, Parser parser, Phase previous, Tok token, Input.Location location, + public Phase(Phase prev, Phase previous, Tok token, Input.Location location, Input.Location nextLocation, Forest forest) throws ParseFailed { this.prevLocation = prev==null ? location : prev.getLocation(); this.prev = prev; this.forest = forest; - this.parser = parser; this.pos = previous==null ? 0 : previous.pos+1; this.token = token; this.location = location; this.nextLocation = nextLocation; performed.clear(); - reset(); - } - - public void reset() throws ParseFailed { - waiting.clear(); - expected.clear(); - lastperformed.clear(); - lastperformed.addAll(performed); - performed.clear(); hash = new IntPairMap(); - reset = false; good = false; - closed = false; - reducing = false; finalResult = null; if (prev != null) prev.shift(this, forest); } - public boolean isDone() throws ParseFailed { if (token != null) return false; if (token==null && finalResult==null) - ParseFailed.error("unexpected end of file", - getLocation(), - token, - hash.values(), - getLocation().createRegion(getLocation()), - input, - GSS.this); + ParseFailed.error("unexpected end of file", this); return true; } @@ -121,213 +74,97 @@ class GSS { public Input.Region getRegion() { return getPrevLocation().createRegion(getLocation()); } public Input.Location getNextLocation() { return nextLocation; } - /** add a new node (merging with existing nodes if possible) - * @param parent the parent of the new node - * @param result the SPPF result corresponding to the new node - * @param state the state that the new node is in - * @param fromEmptyReduction true iff this node is being created as a result of a reduction of length zero (see GRMLR paper) - * @param start the earliest part of the input contributing to this node (used to make merging decisions) - */ - public boolean newNode(Node parent, Forest pending, State state, boolean fromEmptyReduction) { - Node p = hash.get(state, parent==null?null:parent.phase()); - if (p != null) return newNode2(p, parent, pending, state, fromEmptyReduction); - else return newNode3(parent, pending, state, fromEmptyReduction); - } - public void newNode(Node parent, Forest pending, State state, boolean fromEmptyReduction, Position reduction) { - int pos = parent==null?0:parent.phase()==null?0:parent.phase().pos; - Sequence owner = reduction==null ? null : reduction.owner(); - if (reduction!=null) { - if (owner.hates!=null) { - for (Sequence s : performed.getAll(pos)) - if (owner.hates.contains(s)) - return; - for (Sequence s : lastperformed.getAll(pos)) - if (owner.hates.contains(s)) { - //System.out.println("now expecting ["+pos+"] => " + s); - expected.add(pos, s); - return; - } - } - if (owner.needs != null) - for(Sequence s : owner.needs) - if (!performed.contains(pos, s)) { - waiting.add(s, new Waiting(parent, pending, state, fromEmptyReduction, reduction)); - return; - } - if (!performed.contains(pos, owner)) { - performed.add(pos, owner); - if (owner.hated != null) - for(Sequence seq : owner.hated) - if (performed.contains(pos, seq)) { - performed.remove(pos, seq); - reset = true; - } + /** perform all shift operations, adding promoted nodes to next */ + public void shift(Phase next, Forest result) throws ParseFailed { + // this massively improves GC performance + if (prev!=null) prev.hash = null; + this.next = next; + for(Node n : hash.values()) { + if (token == null && n.state().isAccepting()) { + if (finalResult==null) finalResult = new Forest.Many(); + for(Result r : n) + finalResult.merge(r.getForest()); } + if (token == null) continue; + n.state().invokeShifts(token, this, new Result(result, n, null), null); } - newNode(parent, pending, state, fromEmptyReduction); - if (reduction != null) { - boolean redo = true; - while(redo) { - redo = false; - for(Waiting w : waiting.getAll(owner)) { - if (w.parent==parent || (parent!=null&&w.parent!=null&&w.parent.phase()==parent.phase())) { - waiting.remove(owner, w); - w.perform(); - redo = true; - break; - } + if (!good && token!=null) ParseFailed.error("unexpected character", this); + if (token==null && finalResult==null) ParseFailed.error("unexpected end of file", this); + } + + /** perform all reduction operations */ + public void reduce() throws ParseFailed { + Reduction last = null; + while(!reductionQueue.isEmpty()) { + Reduction r = null; + + // ugly + OUTER: for(int i=0; i 0) + continue OUTER; } + r = reductionQueue.get(i); + reductionQueue.remove(r); + break; + } + + /* + if (last == null) last = r; + else if (r.compareTo(last) > 0) last = r; + else if (r.compareTo(last) < 0) { + if (r.targetPhase() != null) + System.out.println("err " + last.compareTo(r) + " " + last.targetPhase().pos() + + " " + r.targetPhase().pos() + " " + pos); } + */ + + r.perform(); } } - private boolean newNode2(Node p, Node parent, Forest pending, State state, boolean fromEmptyReduction) { - if (p.merge(parent, pending)) return true; - p.addParent(parent, true); - if (p!=parent && !fromEmptyReduction && reducing) p.performReductions(parent); - return true; + public void newNodeFromReduction(Result result, State state, boolean fromEmptyReduction, Position reduction) { + int pos = result.phase().pos; + Sequence owner = reduction.owner(); + for(Sequence s : owner.hates) + if (performed.contains(pos, s)) + return; + for(Sequence s : owner.needs) + if (!performed.contains(pos, s)) + return; + if (owner.needed_or_hated && !performed.contains(pos, owner)) + performed.add(pos, owner); + if (state!=null) + newNode(result, state, fromEmptyReduction); } - private boolean newNode3(Node parent, Forest pending, State state, boolean fromEmptyReduction) { + /** add a new node (merging with existing nodes if possible) + * @param parent the parent of the new node + * @param result the SPPF result corresponding to the new node + * @param state the state that the new node is in + * @param fromEmptyReduction true iff this node is being created as a result of a reduction of length zero (see GRMLR paper) + * @param start the earliest part of the input contributing to this node (used to make merging decisions) + */ + public boolean newNode(Result result, State state, boolean fromEmptyReduction) { + Node p = hash.get(state, result.phase()); + if (p != null) { p.merge(result); return true; } do { if (token != null && state.canShift(token)) break; if (state.isAccepting()) break; if (token==null) break; if (!state.canReduce(token)) return false; - //if (count > 1) break; - //if (r.numPop == 0) break; - //r.reduce(pending, parent, null, Phase.this, null); - //return; } while(false); - - Node n = new Node(Phase.this, parent, pending, state); // ALLOC - if (reducing) { - n.performEmptyReductions(); - if (!fromEmptyReduction) n.performReductions(parent); - } + new Node(Phase.this, result, state, fromEmptyReduction); // ALLOC return true; } - public LinkedList reductionQueue = new LinkedList(); - - /** perform all reduction operations */ - public void reduce() throws ParseFailed { - try { - reducing = true; - if (reducing_list==null || reducing_list.length < hash.size()) - reducing_list = new Node[hash.size() * 4]; - hash.toArray(reducing_list); - int num = hash.size(); - for(int i=0; inext */ - public void shift(Phase next, Forest result) throws ParseFailed { - // this massively improves GC performance - if (prev!=null && parser.helpgc) { - //prev.hash = null; - //System.out.println("\r" + /*shifts + " " + */ single_newnode /*+ "/"+multi_newnode + " " + waiting_newnode*/); - //System.out.println("\r" + shifts + " " + note); - //System.out.println("\r" + shifts); - //System.out.println("\r" + toplevel_reductions); - //System.out.println("\r" + multi_newnode); - single_newnode = 0; - note = ""; - multi_newnode = 0; - toplevel_reductions = 0; - waiting_newnode = 0; - shifts = 0; - } - this.next = next; - closed = true; - Forest res = null; - boolean ok = false; - int count = 0; - for(Node n : hash.values()) { - if (token == null && n.state().isAccepting()) { - if (finalResult==null) finalResult = new Forest.Many(); - for(Object f : n.results()) - finalResult.merge((Forest)f); - } - if (token == null) continue; - n.state().invokeShifts(token, this, result, n); - } - //System.out.println(next.hash.size()); - if (!good && token!=null) - ParseFailed.error("unexpected character", - getLocation(), - token, - hash.values(), - getRegion(), - input, - GSS.this); - - if (token==null && finalResult==null) - ParseFailed.error("unexpected end of file", - getLocation(), - token, - hash.values(), - getLocation().createRegion(getLocation()), - input, - GSS.this); - } - - - class Waiting { - Node parent; - Forest pending; - State state; - boolean fromEmptyReduction; - Position reduction; - public Waiting(Node parent, Forest pending, State state, boolean fromEmptyReduction, Position reduction) { - waits++; - this.parent = parent; - this.pending = pending; - this.state = state; - this.fromEmptyReduction = fromEmptyReduction; - this.reduction = reduction; - } - public void perform() { - //System.out.println("performing: " + reduction.position); - waiting_newnode++; - newNode(parent, pending, state, fromEmptyReduction, reduction); - } - } - - public int toInt() { return pos+1; } public int size() { return hash==null ? 0 : hash.size(); } + public int pos() { return pos; } + public Tok getToken() { return token; } + public Iterator iterator() { return hash.iterator(); } + public GSS getGSS() { return GSS.this; } // GraphViz ////////////////////////////////////////////////////////////////////////////// diff --git a/src/edu/berkeley/sbp/Node.java b/src/edu/berkeley/sbp/Node.java index 53782f7..5c0f023 100644 --- a/src/edu/berkeley/sbp/Node.java +++ b/src/edu/berkeley/sbp/Node.java @@ -10,122 +10,83 @@ import java.util.*; import java.lang.reflect.*; /** a node in the GSS */ -final class Node implements Invokable, IntegerMappable, GraphViz.ToGraphViz { +final class Node + implements Invokable, + IntegerMappable, + GraphViz.ToGraphViz, + Iterable { - public static int node_idx = 0; - - private final GSS.Phase phase; - - public FastSet setx = new FastSet(); - public FastSet childs = new FastSet(); - - private boolean allqueued = false; - - /** what state this node is in */ - public final Parser.Table.State state; - - /** which GSS.Phase this Node belongs to (node that Node is also a non-static inner class of GSS.Phase) */ + /** which GSS.Phase this Node belongs to */ public GSS.Phase phase() { return phase; } + public Iterator iterator() { return results.iterator(); } + public Parser.Table.State state() { return state; } - private HashSet resultMap = new HashSet(); - public Iterable results() { return resultMap; } - public Iterable parents() { return setx; } - public void addParent(Node n, boolean b) { - if (n==null) return; - setx.add(n, b); - n.childs.add(this, true); - //else - //System.out.println("************ evilstate: " + this); - } - public boolean merge(Node parent, Forest result) { - // FIXME: inefficient! - for(Forest.Many f : results()) { - if (f.parents.contains(parent) /* UGLY: */ && f.parents.size()==1) { - f.merge(result); - return true; - } - } - Forest.Many f = new Forest.Many(); - f.parents.add(parent); - f.merge(result); - resultMap.add(f); - addParent(parent, true); - return false; - } + public int toInt() { return idx; } - public void performReductions() { - if (allqueued) return; - allqueued = true; - state.invokeReductions(phase().token(), this, this, null); + public boolean merge(Result r) { + if (results.contains(r)) return true; + results.add(r); + if (fromEmptyReduction) return false; + state.invokeReductions(phase().getToken(), this, false, r); + return false; } - public void performReductions(Node n2) { - if (!allqueued) phase().reductionQueue.add(this);//performReductions(); - else state.invokeReductions(phase().token(), this, this, n2); - } + ////////////////////////////////////////////////////////////////////// - public Parser.Table.State state() { return state; } + private static int node_idx = 0; + private final int idx = node_idx++; - public void performEmptyReductions() { state.invokeReductions(phase().token, this, null, null); } - public final void invoke(Position r, Node n, Node n2) { - //reductions++; - //if (r.pos==1) single_newnode++; - //if (r.pos>1) multi_newnode++; - if (n==null || n2==null || r.pos==0) { - if (r.pos==0) { - if (n==null) n = this; - else return; - } - if (n==null) return; - Forest[] holder = new Forest[r.pos]; - if (r.pos==0) n.finish(r, r.zero(n.phase().getLocation().createRegion(n.phase().getLocation())), n.phase()); - else n.reduce(r, r.pos-1, n.phase(), null); - } else { - if (r.pos<=0) throw new Error("called wrong form of reduce()"); - int pos = r.pos-1; - n.reduce(r, pos, n.phase(), n2); - } + private final GSS.Phase phase; + private final Parser.Table.State state; + private final boolean fromEmptyReduction; + //private FastSet results = new FastSet(); + private HashSet results = new HashSet(); + + public final void invoke(Position r, Boolean emptyProductions, Result only) { + if (emptyProductions != (r.pos==0)) return; + if (r.pos==0) finish(r, r.zero(phase().getLocation().createRegion(phase().getLocation())), phase()); + else reduce(r, r.pos-1, phase(), only); } - public void reduce(Position r, int pos, GSS.Phase target, Node only) { + private void reduce(Position r, int pos, GSS.Phase target, Result only) { Forest[] holder = r.holder; Forest old = holder[pos]; - - //toplevel_reductions++; - HashSet rr = new HashSet(); - for(Forest result : results()) { - rr.add(result); - } - //System.out.println(r); - for(Forest result : rr) { - for(Node child : ((Forest.Many)result).parents) { - if (only != null && child!=only) continue; - holder[pos] = result; - if (pos==0) child.finish(r, r.rewrite(child.phase().getLocation().createRegion(target.getLocation())), target); - else child.reduce(r, pos-1, target, null); + for(Result res : results) + if (only == null || res == only) { + Node child = res.parent(); + holder[pos] = res.getForest(); + if (pos>0) child.reduce(r, pos-1, target, null); + else { + Reduction reduction = + new Reduction(child, r, r.rewrite(child.phase().getLocation().createRegion(target.getLocation())), target); + target.reductionQueue.add(reduction); + } } - } + holder[pos] = old; } - public void finish(Position r, Forest result, GSS.Phase target) { + void finish(Position r, Forest forest, GSS.Phase target) { Parser.Table.State state0 = (Parser.Table.State)state.gotoSetNonTerminals.get(r.owner()); - if (result==null) throw new Error(); - if (state0!=null) - target.newNode(this, result, state0, r.pos<=0, r); + Result result = new Result(forest, this, r); + target.newNodeFromReduction(result, state0, r.pos<=0, r); } - Node(GSS.Phase phase, Node parent, Forest pending, State state) { + Node(GSS.Phase phase, Result result, State state, boolean fromEmptyReduction) { this.phase = phase; this.state = state; - this.merge(parent, pending); - GSS.Phase start = parent==null ? null : parent.phase(); - if (parent != null) addParent(parent, true); - if (phase.hash.get(state, start) != null) throw new Error("severe problem!"); - phase.hash.put(state, start, this); + this.fromEmptyReduction = fromEmptyReduction; + results.add(result); + if (phase.hash.get(state, result.phase()) != null) throw new Error("severe problem!"); + phase.hash.put(state, result.phase(), this); + + for(Object s : state.also) + phase.newNode(result, (State)s, fromEmptyReduction); + + state.invokeReductions(phase().token, this, true, null); + if (!fromEmptyReduction) + state.invokeReductions(phase().getToken(), this, false, null); } - public int toInt() { return idx; } - private final int idx = node_idx++; // GraphViz ////////////////////////////////////////////////////////////////////////////// @@ -135,8 +96,8 @@ final class Node implements Invokable, IntegerMappable, Gr n.label = ""+state.toStringx(); n.shape = "rectangle"; boolean hasparents = false; - for(Node parent : parents()) { hasparents = true; n.edge(parent, ""); } - for(Forest result : results()) n.edge(result, ""); + //for(Node parent : parents()) { hasparents = true; n.edge(parent, ""); } + //for(Forest result : resultMap) n.edge(result, ""); n.color = !hasparents ? "blue" : /*state.evil ? "red" :*/ "green"; ((GraphViz.Group)phase().toGraphViz(gv)).add(n); return n; diff --git a/src/edu/berkeley/sbp/ParseFailed.java b/src/edu/berkeley/sbp/ParseFailed.java index 31a3a56..9bc2ae8 100644 --- a/src/edu/berkeley/sbp/ParseFailed.java +++ b/src/edu/berkeley/sbp/ParseFailed.java @@ -50,8 +50,11 @@ public class ParseFailed extends Exception { if (count <= 0) { barf(sb, n, indent, skip, loc); } else { + /* + FIXME: removed for(Node nn : (Iterable)n.parents()) barf(sb, nn, indent, skip, count-1, n.phase().getLocation()); + */ } } static void barf(HashMap sb, Node n, int indent, boolean skip, Input.Location loc) { @@ -105,8 +108,13 @@ public class ParseFailed extends Exception { //if (!p.isLast() && !p.next().isLast()) continue; if (((p.isFirst() || p.isLast()) && !force)/* || p.owner().name==null*/ || !important(p)) { + /* + FIXME: removed for(Node n2 : n.parents()) - complain(n2, errors, force /*| p.isFirst()*/, indent); + complain(n2, errors, force + //| p.isFirst() + , indent); + */ } else { String seqname = p.owner()/*.name*/+""; HashSet hs = errors.get(seqname); @@ -130,6 +138,10 @@ public class ParseFailed extends Exception { return ANSI.purple(ret.toString()); } + static void error(String message, GSS.Phase phase) throws ParseFailed { + error(message, phase.getLocation(), phase.getToken(), + phase, phase.getRegion(), phase.getGSS().getInput(), phase.getGSS()); + } static void error(String message, Input.Location loc, Object token, diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index fa2f7d1..10768f6 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -28,8 +28,8 @@ public abstract class Parser { GSS gss = new GSS(input); Input.Location loc = input.getLocation(); Token tok = input.next(); - GSS.Phase current = gss.new Phase(null, this, null, tok, loc, input.getLocation(), null); - current.newNode(null, Forest.create(loc.createRegion(loc), null, null, false), pt.start, true); + GSS.Phase current = gss.new Phase(null, null, tok, loc, input.getLocation(), null); + current.newNode(new Result(Forest.create(loc.createRegion(loc), null, null, false), null, null), pt.start, true); int count = 1; for(int idx=0;;idx++) { Input.Location oldloc = loc; @@ -37,7 +37,7 @@ public abstract class Parser { Forest forest = current.token==null ? null : shiftToken((Token)current.token, loc); loc = input.getLocation(); Token nextToken = input.next(); - GSS.Phase next = gss.new Phase(current, this, current, nextToken, loc, input.getLocation(), forest); + GSS.Phase next = gss.new Phase(current, current, nextToken, loc, input.getLocation(), forest); if (!helpgc) { FileOutputStream fos = new FileOutputStream("out-"+idx+".dot"); PrintWriter p = new PrintWriter(new OutputStreamWriter(fos)); @@ -106,6 +106,7 @@ public abstract class Parser { /** used to generate unique values for State.idx */ private int master_state_idx = 0; HashMap,State> all_states = new HashMap,State>(); + HashSet all_elements = new HashSet(); /** construct a parse table for the given grammar */ public Table(Topology top) { this("s", top); } @@ -118,15 +119,14 @@ public abstract class Parser { cache.eof.put(start0, true); // construct the set of states - HashSet all_elements = new HashSet(); walk(start0, all_elements); for(SequenceOrElement e : all_elements) cache.ys.addAll(e, new Walk.YieldSet(e, cache).walk()); HashSet hp = new HashSet(); reachable(start0, hp); - this.dead_state = new State(new HashSet(), all_states, all_elements); - this.start = new State(hp, all_states, all_elements); + this.dead_state = new State(new HashSet()); + this.start = new State(hp); // for each state, fill in the corresponding "row" of the parse table for(State state : all_states.values()) @@ -139,8 +139,13 @@ public abstract class Parser { if (isRightNullable(p)) { Walk.Follow wf = new Walk.Follow(top.empty(), p.owner(), all_elements, cache); Topology follow = wf.walk(p.owner()); - for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) + for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) { + Atom set = new Walk.EpsilonFollowSet(new edu.berkeley.sbp.chr.CharAtom(top.empty().complement()), + new edu.berkeley.sbp.chr.CharAtom(top.empty()), + cache).walk(p2.element()); follow = follow.intersect(new Walk.Follow(top.empty(), p2.element(), all_elements, cache).walk(p2.element())); + if (set != null) follow = follow.intersect(set.getTokenTopology()); + } state.reductions.put(follow, p); if (wf.includesEof()) state.eofReductions.add(p); } @@ -165,10 +170,11 @@ public abstract class Parser { /** a single state in the LR table and the transitions possible from it */ - class State implements Comparable>, IntegerMappable, Iterable { + class State implements IntegerMappable, Iterable { public final int idx = master_state_idx++; private final HashSet hs; + public HashSet> also = new HashSet>(); public transient HashMap> gotoSetNonTerminals = new HashMap>(); private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); @@ -202,7 +208,6 @@ public abstract class Parser { /** * create a new state consisting of all the Positions in hs * @param hs the set of Positions comprising this State - * @param all_states the set of states already constructed (to avoid recreating states) * @param all_elements the set of all elements (Atom instances need not be included) * * In principle these two steps could be merged, but they @@ -221,14 +226,31 @@ public abstract class Parser { * for non-Atom Elements. * */ - public State(HashSet hs, - HashMap,State> all_states, - HashSet all_elements) { + public State(HashSet hs) { this(hs, false); } + public boolean special; + public State(HashSet hs, boolean special) { this.hs = hs; + this.special = special; // register ourselves in the all_states hash so that no // two states are ever created with an identical position set - all_states.put(hs, this); + ((HashMap)all_states).put(hs, this); + + for(Position p : hs) { + if (!p.isFirst()) continue; + for(Sequence s : p.owner().needs()) { + if (hs.contains(s.firstp())) continue; + HashSet h2 = new HashSet(); + reachable(s.firstp(), h2); + also.add((State)(all_states.get(h2) == null ? (State)new State(h2,true) : (State)all_states.get(h2))); + } + for(Sequence s : p.owner().hates()) { + if (hs.contains(s.firstp())) continue; + HashSet h2 = new HashSet(); + reachable(s, h2); + also.add((State)(all_states.get(h2) == null ? (State)new State(h2,true) : (State)all_states.get(h2))); + } + } // Step 1a: examine all Position's in this state and compute the mappings from // sets of follow tokens (tokens which could follow this position) to sets @@ -251,7 +273,8 @@ public abstract class Parser { for(Topology r : bag0) { HashSet h = new HashSet(); for(Position p : bag0.getAll(r)) h.add(p); - gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); + ((TopologicalBag)gotoSetTerminals).put(r, all_states.get(h) == null + ? new State(h) : all_states.get(h)); } // Step 2: for every non-Atom element (ie every Element which has a corresponding reduction), @@ -274,7 +297,7 @@ public abstract class Parser { } OUTER: for(SequenceOrElement y : move) { HashSet h = move.getAll(y); - State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); + State s = all_states.get(h) == null ? (State)new State(h) : (State)all_states.get(h); // if a reduction is "lame", it should wind up in the dead_state after reducing if (y instanceof Sequence) { for(Position p : hs) { @@ -308,7 +331,7 @@ public abstract class Parser { return ret.toString(); } - public int compareTo(State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } + public Walk.Cache cache() { return cache; } public int toInt() { return idx; } } } @@ -317,8 +340,8 @@ public abstract class Parser { private static void reachable(Sequence s, HashSet h) { reachable(s.firstp(), h); - for(Sequence ss : s.needs()) reachable(ss, h); - for(Sequence ss : s.hates()) reachable(ss, h); + //for(Sequence ss : s.needs()) reachable(ss, h); + //for(Sequence ss : s.hates()) reachable(ss, h); } private static void reachable(Element e, HashSet h) { if (e instanceof Atom) return; diff --git a/src/edu/berkeley/sbp/Reduction.java b/src/edu/berkeley/sbp/Reduction.java new file mode 100644 index 0000000..e845764 --- /dev/null +++ b/src/edu/berkeley/sbp/Reduction.java @@ -0,0 +1,92 @@ +// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license + +package edu.berkeley.sbp; +import edu.berkeley.sbp.*; +import edu.berkeley.sbp.util.*; +import edu.berkeley.sbp.Parser.Table.*; +import edu.berkeley.sbp.Sequence.Position; +import java.io.*; +import java.util.*; +import java.lang.reflect.*; + +final class Reduction implements Comparable { + + public Position position; + public GSS.Phase phase; + public Forest result; + public Node node; + boolean done = false; + + public Reduction(Node node, Position position, Forest result, GSS.Phase target) { + this.position = position; + this.result = result; + this.phase = target; + this.node = node; + } + + public void perform() { + if (done) return; + done = true; + node.finish(position, result, phase); + } + + public String toString() { return position+""; } + + public int compareTo(Reduction r) { + int ret = compareTo0(r); + if (ret == 0) { + Walk.Cache cache = node.state().cache(); + if (canKill(cache, position, r.position) && canKill(cache, r.position, position)) throw new Error(); + if (canKill(cache, position, r.position)) ret = 1; + else if (canKill(cache, r.position, position)) ret = -1; + if (canNeed(cache, position, r.position)) ret = 1; + else if (canNeed(cache, r.position, position)) ret = -1; + } + return -1 * ret; + } + + private boolean isRightNullable(Walk.Cache c, Position p) { + if (p.isLast()) return true; + if (!c.possiblyEpsilon(p.element())) return false; + return isRightNullable(c, p.next()); + } + + public boolean canKill(Walk.Cache cache, Position mep, Position himp) { + if (!isRightNullable(cache, mep)) return false; + if (!isRightNullable(cache, himp)) return false; + Sequence me = mep.owner(); + Sequence him = himp.owner(); + for(Sequence killer : him.hates()) { + HashSet eq2 = new Walk.EquivalentTo(killer, cache).walk(); + if (eq2.contains(me)) return true; + } + return false; + } + + public static final int LESS_THAN = -1; + public static final int EQUAL_TO = 0; + public static final int GREATER_THAN = 1; + + public int compareTo0(Reduction n) { + if (targetPhase()==null && n.targetPhase()==null) return EQUAL_TO; + if (targetPhase()==null) return LESS_THAN; + if (n.targetPhase()==null) return GREATER_THAN; + if (targetPhase().pos < n.targetPhase().pos) return LESS_THAN; + if (targetPhase().pos > n.targetPhase().pos) return GREATER_THAN; + return 0; + } + public int pos() { return targetPhase()==null ? 0 : targetPhase().pos; } + public GSS.Phase targetPhase() { return node.phase(); } + + public boolean canNeed(Walk.Cache cache, Position mep, Position himp) { + if (!isRightNullable(cache, mep)) return false; + if (!isRightNullable(cache, himp)) return false; + Sequence me = mep.owner(); + Sequence him = himp.owner(); + for(Sequence needer : him.needs()) { + HashSet eq2 = new Walk.EquivalentTo(needer, cache).walk(); + if (eq2.contains(me)) return true; + } + return false; + } +} diff --git a/src/edu/berkeley/sbp/Result.java b/src/edu/berkeley/sbp/Result.java new file mode 100644 index 0000000..8eccf96 --- /dev/null +++ b/src/edu/berkeley/sbp/Result.java @@ -0,0 +1,31 @@ +// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license + +package edu.berkeley.sbp; +import edu.berkeley.sbp.*; +import edu.berkeley.sbp.util.*; +import edu.berkeley.sbp.Parser.Table.*; +import edu.berkeley.sbp.Sequence.Position; +import java.io.*; +import java.util.*; +import java.lang.reflect.*; + +class Result { + + private Forest f; + private Node parent; + private GSS.Phase phase; + private Position reduction; + + public Position reduction() { return reduction; } + public GSS.Phase phase() { return phase; } + public Forest getForest() { return f; } + public Node parent() { return parent; } + + public Result(Forest f, Node parent, Position reduction) { + this.f = f; + this.reduction = reduction; + this.parent = parent; + if (parent != null) phase = parent.phase(); + } + +} \ No newline at end of file diff --git a/src/edu/berkeley/sbp/Sequence.java b/src/edu/berkeley/sbp/Sequence.java index 9535357..d3d8d8a 100644 --- a/src/edu/berkeley/sbp/Sequence.java +++ b/src/edu/berkeley/sbp/Sequence.java @@ -14,6 +14,8 @@ public abstract class Sequence implements Iterable, SequenceOrElement { protected final Element[] elements; + public boolean needed_or_hated = false; + final HashSet hated = new HashSet(); final HashSet needs = new HashSet(); @@ -65,14 +67,20 @@ public abstract class Sequence implements Iterable, SequenceOrElement { //////////////////////////////////////////////////////////////////////////////// /** return a new sequence identical to this one, but with a positive conjunct s */ - public Sequence and(Sequence s) { Sequence ret = dup(); ret.needs.add(s); return ret; } + public Sequence and(Sequence s) { Sequence ret = dup(); ret.needs.add(s); s.needed_or_hated=true; return ret; } /** return a new sequence identical to this one, but with a negative conjunct s */ - public Sequence andnot(Sequence s) { Sequence ret = dup(); ret.hates.add(s); s.hated.add(ret); return ret; } + public Sequence andnot(Sequence s) { Sequence ret = dup(); ret.hates.add(s); s.hated.add(ret); s.needed_or_hated=true; return ret; } /** return a new sequence identical to this one, but with a follow-set restricted to a */ public Sequence followedBy(Atom a) { Sequence ret = dup(); ret.follow = a; return ret; } + boolean hatesAny(Iterable it) { + if (hates.isEmpty()) return false; + for(Sequence s : it) if (hates.contains(s)) return true; + return false; + } + Iterable needs() { return needs; } Iterable hates() { return hates; } diff --git a/src/edu/berkeley/sbp/Walk.java b/src/edu/berkeley/sbp/Walk.java index a92bcb8..0b19cd4 100644 --- a/src/edu/berkeley/sbp/Walk.java +++ b/src/edu/berkeley/sbp/Walk.java @@ -64,6 +64,34 @@ abstract class Walk { } } + static class EquivalentTo extends Walk> { + private final Sequence s; + private final HashSet eq = new HashSet(); + public final HashSet walk() { return walk(s); } + public EquivalentTo(Sequence e, Cache c) { + super(c); this.s = e; + } + public HashSet bottom(SequenceOrElement e) { return eq; } + public HashSet walkSequence(Sequence seq) { + eq.add(seq); + Position p = seq.firstp(); + for(; !p.isLast(); p = p.next()) { + if (!p.isLast() && isRightNullable(p.next())) + walk(p.element()); + if (!c.possiblyEpsilon(p.element())) break; + } + return eq; + } + public HashSet walkAtom(Atom r) { + return eq; + } + private boolean isRightNullable(Position p) { + if (p.isLast()) return true; + if (!c.possiblyEpsilon(p.element())) return false; + return isRightNullable(p.next()); + } + } + // Boolean ////////////////////////////////////////////////////////////////////////////// @@ -82,6 +110,34 @@ abstract class Walk { } } + static class EpsilonFollowSet extends Walk { + Atom all; + Atom empty; + public EpsilonFollowSet(Atom a, Atom empty, Cache c) { + super(c); + this.all = all; + this.empty = empty; + } + public Atom walkAtom(Atom r) { return all; } + public Atom walkSequence(Sequence s) { + if (s.follow==null) return all; + return s.follow; + } + public Atom sequence(Sequence s, Atom a, Atom b) { + throw new RuntimeException(); + } + public Atom union(Union u, Atom a, Atom b) { + /* + if (a==null) return b; + if (b==null) return a; + */ + if (a==null || b==null) return all; + return (Atom)a.union(b); + } + public Atom bottom(SequenceOrElement e) { + return (e instanceof Union) ? empty : all; + } + } // Input-Set ////////////////////////////////////////////////////////////////////////////// diff --git a/src/edu/berkeley/sbp/misc/HaskellHelper.java b/src/edu/berkeley/sbp/misc/HaskellHelper.java index 2a96356..18f5f6a 100644 --- a/src/edu/berkeley/sbp/misc/HaskellHelper.java +++ b/src/edu/berkeley/sbp/misc/HaskellHelper.java @@ -11,9 +11,10 @@ import java.io.*; public class HaskellHelper { public static void main(String[] argv) throws Throwable { - main(argv[0], argv[1]); + help(argv[0], argv[1]); } - public static Tree main(String grammarFile, String targetFile) throws Throwable { + public static boolean isNull(Object o) { return o==null; } + public static Tree help(String grammarFile, String targetFile) throws Throwable { try { Tree res = new CharParser(MetaGrammar.newInstance()).parse(new FileInputStream(grammarFile)).expand1(); Union meta = Grammar.create(res, "s", diff --git a/tests/circular.tc b/tests/circular.tc new file mode 100644 index 0000000..c493fb3 --- /dev/null +++ b/tests/circular.tc @@ -0,0 +1,21 @@ +// circular test cases +//testcase { +// input "x"; +// output "a:{x}"; +// +// s = a1:: a +// a = s1:: s +// a = ^"x" +//} +// +//testcase { +// input "x"; +// output "x"; +// output "s2:{s2:{s0 s0} x}"; +// output "s2:{s0 x}"; +// +// +// s = s2:: s s +// s = ^"x" +// s = s0:: () +//} diff --git a/tests/performance.tc b/tests/performance.tc new file mode 100644 index 0000000..50ef79b --- /dev/null +++ b/tests/performance.tc @@ -0,0 +1,23 @@ +////////////////////////////////////////////////////////////////////////////// +// performance hogs +// +//testcase { +// input "aaaaaXaaaa"; +// output ""; +// s = ManyA &~ EndsWithZ +// EndsWithZ = Anything "Z" +// ManyA = () | "a" ManyA +// Anything = () | ("a" | "X" | "Z") Anything +//} + +//testcase { +// input "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; +// output ""; +// s = ManyA +// ManyA = () +// | A ManyA! & ManyAB +// A = "a" +// ManyAB = () +// | "a" ManyAB +// | "b" ManyAB +//} diff --git a/tests/regression.tc b/tests/regression.tc index 3351018..863f8d8 100644 --- a/tests/regression.tc +++ b/tests/regression.tc @@ -1,24 +1,3 @@ -//testcase { -// input "x"; -// output "a:{x}"; -// -// s = a1:: a -// a = s1:: s -// a = ^"x" -//} -// -//testcase { -// input "x"; -// output "x"; -// output "s2:{s2:{s0 s0} x}"; -// output "s2:{s0 x}"; -// -// -// s = s2:: s s -// s = ^"x" -// s = s0:: () -//} - testcase { input "aaaaa"; s = A @@ -26,15 +5,6 @@ testcase { | "a" A &~ "a" s } -//testcase { -// input "a"; -// output "yes:{}"; -// s = A -// A = "a" s &~ "a" A -// | "a" A &~ "a" s -// | () -//} - testcase { input "ab c"; output "1:{{a b} {c}}"; @@ -91,7 +61,7 @@ testcase { x = ~[] s = xbx:: x* b x* b = abab:: [ab][ab] - &~ ( "aa" | "bb" ) + &~ ("aa"|"bb") } testcase { @@ -217,23 +187,6 @@ testcase { idl = [a-d] } -//testcase { -// input "a*b*c"; -// output "times:{stringify:{{a}} times:{stringify:{{b}} stringify:{{c}}}}"; -// w = " " -// l = id -// s = l "=" r => "assign" -// | r -// r = l -// | l "=" r => "assign" -// | r "+" r => "plus" -// | (r) "*" r => "times" -// | "(" r ")" -// | r r => "times" -// id = idl++ => "stringify" -// idl = [a-d] -//} - testcase { input "a+b*c"; output "plus:{stringify:{{a}} times:{stringify:{{b}} stringify:{{c}}}}"; @@ -319,8 +272,6 @@ keyword = "if" | "then" | "else" | "while" } - - testcase { input "abc "; @@ -390,46 +341,6 @@ testcase { x = [123] } - -//testcase { -// input "ab"; -// -// s = a:"a" b:"b" -//} - - -testcase { - input "a c"; - s = first:: A WSA B? WSB C - A = "a" - B = "b" - C = "c" - WSA = WSA:: " "** - WSB = () -> ~" " - | WSB:: " "++ -} - -//testcase { -// input "aaaaaXaaaa"; -// output ""; -// s = ManyA &~ EndsWithZ -// EndsWithZ = Anything "Z" -// ManyA = () | "a" ManyA -// Anything = () | ("a" | "X" | "Z") Anything -//} - -testcase { - input "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; - output ""; - s = ManyA - ManyA = () - | A ManyA! & ManyAB - A = "a" - ManyAB = () - | "a" ManyAB - | "b" ManyAB -} - testcase { input "aaaaaaaa"; output ""; @@ -438,4 +349,24 @@ testcase { AAs = () | AAs "aa" } +// make sure follow restrictions are honored +// when a string matches the empty string +testcase { + input "xxx"; + s = x:: "x" A "x" C "x" + A = B + B = "y" + | () -> ~"x" + C = D -> ~"x" + D = () +} +testcase { + input "axxxxxc"; + output "q:{a {x x x x x} c}"; + s = q:: A ws B? ws C + ws = [x]** + A = a:: "a" + B = b:: "b" + C = c:: "c" +}