From 61566402d83d5c06d57fb850e60ca0f82c27b9a2 Mon Sep 17 00:00:00 2001 From: adam Date: Sat, 8 Sep 2007 18:30:55 -0400 Subject: [PATCH] no longer need to thread a Grammar through Parser.Table darcs-hash:20070908223055-5007d-a63f4cd7d978447fada19a109eaf14469846aa68.gz --- src/edu/berkeley/sbp/Node.java | 4 +- src/edu/berkeley/sbp/Parser.java | 140 +++++++++++++-------------- src/edu/berkeley/sbp/Sequence.java | 27 +++--- src/edu/berkeley/sbp/SequenceOrElement.java | 1 + src/edu/berkeley/sbp/Union.java | 6 +- 5 files changed, 88 insertions(+), 90 deletions(-) diff --git a/src/edu/berkeley/sbp/Node.java b/src/edu/berkeley/sbp/Node.java index 93548fe..100f89f 100644 --- a/src/edu/berkeley/sbp/Node.java +++ b/src/edu/berkeley/sbp/Node.java @@ -74,7 +74,7 @@ final class Node if (r.numPops()!=0) reduce(r, r.numPops()-1, phase(), only); else { Input.Region region = phase().getLocation().createRegion(phase().getLocation()); - new Result(r.rewrite(region, phase().parser().cache()), this, r, phase()); + new Result(r.rewrite(region), this, r, phase()); } } @@ -89,7 +89,7 @@ final class Node if (pos>0) child.reduce(r, pos-1, target, null); else { Input.Region region = child.phase().getLocation().createRegion(target.getLocation()); - new Reduction(child, r, r.rewrite(region, phase().parser().cache()), target); + new Reduction(child, r, r.rewrite(region), target); } } holder[pos] = old; diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index c37d1a8..5f6e252 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -2,8 +2,8 @@ package edu.berkeley.sbp; import edu.berkeley.sbp.util.*; -import edu.berkeley.sbp.Sequence.Position; -import edu.berkeley.sbp.Sequence.Position; +import edu.berkeley.sbp.Sequence.Pos; +import edu.berkeley.sbp.Sequence.Pos; import java.io.*; import java.util.*; @@ -21,7 +21,6 @@ public abstract class Parser { public abstract Topology emptyTopology(); public String toString() { return pt.toString(); } - Grammar cache() { return pt; } /** parse input, and return the shared packed parse forest (or throw an exception) */ public Forest parse(Input input) throws IOException, ParseFailed { @@ -119,35 +118,36 @@ public abstract class Parser { private int master_state_idx = 0; /** all the states for this table */ - private transient HashSet> all_states = new HashSet>(); + private transient HashSet> all_states = new HashSet>(); /** all the doomed states in this table */ - private transient HashMap,State> doomed_states = new HashMap,State>(); + private transient HashMap,State> doomed_states = new HashMap,State>(); /** all the non-doomed states in this table */ - private transient HashMap,State> normal_states = new HashMap,State>(); + private transient HashMap,State> normal_states = new HashMap,State>(); - Topology emptyTopology() { return Parser.this.emptyTopology(); } - /** construct a parse table for the given grammar */ Table(Union ux) { - super(new Union("0", Sequence.create(ux), true)); + Union rootUnion = new Union("0", Sequence.create(ux), true); + Grammar grammar = new Grammar(rootUnion) { + public Topology emptyTopology() { return Parser.this.emptyTopology(); } + }; // create the "dead state" - this.dead_state = new State(new HashSet(), true); + this.dead_state = new State(new HashSet(), true, grammar); // construct the start state; this will recursively create *all* the states - this.start = new State(reachable(rootUnion), false); + this.start = new State(reachable(rootUnion), false, grammar); - buildReductions(); - sortReductions(); + buildReductions(grammar); + sortReductions(grammar); } /** fill in the reductions table */ - private void buildReductions() { + private void buildReductions(Grammar grammar) { // for each state, fill in the corresponding "row" of the parse table for(State state : all_states) - for(Position p : state.hs) { + for(Pos p : state.hs) { // if the element following this position is an atom, copy the corresponding // set of rows out of the "master" goto table and into this state's shift table @@ -157,9 +157,9 @@ public abstract class Parser { // RNGLR: we can potentially reduce from any "right-nullable" position -- that is, // any position for which all Elements after it in the Sequence are capable of // matching the empty string. - if (!isRightNullable(p)) continue; - Topology follow = follow(p.owner()); - for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) { + if (!grammar.isRightNullable(p)) continue; + Topology follow = grammar.follow(p.owner()); + for(Pos p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) { if (!(p2.element() instanceof Union)) throw new Error("impossible -- only Unions can be nullable"); @@ -167,15 +167,15 @@ public abstract class Parser { // not just the follow-set of the last non-nullable element, but the // follow-sets of the nulled elements as well. for(Sequence s : ((Union)p2.element())) - follow = follow.intersect(follow(s)); - Topology set = epsilonFollowSet((Union)p2.element()); + follow = follow.intersect(grammar.follow(s)); + Topology set = grammar.epsilonFollowSet((Union)p2.element()); if (set != null) follow = follow.intersect(set); } // indicate that when the next token is in the set "follow", nodes in this - // state should reduce according to Position "p" + // state should reduce according to Pos "p" state.reductions.put(follow, p); - if (followEof.contains(p.owner())) state.eofReductions.add(p); + if (grammar.followEof.contains(p.owner())) state.eofReductions.add(p); } // optimize the reductions table @@ -188,17 +188,17 @@ public abstract class Parser { } // FIXME: this method needs to be cleaned up and documented - private void sortReductions() { + private void sortReductions(Grammar grammar) { // crude algorithm to assing an ordinal ordering to every position // al will be sorted in DECREASING order (al[0] >= al[1]) - ArrayList al = new ArrayList(); + ArrayList al = new ArrayList(); for(State s : all_states) { for(Object po : s.positions()) { - Sequence.Position p = (Sequence.Position)po; + Sequence.Pos p = (Sequence.Pos)po; if (al.contains(p)) continue; int i=0; for(; i { OUTER: while(true) { for(int i=0; i 0) { - Sequence.Position p = al.remove(j); + if (grammar.comparePositions(al.get(i), al.get(j)) > 0) { + Sequence.Pos p = al.remove(j); al.add(i, p); continue OUTER; } @@ -222,7 +222,7 @@ public abstract class Parser { for(int i=0; i 0) + if (grammar.comparePositions(al.get(k), al.get(i)) > 0) { inc = true; break; } } inc = true; @@ -238,13 +238,13 @@ public abstract class Parser { * A single state in the LR table and the transitions * possible from it * - * A state corresponds to a set of Sequence.Position's. Each + * A state corresponds to a set of Sequence.Pos's. Each * Node in the GSS has a State; the Node represents a set of - * possible parses, one for each Position in the State. + * possible parses, one for each Pos in the State. * - * Every state is either "doomed" or "normal". If a Position + * Every state is either "doomed" or "normal". If a Pos * is part of a Sequence which is a conjunct (that is, it was - * passed to Sequence.{and(),andnot()}), then that Position + * passed to Sequence.{and(),andnot()}), then that Pos * will appear only in doomed States. Furthermore, any set * of Positions reachable from a doomed State also forms a * doomed State. Note that in this latter case, a doomed @@ -267,7 +267,7 @@ public abstract class Parser { class State implements IntegerMappable, Serializable { public final int idx = master_state_idx++; - private final transient HashSet hs; + private final transient HashSet hs; public HashSet> conjunctStates = new HashSet>(); HashMap> gotoSetNonTerminals = new HashMap>(); @@ -279,7 +279,7 @@ public abstract class Parser { private boolean accept = false; private VisitableMap> oshifts = null; - private VisitableMap oreductions = null; + private VisitableMap oreductions = null; public final boolean doomed; // Interface Methods ////////////////////////////////////////////////////////////////////////////// @@ -287,26 +287,26 @@ public abstract class Parser { public boolean doomed() { return doomed; } boolean isAccepting() { return accept; } - Iterable positions() { return hs; } + Iterable positions() { return hs; } boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); } void invokeShifts(Token t, GSS.Phase phase, Result r) { oshifts.invoke(t, phase, r); } boolean canReduce(Token t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } void invokeEpsilonReductions(Token t, Node node) { - if (t==null) for(Position r : eofReductions) node.invoke(r, null); + if (t==null) for(Pos r : eofReductions) node.invoke(r, null); else oreductions.invoke(t, node, null); } void invokeReductions(Token t, Node node, Result b) { - if (t==null) for(Position r : eofReductions) node.invoke(r, b); + if (t==null) for(Pos r : eofReductions) node.invoke(r, b); else oreductions.invoke(t, node, b); } // Constructor ////////////////////////////////////////////////////////////////////////////// /** - * create a new state consisting of all the Positions in hs - * @param hs the set of Positions comprising this State + * create a new state consisting of all the Poss in hs + * @param hs the set of Poss comprising this State * @param all the set of all elements (Atom instances need not be included) * * In principle these two steps could be merged, but they @@ -325,7 +325,7 @@ public abstract class Parser { * for non-Atom Elements. * */ - public State(HashSet hs, boolean doomed) { + public State(HashSet hs, boolean doomed, Grammar grammar) { this.hs = hs; this.doomed = doomed; @@ -334,35 +334,35 @@ public abstract class Parser { ((HashMap)(doomed ? doomed_states : normal_states)).put(hs, this); ((HashSet)all_states).add(this); - for(Position p : hs) { + for(Pos p : hs) { // Step 1a: take note if we are an accepting state // (last position of the root Union's sequence) - if (p.next()==null && !doomed && rootUnion.contains(p.owner())) + if (p.next()==null && !doomed && grammar.rootUnion.contains(p.owner())) accept = true; - // Step 1b: If any Position in the set is the first position of its sequence, then this + // Step 1b: If any Pos in the set is the first position of its sequence, then this // state is responsible for spawning the "doomed" states for each of the // Sequence's conjuncts. This obligation is recorded by adding the to-be-spawned // states to conjunctStates. if (!p.isFirst()) continue; for(Sequence s : p.owner().needs()) if (!hs.contains(s.firstp())) - conjunctStates.add(mkstate(reachable(s.firstp()), true)); + conjunctStates.add(mkstate(reachable(s.firstp()), true, grammar)); for(Sequence s : p.owner().hates()) if (!hs.contains(s.firstp())) - conjunctStates.add(mkstate(reachable(s.firstp()), true)); + conjunctStates.add(mkstate(reachable(s.firstp()), true, grammar)); } - // Step 2a: examine all Position's in this state and compute the mappings from + // Step 2a: examine all Pos's in this state and compute the mappings from // sets of follow tokens (tokens which could follow this position) to sets // of _new_ positions (positions after shifting). These mappings are // collectively known as the _closure_ - TopologicalBag bag0 = new TopologicalBag(); - for(Position position : hs) { + TopologicalBag bag0 = new TopologicalBag(); + for(Pos position : hs) { if (position.isLast() || !(position.element() instanceof Atom)) continue; Atom a = (Atom)position.element(); - HashSet hp = new HashSet(); + HashSet hp = new HashSet(); reachable(position.next(), hp); bag0.addAll(a.getTokenTopology(), hp); } @@ -372,9 +372,9 @@ public abstract class Parser { // computed next-position set). for(Topology r : bag0) { - HashSet h = new HashSet(); - for(Position p : bag0.getAll(r)) h.add(p); - ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed)); + HashSet h = new HashSet(); + for(Pos p : bag0.getAll(r)) h.add(p); + ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed, grammar)); } // Step 3: for every Sequence, compute the closure over every position in this set which @@ -384,43 +384,43 @@ public abstract class Parser { // to avoid having to iteratively construct our set of States as shown in most // expositions of the algorithm (ie "keep doing XYZ until things stop changing"). - HashMapBag move = new HashMapBag(); - for(Position p : hs) + HashMapBag move = new HashMapBag(); + for(Pos p : hs) if (!p.isLast() && p.element() instanceof Union) for(Sequence s : ((Union)p.element())) { - HashSet hp = new HashSet(); + HashSet hp = new HashSet(); reachable(p.next(), hp); move.addAll(s, hp); } OUTER: for(Sequence y : move) { // if a reduction is "lame", it should wind up in the dead_state after reducing - HashSet h = move.getAll(y); - State s = mkstate(h, doomed); - for(Position p : hs) + HashSet h = move.getAll(y); + State s = mkstate(h, doomed, grammar); + for(Pos p : hs) if (p.element() != null && (p.element() instanceof Union)) for(Sequence seq : ((Union)p.element())) if (seq.needs.contains(y) || seq.hates.contains(y)) { // FIXME: assumption that no sequence is ever both usefully (non-lamely) matched // and also directly lamely matched - for(Position pp = y.firstp(); pp != null; pp = pp.next()) + for(Pos pp = y.firstp(); pp != null; pp = pp.next()) ((HashMap)gotoSetNonTerminals).put(pp, dead_state); continue OUTER; } - for(Position pp = y.firstp(); pp != null; pp = pp.next()) + for(Pos pp = y.firstp(); pp != null; pp = pp.next()) gotoSetNonTerminals.put(pp, s); } } - private State mkstate(HashSet h, boolean b) { + private State mkstate(HashSet h, boolean b, Grammar grammar) { State ret = (b?doomed_states:normal_states).get(h); - if (ret==null) ret = new State(h,b); + if (ret==null) ret = new State(h,b, grammar); return ret; } public int toInt() { return idx; } public String toString() { StringBuffer ret = new StringBuffer(); - for(Position p : hs) + for(Pos p : hs) ret.append(p+"\n"); return ret.toString(); } @@ -430,23 +430,23 @@ public abstract class Parser { // Helpers ////////////////////////////////////////////////////////////////////////////// - private static HashSet reachable(Element e) { - HashSet h = new HashSet(); + private static HashSet reachable(Element e) { + HashSet h = new HashSet(); reachable(e, h); return h; } - private static void reachable(Element e, HashSet h) { + private static void reachable(Element e, HashSet h) { if (e instanceof Atom) return; for(Sequence s : ((Union)e)) reachable(s.firstp(), h); } - private static void reachable(Position p, HashSet h) { + private static void reachable(Pos p, HashSet h) { if (h.contains(p)) return; h.add(p); if (p.element() != null) reachable(p.element(), h); } - private static HashSet reachable(Position p) { - HashSet ret = new HashSet(); + private static HashSet reachable(Pos p) { + HashSet ret = new HashSet(); reachable(p, ret); return ret; } diff --git a/src/edu/berkeley/sbp/Sequence.java b/src/edu/berkeley/sbp/Sequence.java index eff94a4..f63c30a 100644 --- a/src/edu/berkeley/sbp/Sequence.java +++ b/src/edu/berkeley/sbp/Sequence.java @@ -127,9 +127,9 @@ public abstract class Sequence implements Iterable, SequenceOrElement { this.firstp = new Position(this, 0, null); } - abstract Forest epsilonForm(Input.Region loc, Grammar cache); + abstract Forest epsilonForm(Input.Region loc); - protected abstract Forest postReduce(Input.Region loc, Forest[] args, Pos p); + protected abstract Forest postReduce(Input.Region loc, Forest[] args, Position p); // Position ////////////////////////////////////////////////////////////////////////////// @@ -158,7 +158,7 @@ public abstract class Sequence implements Iterable, SequenceOrElement { abstract Element element(); public abstract int numPops(); - public abstract Forest rewrite(Input.Region loc, Grammar cache); + public abstract Forest rewrite(Input.Region loc); } /** the imaginary position before or after an element of a sequence; corresponds to an "LR item" */ @@ -171,12 +171,10 @@ public abstract class Sequence implements Iterable, SequenceOrElement { public int[] hates(); public boolean owner_needed_or_hated(); public int numPops(); - public Forest rewrite(Input.Region loc, Grammar cache) + public Forest rewrite(Input.Region loc) }; } */ - public int ord = -1; - public int ord() { return ord; } public int numPops() { return pos; } final int pos; @@ -220,14 +218,14 @@ public abstract class Sequence implements Iterable, SequenceOrElement { // Position ///////////////////////////////////////////////////////////////////////////////// - public final Forest rewrite(Input.Region loc, Grammar cache) { - if (this==firstp()) epsilonForm(loc, cache); + public final Forest rewrite(Input.Region loc) { + if (isFirst()) owner().epsilonForm(loc); for(int i=0; i, SequenceOrElement { return sb; } - // Specialized Subclasses ////////////////////////////////////////////////////////////////////////////// static class Singleton extends Sequence { @@ -281,8 +278,8 @@ public abstract class Sequence implements Iterable, SequenceOrElement { public Singleton(Element[] e, int idx) { super(e); this.idx = idx; } public Forest postReduce(Input.Region loc, Forest[] args, Position p) { return args[idx]; } Sequence _clone() { return new Singleton(elements,idx); } - Forest epsilonForm(Input.Region loc, Grammar cache) { - return ((Union)elements[idx]).epsilonForm(loc, cache); + Forest epsilonForm(Input.Region loc) { + return ((Union)elements[idx]).epsilonForm(loc); } } @@ -321,7 +318,7 @@ public abstract class Sequence implements Iterable, SequenceOrElement { if (spacing) for(int i=0; i<50-len; i++) sb.append(' '); return sb; } - Forest epsilonForm(Input.Region loc, Grammar cache) { + Forest epsilonForm(Input.Region loc) { return Forest.create(loc, tag, new Forest[0], lifts); } } diff --git a/src/edu/berkeley/sbp/SequenceOrElement.java b/src/edu/berkeley/sbp/SequenceOrElement.java index 5a07134..a13917f 100644 --- a/src/edu/berkeley/sbp/SequenceOrElement.java +++ b/src/edu/berkeley/sbp/SequenceOrElement.java @@ -11,3 +11,4 @@ import java.lang.ref.*; interface SequenceOrElement { } + diff --git a/src/edu/berkeley/sbp/Union.java b/src/edu/berkeley/sbp/Union.java index a0660e3..8f3ea2e 100644 --- a/src/edu/berkeley/sbp/Union.java +++ b/src/edu/berkeley/sbp/Union.java @@ -74,12 +74,12 @@ public class Union extends Element implements Iterable { } /** the Forest which results from matching this Union against the empty string at region region */ - Forest epsilonForm(Input.Region region, Grammar cache) { + Forest epsilonForm(Input.Region region) { viewed = true; Forest.Many epsilonForm = new Forest.Many(); for(Sequence s : this) - if (cache.possiblyEpsilon(s)) - epsilonForm.merge(s.epsilonForm(region, cache)); + if (Element.possiblyEpsilon(s)) + epsilonForm.merge(s.epsilonForm(region)); return epsilonForm; } -- 1.7.10.4