-// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license
+// Copyright 2006-2007 all rights reserved; see LICENSE file for BSD-style license
package edu.berkeley.sbp;
import edu.berkeley.sbp.util.*;
import edu.berkeley.sbp.Sequence.Position;
+import edu.berkeley.sbp.Sequence.Position;
import java.io.*;
import java.util.*;
-// FEATURE: try harder to "fuse" states together along two dimensions:
-// - identical (equivalent) states, or states that subsume each other
-// - unnecessary intermediate states ("short cut" GLR)
-
/** a parser which translates an Input<Token> into a Forest<NodeType> */
public abstract class Parser<Token, NodeType> {
public abstract Topology<Token> emptyTopology();
public String toString() { return pt.toString(); }
- Cache cache() { return pt; }
+ Grammar cache() { return pt; }
/** parse <tt>input</tt>, and return the shared packed parse forest (or throw an exception) */
public Forest<NodeType> parse(Input<Token> input) throws IOException, ParseFailed {
verbose = System.getProperty("sbp.verbose", null) != null;
spinpos = 0;
+ GSS gss = new GSS(input, this);
try {
- GSS gss = new GSS(input, this);
for(GSS.Phase current = gss.new Phase<Token>(pt.start); ;) {
-
- if (verbose) {
- // FIXME: clean this up
- String s;
- s = " " + spin[spinpos++ % (spin.length)]+" parsing ";
- s += input.getName();
- s += " "+input.getLocation();
- while(s.indexOf(':') != -1 && s.indexOf(':') < 8) s = " " + s;
- String y = "@"+gss.viewPos+" ";
- while(y.length() < 9) y = " " + y;
- s += y;
- s += " nodes="+gss.numOldNodes;
- while(s.length() < 50) s = s + " ";
- s += " shifted="+gss.numNewNodes;
- while(s.length() < 60) s = s + " ";
- s += " reductions="+gss.numReductions;
- System.err.print("\r"+s+ANSI.clreol()+"\r");
- }
-
+ if (verbose) debug(current.token, gss, input);
if (current.isDone()) return (Forest<NodeType>)current.finalResult;
- Forest forest = shiftToken((Token)current.token, current.getRegion());
+ Input.Region region = current.getLocation().createRegion(current.getNextLocation());
+ Forest forest = shiftToken((Token)current.token, region);
current = gss.new Phase<Token>(current, forest);
}
- } finally { if (verbose) System.err.print("\r"+ANSI.clreol()); }
+ } finally {
+ if (verbose) {
+ System.err.print("\r"+ANSI.clreol());
+ debug(null, gss, input);
+ }
+ }
}
// Spinner //////////////////////////////////////////////////////////////////////////////
System.err.print("\r " + spin[spinpos++ % (spin.length)]+"\r");
}
+ private int _last = -1;
+ private String buf = "";
+ private void debug(Object t, GSS gss, Input input) {
+ //FIXME
+ int c = t==null ? -1 : ((t+"").charAt(0));
+ int last = _last;
+ _last = c;
+ switch(c) {
+ case edu.berkeley.sbp.chr.CharAtom.left:
+ buf += "\033[31m>\033[0m";
+ break;
+ case edu.berkeley.sbp.chr.CharAtom.right:
+ buf += "\033[31m<\033[0m";
+ break;
+ case -1: // FIXME
+ case '\n':
+ if (verbose) {
+ if (last==' ') buf += ANSI.blue("\\n");
+ System.err.println("\r"+ANSI.clreol()+"\r"+buf);
+ buf = "";
+ }
+ break;
+ default:
+ buf += ANSI.cyan(""+((char)c));
+ break;
+ }
+ if (t==null) return;
+
+ // FIXME: clean this up
+ String s;
+ s = " " + spin[spinpos++ % (spin.length)]+" parsing ";
+ s += input.getName();
+ s += " "+input.getLocation();
+ while(s.indexOf(':') != -1 && s.indexOf(':') < 8) s = " " + s;
+ String y = "@"+gss.viewPos+" ";
+ while(y.length() < 9) y = " " + y;
+ s += y;
+ s += " nodes="+gss.numOldNodes;
+ while(s.length() < 50) s = s + " ";
+ s += " shifted="+gss.numNewNodes;
+ while(s.length() < 60) s = s + " ";
+ s += " reductions="+gss.numReductions;
+ while(s.length() < 78) s = s + " ";
+ System.err.print("\r"+ANSI.invert(s+ANSI.clreol())+"\r");
+ }
+
// Table //////////////////////////////////////////////////////////////////////////////
/** an SLR(1) parse table which may contain conflicts */
- class Table extends Cache<Token> {
+ class Table extends Grammar<Token> implements Serializable {
/** the start state */
final State<Token> start;
private int master_state_idx = 0;
/** all the states for this table */
- HashSet<State<Token>> all_states = new HashSet<State<Token>>();
+ private transient HashSet<State<Token>> all_states = new HashSet<State<Token>>();
/** all the doomed states in this table */
- HashMap<HashSet<Position>,State<Token>> doomed_states = new HashMap<HashSet<Position>,State<Token>>();
+ private transient HashMap<HashSet<Position>,State<Token>> doomed_states = new HashMap<HashSet<Position>,State<Token>>();
/** all the non-doomed states in this table */
- HashMap<HashSet<Position>,State<Token>> normal_states = new HashMap<HashSet<Position>,State<Token>>();
+ private transient HashMap<HashSet<Position>,State<Token>> normal_states = new HashMap<HashSet<Position>,State<Token>>();
Topology<Token> emptyTopology() { return Parser.this.emptyTopology(); }
// al will be sorted in DECREASING order (al[0] >= al[1])
ArrayList<Sequence.Position> al = new ArrayList<Sequence.Position>();
for(State s : all_states) {
- for(Object po : s) {
+ for(Object po : s.positions()) {
Sequence.Position p = (Sequence.Position)po;
if (al.contains(p)) continue;
int i=0;
* space+time complexity in otherwise simple grammars. There
* is an example of this in the regression suite.
*/
- class State<Token> implements IntegerMappable, Iterable<Position> {
+ class State<Token> implements IntegerMappable, Serializable {
public final int idx = master_state_idx++;
- private final HashSet<Position> hs;
+ private final transient HashSet<Position> hs;
public HashSet<State<Token>> conjunctStates = new HashSet<State<Token>>();
- HashMap<Sequence,State<Token>> gotoSetNonTerminals = new HashMap<Sequence,State<Token>>();
+ HashMap<Pos,State<Token>> gotoSetNonTerminals = new HashMap<Pos,State<Token>>();
private transient TopologicalBag<Token,State<Token>> gotoSetTerminals = new TopologicalBag<Token,State<Token>>();
- private TopologicalBag<Token,Position> reductions = new TopologicalBag<Token,Position>();
- private HashSet<Position> eofReductions = new HashSet<Position>();
+ private TopologicalBag<Token,Pos> reductions = new TopologicalBag<Token,Pos>();
+ private HashSet<Pos> eofReductions = new HashSet<Pos>();
private TopologicalBag<Token,State<Token>> shifts = new TopologicalBag<Token,State<Token>>();
private boolean accept = false;
// Interface Methods //////////////////////////////////////////////////////////////////////////////
+ public boolean doomed() { return doomed; }
boolean isAccepting() { return accept; }
- public Iterator<Position> iterator() { return hs.iterator(); }
+
+ Iterable<Position> positions() { return hs; }
+
boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); }
void invokeShifts(Token t, GSS.Phase phase, Result r) { oshifts.invoke(t, phase, r); }
boolean canReduce(Token t) {
if (seq.needs.contains(y) || seq.hates.contains(y)) {
// FIXME: assumption that no sequence is ever both usefully (non-lamely) matched
// and also directly lamely matched
- ((HashMap)gotoSetNonTerminals).put(y, dead_state);
+ for(Position pp = y.firstp(); pp != null; pp = pp.next())
+ ((HashMap)gotoSetNonTerminals).put(pp, dead_state);
continue OUTER;
}
- gotoSetNonTerminals.put(y, s);
+ for(Position pp = y.firstp(); pp != null; pp = pp.next())
+ gotoSetNonTerminals.put(pp, s);
}
}
}
public int toInt() { return idx; }
+ public String toString() {
+ StringBuffer ret = new StringBuffer();
+ for(Position p : hs)
+ ret.append(p+"\n");
+ return ret.toString();
+ }
}
}