X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2FParser.java;h=9db0bf95b61e642415f48595e0ca8f39807433ba;hp=4aa65151106cb01ece84878d9b80a5c7110b92db;hb=3ee451bce342d4bb61ad6235ba57bdf817bfdd1a;hpb=b8a597c8d1a29afc24f9b89f726d5b1a9b9aeec1 diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index 4aa6515..9db0bf9 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -1,3 +1,5 @@ +// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license + package edu.berkeley.sbp; import edu.berkeley.sbp.*; import edu.berkeley.sbp.util.*; @@ -5,35 +7,37 @@ import edu.berkeley.sbp.Sequence.Position; import java.io.*; import java.util.*; -/** a parser which translates streams of Tokens of type T into a Forest */ -public abstract class Parser { +/** a parser which translates an Input<Token> into a Forest<NodeType> */ +public abstract class Parser { - protected final Table pt; + protected final Table pt; /** create a parser to parse the grammar with start symbol u */ - protected Parser(Union u, Topology top) { this.pt = new Table(u, top); } - protected Parser(Table pt) { this.pt = pt; } + public Parser(Union u, Topology top) { this.pt = new Table(u, top); } + Parser(Table pt) { this.pt = pt; } /** implement this method to create the output forest corresponding to a lone shifted input token */ - protected abstract Forest shiftToken(Tok t, Input.Location newloc); + public abstract Forest shiftToken(Token t, Input.Location newloc); boolean helpgc = true; public String toString() { return pt.toString(); } /** parse input, and return the shared packed parse forest (or throw an exception) */ - public Forest parse(Input input) throws IOException, ParseFailed { + public Forest parse(Input input) throws IOException, ParseFailed { GSS gss = new GSS(); Input.Location loc = input.getLocation(); - GSS.Phase current = gss.new Phase(null, this, null, input.next(), loc, null); - current.newNode(null, Forest.create(null, null, null, false), pt.start, true); + Token tok = input.next(); + GSS.Phase current = gss.new Phase(null, this, null, tok, loc, input.getLocation(), null); + current.newNode(null, Forest.create(loc.createRegion(loc), null, null, false), pt.start, true); int count = 1; for(int idx=0;;idx++) { Input.Location oldloc = loc; - loc = input.getLocation(); current.reduce(); - Forest forest = current.token==null ? null : shiftToken((Tok)current.token, loc); - GSS.Phase next = gss.new Phase(current, this, current, input.next(), loc, forest); + Forest forest = current.token==null ? null : shiftToken((Token)current.token, loc); + loc = input.getLocation(); + Token nextToken = input.next(); + GSS.Phase next = gss.new Phase(current, this, current, nextToken, loc, input.getLocation(), forest); if (!helpgc) { FileOutputStream fos = new FileOutputStream("out-"+idx+".dot"); PrintWriter p = new PrintWriter(new OutputStreamWriter(fos)); @@ -45,7 +49,7 @@ public abstract class Parser { p.close(); } count = next.size(); - if (current.isDone()) return (Forest)gss.finalResult; + if (current.isDone()) return (Forest)gss.finalResult; current = next; } } @@ -53,21 +57,21 @@ public abstract class Parser { // Table ////////////////////////////////////////////////////////////////////////////// /** an SLR(1) parse table which may contain conflicts */ - static class Table extends Walk.Cache { + static class Table extends Walk.Cache { public String toString() { StringBuffer sb = new StringBuffer(); sb.append("parse table"); - for(State state : all_states.values()) { + for(State state : all_states.values()) { sb.append(" " + state + "\n"); - for(Topology t : state.shifts) { + for(Topology t : state.shifts) { sb.append(" shift \""+ new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => "); for(State st : state.shifts.getAll(t)) sb.append(st.idx+" "); sb.append("\n"); } - for(Topology t : state.reductions) + for(Topology t : state.reductions) sb.append(" reduce \""+ new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => " + state.reductions.getAll(t) + "\n"); @@ -77,7 +81,7 @@ public abstract class Parser { public final Walk.Cache cache = this; - private void walk(Element e, HashSet hs) { + private void walk(Element e, HashSet hs) { if (e==null) return; if (hs.contains(e)) return; hs.add(e); @@ -85,7 +89,7 @@ public abstract class Parser { for(Sequence s : (Union)e) walk(s, hs); } - private void walk(Sequence s, HashSet hs) { + private void walk(Sequence s, HashSet hs) { hs.add(s); for(Position p = s.firstp(); p != null; p = p.next()) walk(p.element(), hs); @@ -94,14 +98,14 @@ public abstract class Parser { } /** the start state */ - public final State start; + public final State start; /** the state from which no reductions can be done */ - private final State dead_state; + private final State dead_state; /** used to generate unique values for State.idx */ private int master_state_idx = 0; - HashMap,State> all_states = new HashMap,State>(); + HashMap,State> all_states = new HashMap,State>(); /** construct a parse table for the given grammar */ public Table(Topology top) { this("s", top); } @@ -114,18 +118,18 @@ public abstract class Parser { cache.eof.put(start0, true); // construct the set of states - HashSet all_elements = new HashSet(); + HashSet all_elements = new HashSet(); walk(start0, all_elements); - for(Element e : all_elements) + for(SequenceOrElement e : all_elements) cache.ys.addAll(e, new Walk.YieldSet(e, cache).walk()); HashSet hp = new HashSet(); reachable(start0, hp); - this.dead_state = new State(new HashSet(), all_states, all_elements); - this.start = new State(hp, all_states, all_elements); + this.dead_state = new State(new HashSet(), all_states, all_elements); + this.start = new State(hp, all_states, all_elements); // for each state, fill in the corresponding "row" of the parse table - for(State state : all_states.values()) + for(State state : all_states.values()) for(Position p : state.hs) { // the Grammar's designated "last position" is the only accepting state @@ -147,7 +151,7 @@ public abstract class Parser { state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology())); } if (top instanceof IntegerTopology) - for(State state : all_states.values()) { + for(State state : all_states.values()) { state.oreductions = state.reductions.optimize(((IntegerTopology)top).functor()); state.oshifts = state.shifts.optimize(((IntegerTopology)top).functor()); } @@ -161,34 +165,34 @@ public abstract class Parser { /** a single state in the LR table and the transitions possible from it */ - class State implements Comparable>, IntegerMappable, Iterable { + class State implements Comparable>, IntegerMappable, Iterable { public final int idx = master_state_idx++; private final HashSet hs; - public transient HashMap> gotoSetNonTerminals = new HashMap>(); - private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); + public transient HashMap> gotoSetNonTerminals = new HashMap>(); + private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); - private TopologicalBag reductions = new TopologicalBag(); + private TopologicalBag reductions = new TopologicalBag(); private HashSet eofReductions = new HashSet(); - private TopologicalBag> shifts = new TopologicalBag>(); + private TopologicalBag> shifts = new TopologicalBag>(); private boolean accept = false; - private VisitableMap> oshifts = null; - private VisitableMap oreductions = null; + private VisitableMap> oshifts = null; + private VisitableMap oreductions = null; // Interface Methods ////////////////////////////////////////////////////////////////////////////// boolean isAccepting() { return accept; } public Iterator iterator() { return hs.iterator(); } - boolean canShift(Tok t) { return oshifts!=null && oshifts.contains(t); } - void invokeShifts(Tok t, Invokable,B,C> irbc, B b, C c) { + boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); } + void invokeShifts(Token t, Invokable,B,C> irbc, B b, C c) { oshifts.invoke(t, irbc, b, c); } - boolean canReduce(Tok t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } - void invokeReductions(Tok t, Invokable irbc, B b, C c) { + boolean canReduce(Token t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } + void invokeReductions(Token t, Invokable irbc, B b, C c) { if (t==null) for(Position r : eofReductions) irbc.invoke(r, b, c); else oreductions.invoke(t, irbc, b, c); } @@ -218,8 +222,8 @@ public abstract class Parser { * */ public State(HashSet hs, - HashMap,State> all_states, - HashSet all_elements) { + HashMap,State> all_states, + HashSet all_elements) { this.hs = hs; // register ourselves in the all_states hash so that no @@ -231,7 +235,7 @@ public abstract class Parser { // of _new_ positions (positions after shifting). These mappings are // collectively known as the _closure_ - TopologicalBag bag0 = new TopologicalBag(); + TopologicalBag bag0 = new TopologicalBag(); for(Position position : hs) { if (position.isLast() || !(position.element() instanceof Atom)) continue; Atom a = (Atom)position.element(); @@ -244,10 +248,10 @@ public abstract class Parser { // set, add that character set to the goto table (with the State corresponding to the // computed next-position set). - for(Topology r : bag0) { + for(Topology r : bag0) { HashSet h = new HashSet(); for(Position p : bag0.getAll(r)) h.add(p); - gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); + gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); } // Step 2: for every non-Atom element (ie every Element which has a corresponding reduction), @@ -258,19 +262,19 @@ public abstract class Parser { // to avoid having to iteratively construct our set of States as shown in most // expositions of the algorithm (ie "keep doing XYZ until things stop changing"). - HashMapBag move = new HashMapBag(); + HashMapBag move = new HashMapBag(); for(Position p : hs) { Element e = p.element(); if (e==null) continue; - for(Element y : cache.ys.getAll(e)) { + for(SequenceOrElement y : cache.ys.getAll(e)) { HashSet hp = new HashSet(); reachable(p.next(), hp); move.addAll(y, hp); } } - OUTER: for(Element y : move) { + OUTER: for(SequenceOrElement y : move) { HashSet h = move.getAll(y); - State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); + State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); // if a reduction is "lame", it should wind up in the dead_state after reducing if (y instanceof Sequence) { for(Position p : hs) { @@ -279,13 +283,13 @@ public abstract class Parser { for(Sequence seq : u) if (seq.needs.contains((Sequence)y) || seq.hates.contains((Sequence)y)) { // FIXME: what if there are two "routes" to get to the sequence? - ((HashMap)gotoSetNonTerminals).put(y, dead_state); + ((HashMap)gotoSetNonTerminals).put((Sequence)y, dead_state); continue OUTER; } } } + gotoSetNonTerminals.put((Sequence)y, s); } - gotoSetNonTerminals.put(y, s); } } @@ -304,7 +308,7 @@ public abstract class Parser { return ret.toString(); } - public int compareTo(State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } + public int compareTo(State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } public int toInt() { return idx; } } }