tests/tibdoc.g \
tests/bitstream.tib
+java15: edu.berkeley.sbp.jar
+ $(java) -cp $< edu.berkeley.sbp.misc.Java15 \
+ tests/java15.g \
+ tests/java15.test
+
demo: edu.berkeley.sbp.jar
$(java) -cp $< edu.berkeley.sbp.misc.Demo \
tests/demo.g \
'(11+2*3)-44'
+demo2: edu.berkeley.sbp.jar
+ $(java) -cp $< edu.berkeley.sbp.misc.Demo2
+
regress:
make boot
rm edu.berkeley.sbp.jar
edu.berkeley.sbp.jar: $(shell find src -name \*.java)
mkdir -p bin
- javac -cp javax.servlet.jar:tests/ArchSimA3.jar:tests/grappa.jar -d bin -sourcepath src $^
+ javac -cp javax.servlet.jar:tests/ArchSimA3.jar:tests/grappa.jar -d bin -sourcepath src $^
cd bin; jar cf ../$@ .
-
+#-Xlint:unchecked
javadoc:
rm -rf doc/api
mkdir -p doc/api
- javadoc -sourcepath src -public -d doc/api `find src -name \*.java`
+ javadoc \
+ -linksource \
+ -windowtitle "SBP: the Scannerless Boolean Parser" \
+ -sourcepath src \
+ -header "<b>SBP </b><br>v1.0" \
+ -public \
+ -notree \
+ -noindex \
+ -nonavbar \
+ -stylesheetfile doc/javadoc.css \
+ -noqualifier all \
+ -d doc/api \
+ edu.berkeley.sbp
clean:
rm -rf doc/api edu.berkeley.sbp.jar bin edu.berkeley.sbp.tar.gz
_____________________________________________________________________________
Immediately
- - Sequence extends Element (?) -> then add Union.add(element)
- - Parameterize Sequence/Union/Atom <Tok,Result>
- - Make sure we never use raw types
- - do Forest/Tree still need a Region?
+ - evil problems with: (x y? z /ws)
+ - it gets even more evil than that
- - More topology untangling
+ - Annotation Tutorial
- - tib: use the lexer only for indentation increases/decreases
+ - MUST HAVE BETTER ERROR MESSAGES
+ - use for developing java15.g
- - grammar highlighting?
+ - java15.g
+
+ - topology no longer needed as an arg to parser
+ - expose parser's protected method?
+ - do Forest/Tree still need a Region?
- copyright notices
- - tutorial
______________________________________________________________________________
v1.1
+ - More topology untangling [later]
+ - tib: use the lexer only for indentation increases/decreases
+ - grammar highlighting?
+
- Forest needs a "manual access" API
- the unwrap bit in Forest makes it really hard to expose an API for forests
- - evil problems with (x y? z /ws)
______________________________________________________________________________
--- /dev/null
+/* Page background color */
+body {
+ background-color: #F0F0E0;
+ font-family: arial, helvetica, sans-serif;
+ font-size: 14px;
+ margin-left: 40px;
+ margin-right: 40px;
+ margin-top: 0px;
+}
+
+p { }
+
+/* Headings */
+h1 { border-top: 2px solid black; font-size: 14px; }
+h2 { font-size: 20px; width: 100%; margin-left: -40px; margin-right: -40; background-color:blue; padding: 5px; padding-left: 40px; padding-right: 40px; color: white; }
+h3 { border-top: 2px solid black; font-size: 14px; }
+pre { }
+hr { width: 0; }
+tt { font-size: 14px; }
+
+div.example {
+ background: black;
+ border: 2px solid gray;
+ color: #ddd;
+ font-family: monospace;
+ padding: 4px;
+ font-size: 14px;
+}
+
+a:link { color: #00f; text-decoration: none; }
+a:active { color: #00f; text-decoration: none; border: 1px blue; }
+a:visited { color: #44f; text-decoration: none; }
+a:hover { color: #00f; text-decoration: none; border-bottom: dotted 1px blue; }
+
+/* Table colors */
+.TableHeadingColor { background: #CCCCFF; border: 0px white; } /* Dark mauve */
+.TableSubHeadingColor { background: #EEEEFF; border: 0px white; } /* Light mauve */
+.TableRowColor { background: #FFFFFF; font-size: 14px; } /* White */
+
+table { border: 1px black solid; }
+td { border: 0px white; border-top: 1px dotted gray; font-size: 14px; }
+th { border: 0px white; }
+
+
+
+
+/* Font used in left-hand frame lists */
+.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif }
+.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif }
+.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif }
+
+/* Navigation bar fonts and colors */
+.NavBarCell1 { background-color:#EEEEFF;} /* Light mauve */
+.NavBarCell1Rev { background-color:#00008B;} /* Dark Blue */
+.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;}
+.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;}
+
+.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF;}
+.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF;}
* <p>
* This class is a topology over itself (yes, that's sort of Frege'd
* up) so that Atoms can be intersected and unioned with each other
- * to result in other Atom<T>'s (rather than raw Topology<T>'s, which
+ * to result in other Atom<Token>'s (rather than raw Topology<Token>'s, which
* are not Elements). If you want the latter, use the
* getTokenTopology() method.
* </p>
*/
-public abstract class Atom<T> extends Element implements Topology<Atom<T>> {
+public abstract class Atom<Token> extends Element implements Topology<Atom<Token>> {
/** the set (topology) of tokens that can match this element */
- public abstract Topology<T> getTokenTopology();
+ public abstract Topology<Token> getTokenTopology();
StringBuffer toString(StringBuffer sb) { sb.append(this); return sb; }
abstract StringBuffer toString(StringBuffer sb);
/** returns the Forest resulting from matching this element against the empty string */
- Forest epsilonForm() { throw new Error("element " + this + " has no epsilon form"); }
+ Forest<?> epsilonForm() { throw new Error("element " + this + " has no epsilon form"); }
}
* shared packed parse forest).
* </font>
*/
-public abstract class Forest<T> implements GraphViz.ToGraphViz {
+public abstract class Forest<NodeType> implements GraphViz.ToGraphViz {
/** assume that this forest contains exactly one tree and return it; otherwise throw an exception */
- public abstract Tree<T> expand1() throws Ambiguous;
+ public abstract Tree<NodeType> expand1() throws Ambiguous;
/** expand this forest into a set of trees */
- public void expand(HashSet<Tree<T>> ht) { expand(ht, new HashSet<Forest<T>>(), null); }
+ public void expand(HashSet<Tree<NodeType>> ht) { expand(ht, new HashSet<Forest<NodeType>>(), null); }
- static <T> Forest<T> create(Input.Region loc, T head, Forest<T>[] children, boolean lift) {
- return new One<T>(loc, head, children, lift);
+ static <NodeType> Forest<NodeType> create(Input.Region loc, NodeType head, Forest<NodeType>[] children, boolean lift) {
+ return new One<NodeType>(loc, head, children, lift);
}
/** create a new forest */
- public static <T> Forest<T> create(Input.Region loc, T head, Forest<T>[] children) {
+ public static <NodeType> Forest<NodeType> create(Input.Region loc, NodeType head, Forest<NodeType>[] children) {
return Forest.create(loc, head, children, false); }
// Package-Private //////////////////////////////////////////////////////////////////////////////
- abstract void expand(HashSet<Tree<T>> ht, HashSet<Forest<T>> ignore, Tree<T> bogus);
- abstract void gather(HashSet<Forest<T>> ignore);
+ abstract void expand(HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore, Tree<NodeType> bogus);
+ abstract void gather(HashSet<Forest<NodeType>> ignore);
abstract void edges(GraphViz.Node n);
boolean ambiguous() { return false; }
// One //////////////////////////////////////////////////////////////////////////////
/** A "single" forest with a head and child subforests */
- private static class One<T> extends Forest<T> {
+ private static class One<NodeType> extends Forest<NodeType> {
private final Input.Region location;
- private final T head;
- private final Forest<T>[] children;
+ private final NodeType head;
+ private final Forest<NodeType>[] children;
/** if true, the last child's children are considered children of this node */
private final boolean lift;
- private One(Input.Region loc, T head, Forest<T>[] children, boolean lift) {
+ private One(Input.Region loc, NodeType head, Forest<NodeType>[] children, boolean lift) {
this.location = loc;
this.head = head;
this.children = children==null ? emptyForestArray : new Forest[children.length];
this.lift = lift;
}
- public Tree<T> expand1() throws Ambiguous {
- Tree<T>[] ret = new Tree[children.length];
+ public Tree<NodeType> expand1() throws Ambiguous {
+ Tree<NodeType>[] ret = new Tree[children.length];
for(int i=0; i<children.length; i++) ret[i] = children[i].expand1();
- return new Tree<T>(location, head, ret, lift);
+ return new Tree<NodeType>(location, head, ret, lift);
}
- void gather(HashSet<Forest<T>> hf) {
+ void gather(HashSet<Forest<NodeType>> hf) {
hf.add(this);
- for(Forest<T> f : children) f.gather(hf);
+ for(Forest<NodeType> f : children) f.gather(hf);
}
- void expand(HashSet<Tree<T>> ht, HashSet<Forest<T>> ignore, Tree<T> bogus) {
+ void expand(HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore, Tree<NodeType> bogus) {
if (ignore.contains(this)) { ht.add(bogus); return; }
expand(0, new Tree[children.length], ht, ignore, bogus);
}
- private void expand(final int i, Tree<T>[] ta, HashSet<Tree<T>> ht, HashSet<Forest<T>> ignore, Tree<T> bogus) {
+ private void expand(final int i, Tree<NodeType>[] ta, HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore, Tree<NodeType> bogus) {
if (i==children.length) {
- ht.add(new Tree<T>(location, head, ta, lift));
+ ht.add(new Tree<NodeType>(location, head, ta, lift));
} else {
- HashSet<Tree<T>> ht2 = new HashSet<Tree<T>>();
+ HashSet<Tree<NodeType>> ht2 = new HashSet<Tree<NodeType>>();
children[i].expand(ht2, ignore, bogus);
- for(Tree<T> tc : ht2) {
+ for(Tree<NodeType> tc : ht2) {
ta[i] = tc;
expand(i+1, ta, ht, ignore, bogus);
ta[i] = null;
// Many //////////////////////////////////////////////////////////////////////////////
/** An "ambiguity node"; this is immutable once it has been "looked at" */
- static class Many<T> extends Forest<T> {
+ static class Many<NodeType> extends Forest<NodeType> {
HashSet<GSS.Phase.Node> parents = new HashSet<GSS.Phase.Node>();
- private FastSet<Forest<T>> hp = new FastSet<Forest<T>>();
+ private FastSet<Forest<NodeType>> hp = new FastSet<Forest<NodeType>>();
private boolean touched = false;
public Many() { }
- public Tree<T> expand1() throws Ambiguous {
+ public Tree<NodeType> expand1() throws Ambiguous {
touched();
if (hp.size() > 1) {
- HashSet<Forest<T>> hf0 = new HashSet<Forest<T>>();
- Iterator<Forest<T>> ih = hp.iterator();
+ HashSet<Forest<NodeType>> hf0 = new HashSet<Forest<NodeType>>();
+ Iterator<Forest<NodeType>> ih = hp.iterator();
ih.next().gather(hf0);
- for(Forest<T> f : hp) {
- HashSet<Forest<T>> hf1 = new HashSet<Forest<T>>();
+ for(Forest<NodeType> f : hp) {
+ HashSet<Forest<NodeType>> hf1 = new HashSet<Forest<NodeType>>();
f.gather(hf1);
hf0.retainAll(hf1);
}
- HashSet<Tree<T>> ht = new HashSet<Tree<T>>();
+ HashSet<Tree<NodeType>> ht = new HashSet<Tree<NodeType>>();
expand(ht, hf0, new Tree(null, "*"));
throw new Ambiguous((Forest<?>)this,
(HashSet<Tree<?>>)(Object)ht);
return hp.iterator().next().expand1();
}
- void gather(HashSet<Forest<T>> ht) {
+ void gather(HashSet<Forest<NodeType>> ht) {
touched();
ht.add(this);
- for(Forest<T> f : hp) f.gather(ht);
+ for(Forest<NodeType> f : hp) f.gather(ht);
}
private void touched() {
if (touched) return;
touched = true;
/*
- FastSet<Forest<T>> f2 = new FastSet<Forest<T>>();
+ FastSet<Forest<NodeType>> f2 = new FastSet<Forest<NodeType>>();
for(Forest f : hp)
if (f instanceof Forest.One) f2.add(f);
- else for(Forest ff : ((Forest.Many<T>)f))
+ else for(Forest ff : ((Forest.Many<NodeType>)f))
f2.add(ff);
hp = f2;
*/
return true;
}
- void expand(HashSet<Tree<T>> ht, HashSet<Forest<T>> ignore, Tree<T> bogus) {
+ void expand(HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore, Tree<NodeType> bogus) {
touched();
if (ignore.contains(this)) { ht.add(bogus); return; }
- for (Forest<T> f : hp) f.expand(ht, ignore, bogus);
+ for (Forest<NodeType> f : hp) f.expand(ht, ignore, bogus);
}
import java.io.*;
import java.util.*;
-/** a parser which translates streams of Tokens of type T into a Forest<R> */
-public abstract class Parser<Tok, Result> {
+/** a parser which translates an Input<Token> into a Forest<NodeType> */
+public abstract class Parser<Token, NodeType> {
- protected final Table<Tok> pt;
+ protected final Table<Token> pt;
/** create a parser to parse the grammar with start symbol <tt>u</tt> */
- protected Parser(Union u, Topology<Tok> top) { this.pt = new Table<Tok>(u, top); }
- protected Parser(Table<Tok> pt) { this.pt = pt; }
+ protected Parser(Union u, Topology<Token> top) { this.pt = new Table<Token>(u, top); }
+ protected Parser(Table<Token> pt) { this.pt = pt; }
/** implement this method to create the output forest corresponding to a lone shifted input token */
- protected abstract Forest<Result> shiftToken(Tok t, Input.Location newloc);
+ protected abstract Forest<NodeType> shiftToken(Token t, Input.Location newloc);
boolean helpgc = true;
public String toString() { return pt.toString(); }
/** parse <tt>input</tt>, and return the shared packed parse forest (or throw an exception) */
- public Forest<Result> parse(Input<Tok> input) throws IOException, ParseFailed {
+ public Forest<NodeType> parse(Input<Token> input) throws IOException, ParseFailed {
GSS gss = new GSS();
Input.Location loc = input.getLocation();
- GSS.Phase current = gss.new Phase<Tok>(null, this, null, input.next(), loc, null);
+ GSS.Phase current = gss.new Phase<Token>(null, this, null, input.next(), loc, null);
current.newNode(null, Forest.create(null, null, null, false), pt.start, true);
int count = 1;
for(int idx=0;;idx++) {
Input.Location oldloc = loc;
loc = input.getLocation();
current.reduce();
- Forest forest = current.token==null ? null : shiftToken((Tok)current.token, loc);
- GSS.Phase next = gss.new Phase<Tok>(current, this, current, input.next(), loc, forest);
+ Forest forest = current.token==null ? null : shiftToken((Token)current.token, loc);
+ GSS.Phase next = gss.new Phase<Token>(current, this, current, input.next(), loc, forest);
if (!helpgc) {
FileOutputStream fos = new FileOutputStream("out-"+idx+".dot");
PrintWriter p = new PrintWriter(new OutputStreamWriter(fos));
p.close();
}
count = next.size();
- if (current.isDone()) return (Forest<Result>)gss.finalResult;
+ if (current.isDone()) return (Forest<NodeType>)gss.finalResult;
current = next;
}
}
// Table //////////////////////////////////////////////////////////////////////////////
/** an SLR(1) parse table which may contain conflicts */
- static class Table<Tok> extends Walk.Cache {
+ static class Table<Token> extends Walk.Cache {
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("parse table");
- for(State<Tok> state : all_states.values()) {
+ for(State<Token> state : all_states.values()) {
sb.append(" " + state + "\n");
- for(Topology<Tok> t : state.shifts) {
+ for(Topology<Token> t : state.shifts) {
sb.append(" shift \""+
new edu.berkeley.sbp.chr.CharTopology((IntegerTopology<Character>)t)+"\" => ");
for(State st : state.shifts.getAll(t))
sb.append(st.idx+" ");
sb.append("\n");
}
- for(Topology<Tok> t : state.reductions)
+ for(Topology<Token> t : state.reductions)
sb.append(" reduce \""+
new edu.berkeley.sbp.chr.CharTopology((IntegerTopology<Character>)t)+"\" => " +
state.reductions.getAll(t) + "\n");
}
/** the start state */
- public final State<Tok> start;
+ public final State<Token> start;
/** the state from which no reductions can be done */
- private final State<Tok> dead_state;
+ private final State<Token> dead_state;
/** used to generate unique values for State.idx */
private int master_state_idx = 0;
- HashMap<HashSet<Position>,State<Tok>> all_states = new HashMap<HashSet<Position>,State<Tok>>();
+ HashMap<HashSet<Position>,State<Token>> all_states = new HashMap<HashSet<Position>,State<Token>>();
/** construct a parse table for the given grammar */
public Table(Topology top) { this("s", top); }
HashSet<Position> hp = new HashSet<Position>();
reachable(start0, hp);
- this.dead_state = new State<Tok>(new HashSet<Position>(), all_states, all_elements);
- this.start = new State<Tok>(hp, all_states, all_elements);
+ this.dead_state = new State<Token>(new HashSet<Position>(), all_states, all_elements);
+ this.start = new State<Token>(hp, all_states, all_elements);
// for each state, fill in the corresponding "row" of the parse table
- for(State<Tok> state : all_states.values())
+ for(State<Token> state : all_states.values())
for(Position p : state.hs) {
// the Grammar's designated "last position" is the only accepting state
state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology()));
}
if (top instanceof IntegerTopology)
- for(State<Tok> state : all_states.values()) {
+ for(State<Token> state : all_states.values()) {
state.oreductions = state.reductions.optimize(((IntegerTopology)top).functor());
state.oshifts = state.shifts.optimize(((IntegerTopology)top).functor());
}
/** a single state in the LR table and the transitions possible from it */
- class State<Tok> implements Comparable<State<Tok>>, IntegerMappable, Iterable<Position> {
+ class State<Token> implements Comparable<State<Token>>, IntegerMappable, Iterable<Position> {
public final int idx = master_state_idx++;
private final HashSet<Position> hs;
- public transient HashMap<Sequence,State<Tok>> gotoSetNonTerminals = new HashMap<Sequence,State<Tok>>();
- private transient TopologicalBag<Tok,State<Tok>> gotoSetTerminals = new TopologicalBag<Tok,State<Tok>>();
+ public transient HashMap<Sequence,State<Token>> gotoSetNonTerminals = new HashMap<Sequence,State<Token>>();
+ private transient TopologicalBag<Token,State<Token>> gotoSetTerminals = new TopologicalBag<Token,State<Token>>();
- private TopologicalBag<Tok,Position> reductions = new TopologicalBag<Tok,Position>();
+ private TopologicalBag<Token,Position> reductions = new TopologicalBag<Token,Position>();
private HashSet<Position> eofReductions = new HashSet<Position>();
- private TopologicalBag<Tok,State<Tok>> shifts = new TopologicalBag<Tok,State<Tok>>();
+ private TopologicalBag<Token,State<Token>> shifts = new TopologicalBag<Token,State<Token>>();
private boolean accept = false;
- private VisitableMap<Tok,State<Tok>> oshifts = null;
- private VisitableMap<Tok,Position> oreductions = null;
+ private VisitableMap<Token,State<Token>> oshifts = null;
+ private VisitableMap<Token,Position> oreductions = null;
// Interface Methods //////////////////////////////////////////////////////////////////////////////
boolean isAccepting() { return accept; }
public Iterator<Position> iterator() { return hs.iterator(); }
- boolean canShift(Tok t) { return oshifts!=null && oshifts.contains(t); }
- <B,C> void invokeShifts(Tok t, Invokable<State<Tok>,B,C> irbc, B b, C c) {
+ boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); }
+ <B,C> void invokeShifts(Token t, Invokable<State<Token>,B,C> irbc, B b, C c) {
oshifts.invoke(t, irbc, b, c);
}
- boolean canReduce(Tok t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); }
- <B,C> void invokeReductions(Tok t, Invokable<Position,B,C> irbc, B b, C c) {
+ boolean canReduce(Token t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); }
+ <B,C> void invokeReductions(Token t, Invokable<Position,B,C> irbc, B b, C c) {
if (t==null) for(Position r : eofReductions) irbc.invoke(r, b, c);
else oreductions.invoke(t, irbc, b, c);
}
* </ul>
*/
public State(HashSet<Position> hs,
- HashMap<HashSet<Position>,State<Tok>> all_states,
+ HashMap<HashSet<Position>,State<Token>> all_states,
HashSet<SequenceOrElement> all_elements) {
this.hs = hs;
// of _new_ positions (positions after shifting). These mappings are
// collectively known as the _closure_
- TopologicalBag<Tok,Position> bag0 = new TopologicalBag<Tok,Position>();
+ TopologicalBag<Token,Position> bag0 = new TopologicalBag<Token,Position>();
for(Position position : hs) {
if (position.isLast() || !(position.element() instanceof Atom)) continue;
Atom a = (Atom)position.element();
// set, add that character set to the goto table (with the State corresponding to the
// computed next-position set).
- for(Topology<Tok> r : bag0) {
+ for(Topology<Token> r : bag0) {
HashSet<Position> h = new HashSet<Position>();
for(Position p : bag0.getAll(r)) h.add(p);
- gotoSetTerminals.put(r, all_states.get(h) == null ? new State<Tok>(h, all_states, all_elements) : all_states.get(h));
+ gotoSetTerminals.put(r, all_states.get(h) == null ? new State<Token>(h, all_states, all_elements) : all_states.get(h));
}
// Step 2: for every non-Atom element (ie every Element which has a corresponding reduction),
}
OUTER: for(SequenceOrElement y : move) {
HashSet<Position> h = move.getAll(y);
- State<Tok> s = all_states.get(h) == null ? new State<Tok>(h, all_states, all_elements) : all_states.get(h);
+ State<Token> s = all_states.get(h) == null ? new State<Token>(h, all_states, all_elements) : all_states.get(h);
// if a reduction is "lame", it should wind up in the dead_state after reducing
if (y instanceof Sequence) {
for(Position p : hs) {
return ret.toString();
}
- public int compareTo(State<Tok> s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; }
+ public int compareTo(State<Token> s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; }
public int toInt() { return idx; }
}
}
import java.lang.ref.*;
/** <font color=green>juxtaposition; zero or more adjacent Elements; can specify a rewriting</font> */
-public abstract class Sequence /*extends Element*/ implements Iterable<Element>, SequenceOrElement {
+public abstract class Sequence implements Iterable<Element>, SequenceOrElement {
protected final Element[] elements;
public Sequence and(Sequence s) { Sequence ret = dup(); ret.needs.add(s); return ret; }
/** return a new sequence identical to this one, but with a negative conjunct <tt>s</tt> */
- public Sequence not(Sequence s) { Sequence ret = dup(); ret.hates.add(s); s.hated.add(ret); return ret; }
+ public Sequence andnot(Sequence s) { Sequence ret = dup(); ret.hates.add(s); s.hated.add(ret); return ret; }
/** return a new sequence identical to this one, but with a follow-set restricted to <tt>a</tt> */
public Sequence followedBy(Atom a) { Sequence ret = dup(); ret.follow = a; return ret; }
sb.append("-> ");
sb.append(follow);
}
+ for(Sequence s : needs) {
+ sb.append("& ");
+ sb.append(s);
+ }
+ for(Sequence s : hates) {
+ sb.append("&~ ");
+ sb.append(s);
+ }
return sb;
}
import java.lang.reflect.*;
/** <font color=blue>a tree (or node in a tree); see jargon.txt for details</font> */
-public class Tree<T>
- extends PrintableTree<Tree<T>>
- implements Iterable<Tree<T>> {
+public class Tree<NodeType>
+ extends PrintableTree<Tree<NodeType>>
+ implements Iterable<Tree<NodeType>> {
private final Input.Region location;
- private final T head;
- private final Tree<T>[] children;
+ private final NodeType head;
+ private final Tree<NodeType>[] children;
private final boolean lift;
/** the element at the head of the tree */
- public T head() { return head; }
+ public NodeType head() { return head; }
- private Tree<T> lifted() { return children[children.length-1]; }
+ private Tree<NodeType> lifted() { return children[children.length-1]; }
/** the number of children the tree has */
public int size() {
}
/** the tree's children */
- public Iterable<Tree<T>> children() { return this; }
+ public Iterable<Tree<NodeType>> children() { return this; }
/** the tree's children */
- public Iterator<Tree<T>> iterator() {
+ public Iterator<Tree<NodeType>> iterator() {
return lift
? new ConcatenateIterator(new ArrayIterator(children, 0, children.length-1),
children[children.length-1].iterator())
}
/** get the <tt>i</t>th child */
- public Tree<T> child(int i) {
+ public Tree<NodeType> child(int i) {
return lift && i >= children.length-1
? children[children.length-1].child(i-(children.length-1))
: children[i];
/** get the input region that this tree was parsed from */
public Input.Region getRegion() { return location; }
- public Tree(Input.Region loc, T head) { this(loc, head, null); }
- public Tree(Input.Region loc, T head, Tree<T>[] children) { this(loc, head, children, false); }
+ public Tree(Input.Region loc, NodeType head) { this(loc, head, null); }
+ public Tree(Input.Region loc, NodeType head, Tree<NodeType>[] children) { this(loc, head, children, false); }
/** package-private constructor, allows setting the "lift" bit */
- Tree(Input.Region loc, T head, Tree<T>[] children, boolean lift) {
+ Tree(Input.Region loc, NodeType head, Tree<NodeType>[] children, boolean lift) {
this.location = loc;
this.head = head;
this.lift = lift && children != null && children.length > 0;
/** adds an alternative */
public void add(Sequence s) {
+ /*
+ FIXME
if (viewed)
- throw new RuntimeException("attempt to add a Sequence to a Union that has already been examined");
+ throw new RuntimeException("attempt to add a Sequence to a Union that has already been examined:\n "+this);
+ */
if (alternatives.contains(s)) return;
alternatives.add(s);
}
+ /** adds a one-element sequence */
+ public void add(Element e) {
+ add(Sequence.create(e));
+ }
+
// Epsilon Form //////////////////////////////////////////////////////////////////////////////
public Forest<String> parse(InputStream is) throws IOException, ParseFailed { return super.parse(new CharInput(is)); }
public Forest<String> parse(Reader r) throws IOException, ParseFailed { return super.parse(new CharInput(r)); }
+ public Forest<String> parse(String s) throws IOException, ParseFailed { return parse(new StringReader(s)); }
public CharParser(Union u) { super(u, new CharTopology()); }
}
if (sequences.length==1) break;
Sequence seq = Sequence.create(u2);
- for(Sequence s : bad2) seq = seq.not(s);
+ for(Sequence s : bad2) seq = seq.andnot(s);
u.add(seq);
bad2.add(Sequence.create(u2));
}
}
if (sequences.length==1) break;
Sequence seq = Sequence.create(u2);
- for(Sequence s : bad2) seq = seq.not(s);
+ for(Sequence s : bad2) seq = seq.andnot(s);
u.add(seq);
bad2.add(Sequence.create(u2));
}
public Sequence build(Context cx, Union u, NonTerminalNode cnt) {
Sequence ret = build0(cx, cnt);
for(Seq s : and) { Sequence dork = s.build(cx, u, cnt); ret = ret.and(dork); }
- for(Seq s : not) { Sequence dork = s.build(cx, u, cnt); ret = ret.not(dork); }
+ for(Seq s : not) { Sequence dork = s.build(cx, u, cnt); ret = ret.andnot(dork); }
u.add(ret);
return ret;
}
--- /dev/null
+package edu.berkeley.sbp.misc;
+
+import edu.berkeley.sbp.*;
+
+public class Demo2 {
+
+ private static Atom atom(char c) {
+ return new edu.berkeley.sbp.chr.CharAtom(c); }
+ private static Atom atom(char c1, char c2) {
+ return new edu.berkeley.sbp.chr.CharAtom(c1, c2); }
+
+ public static void main(String[] s) throws Exception {
+
+ Union expr = new Union("Expr");
+
+ Element[] add = new Element[] { expr, atom('+'), expr };
+ Element[] mult = new Element[] { expr, atom('*'), expr };
+ Element[] paren = new Element[] { atom('('), expr, atom(')') };
+
+ Sequence addSequence = Sequence.create("add", add, null, false);
+ Sequence multSequence = Sequence.create("mult", mult, null, false);
+
+ // uncomment this line to disambiguate
+ multSequence = multSequence.andnot(Sequence.create("add", add, null, false));
+
+ expr.add(Sequence.create(paren, 1));
+ expr.add(addSequence);
+ expr.add(multSequence);
+ expr.add(Sequence.create(atom('0', '9')));
+
+ String input = "8+(1+3)*7";
+
+ System.out.println("input: \""+input+"\"");
+
+ StringBuffer sb = new StringBuffer();
+ expr.toString(sb);
+ System.out.println("grammar: \n"+sb);
+
+ Forest f = new edu.berkeley.sbp.chr.CharParser(expr).parse(input);
+ System.out.println("output: "+f.expand1().toPrettyString());
+ }
+
+}
--- /dev/null
+// Copyright 2006 the Contributors, as shown in the revision logs.
+// Licensed under the Apache Public Source License 2.0 ("the License").
+// You may not use this file except in compliance with the License.
+
+package edu.berkeley.sbp.misc;
+import edu.berkeley.sbp.*;
+import edu.berkeley.sbp.misc.*;
+import edu.berkeley.sbp.meta.*;
+import edu.berkeley.sbp.util.*;
+import edu.berkeley.sbp.chr.*;
+import edu.berkeley.sbp.bind.*;
+import java.util.*;
+import java.io.*;
+
+public class Java15 {
+
+ public static void main(String[] s) throws Exception {
+
+ try {
+ Tree<String> res = new CharParser(MetaGrammar.newInstance()).parse(new FileInputStream(s[0])).expand1();
+
+ //AnnotationGrammarBindings resolver = new AnnotationGrammarBindings(Java15.class);
+ Grammar.Bindings resolver = new Grammar.Bindings();
+ Union javaGrammar = Grammar.create(res, "s", resolver);
+
+ System.err.println("parsing " + s[1]);
+ Tree t = new CharParser(javaGrammar).parse(new FileInputStream(s[1])).expand1();
+
+ System.out.println("tree:\n" + t.toPrettyString());
+
+ } catch (Ambiguous a) {
+ FileOutputStream fos = new FileOutputStream("/Users/megacz/Desktop/out.dot");
+ PrintWriter p = new PrintWriter(new OutputStreamWriter(fos));
+ GraphViz gv = new GraphViz();
+ a.getAmbiguity().toGraphViz(gv);
+ gv.dump(p);
+ p.flush();
+ p.close();
+ a.printStackTrace();
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ }
+
+}
<body>
-<p>
-<b>
- <font color=red>IMPORTANT:</font>
- BE SURE TO READ THE FILE
- <tt><font size=big><a href=../../../../jargon.txt>doc/jargon.txt</a></big></tt> FIRST!<br> Also, see the legend at the bottom of this page.
-</b>
+<p style="border: 1px red solid; width: 50%; padding: 10px; background-color: white; margin-left: auto; margin-right: auto">
+
+ The public APIs in this package are <b>stable</b>; package-private
+ APIs and all other packages are subject to change in future
+ releases.<br><br>Be sure to read <a
+ href=../../../../jargon.txt>doc/jargon.txt</a> and the <a
+ href=#package_description>description</a> below.
+
</p>
<p>
<li> Exceptions.
</ul>
</p>
+
+<h2>Theory of Operation</h2>
+
+<p>
+
+The input that you parse is considered to be a stream of
+<tt>Tokens</tt>; this stream is represented by an
+<tt>Input<Token></tt>. In order to create this <tt>Input</tt>,
+you must first decide what kind of tokens you want to parse. Based on
+this decision, you should then implement subclasses of <tt>Input</tt>,
+<tt>Parser</tt>, and <tt>Atom</tt> for that token type. If you are
+parsing characters (which you usually are), these subclasses are
+provided in the <tt>edu.berkeley.sbp.chr.*</tt> package so you don't
+have to write them yourself.
+
+</p><p>
+
+You then create a grammar by instantiating objects belonging to your
+subclass of <tt>Atom</tt> and forming them into sequences using
+<tt>Sequence.create___()</tt> and <tt>new Union()</tt>.
+
+</p><p>
+
+Ultimately you will wind up with an instance of <tt>Union</tt>
+corresponding to the "start nonterminal" of your grammar. You can
+then provide this <tt>Union</tt> to the constructor of your
+<tt>Parser</tt> subclass and invoke the <tt>Parser.parse(Input)</tt>
+method on the <tt>Input</tt> to be parsed.
+
+</p><p>
+
+The result will be a <tt>Forest</tt>, which is an efficient
+representation of a set of one or more trees that may share subtrees.
+
+</p><p>
+
+If the parse was ambiguous, you can use
+<tt>Forest.expand(HashSet)</tt> to expand the Forest into all the
+possible trees (there is not yet a stable API for inspecting the
+<tt>Forest</tt> directly).
+
+</p><p>
+
+If the parse was <i>not</i> ambiguous, you can call
+<tt>Forest.expand1()</tt> to return the single possible parsing as a
+<tt>Tree</tt>. You would then typically use the methods of the
+<tt>Tree</tt> class to examine the parse tree.
+
+</p>
+<h2>Guide to the API</h2>
+
+<h2>Example</h2>
+
+<div class=example>
+<font color=cyan>package</font> <font color=#f0f>edu.berkeley.sbp.misc</font>;<br>
+<br>
+<font color=cyan>import</font> <font color=#f0f>edu.berkeley.sbp.*</font>;<br>
+<br>
+<font color=cyan>public</font> <font color=cyan>class</font> Demo2 {<br>
+<br>
+ <font color=cyan>private</font> <font color=cyan>static</font> <font color=orange>Atom</font> <font color=#00f>atom</font>(<font color=orange>char</font> <font color=yellow>c</font>) {<br>
+ <font color=cyan>return</font> <font color=cyan>new</font> edu.berkeley.sbp.chr.<font color=orange>CharAtom</font>(c); }<br>
+ <font color=cyan>private</font> <font color=cyan>static</font> <font color=orange>Atom</font> <font color=#00f>atom</font>(<font color=orange>char</font> <font color=yellow>c1</font>, <font color=orange>char</font> <font color=yellow>c2</font>) {<br>
+ <font color=cyan>return</font> <font color=cyan>new</font> edu.berkeley.sbp.chr.<font color=orange>CharAtom</font>(c1, c2); }<br>
+<br>
+ <font color=cyan>public</font> <font color=cyan>static</font> <font color=cyan>void</font> <font color=#00f>main</font>(<font color=orange>String[]</font> s) throws Exception {<br>
+<br>
+ <font color=orange>Union</font> <font color=yellow>expr</font> = <font color=cyan>new</font> <font color=orange>Union</font>(<font color=#0f0>"Expr"</font>);<br>
+<br>
+ <font color=orange>Element[]</font> <font color=yellow>add</font> <font color=yellow></font> = <font color=cyan>new</font> <font color=orange>Element</font>[] { expr, atom(<font color=#0f0>'+'</font>), expr };<br>
+ <font color=orange>Element[]</font> <font color=yellow>mult</font> <font color=yellow></font> = <font color=cyan>new</font> <font color=orange>Element</font>[] { expr, atom(<font color=#0f0>'*'</font>), expr };<br>
+ <font color=orange>Element[]</font> <font color=yellow>paren</font> = <font color=cyan>new</font> <font color=orange>Element</font>[] { atom(<font color=#0f0>'('</font>), expr, atom(<font color=#0f0>')'</font>) };<br>
+<br>
+ <font color=orange>Sequence</font> <font color=yellow>addSequence</font> = <font color=orange>Sequence</font>.create(<font color=#0f0>"add"</font>, add, <font color=#f0f>null</font>, <font color=#f0f>false</font>);<br>
+ <font color=orange>Sequence</font> <font color=yellow>multSequence</font> = <font color=orange>Sequence</font>.create(<font color=#0f0>"mult"</font>, mult, <font color=#f0f>null</font>, <font color=#f0f>false</font>);<br>
+<br>
+<font color=red>
+ // uncomment this line to disambiguate<br>
+ //multSequence = multSequence.andnot(Sequence.create("add", add, null, false));<br>
+</font>
+<br>
+ expr.add(<font color=orange>Sequence</font>.create(paren, 1));<br>
+ expr.add(addSequence);<br>
+ expr.add(multSequence);<br>
+ expr.add(<font color=orange>Sequence</font>.create(atom(<font color=#0f0>'0'</font>, <font color=#0f0>'9'</font>)));<br>
+<br>
+ <font color=orange>String</font> <font color=yellow>input</font> = <font color=#0f0>"8+(1+3)*7"</font>;<br>
+<br>
+ System.out.println(<font color=#0f0>"input: \""+input+"\""</font>);<br>
+<br>
+ <font color=orange>StringBuffer</font> <font color=yellow>sb</font> = <font color=cyan>new</font> <font color=orange>StringBuffer</font>();<br>
+ expr.toString(sb);<br>
+ System.out.println(<font color=#0f0>"grammar: \n"</font>+sb);<br>
+<br>
+ <font color=orange>Forest</font> <font color=yellow>f</font> = <font color=cyan>new</font> edu.berkeley.sbp.chr.<font color=orange>CharParser</font>(expr).parse(input);<br>
+ System.out.println(<font color=#0f0>"output: "</font>+f.expand1().toPrettyString());<br>
+ }<br>
+<br>
+}<br>
+</div>
+
+<p>
+Executing this code gives the following:
+</p>
+
+<div class=example>
+java -Xmx900m -cp edu.berkeley.sbp.jar edu.berkeley.sbp.misc.Demo2<br>
+input: "8+(1+3)*7"<br>
+grammar:<br>
+Expr = [(] Expr [)]<br>
+ | "add":: Expr [+] Expr<br>
+ | "mult":: Expr [*] Expr<br>
+ | [0-9]<br>
+<br>
+Exception in thread "main" unresolved ambiguity; shared subtrees are shown as "*"<br>
+ possibility: mult:{add:{* * *} * *}<br>
+ possibility: add:{* * mult:{* * *}}<br>
+</div>
+
+<p>
+If we uncomment the line in the example, the result is:
+</p>
+
+<div class=example>
+java -Xmx900m -cp edu.berkeley.sbp.jar edu.berkeley.sbp.misc.Demo2<br>
+input: "8+(1+3)*7"<br>
+grammar: <br>
+Expr = [(] Expr [)] <br>
+ | "add":: Expr [+] Expr <br>
+ | "mult":: Expr [*] Expr &~ "add":: Expr [+] Expr <br>
+ | [0-9] <br>
+<br>
+output: add:{8 + mult:{add:{1 + 3} * 7}}<br>
+</div>
+
</body>
--- /dev/null
+
+s = ws! CompilationUnit ws!
+
+CompilationUnit =
+ CompilationUnit:: Package? Import* (InterfaceDecl|ClassDecl)* /ws
+
+Package = Annotations?! "package" PackageName ";" /ws
+
+Annotations = ()
+
+Import = "import" TypeName ";" /ws
+ | "import" (TypeName ".*") ";" /ws
+ | "import" "static" TypeName ";" /ws
+ | "import" "static" (TypeName ".*") ";" /ws
+
+TypeName = Identifier +/ "."
+PackageName = Identifier +/ "."
+
+Modifiers = Modifiers:: ("public" | "protected" | "private") (ws! "abstract")?
+ | Modifiers:: "abstract"
+
+ClassDecl = Class:: Modifiers "class" TypeDecl ClassBody /ws
+InterfaceDecl = Interface:: Modifiers "interface" TypeDecl ClassBody /ws
+
+TypeDecl = Identifier
+ | GenericTypeDecl:: Identifier "<" (TypeArg +/ Comma) ">" /ws
+
+TypeArg = Identifier
+ | Extends:: Identifier "extends" Type /ws
+ | Super:: Identifier "super" Type /ws
+ | Exist:: "?"
+ | ExistExtends:: "?" "extends" Type
+ | Intersect:: Identifier "&" Type
+
+ClassBody = "{" (BodyDecl +/ ws) "}" /ws
+ | "{" ws! "}"
+
+BodyDecl = FieldDecl | MethodDecl | ClassDecl | InterfaceDecl
+
+FieldDecl = Field:: Modifiers Type Identifier ";" /ws
+MethodDecl = Method:: MethodHeader (";" | MethodBody) /ws
+
+MethodHeader = MethodHeader:: Modifiers Type Identifier Args /ws
+MethodBody = "{" "}" /ws
+
+Args = "(" (Arg+/Comma) ")" /ws
+ | "(" ws! ")"
+Arg = Arg:: Type Identifier /ws
+
+Type = BareType | GenericType | ArrayType
+BareType = Type:: TypeName | "boolean" | "int" | "double" | "float" | "char" | "short" | "long" | "void"
+GenericType = GenericType:: TypeName "<" (Type+/Comma) ">" /ws
+ArrayType = ArrayOf:: (BareType | GenericType) "[]" /ws
+
+ws = [\r\n ]**
+Comma = ws! "," ws!
+
+JavaLetter = [a-zA-Z_$]
+Identifier = JavaLetter++
+ &~ ([0-9]! ~[]*!)
+ &~ Keyword
+ &~ BooleanLiteral
+ &~ NullLiteral
+BooleanLiteral = "true" | "false"
+NullLiteral = "null"
+
+// http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#29542
+//
+//HexDigit =
+//UnicodeEscape = "\\u" [0-9] [0-9] [0-9] [0-9] // this is valid even inside strings/comments
+//
+//// Ctrl-Z
+//// Unicode escapes (including \\u garbage)
+//
+//WhiteSpace = [\r\n\t ]
+//Comment = "/*" (~[]* &~ (~[]*! "*/" ~[]*!)) "*/"
+// | "//" [^\r\n]* [\r\n]!
+//
+//Token = Identifier | Keyword | Literal | Separator | Operator
+//
+//
+//Literal =
+// | IntegerLiteral
+// | FloatingPointLiteral
+// | BooleanLiteral
+// | CharacterLiteral
+// | StringLiteral
+// | NullLiteral
+//
+//
+//FloatLiteral = (DecimalFloatLiteral > HexFloatLiteral) [dDfF]?
+//DecimalFloatLiteral = [0-9]++ "." [0-9]++ [ep] [+-]? [0-9]++
+//HexFloatLiteral = ("0x"|"0X") [0-9a-fA-F]++ "." [0-9a-fA-F]++ [ep] [+-]? [0-9a-fA-F]++
+//
+//CharLiteral = "'" ([^\\\'] | "\\" ~[]) "'"
+//StringLiteral = "\"" ([^\\\"] | "\\" ~[])* "\""
+//
+//IntegerLiteral = (DecimalLiteral | OctalLiteral | HexLiteral) [lL]?
+//DecimalLiteral = [0-9]++ &~ OctalLiteral
+//OctalLiteral = "0" [0-9]++ &~ "0"
+//HexLiteral = ("0x"|"0X") [0-9a-fA-F]++
+//
+
+Keyword =
+ "abstract" | "continue" | "for" | "new" | "switch"
+ | "assert" | "default" | "if" | "package" | "synchronized"
+ | "boolean" | "do" | "goto" | "private" | "this"
+ | "break" | "double" | "implements" | "protected" | "throw"
+ | "byte" | "else" | "import" | "public" | "throws"
+ | "case" | "enum" | "instanceof" | "return" | "transient"
+ | "catch" | "extends" | "int" | "short" | "try"
+ | "char" | "final" | "interface" | "static" | "void"
+ | "class" | "finally" | "long" | "strictfp" | "volatile"
+ | "const" | "float" | "native" | "super" | "while"
+
+// We don't obey this:
+//
+// "The longest possible translation is used at each step, even if
+// "the result does not ultimately make a correct program while
+// "another lexical translation would. Thus the input characters
+// "a--b are tokenized (-3.5) as a, --, b, which is not part of any
+// "grammatically correct program, even though the tokenization a,
+// "-, -, b could be part of a grammatically correct program.
--- /dev/null
+package foo;
+import foo.bar;
+
+public class Baz < A extends Object , Q super Foo<Bar<Baz>,Bop> > {
+
+ public void foo(int x, char y, Bop<Baz<M>> q) {
+ }
+
+ protected abstract int bar(int c);
+
+}