From: adam Date: Sun, 23 Jul 2006 05:38:21 +0000 (-0400) Subject: checkpoint X-Git-Tag: tag_for_25-Mar~99 X-Git-Url: http://git.megacz.com/?p=sbp.git;a=commitdiff_plain;h=2c1c0293545f3d12c23220fd05c663e6aa3f3de1;ds=sidebyside checkpoint darcs-hash:20060723053821-5007d-c81ea07ba279c73b02819f1ddbe45dece3f6ed85.gz --- diff --git a/Makefile b/Makefile index da56cde..7cfc73d 100644 --- a/Makefile +++ b/Makefile @@ -6,11 +6,19 @@ tibdoc: edu.berkeley.sbp.jar tests/tibdoc.g \ tests/bitstream.tib +java15: edu.berkeley.sbp.jar + $(java) -cp $< edu.berkeley.sbp.misc.Java15 \ + tests/java15.g \ + tests/java15.test + demo: edu.berkeley.sbp.jar $(java) -cp $< edu.berkeley.sbp.misc.Demo \ tests/demo.g \ '(11+2*3)-44' +demo2: edu.berkeley.sbp.jar + $(java) -cp $< edu.berkeley.sbp.misc.Demo2 + regress: make boot rm edu.berkeley.sbp.jar @@ -75,13 +83,25 @@ boot: edu.berkeley.sbp.jar edu.berkeley.sbp.jar: $(shell find src -name \*.java) mkdir -p bin - javac -cp javax.servlet.jar:tests/ArchSimA3.jar:tests/grappa.jar -d bin -sourcepath src $^ + javac -cp javax.servlet.jar:tests/ArchSimA3.jar:tests/grappa.jar -d bin -sourcepath src $^ cd bin; jar cf ../$@ . - +#-Xlint:unchecked javadoc: rm -rf doc/api mkdir -p doc/api - javadoc -sourcepath src -public -d doc/api `find src -name \*.java` + javadoc \ + -linksource \ + -windowtitle "SBP: the Scannerless Boolean Parser" \ + -sourcepath src \ + -header "SBP
v1.0" \ + -public \ + -notree \ + -noindex \ + -nonavbar \ + -stylesheetfile doc/javadoc.css \ + -noqualifier all \ + -d doc/api \ + edu.berkeley.sbp clean: rm -rf doc/api edu.berkeley.sbp.jar bin edu.berkeley.sbp.tar.gz diff --git a/TODO b/TODO index cecd2d4..9362404 100644 --- a/TODO +++ b/TODO @@ -1,28 +1,33 @@ _____________________________________________________________________________ Immediately - - Sequence extends Element (?) -> then add Union.add(element) - - Parameterize Sequence/Union/Atom - - Make sure we never use raw types - - do Forest/Tree still need a Region? + - evil problems with: (x y? z /ws) + - it gets even more evil than that - - More topology untangling + - Annotation Tutorial - - tib: use the lexer only for indentation increases/decreases + - MUST HAVE BETTER ERROR MESSAGES + - use for developing java15.g - - grammar highlighting? + - java15.g + + - topology no longer needed as an arg to parser + - expose parser's protected method? + - do Forest/Tree still need a Region? - copyright notices - - tutorial ______________________________________________________________________________ v1.1 + - More topology untangling [later] + - tib: use the lexer only for indentation increases/decreases + - grammar highlighting? + - Forest needs a "manual access" API - the unwrap bit in Forest makes it really hard to expose an API for forests - - evil problems with (x y? z /ws) ______________________________________________________________________________ diff --git a/doc/javadoc.css b/doc/javadoc.css new file mode 100644 index 0000000..0223654 --- /dev/null +++ b/doc/javadoc.css @@ -0,0 +1,59 @@ +/* Page background color */ +body { + background-color: #F0F0E0; + font-family: arial, helvetica, sans-serif; + font-size: 14px; + margin-left: 40px; + margin-right: 40px; + margin-top: 0px; +} + +p { } + +/* Headings */ +h1 { border-top: 2px solid black; font-size: 14px; } +h2 { font-size: 20px; width: 100%; margin-left: -40px; margin-right: -40; background-color:blue; padding: 5px; padding-left: 40px; padding-right: 40px; color: white; } +h3 { border-top: 2px solid black; font-size: 14px; } +pre { } +hr { width: 0; } +tt { font-size: 14px; } + +div.example { + background: black; + border: 2px solid gray; + color: #ddd; + font-family: monospace; + padding: 4px; + font-size: 14px; +} + +a:link { color: #00f; text-decoration: none; } +a:active { color: #00f; text-decoration: none; border: 1px blue; } +a:visited { color: #44f; text-decoration: none; } +a:hover { color: #00f; text-decoration: none; border-bottom: dotted 1px blue; } + +/* Table colors */ +.TableHeadingColor { background: #CCCCFF; border: 0px white; } /* Dark mauve */ +.TableSubHeadingColor { background: #EEEEFF; border: 0px white; } /* Light mauve */ +.TableRowColor { background: #FFFFFF; font-size: 14px; } /* White */ + +table { border: 1px black solid; } +td { border: 0px white; border-top: 1px dotted gray; font-size: 14px; } +th { border: 0px white; } + + + + +/* Font used in left-hand frame lists */ +.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif } +.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif } +.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif } + +/* Navigation bar fonts and colors */ +.NavBarCell1 { background-color:#EEEEFF;} /* Light mauve */ +.NavBarCell1Rev { background-color:#00008B;} /* Dark Blue */ +.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;} +.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;} + +.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF;} +.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF;} diff --git a/src/edu/berkeley/sbp/Atom.java b/src/edu/berkeley/sbp/Atom.java index fd91496..daa3d49 100644 --- a/src/edu/berkeley/sbp/Atom.java +++ b/src/edu/berkeley/sbp/Atom.java @@ -13,15 +13,15 @@ import edu.berkeley.sbp.*; *

* This class is a topology over itself (yes, that's sort of Frege'd * up) so that Atoms can be intersected and unioned with each other - * to result in other Atom's (rather than raw Topology's, which + * to result in other Atom's (rather than raw Topology's, which * are not Elements). If you want the latter, use the * getTokenTopology() method. *

*/ -public abstract class Atom extends Element implements Topology> { +public abstract class Atom extends Element implements Topology> { /** the set (topology) of tokens that can match this element */ - public abstract Topology getTokenTopology(); + public abstract Topology getTokenTopology(); StringBuffer toString(StringBuffer sb) { sb.append(this); return sb; } diff --git a/src/edu/berkeley/sbp/Element.java b/src/edu/berkeley/sbp/Element.java index 16ae2f0..7eb6a59 100644 --- a/src/edu/berkeley/sbp/Element.java +++ b/src/edu/berkeley/sbp/Element.java @@ -16,6 +16,6 @@ public abstract class Element implements SequenceOrElement { abstract StringBuffer toString(StringBuffer sb); /** returns the Forest resulting from matching this element against the empty string */ - Forest epsilonForm() { throw new Error("element " + this + " has no epsilon form"); } + Forest epsilonForm() { throw new Error("element " + this + " has no epsilon form"); } } diff --git a/src/edu/berkeley/sbp/Forest.java b/src/edu/berkeley/sbp/Forest.java index 4d8b34f..4f79bb6 100644 --- a/src/edu/berkeley/sbp/Forest.java +++ b/src/edu/berkeley/sbp/Forest.java @@ -12,26 +12,26 @@ import java.lang.reflect.*; * shared packed parse forest). * */ -public abstract class Forest implements GraphViz.ToGraphViz { +public abstract class Forest implements GraphViz.ToGraphViz { /** assume that this forest contains exactly one tree and return it; otherwise throw an exception */ - public abstract Tree expand1() throws Ambiguous; + public abstract Tree expand1() throws Ambiguous; /** expand this forest into a set of trees */ - public void expand(HashSet> ht) { expand(ht, new HashSet>(), null); } + public void expand(HashSet> ht) { expand(ht, new HashSet>(), null); } - static Forest create(Input.Region loc, T head, Forest[] children, boolean lift) { - return new One(loc, head, children, lift); + static Forest create(Input.Region loc, NodeType head, Forest[] children, boolean lift) { + return new One(loc, head, children, lift); } /** create a new forest */ - public static Forest create(Input.Region loc, T head, Forest[] children) { + public static Forest create(Input.Region loc, NodeType head, Forest[] children) { return Forest.create(loc, head, children, false); } // Package-Private ////////////////////////////////////////////////////////////////////////////// - abstract void expand(HashSet> ht, HashSet> ignore, Tree bogus); - abstract void gather(HashSet> ignore); + abstract void expand(HashSet> ht, HashSet> ignore, Tree bogus); + abstract void gather(HashSet> ignore); abstract void edges(GraphViz.Node n); boolean ambiguous() { return false; } @@ -39,16 +39,16 @@ public abstract class Forest implements GraphViz.ToGraphViz { // One ////////////////////////////////////////////////////////////////////////////// /** A "single" forest with a head and child subforests */ - private static class One extends Forest { + private static class One extends Forest { private final Input.Region location; - private final T head; - private final Forest[] children; + private final NodeType head; + private final Forest[] children; /** if true, the last child's children are considered children of this node */ private final boolean lift; - private One(Input.Region loc, T head, Forest[] children, boolean lift) { + private One(Input.Region loc, NodeType head, Forest[] children, boolean lift) { this.location = loc; this.head = head; this.children = children==null ? emptyForestArray : new Forest[children.length]; @@ -57,27 +57,27 @@ public abstract class Forest implements GraphViz.ToGraphViz { this.lift = lift; } - public Tree expand1() throws Ambiguous { - Tree[] ret = new Tree[children.length]; + public Tree expand1() throws Ambiguous { + Tree[] ret = new Tree[children.length]; for(int i=0; i(location, head, ret, lift); + return new Tree(location, head, ret, lift); } - void gather(HashSet> hf) { + void gather(HashSet> hf) { hf.add(this); - for(Forest f : children) f.gather(hf); + for(Forest f : children) f.gather(hf); } - void expand(HashSet> ht, HashSet> ignore, Tree bogus) { + void expand(HashSet> ht, HashSet> ignore, Tree bogus) { if (ignore.contains(this)) { ht.add(bogus); return; } expand(0, new Tree[children.length], ht, ignore, bogus); } - private void expand(final int i, Tree[] ta, HashSet> ht, HashSet> ignore, Tree bogus) { + private void expand(final int i, Tree[] ta, HashSet> ht, HashSet> ignore, Tree bogus) { if (i==children.length) { - ht.add(new Tree(location, head, ta, lift)); + ht.add(new Tree(location, head, ta, lift)); } else { - HashSet> ht2 = new HashSet>(); + HashSet> ht2 = new HashSet>(); children[i].expand(ht2, ignore, bogus); - for(Tree tc : ht2) { + for(Tree tc : ht2) { ta[i] = tc; expand(i+1, ta, ht, ignore, bogus); ta[i] = null; @@ -121,26 +121,26 @@ public abstract class Forest implements GraphViz.ToGraphViz { // Many ////////////////////////////////////////////////////////////////////////////// /** An "ambiguity node"; this is immutable once it has been "looked at" */ - static class Many extends Forest { + static class Many extends Forest { HashSet parents = new HashSet(); - private FastSet> hp = new FastSet>(); + private FastSet> hp = new FastSet>(); private boolean touched = false; public Many() { } - public Tree expand1() throws Ambiguous { + public Tree expand1() throws Ambiguous { touched(); if (hp.size() > 1) { - HashSet> hf0 = new HashSet>(); - Iterator> ih = hp.iterator(); + HashSet> hf0 = new HashSet>(); + Iterator> ih = hp.iterator(); ih.next().gather(hf0); - for(Forest f : hp) { - HashSet> hf1 = new HashSet>(); + for(Forest f : hp) { + HashSet> hf1 = new HashSet>(); f.gather(hf1); hf0.retainAll(hf1); } - HashSet> ht = new HashSet>(); + HashSet> ht = new HashSet>(); expand(ht, hf0, new Tree(null, "*")); throw new Ambiguous((Forest)this, (HashSet>)(Object)ht); @@ -148,20 +148,20 @@ public abstract class Forest implements GraphViz.ToGraphViz { return hp.iterator().next().expand1(); } - void gather(HashSet> ht) { + void gather(HashSet> ht) { touched(); ht.add(this); - for(Forest f : hp) f.gather(ht); + for(Forest f : hp) f.gather(ht); } private void touched() { if (touched) return; touched = true; /* - FastSet> f2 = new FastSet>(); + FastSet> f2 = new FastSet>(); for(Forest f : hp) if (f instanceof Forest.One) f2.add(f); - else for(Forest ff : ((Forest.Many)f)) + else for(Forest ff : ((Forest.Many)f)) f2.add(ff); hp = f2; */ @@ -182,10 +182,10 @@ public abstract class Forest implements GraphViz.ToGraphViz { return true; } - void expand(HashSet> ht, HashSet> ignore, Tree bogus) { + void expand(HashSet> ht, HashSet> ignore, Tree bogus) { touched(); if (ignore.contains(this)) { ht.add(bogus); return; } - for (Forest f : hp) f.expand(ht, ignore, bogus); + for (Forest f : hp) f.expand(ht, ignore, bogus); } diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index a74ea5e..e884a4b 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -5,35 +5,35 @@ import edu.berkeley.sbp.Sequence.Position; import java.io.*; import java.util.*; -/** a parser which translates streams of Tokens of type T into a Forest */ -public abstract class Parser { +/** a parser which translates an Input<Token> into a Forest<NodeType> */ +public abstract class Parser { - protected final Table pt; + protected final Table pt; /** create a parser to parse the grammar with start symbol u */ - protected Parser(Union u, Topology top) { this.pt = new Table(u, top); } - protected Parser(Table pt) { this.pt = pt; } + protected Parser(Union u, Topology top) { this.pt = new Table(u, top); } + protected Parser(Table pt) { this.pt = pt; } /** implement this method to create the output forest corresponding to a lone shifted input token */ - protected abstract Forest shiftToken(Tok t, Input.Location newloc); + protected abstract Forest shiftToken(Token t, Input.Location newloc); boolean helpgc = true; public String toString() { return pt.toString(); } /** parse input, and return the shared packed parse forest (or throw an exception) */ - public Forest parse(Input input) throws IOException, ParseFailed { + public Forest parse(Input input) throws IOException, ParseFailed { GSS gss = new GSS(); Input.Location loc = input.getLocation(); - GSS.Phase current = gss.new Phase(null, this, null, input.next(), loc, null); + GSS.Phase current = gss.new Phase(null, this, null, input.next(), loc, null); current.newNode(null, Forest.create(null, null, null, false), pt.start, true); int count = 1; for(int idx=0;;idx++) { Input.Location oldloc = loc; loc = input.getLocation(); current.reduce(); - Forest forest = current.token==null ? null : shiftToken((Tok)current.token, loc); - GSS.Phase next = gss.new Phase(current, this, current, input.next(), loc, forest); + Forest forest = current.token==null ? null : shiftToken((Token)current.token, loc); + GSS.Phase next = gss.new Phase(current, this, current, input.next(), loc, forest); if (!helpgc) { FileOutputStream fos = new FileOutputStream("out-"+idx+".dot"); PrintWriter p = new PrintWriter(new OutputStreamWriter(fos)); @@ -45,7 +45,7 @@ public abstract class Parser { p.close(); } count = next.size(); - if (current.isDone()) return (Forest)gss.finalResult; + if (current.isDone()) return (Forest)gss.finalResult; current = next; } } @@ -53,21 +53,21 @@ public abstract class Parser { // Table ////////////////////////////////////////////////////////////////////////////// /** an SLR(1) parse table which may contain conflicts */ - static class Table extends Walk.Cache { + static class Table extends Walk.Cache { public String toString() { StringBuffer sb = new StringBuffer(); sb.append("parse table"); - for(State state : all_states.values()) { + for(State state : all_states.values()) { sb.append(" " + state + "\n"); - for(Topology t : state.shifts) { + for(Topology t : state.shifts) { sb.append(" shift \""+ new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => "); for(State st : state.shifts.getAll(t)) sb.append(st.idx+" "); sb.append("\n"); } - for(Topology t : state.reductions) + for(Topology t : state.reductions) sb.append(" reduce \""+ new edu.berkeley.sbp.chr.CharTopology((IntegerTopology)t)+"\" => " + state.reductions.getAll(t) + "\n"); @@ -94,14 +94,14 @@ public abstract class Parser { } /** the start state */ - public final State start; + public final State start; /** the state from which no reductions can be done */ - private final State dead_state; + private final State dead_state; /** used to generate unique values for State.idx */ private int master_state_idx = 0; - HashMap,State> all_states = new HashMap,State>(); + HashMap,State> all_states = new HashMap,State>(); /** construct a parse table for the given grammar */ public Table(Topology top) { this("s", top); } @@ -121,11 +121,11 @@ public abstract class Parser { HashSet hp = new HashSet(); reachable(start0, hp); - this.dead_state = new State(new HashSet(), all_states, all_elements); - this.start = new State(hp, all_states, all_elements); + this.dead_state = new State(new HashSet(), all_states, all_elements); + this.start = new State(hp, all_states, all_elements); // for each state, fill in the corresponding "row" of the parse table - for(State state : all_states.values()) + for(State state : all_states.values()) for(Position p : state.hs) { // the Grammar's designated "last position" is the only accepting state @@ -147,7 +147,7 @@ public abstract class Parser { state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology())); } if (top instanceof IntegerTopology) - for(State state : all_states.values()) { + for(State state : all_states.values()) { state.oreductions = state.reductions.optimize(((IntegerTopology)top).functor()); state.oshifts = state.shifts.optimize(((IntegerTopology)top).functor()); } @@ -161,34 +161,34 @@ public abstract class Parser { /** a single state in the LR table and the transitions possible from it */ - class State implements Comparable>, IntegerMappable, Iterable { + class State implements Comparable>, IntegerMappable, Iterable { public final int idx = master_state_idx++; private final HashSet hs; - public transient HashMap> gotoSetNonTerminals = new HashMap>(); - private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); + public transient HashMap> gotoSetNonTerminals = new HashMap>(); + private transient TopologicalBag> gotoSetTerminals = new TopologicalBag>(); - private TopologicalBag reductions = new TopologicalBag(); + private TopologicalBag reductions = new TopologicalBag(); private HashSet eofReductions = new HashSet(); - private TopologicalBag> shifts = new TopologicalBag>(); + private TopologicalBag> shifts = new TopologicalBag>(); private boolean accept = false; - private VisitableMap> oshifts = null; - private VisitableMap oreductions = null; + private VisitableMap> oshifts = null; + private VisitableMap oreductions = null; // Interface Methods ////////////////////////////////////////////////////////////////////////////// boolean isAccepting() { return accept; } public Iterator iterator() { return hs.iterator(); } - boolean canShift(Tok t) { return oshifts!=null && oshifts.contains(t); } - void invokeShifts(Tok t, Invokable,B,C> irbc, B b, C c) { + boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); } + void invokeShifts(Token t, Invokable,B,C> irbc, B b, C c) { oshifts.invoke(t, irbc, b, c); } - boolean canReduce(Tok t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } - void invokeReductions(Tok t, Invokable irbc, B b, C c) { + boolean canReduce(Token t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); } + void invokeReductions(Token t, Invokable irbc, B b, C c) { if (t==null) for(Position r : eofReductions) irbc.invoke(r, b, c); else oreductions.invoke(t, irbc, b, c); } @@ -218,7 +218,7 @@ public abstract class Parser { * */ public State(HashSet hs, - HashMap,State> all_states, + HashMap,State> all_states, HashSet all_elements) { this.hs = hs; @@ -231,7 +231,7 @@ public abstract class Parser { // of _new_ positions (positions after shifting). These mappings are // collectively known as the _closure_ - TopologicalBag bag0 = new TopologicalBag(); + TopologicalBag bag0 = new TopologicalBag(); for(Position position : hs) { if (position.isLast() || !(position.element() instanceof Atom)) continue; Atom a = (Atom)position.element(); @@ -244,10 +244,10 @@ public abstract class Parser { // set, add that character set to the goto table (with the State corresponding to the // computed next-position set). - for(Topology r : bag0) { + for(Topology r : bag0) { HashSet h = new HashSet(); for(Position p : bag0.getAll(r)) h.add(p); - gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); + gotoSetTerminals.put(r, all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h)); } // Step 2: for every non-Atom element (ie every Element which has a corresponding reduction), @@ -270,7 +270,7 @@ public abstract class Parser { } OUTER: for(SequenceOrElement y : move) { HashSet h = move.getAll(y); - State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); + State s = all_states.get(h) == null ? new State(h, all_states, all_elements) : all_states.get(h); // if a reduction is "lame", it should wind up in the dead_state after reducing if (y instanceof Sequence) { for(Position p : hs) { @@ -304,7 +304,7 @@ public abstract class Parser { return ret.toString(); } - public int compareTo(State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } + public int compareTo(State s) { return idx==s.idx ? 0 : idx < s.idx ? -1 : 1; } public int toInt() { return idx; } } } diff --git a/src/edu/berkeley/sbp/Sequence.java b/src/edu/berkeley/sbp/Sequence.java index b67f8a7..5680d6c 100644 --- a/src/edu/berkeley/sbp/Sequence.java +++ b/src/edu/berkeley/sbp/Sequence.java @@ -8,7 +8,7 @@ import java.lang.reflect.*; import java.lang.ref.*; /** juxtaposition; zero or more adjacent Elements; can specify a rewriting */ -public abstract class Sequence /*extends Element*/ implements Iterable, SequenceOrElement { +public abstract class Sequence implements Iterable, SequenceOrElement { protected final Element[] elements; @@ -66,7 +66,7 @@ public abstract class Sequence /*extends Element*/ implements Iterable, public Sequence and(Sequence s) { Sequence ret = dup(); ret.needs.add(s); return ret; } /** return a new sequence identical to this one, but with a negative conjunct s */ - public Sequence not(Sequence s) { Sequence ret = dup(); ret.hates.add(s); s.hated.add(ret); return ret; } + public Sequence andnot(Sequence s) { Sequence ret = dup(); ret.hates.add(s); s.hated.add(ret); return ret; } /** return a new sequence identical to this one, but with a follow-set restricted to a */ public Sequence followedBy(Atom a) { Sequence ret = dup(); ret.follow = a; return ret; } @@ -176,6 +176,14 @@ public abstract class Sequence /*extends Element*/ implements Iterable, sb.append("-> "); sb.append(follow); } + for(Sequence s : needs) { + sb.append("& "); + sb.append(s); + } + for(Sequence s : hates) { + sb.append("&~ "); + sb.append(s); + } return sb; } diff --git a/src/edu/berkeley/sbp/Tree.java b/src/edu/berkeley/sbp/Tree.java index 8368673..f87c5dc 100644 --- a/src/edu/berkeley/sbp/Tree.java +++ b/src/edu/berkeley/sbp/Tree.java @@ -7,19 +7,19 @@ import java.util.*; import java.lang.reflect.*; /** a tree (or node in a tree); see jargon.txt for details */ -public class Tree - extends PrintableTree> - implements Iterable> { +public class Tree + extends PrintableTree> + implements Iterable> { private final Input.Region location; - private final T head; - private final Tree[] children; + private final NodeType head; + private final Tree[] children; private final boolean lift; /** the element at the head of the tree */ - public T head() { return head; } + public NodeType head() { return head; } - private Tree lifted() { return children[children.length-1]; } + private Tree lifted() { return children[children.length-1]; } /** the number of children the tree has */ public int size() { @@ -29,10 +29,10 @@ public class Tree } /** the tree's children */ - public Iterable> children() { return this; } + public Iterable> children() { return this; } /** the tree's children */ - public Iterator> iterator() { + public Iterator> iterator() { return lift ? new ConcatenateIterator(new ArrayIterator(children, 0, children.length-1), children[children.length-1].iterator()) @@ -40,7 +40,7 @@ public class Tree } /** get the ith child */ - public Tree child(int i) { + public Tree child(int i) { return lift && i >= children.length-1 ? children[children.length-1].child(i-(children.length-1)) : children[i]; @@ -49,11 +49,11 @@ public class Tree /** get the input region that this tree was parsed from */ public Input.Region getRegion() { return location; } - public Tree(Input.Region loc, T head) { this(loc, head, null); } - public Tree(Input.Region loc, T head, Tree[] children) { this(loc, head, children, false); } + public Tree(Input.Region loc, NodeType head) { this(loc, head, null); } + public Tree(Input.Region loc, NodeType head, Tree[] children) { this(loc, head, children, false); } /** package-private constructor, allows setting the "lift" bit */ - Tree(Input.Region loc, T head, Tree[] children, boolean lift) { + Tree(Input.Region loc, NodeType head, Tree[] children, boolean lift) { this.location = loc; this.head = head; this.lift = lift && children != null && children.length > 0; diff --git a/src/edu/berkeley/sbp/Union.java b/src/edu/berkeley/sbp/Union.java index 77a1581..d52c6e9 100644 --- a/src/edu/berkeley/sbp/Union.java +++ b/src/edu/berkeley/sbp/Union.java @@ -52,12 +52,20 @@ public class Union extends Element implements Iterable { /** adds an alternative */ public void add(Sequence s) { + /* + FIXME if (viewed) - throw new RuntimeException("attempt to add a Sequence to a Union that has already been examined"); + throw new RuntimeException("attempt to add a Sequence to a Union that has already been examined:\n "+this); + */ if (alternatives.contains(s)) return; alternatives.add(s); } + /** adds a one-element sequence */ + public void add(Element e) { + add(Sequence.create(e)); + } + // Epsilon Form ////////////////////////////////////////////////////////////////////////////// diff --git a/src/edu/berkeley/sbp/chr/CharParser.java b/src/edu/berkeley/sbp/chr/CharParser.java index 9e9a01a..c901388 100644 --- a/src/edu/berkeley/sbp/chr/CharParser.java +++ b/src/edu/berkeley/sbp/chr/CharParser.java @@ -12,6 +12,7 @@ public class CharParser extends Parser { public Forest parse(InputStream is) throws IOException, ParseFailed { return super.parse(new CharInput(is)); } public Forest parse(Reader r) throws IOException, ParseFailed { return super.parse(new CharInput(r)); } + public Forest parse(String s) throws IOException, ParseFailed { return parse(new StringReader(s)); } public CharParser(Union u) { super(u, new CharTopology()); } diff --git a/src/edu/berkeley/sbp/meta/MetaGrammarBindings.java b/src/edu/berkeley/sbp/meta/MetaGrammarBindings.java index 9232d5c..00d4aa4 100644 --- a/src/edu/berkeley/sbp/meta/MetaGrammarBindings.java +++ b/src/edu/berkeley/sbp/meta/MetaGrammarBindings.java @@ -82,7 +82,7 @@ public class MetaGrammarBindings extends AnnotationGrammarBindings { } if (sequences.length==1) break; Sequence seq = Sequence.create(u2); - for(Sequence s : bad2) seq = seq.not(s); + for(Sequence s : bad2) seq = seq.andnot(s); u.add(seq); bad2.add(Sequence.create(u2)); } @@ -149,7 +149,7 @@ public class MetaGrammarBindings extends AnnotationGrammarBindings { } if (sequences.length==1) break; Sequence seq = Sequence.create(u2); - for(Sequence s : bad2) seq = seq.not(s); + for(Sequence s : bad2) seq = seq.andnot(s); u.add(seq); bad2.add(Sequence.create(u2)); } @@ -225,7 +225,7 @@ public class MetaGrammarBindings extends AnnotationGrammarBindings { public Sequence build(Context cx, Union u, NonTerminalNode cnt) { Sequence ret = build0(cx, cnt); for(Seq s : and) { Sequence dork = s.build(cx, u, cnt); ret = ret.and(dork); } - for(Seq s : not) { Sequence dork = s.build(cx, u, cnt); ret = ret.not(dork); } + for(Seq s : not) { Sequence dork = s.build(cx, u, cnt); ret = ret.andnot(dork); } u.add(ret); return ret; } diff --git a/src/edu/berkeley/sbp/misc/Demo2.java b/src/edu/berkeley/sbp/misc/Demo2.java new file mode 100644 index 0000000..fbe0ecf --- /dev/null +++ b/src/edu/berkeley/sbp/misc/Demo2.java @@ -0,0 +1,43 @@ +package edu.berkeley.sbp.misc; + +import edu.berkeley.sbp.*; + +public class Demo2 { + + private static Atom atom(char c) { + return new edu.berkeley.sbp.chr.CharAtom(c); } + private static Atom atom(char c1, char c2) { + return new edu.berkeley.sbp.chr.CharAtom(c1, c2); } + + public static void main(String[] s) throws Exception { + + Union expr = new Union("Expr"); + + Element[] add = new Element[] { expr, atom('+'), expr }; + Element[] mult = new Element[] { expr, atom('*'), expr }; + Element[] paren = new Element[] { atom('('), expr, atom(')') }; + + Sequence addSequence = Sequence.create("add", add, null, false); + Sequence multSequence = Sequence.create("mult", mult, null, false); + + // uncomment this line to disambiguate + multSequence = multSequence.andnot(Sequence.create("add", add, null, false)); + + expr.add(Sequence.create(paren, 1)); + expr.add(addSequence); + expr.add(multSequence); + expr.add(Sequence.create(atom('0', '9'))); + + String input = "8+(1+3)*7"; + + System.out.println("input: \""+input+"\""); + + StringBuffer sb = new StringBuffer(); + expr.toString(sb); + System.out.println("grammar: \n"+sb); + + Forest f = new edu.berkeley.sbp.chr.CharParser(expr).parse(input); + System.out.println("output: "+f.expand1().toPrettyString()); + } + +} diff --git a/src/edu/berkeley/sbp/misc/Java15.java b/src/edu/berkeley/sbp/misc/Java15.java new file mode 100644 index 0000000..085f26e --- /dev/null +++ b/src/edu/berkeley/sbp/misc/Java15.java @@ -0,0 +1,47 @@ +// Copyright 2006 the Contributors, as shown in the revision logs. +// Licensed under the Apache Public Source License 2.0 ("the License"). +// You may not use this file except in compliance with the License. + +package edu.berkeley.sbp.misc; +import edu.berkeley.sbp.*; +import edu.berkeley.sbp.misc.*; +import edu.berkeley.sbp.meta.*; +import edu.berkeley.sbp.util.*; +import edu.berkeley.sbp.chr.*; +import edu.berkeley.sbp.bind.*; +import java.util.*; +import java.io.*; + +public class Java15 { + + public static void main(String[] s) throws Exception { + + try { + Tree res = new CharParser(MetaGrammar.newInstance()).parse(new FileInputStream(s[0])).expand1(); + + //AnnotationGrammarBindings resolver = new AnnotationGrammarBindings(Java15.class); + Grammar.Bindings resolver = new Grammar.Bindings(); + Union javaGrammar = Grammar.create(res, "s", resolver); + + System.err.println("parsing " + s[1]); + Tree t = new CharParser(javaGrammar).parse(new FileInputStream(s[1])).expand1(); + + System.out.println("tree:\n" + t.toPrettyString()); + + } catch (Ambiguous a) { + FileOutputStream fos = new FileOutputStream("/Users/megacz/Desktop/out.dot"); + PrintWriter p = new PrintWriter(new OutputStreamWriter(fos)); + GraphViz gv = new GraphViz(); + a.getAmbiguity().toGraphViz(gv); + gv.dump(p); + p.flush(); + p.close(); + a.printStackTrace(); + + } catch (Exception e) { + e.printStackTrace(); + } + + } + +} diff --git a/src/edu/berkeley/sbp/package.html b/src/edu/berkeley/sbp/package.html index bcba1d8..c55b5cb 100644 --- a/src/edu/berkeley/sbp/package.html +++ b/src/edu/berkeley/sbp/package.html @@ -1,10 +1,12 @@ - - - IMPORTANT: - BE SURE TO READ THE FILE - doc/jargon.txt FIRST! Also, see the legend at the bottom of this page. - + + + The public APIs in this package are stable; package-private + APIs and all other packages are subject to change in future + releases. Be sure to read doc/jargon.txt and the description below. + @@ -27,5 +29,140 @@ five categories: Exceptions. + +Theory of Operation + + + +The input that you parse is considered to be a stream of +Tokens; this stream is represented by an +Input<Token>. In order to create this Input, +you must first decide what kind of tokens you want to parse. Based on +this decision, you should then implement subclasses of Input, +Parser, and Atom for that token type. If you are +parsing characters (which you usually are), these subclasses are +provided in the edu.berkeley.sbp.chr.* package so you don't +have to write them yourself. + + + +You then create a grammar by instantiating objects belonging to your +subclass of Atom and forming them into sequences using +Sequence.create___() and new Union(). + + + +Ultimately you will wind up with an instance of Union +corresponding to the "start nonterminal" of your grammar. You can +then provide this Union to the constructor of your +Parser subclass and invoke the Parser.parse(Input) +method on the Input to be parsed. + + + +The result will be a Forest, which is an efficient +representation of a set of one or more trees that may share subtrees. + + + +If the parse was ambiguous, you can use +Forest.expand(HashSet) to expand the Forest into all the +possible trees (there is not yet a stable API for inspecting the +Forest directly). + + + +If the parse was not ambiguous, you can call +Forest.expand1() to return the single possible parsing as a +Tree. You would then typically use the methods of the +Tree class to examine the parse tree. + + +Guide to the API + +Example + + +package edu.berkeley.sbp.misc; + +import edu.berkeley.sbp.*; + +public class Demo2 { + + private static Atom atom(char c) { + return new edu.berkeley.sbp.chr.CharAtom(c); } + private static Atom atom(char c1, char c2) { + return new edu.berkeley.sbp.chr.CharAtom(c1, c2); } + + public static void main(String[] s) throws Exception { + + Union expr = new Union("Expr"); + + Element[] add = new Element[] { expr, atom('+'), expr }; + Element[] mult = new Element[] { expr, atom('*'), expr }; + Element[] paren = new Element[] { atom('('), expr, atom(')') }; + + Sequence addSequence = Sequence.create("add", add, null, false); + Sequence multSequence = Sequence.create("mult", mult, null, false); + + + // uncomment this line to disambiguate + //multSequence = multSequence.andnot(Sequence.create("add", add, null, false)); + + + expr.add(Sequence.create(paren, 1)); + expr.add(addSequence); + expr.add(multSequence); + expr.add(Sequence.create(atom('0', '9'))); + + String input = "8+(1+3)*7"; + + System.out.println("input: \""+input+"\""); + + StringBuffer sb = new StringBuffer(); + expr.toString(sb); + System.out.println("grammar: \n"+sb); + + Forest f = new edu.berkeley.sbp.chr.CharParser(expr).parse(input); + System.out.println("output: "+f.expand1().toPrettyString()); + } + +} + + + +Executing this code gives the following: + + + +java -Xmx900m -cp edu.berkeley.sbp.jar edu.berkeley.sbp.misc.Demo2 +input: "8+(1+3)*7" +grammar: +Expr = [(] Expr [)] + | "add":: Expr [+] Expr + | "mult":: Expr [*] Expr + | [0-9] + +Exception in thread "main" unresolved ambiguity; shared subtrees are shown as "*" + possibility: mult:{add:{* * *} * *} + possibility: add:{* * mult:{* * *}} + + + +If we uncomment the line in the example, the result is: + + + +java -Xmx900m -cp edu.berkeley.sbp.jar edu.berkeley.sbp.misc.Demo2 +input: "8+(1+3)*7" +grammar: +Expr = [(] Expr [)] + | "add":: Expr [+] Expr + | "mult":: Expr [*] Expr &~ "add":: Expr [+] Expr + | [0-9] + +output: add:{8 + mult:{add:{1 + 3} * 7}} + + diff --git a/tests/java15.g b/tests/java15.g new file mode 100644 index 0000000..f89658e --- /dev/null +++ b/tests/java15.g @@ -0,0 +1,123 @@ + +s = ws! CompilationUnit ws! + +CompilationUnit = + CompilationUnit:: Package? Import* (InterfaceDecl|ClassDecl)* /ws + +Package = Annotations?! "package" PackageName ";" /ws + +Annotations = () + +Import = "import" TypeName ";" /ws + | "import" (TypeName ".*") ";" /ws + | "import" "static" TypeName ";" /ws + | "import" "static" (TypeName ".*") ";" /ws + +TypeName = Identifier +/ "." +PackageName = Identifier +/ "." + +Modifiers = Modifiers:: ("public" | "protected" | "private") (ws! "abstract")? + | Modifiers:: "abstract" + +ClassDecl = Class:: Modifiers "class" TypeDecl ClassBody /ws +InterfaceDecl = Interface:: Modifiers "interface" TypeDecl ClassBody /ws + +TypeDecl = Identifier + | GenericTypeDecl:: Identifier "<" (TypeArg +/ Comma) ">" /ws + +TypeArg = Identifier + | Extends:: Identifier "extends" Type /ws + | Super:: Identifier "super" Type /ws + | Exist:: "?" + | ExistExtends:: "?" "extends" Type + | Intersect:: Identifier "&" Type + +ClassBody = "{" (BodyDecl +/ ws) "}" /ws + | "{" ws! "}" + +BodyDecl = FieldDecl | MethodDecl | ClassDecl | InterfaceDecl + +FieldDecl = Field:: Modifiers Type Identifier ";" /ws +MethodDecl = Method:: MethodHeader (";" | MethodBody) /ws + +MethodHeader = MethodHeader:: Modifiers Type Identifier Args /ws +MethodBody = "{" "}" /ws + +Args = "(" (Arg+/Comma) ")" /ws + | "(" ws! ")" +Arg = Arg:: Type Identifier /ws + +Type = BareType | GenericType | ArrayType +BareType = Type:: TypeName | "boolean" | "int" | "double" | "float" | "char" | "short" | "long" | "void" +GenericType = GenericType:: TypeName "<" (Type+/Comma) ">" /ws +ArrayType = ArrayOf:: (BareType | GenericType) "[]" /ws + +ws = [\r\n ]** +Comma = ws! "," ws! + +JavaLetter = [a-zA-Z_$] +Identifier = JavaLetter++ + &~ ([0-9]! ~[]*!) + &~ Keyword + &~ BooleanLiteral + &~ NullLiteral +BooleanLiteral = "true" | "false" +NullLiteral = "null" + +// http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#29542 +// +//HexDigit = +//UnicodeEscape = "\\u" [0-9] [0-9] [0-9] [0-9] // this is valid even inside strings/comments +// +//// Ctrl-Z +//// Unicode escapes (including \\u garbage) +// +//WhiteSpace = [\r\n\t ] +//Comment = "/*" (~[]* &~ (~[]*! "*/" ~[]*!)) "*/" +// | "//" [^\r\n]* [\r\n]! +// +//Token = Identifier | Keyword | Literal | Separator | Operator +// +// +//Literal = +// | IntegerLiteral +// | FloatingPointLiteral +// | BooleanLiteral +// | CharacterLiteral +// | StringLiteral +// | NullLiteral +// +// +//FloatLiteral = (DecimalFloatLiteral > HexFloatLiteral) [dDfF]? +//DecimalFloatLiteral = [0-9]++ "." [0-9]++ [ep] [+-]? [0-9]++ +//HexFloatLiteral = ("0x"|"0X") [0-9a-fA-F]++ "." [0-9a-fA-F]++ [ep] [+-]? [0-9a-fA-F]++ +// +//CharLiteral = "'" ([^\\\'] | "\\" ~[]) "'" +//StringLiteral = "\"" ([^\\\"] | "\\" ~[])* "\"" +// +//IntegerLiteral = (DecimalLiteral | OctalLiteral | HexLiteral) [lL]? +//DecimalLiteral = [0-9]++ &~ OctalLiteral +//OctalLiteral = "0" [0-9]++ &~ "0" +//HexLiteral = ("0x"|"0X") [0-9a-fA-F]++ +// + +Keyword = + "abstract" | "continue" | "for" | "new" | "switch" + | "assert" | "default" | "if" | "package" | "synchronized" + | "boolean" | "do" | "goto" | "private" | "this" + | "break" | "double" | "implements" | "protected" | "throw" + | "byte" | "else" | "import" | "public" | "throws" + | "case" | "enum" | "instanceof" | "return" | "transient" + | "catch" | "extends" | "int" | "short" | "try" + | "char" | "final" | "interface" | "static" | "void" + | "class" | "finally" | "long" | "strictfp" | "volatile" + | "const" | "float" | "native" | "super" | "while" + +// We don't obey this: +// +// "The longest possible translation is used at each step, even if +// "the result does not ultimately make a correct program while +// "another lexical translation would. Thus the input characters +// "a--b are tokenized (-3.5) as a, --, b, which is not part of any +// "grammatically correct program, even though the tokenization a, +// "-, -, b could be part of a grammatically correct program. diff --git a/tests/java15.test b/tests/java15.test new file mode 100644 index 0000000..968eaf7 --- /dev/null +++ b/tests/java15.test @@ -0,0 +1,11 @@ +package foo; +import foo.bar; + +public class Baz < A extends Object , Q super Foo,Bop> > { + + public void foo(int x, char y, Bop> q) { + } + + protected abstract int bar(int c); + +}