X-Git-Url: http://git.megacz.com/?p=sbp.git;a=blobdiff_plain;f=src%2Fedu%2Fberkeley%2Fsbp%2Fmeta%2FGrammarAST.java;fp=src%2Fedu%2Fberkeley%2Fsbp%2Fmeta%2FGrammarAST.java;h=63c43920805f96df7274c2a18b2668d2d5c9dc52;hp=0000000000000000000000000000000000000000;hb=f17fca21d99030d325624f839efcd17bc93f1548;hpb=8a05c54202f3f5792bbd7146007c6718049fecd9 diff --git a/src/edu/berkeley/sbp/meta/GrammarAST.java b/src/edu/berkeley/sbp/meta/GrammarAST.java new file mode 100644 index 0000000..63c4392 --- /dev/null +++ b/src/edu/berkeley/sbp/meta/GrammarAST.java @@ -0,0 +1,545 @@ +// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license + +package edu.berkeley.sbp.meta; +import edu.berkeley.sbp.util.*; +import edu.berkeley.sbp.*; +import edu.berkeley.sbp.chr.*; +import edu.berkeley.sbp.misc.*; +import java.util.*; +import java.lang.annotation.*; +import java.lang.reflect.*; +import java.io.*; + +// FIXME: deal with question-marks + +// FIXME: put back in associativity: ^")" +// A = A "->" (A) +// means that the FIRST "A" cannot match the SAME sequence in +// which the (A) occurs +// -- and add a test case + +// FIXME: make it so that we can have multi-result nonterminals +// so long as they always appear under double-colons? +// auto-insert the unwrap? + +// FIXME: put associativity back in + +// FIXME: "flat" sequences (no subtrees contain "::"s?) + +// freezing problem is related to comments in grammar files + +/** The java classes typically used to represent a parsed grammar AST; each inner class is a type of AST node. */ +public class GrammarBuilder { + + /** + * Create a grammar from a parse tree and binding resolver + * + * @param t a tree produced by parsing a grammar using the metagrammar + * @param s the name of the "start symbol" + * @param gbr a GrammarBindingResolver that resolves grammatical reductions into tree-node-heads + */ + public static Union buildFromAST(Tree grammarAST, String startingNonterminal, File[] includes) { + return new GrammarAST(includes, "").buildGrammar(grammarAST, startingNonterminal); + } + + public static Object illegalTag = ""; // this is the tag that should never appear in the non-dropped output FIXME + + private final String prefix; + private final File[] includes; + + public GrammarAST(File[] includes, String prefix) { + this.prefix = prefix; + this.includes = includes; + } + + // Methods ////////////////////////////////////////////////////////////////////////////// + + private Union buildGrammar(Tree t, String rootNonTerminal) { + return ((GrammarAST.GrammarNode)walk(t)).build(rootNonTerminal); + } + + public Object[] walkChildren(Tree t) { + Object[] ret = new Object[t.size()]; + for(int i=0; i 0) + head = head.substring(head.indexOf('.')+1); + if (head==null) throw new RuntimeException("head is null: " + t); + if (head.equals("|")) return walkChildren(t); + if (head.equals("RHS")) return walkChildren(t); + if (head.equals("Grammar")) return new GrammarNode(walkChildren(t)); + if (head.equals("(")) return new UnionNode((Seq[][])walkChildren(t.child(0))); + if (head.equals("Word")) return stringifyChildren(t); + if (head.equals("Elements")) return seq2((ElementNode[])Reflection.rebuild(walkChildren(t), ElementNode[].class)); + if (head.equals("NonTerminalReference")) return new ReferenceNode(stringifyChildren(t.child(0))); + //if (head.equals(")")) return new ReferenceNode(stringifyChildren(t.child(0))); + if (head.equals("{")) return new XTree((Seq)walk(t.child(0))); + if (head.equals("::")) return tag((String)walk(t.child(0)), (Seq)walk(t.child(1))); + if (head.equals("++")) return plusmax((ElementNode)walk(t.child(0))); + if (head.equals("...")) return star(new TildeNode(new AtomNode())); + if (head.equals("+")) return plus((ElementNode)walk(t.child(0))); + if (head.equals("++/")) return plusmaxfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1))); + if (head.equals("+/")) return plusfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1))); + if (head.equals("**")) return starmax((ElementNode)walk(t.child(0))); + if (head.equals("*")) return star((ElementNode)walk(t.child(0))); + if (head.equals("**/")) return starmaxfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1))); + if (head.equals("*/")) return starfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1))); + if (head.equals("?")) return question((ElementNode)walk(t.child(0))); + if (head.equals("!")) return new DropNode((ElementNode)walk(t.child(0))); + if (head.equals("^")) return new LiteralNode((String)walk(t.child(0)), true); + if (head.equals("`")) { + ElementNode ret = (ElementNode)walk(t.child(0)); + ret.lifted = true; + return ret; + } + if (head.equals("Quoted")) return stringifyChildren(t); + if (head.equals("Literal")) return new LiteralNode((String)walk(t.child(0))); + if (head.equals("->")) return arrow((Seq)walk(t.child(0)), (ElementNode)walk(t.child(1))); + if (head.equals("DropNT")) return new NonTerminalNode((String)walk(t.child(0)), (Seq[][])walkChildren(t.child(1)), false, null, true); + if (head.equals("=") && t.size()==2) return new NonTerminalNode((String)walk(t.child(0)), (Seq[][])walk(t.child(1)), true, null, false); + if (head.equals("=")) return new NonTerminalNode((String)walk(t.child(0)), (Seq[][])walk(t.child(2)), true, (String)walk(t.child(1)), false); + if (head.equals("&")) return and2((Seq)walk(t.child(0)), (Seq)walk(t.child(1))); + if (head.equals("&~")) return andnot2((Seq)walk(t.child(0)), (Seq)walk(t.child(1))); + if (head.equals("/")) return ((Seq)walk(t.child(0))).separate((ElementNode)walk(t.child(1))); + if (head.equals("()")) return epsilon; + if (head.equals("[")) return new AtomNode((AtomNodeRange[])Reflection.rebuild(walkChildren(t), AtomNodeRange[].class)); + if (head.equals("\\{")) return new DropNode(new AtomNode(new AtomNodeRange(CharAtom.left, CharAtom.left))); + if (head.equals("\\}")) return new DropNode(new AtomNode(new AtomNodeRange(CharAtom.right, CharAtom.right))); + if (head.equals("~")) return new TildeNode((ElementNode)walk(t.child(0))); + if (head.equals("~~")) { + Seq seq = new Seq(star(new TildeNode(new AtomNode()))); + return seq.andnot((Seq)walk(t.child(0))); + } + if (head.equals("Range") && t.size()==1) return new AtomNodeRange(unescape(t).charAt(0)); + if (head.equals("Range")) return new AtomNodeRange(unescape(t).charAt(0), unescape(t).charAt(1)); + if (head.equals("\"\"")) return ""; + if (head.equals("\n")) return "\n"; + if (head.equals("\r")) return "\r"; + if (head.equals("grammar.Grammar")) return walkChildren(t); + if (head.equals("SubGrammar")) return GrammarBuilder.buildFromAST(t.child(0), "s", includes); + if (head.equals("NonTerminal")) + return new NonTerminalNode((String)walk(t.child(0)), + (Seq[][])walkChildren(t.child(1)), false, null, false); + if (head.equals("Colons")) { + String tag = (String)walk(t.child(0)); + Seq[][] seqs = (Seq[][])walk(t.child(1)); + for(Seq[] seq : seqs) + for(int i=0; i " + (head.equals("..."))); + } + + // Nodes ////////////////////////////////////////////////////////////////////////////// + + /** Root node of a grammar's AST; a set of named nonterminals */ + private class GrammarNode extends HashMap { + public GrammarNode(NonTerminalNode[] nonterminals) { + for(NonTerminalNode nt : nonterminals) { + if (nt==null) continue; + if (this.get(nt.name)!=null) + throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\""); + this.put(nt.name, nt); + } + } + public GrammarNode(Object[] nt) { add(nt); } + private void add(Object o) { + if (o==null) return; + else if (o instanceof Object[]) for(Object o2 : (Object[])o) add(o2); + else if (o instanceof NonTerminalNode) { + NonTerminalNode nt = (NonTerminalNode)o; + if (this.get(nt.name)!=null) + throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\""); + this.put(nt.name, nt); + } + else if (o instanceof GrammarNode) + for(NonTerminalNode n : ((GrammarNode)o).values()) + add(n); + } + public String toString() { + String ret = "[ "; + for(NonTerminalNode nt : values()) ret += nt + ", "; + return ret + " ]"; + } + public Union build(String rootNonterminal) { + Context cx = new Context(this); + Union u = null; + for(GrammarBuilder.NonTerminalNode nt : values()) + if (nt.name.equals(rootNonterminal)) + return (Union)cx.get(nt.name); + return null; + } + } + + public class UnionNode extends ElementNode { + public Seq[][] sequences; + public String sep = null; + public boolean rep; + public UnionNode(Seq[][] sequences) { this(sequences, false, null); } + public UnionNode(Seq[][] sequences, boolean rep, String sep) { + this.sequences = sequences; + this.rep = rep; + this.sep = sep; + } + public Atom toAtom(Context cx) { + Atom ret = null; + for(Seq[] ss : sequences) + for(Seq s : ss) + ret = ret==null ? s.toAtom(cx) : (Atom)ret.union(s.toAtom(cx)); + return ret; + } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { + return buildIntoPreallocatedUnion(cx, cnt, dropall, new Union(null, false)); } + public Element buildIntoPreallocatedUnion(Context cx, NonTerminalNode cnt, boolean dropall, Union u) { + Union urep = null; + if (rep) { + urep = new Union(null, false); + urep.add(Sequence.create(new Element[0], cnt.name)); + urep.add(sep==null + ? Sequence.create(new Element[] { u }, 0) + : Sequence.create(new Element[] { cx.get(sep), u }, 1)); + } + HashSet bad2 = new HashSet(); + for(int i=0; i and = new HashSet(); + HashSet not = new HashSet(); + ElementNode[] elements; + ElementNode follow; + String tag = null; + public Seq(ElementNode e) { this(new ElementNode[] { e }); } + public Seq(ElementNode[] elements) { + this.elements = elements; + for(int i=0; i, **, ++, or a similar character-class"+ + " operator on a [potentially] multicharacter production"); + return elements[0].toAtom(cx); + } + public Seq tag(String tag) { this.tag = tag; return this; } + public Seq follow(ElementNode follow) { this.follow = follow; return this; } + public Seq and(Seq s) { and.add(s); return this; } + public Seq andnot(Seq s) { not.add(s); return this; } + public Seq dup() { + Seq ret = new Seq(elements); + ret.and.addAll(and); + ret.not.addAll(not); + ret.follow = follow; + ret.tag = tag; + return ret; + } + public Seq separate(ElementNode sep) { + ElementNode[] elements = new ElementNode[this.elements.length * 2 - 1]; + for(int i=0; i 0 && elements[0].lifted) + lift = true; + if (!multiNonDrop) { + if (idx == -1) + ret = tag==null + ? Sequence.create(els, illegalTag) + : Sequence.createLeft(tag, els, drops, lift); + else if (tag==null) ret = Sequence.create(els, idx); + else ret = Sequence.createLeft(tag, els, drops, lift); + } + if (multiNonDrop) + ret = Sequence.createLeft(tag, els, drops, lift); + if (this.follow != null) + ret = ret.followedBy(this.follow.toAtom(cx)); + return ret; + } + } + + public class ReferenceNode extends ElementNode { + public String nonTerminal; + public ReferenceNode() { } + public NonTerminalNode resolve(Context cx) { + NonTerminalNode ret = cx.grammar.get(nonTerminal); + if (ret==null) throw new RuntimeException("undefined nonterminal: " + nonTerminal); + return ret; + } + public ReferenceNode(String nonTerminal) { this.nonTerminal = prefix + nonTerminal; } + public Atom toAtom(Context cx) { return cx.grammar.get(nonTerminal).toAtom(cx); } + public boolean drop(Context cx) { return resolve(cx).drop(cx); } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { + if (!this.nonTerminal.startsWith(prefix)) nonTerminal = prefix + nonTerminal; + Element ret = cx.get(nonTerminal); + if (ret == null) throw new RuntimeException("unknown nonterminal \""+nonTerminal+"\""); + return ret; + } + } + + public class LiteralNode extends ElementNode { + private String string; + private final String thePrefix = prefix; + private boolean caret; + public LiteralNode(String string) { this(string, false); } + public LiteralNode(String string, boolean caret) { + this.string = string; + this.caret = caret; + } + public String getOwnerTag() { return caret ? thePrefix+string : super.getOwnerTag(); } + public String toString() { return "\""+string+"\""; } + public boolean drop(Context cx) { return true; } + public Atom toAtom(Context cx) { + if (string.length()!=1) return super.toAtom(cx); + edu.berkeley.sbp.util.Range.Set set = new edu.berkeley.sbp.util.Range.Set(); + set.add(string.charAt(0), string.charAt(0)); + return CharAtom.set(set); + } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return CharAtom.string(string); } + } + + public class AtomNode extends ElementNode { + AtomNodeRange[] ranges; + public AtomNode() { this(new AtomNodeRange[0]); } + public AtomNode(AtomNodeRange[] ranges) { this.ranges = ranges; } + public AtomNode(AtomNodeRange range) { this.ranges = new AtomNodeRange[] { range }; } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); } + public Atom toAtom(Context cx) { + edu.berkeley.sbp.util.Range.Set set = new edu.berkeley.sbp.util.Range.Set(); + for(AtomNodeRange r : ranges) set.add(r.first, r.last); + return CharAtom.set(set); + } + } + public class AtomNodeRange { + public char first; + public char last; + public AtomNodeRange(char only) { first = only; last = only; } + public AtomNodeRange(char first, char last) { this.first = first; this.last = last; } + } + + public class RepeatNode extends ElementNode { + public ElementNode e, sep; + public final boolean zero, many, max; + public RepeatNode(ElementNode e, ElementNode sep, boolean zero, boolean many, boolean max) { + this.e = e; this.sep = sep; this.zero = zero; this.many = many; this.max = max; + } + public Atom toAtom(Context cx) { return sep==null ? e.toAtom(cx) : super.toAtom(cx); } + public boolean drop(Context cx) { return e.drop(cx); } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { + Element ret = build(cx, cnt, dropall, illegalTag); + String must = "must be tagged unless they appear within a dropped expression or their contents are dropped: "; + if (!dropall && !drop(cx) && !e.drop(cx)) + if (!many) throw new RuntimeException("options (?) " + must + ret); + else if (zero) throw new RuntimeException("zero-or-more repetitions (*) " + must + ret); + else throw new RuntimeException("one-or-more repetitions (+) " + must + ret); + return ret; + } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall, Object repeatTag) { + return (!max) + ? Repeat.repeat(e.build(cx, null, dropall), zero, many, sep==null ? null : sep.build(cx, null, dropall), repeatTag) + : sep==null + ? Repeat.repeatMaximal(e.toAtom(cx), zero, many, repeatTag) + : Repeat.repeatMaximal(e.build(cx, null, dropall), zero, many, sep.toAtom(cx), repeatTag); + } + } + + public abstract class ElementNode { + public boolean lifted = false; + public String getOwnerTag() { return null; } + public boolean drop(Context cx) { return false; } + public Atom toAtom(Context cx) { throw new Error("can't convert a " + this.getClass().getName() + " to an atom: " + this); } + public abstract Element build(Context cx, NonTerminalNode cnt, boolean dropall); + } + + public abstract class ElementNodeWrapper extends ElementNode { + protected ElementNode _e; + public ElementNodeWrapper(ElementNode e) { this._e = e; } + public String getOwnerTag() { return _e.getOwnerTag(); } + public boolean drop(Context cx) { return _e.drop(cx); } + public Atom toAtom(Context cx) { return _e.toAtom(cx); } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return _e.build(cx, cnt, dropall); } + } + + public class TildeNode extends ElementNodeWrapper { + public TildeNode(ElementNode e) { super(e); } + public Atom toAtom(Context cx) { return (Atom)((Topology)_e.toAtom(cx).complement()); } + public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); } + } + + public class DropNode extends ElementNodeWrapper { + public DropNode(ElementNode e) { super(e); } + public boolean drop(Context cx) { return true; } + } + + public Seq and2(Seq s, Seq a) { a.alwaysDrop = true; return s.and(a); } + public Seq andnot2(Seq s, Seq a) { a.alwaysDrop = true; return s.andnot(a); } + public Seq arrow(Seq s, ElementNode e) { return s.follow(e); } + public Seq tag(String tagname, Seq s) { return s.tag(tagname); } + public Seq seq(ElementNode[] elements) { return new Seq(elements); } + public Seq seq2(ElementNode[] elements) { return new Seq(elements); } + public ElementNode plusmax(final ElementNode e) { return new RepeatNode(e, null, false, true, true); } + public ElementNode plus(final ElementNode e) { return new RepeatNode(e, null, false, true, false); } + public ElementNode plusmaxfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, false, true, true); } + public ElementNode plusfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, false, true, false); } + public ElementNode starmax(final ElementNode e) { return new RepeatNode(e, null, true, true, true); } + public ElementNode star(final ElementNode e) { return new RepeatNode(e, null, true, true, false); } + public ElementNode starmaxfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, true, true, true); } + public ElementNode starfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, true, true, false); } + public ElementNode question(final ElementNode e) { return new RepeatNode(e, null, true, false, false); } + + ////////////////////////////////////////////////////////////////////////////// + + public class Context { + public HashMap map = new HashMap(); + public GrammarNode grammar; + public Context(Tree t) { } + public Context(GrammarNode g) { this.grammar = g; } + public Union build() { + Union ret = null; + for(NonTerminalNode nt : grammar.values()) { + Union u = get(nt.name); + if ("s".equals(nt.name)) + ret = u; + } + return ret; + } + public Union peek(String name) { return map.get(name); } + public void put(String name, Union u) { map.put(name, u); } + public Union get(String name) { + Union ret = map.get(name); + if (ret != null) return ret; + NonTerminalNode nt = grammar.get(name); + if (nt==null) { + throw new Error("warning could not find " + name); + } else { + ret = new Union(name, false); + map.put(name, ret); + nt.buildIntoPreallocatedUnion(this, nt, nt.drop(this), ret); + } + return ret; + } + + } + +}