1 // Copyright 2006 all rights reserved; see LICENSE file for BSD-style license
3 package edu.berkeley.sbp.meta;
4 import edu.berkeley.sbp.util.*;
5 import edu.berkeley.sbp.*;
6 import edu.berkeley.sbp.chr.*;
7 import edu.berkeley.sbp.misc.*;
9 import java.lang.annotation.*;
10 import java.lang.reflect.*;
14 * The inner classes of this class represent nodes in the Abstract
15 * Syntax Tree of a grammar.
17 public class GrammarAST {
20 * Create a grammar from a parse tree and binding resolver
22 * @param t a tree produced by parsing a grammar using the metagrammar
23 * @param s the name of the "start symbol"
24 * @param gbr a GrammarBindingResolver that resolves grammatical reductions into tree-node-heads
26 public static Union buildFromAST(Tree grammarAST, String startingNonterminal, File[] includes) {
27 return new GrammarAST(includes, "").buildGrammar(grammarAST, startingNonterminal);
30 public static Object illegalTag = ""; // this is the tag that should never appear in the non-dropped output FIXME
32 private final String prefix;
33 private final File[] includes;
35 public GrammarAST(File[] includes, String prefix) {
37 this.includes = includes;
40 // Methods //////////////////////////////////////////////////////////////////////////////
42 private Union buildGrammar(Tree t, String rootNonTerminal) {
43 return ((GrammarAST.GrammarNode)walk(t)).build(rootNonTerminal);
46 public Object[] walkChildren(Tree t) {
47 Object[] ret = new Object[t.size()];
48 for(int i=0; i<ret.length; i++) {
49 ret[i] = walk(t.child(i));
50 if (ret[i] instanceof Object[])
51 ret[i] = Reflection.lub((Object[])ret[i]);
53 return Reflection.lub(ret);
55 public String stringifyChildren(Tree t) {
56 StringBuffer sb = new StringBuffer();
57 for(int i=0; i<t.size(); i++) {
58 sb.append(t.child(i).head());
59 sb.append(stringifyChildren(t.child(i)));
63 public String unescape(Tree t) {
64 StringBuffer sb = new StringBuffer();
65 for(int i=0; i<t.size(); i++)
66 sb.append(t.child(i).head()+stringifyChildren(t.child(i)));
70 public Object walk(Tree t) {
71 String head = (String)t.head();
72 while(head.indexOf('.') > 0)
73 head = head.substring(head.indexOf('.')+1);
74 if (head==null) throw new RuntimeException("head is null: " + t);
75 if (head.equals("|")) return walkChildren(t);
76 if (head.equals("RHS")) return walkChildren(t);
77 if (head.equals("Grammar")) return new GrammarNode(walkChildren(t));
78 if (head.equals("(")) return new UnionNode((Seq[][])walkChildren(t.child(0)));
79 if (head.equals("Word")) return stringifyChildren(t);
80 if (head.equals("Elements")) return seq2((ElementNode[])Reflection.rebuild(walkChildren(t), ElementNode[].class));
81 if (head.equals("NonTerminalReference")) return new ReferenceNode(stringifyChildren(t.child(0)));
82 if (head.equals(")")) return new ReferenceNode(stringifyChildren(t.child(0)), true);
83 if (head.equals("{")) return new BracedNode(walkSeq(t.child(0)));
84 if (head.equals("::")) return walkSeq(t.child(1)).tag(walkString(t.child(0)));
85 if (head.equals("...")) return new DropNode(new RepeatNode(new TildeNode(new AtomNode()), null, true, true, false));
87 if (head.equals("++")) return new RepeatNode(walkElement(t.child(0)), null, false, true, true);
88 if (head.equals("+")) return new RepeatNode(walkElement(t.child(0)), null, false, true, false);
89 if (head.equals("++/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), false, true, true);
90 if (head.equals("+/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), false, true, false);
91 if (head.equals("**")) return new RepeatNode(walkElement(t.child(0)), null, true, true, true);
92 if (head.equals("*")) return new RepeatNode(walkElement(t.child(0)), null, true, true, false);
93 if (head.equals("**/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), true, true, true);
94 if (head.equals("*/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), true, true, false);
95 if (head.equals("?")) return new RepeatNode(walkElement(t.child(0)), null, true, false, false);
97 if (head.equals("!")) return new DropNode(walkElement(t.child(0)));
98 if (head.equals("^")) return new LiteralNode(walkString(t.child(0)), true);
99 if (head.equals("`")) return walkElement(t.child(0)).lifted();
100 if (head.equals("Quoted")) return stringifyChildren(t);
101 if (head.equals("Literal")) return new LiteralNode(walkString(t.child(0)));
102 if (head.equals("->")) return walkSeq(t.child(0)).follow(walkElement(t.child(1)));
103 if (head.equals("DropNT")) return new NonTerminalNode(walkString(t.child(0)), (Seq[][])walkChildren(t.child(1)), false, null, true);
104 if (head.equals("=")) return new NonTerminalNode(walkString(t.child(0)), (Seq[][])walk(t.child(2)),
105 true, t.size()==2 ? null : walkString(t.child(1)), false);
106 if (head.equals("&")) return and2(walkSeq(t.child(0)), walkSeq(t.child(1)));
107 if (head.equals("&~")) return andnot2(walkSeq(t.child(0)), walkSeq(t.child(1)));
108 if (head.equals("/")) return (walkSeq(t.child(0))).separate(walkElement(t.child(1)));
109 if (head.equals("()")) return new LiteralNode("");
110 if (head.equals("[")) return new AtomNode((char[][])Reflection.rebuild(walkChildren(t), char[][].class));
111 if (head.equals("\\{")) return new DropNode(new AtomNode(new char[] { CharAtom.left, CharAtom.left }));
112 if (head.equals("\\}")) return new DropNode(new AtomNode(new char[] { CharAtom.right, CharAtom.right }));
113 if (head.equals(">>")) return new DropNode(new AtomNode(new char[] { CharAtom.left, CharAtom.left }));
114 if (head.equals("<<")) return new DropNode(new AtomNode(new char[] { CharAtom.right, CharAtom.right }));
115 if (head.equals("~")) return new TildeNode(walkElement(t.child(0)));
116 if (head.equals("~~")) return new Seq(new RepeatNode(new TildeNode(new AtomNode()), null, true, true, false)).andnot(walkSeq(t.child(0)));
117 if (head.equals("Range") && t.size()==2 && ">".equals(t.child(0).head()))
118 return new char[] { CharAtom.left, CharAtom.left };
119 if (head.equals("Range") && t.size()==2 && "<".equals(t.child(0).head()))
120 return new char[] { CharAtom.right, CharAtom.right };
121 if (head.equals("Range") && t.size()==1) return new char[] { unescape(t).charAt(0), unescape(t).charAt(0) };
122 if (head.equals("Range")) return new char[] { unescape(t).charAt(0), unescape(t).charAt(1) };
123 if (head.equals("\"\"")) return "";
124 if (head.equals("\n")) return "\n";
125 if (head.equals("\r")) return "\r";
126 if (head.equals("grammar.Grammar")) return walkChildren(t);
127 if (head.equals("SubGrammar")) return GrammarBuilder.buildFromAST(t.child(0), "s", includes);
128 if (head.equals("NonTerminal"))
129 return new NonTerminalNode((String)walk(t.child(0)),
130 (Seq[][])walkChildren(t.child(1)), false, null, false);
131 if (head.equals("Colons")) {
132 String tag = (String)walk(t.child(0));
133 Seq[][] seqs = (Seq[][])walk(t.child(1));
134 for(Seq[] seq : seqs)
135 for(int i=0; i<seq.length; i++)
136 seq[i] = tag(tag, seq[i]);
137 return new NonTerminalNode(tag, seqs, false, null, false);
139 if (head.equals("TestCase"))
140 return new RegressionTests.TestCase(walkString(t.child(0)),
141 walkString(t.child(1)),
142 (String[])Reflection.coerce(walkChildren(t.child(2)), String[].class),
143 (Union)walk(t.child(3)),
146 if (head.equals("#import")) {
147 String fileName = (String)stringifyChildren(t.child(0));
148 for(File f : includes) {
149 File file = new File(f.getAbsolutePath()+File.separatorChar+fileName);
150 if (!file.exists()) continue;
152 String newPrefix = t.size()<2 ? "" : ((String)walk(t.child(1))+".");
153 FileInputStream fis = new FileInputStream(file);
154 Tree tr = new CharParser(MetaGrammar.newInstance()).parse(fis).expand1();
155 return (GrammarNode)new GrammarBuilder(includes, newPrefix).walk(tr);
156 } catch (Exception e) {
157 throw new RuntimeException("while parsing " + file, e);
160 throw new RuntimeException("unable to find #include file \""+fileName+"\"");
162 throw new RuntimeException("unknown head: \"" + head + "\" => " + (head.equals("...")));
165 // Nodes //////////////////////////////////////////////////////////////////////////////
167 /** Root node of a grammar's AST; a set of named nonterminals */
168 private class GrammarNode extends HashMap<String,NonTerminalNode> {
169 public GrammarNode(NonTerminalNode[] nonterminals) {
170 for(NonTerminalNode nt : nonterminals) {
171 if (nt==null) continue;
172 if (this.get(nt.name)!=null)
173 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
174 this.put(nt.name, nt);
177 public GrammarNode(Object[] nt) { add(nt); }
178 private void add(Object o) {
180 else if (o instanceof Object[]) for(Object o2 : (Object[])o) add(o2);
181 else if (o instanceof NonTerminalNode) {
182 NonTerminalNode nt = (NonTerminalNode)o;
183 if (this.get(nt.name)!=null)
184 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
185 this.put(nt.name, nt);
187 else if (o instanceof GrammarNode)
188 for(NonTerminalNode n : ((GrammarNode)o).values())
191 public String toString() {
193 for(NonTerminalNode nt : values()) ret += nt + ", ";
196 public Union build(String rootNonterminal) {
197 Context cx = new Context(this);
199 for(GrammarAST.NonTerminalNode nt : values())
200 if (nt.name.equals(rootNonterminal))
201 return (Union)cx.get(nt.name);
206 public class UnionNode extends ElementNode {
207 public Seq[][] sequences;
208 public String sep = null;
210 public UnionNode(Seq[][] sequences) { this(sequences, false, null); }
211 public UnionNode(Seq[][] sequences, boolean rep, String sep) {
212 this.sequences = sequences;
216 public Atom toAtom(Context cx) {
218 for(Seq[] ss : sequences)
220 ret = ret==null ? s.toAtom(cx) : (Atom)ret.union(s.toAtom(cx));
223 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
224 return buildIntoPreallocatedUnion(cx, cnt, dropall, new Union(null, false)); }
225 public Element buildIntoPreallocatedUnion(Context cx, NonTerminalNode cnt, boolean dropall, Union u) {
228 urep = new Union(null, false);
229 urep.add(Sequence.create(new Element[0], cnt.name));
231 ? Sequence.create(new Element[] { u }, 0)
232 : Sequence.create(new Element[] { cx.get(sep), u }, 1));
234 HashSet<Sequence> bad2 = new HashSet<Sequence>();
235 for(int i=0; i<sequences.length; i++) {
236 Seq[] group = sequences[i];
237 Union u2 = new Union(null, false);
238 if (sequences.length==1) u2 = u;
239 for(int j=0; j<group.length; j++)
241 group[j].build(cx, u2, cnt, dropall);
243 Union u3 = new Union(null, false);
244 group[j].build(cx, u3, cnt, dropall);
245 Sequence s = Sequence.create(cnt.name,
246 new Element[] { u3, urep },
247 new boolean[] { false, false },
251 if (sequences.length==1) break;
252 Sequence seq = Sequence.create(u2);
253 for(Sequence s : bad2) seq = seq.andnot(s);
255 bad2.add(Sequence.create(u2));
261 public class NonTerminalNode extends UnionNode {
262 public boolean alwaysDrop;
263 public String name = null;
264 public boolean drop(Context cx) { return alwaysDrop; }
265 public NonTerminalNode(String name, Seq[][] sequences, boolean rep, String sep, boolean alwaysDrop) {
266 super(sequences, rep, sep==null?null:(prefix + sep));
267 this.name = prefix + name;
268 this.alwaysDrop = alwaysDrop;
270 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return cx.get(name); }
274 public boolean alwaysDrop = false;
275 public boolean drop(Context cx) { return alwaysDrop; }
276 HashSet<Seq> and = new HashSet<Seq>();
277 HashSet<Seq> not = new HashSet<Seq>();
278 ElementNode[] elements;
281 public Seq(ElementNode e) { this(new ElementNode[] { e }); }
282 public Seq(ElementNode[] elements) {
283 this.elements = elements;
284 for(int i=0; i<elements.length; i++)
285 if (elements[i]==null)
286 throw new RuntimeException();
288 public Atom toAtom(Context cx) {
289 if (elements.length != 1)
290 throw new Error("you attempted to use ->, **, ++, or a similar character-class"+
291 " operator on a [potentially] multicharacter production");
292 return elements[0].toAtom(cx);
294 public Seq tag(String tag) { this.tag = tag; return this; }
295 public Seq follow(ElementNode follow) { this.follow = follow; return this; }
296 public Seq and(Seq s) { and.add(s); return this; }
297 public Seq andnot(Seq s) { not.add(s); return this; }
299 Seq ret = new Seq(elements);
306 public Seq separate(ElementNode sep) {
307 ElementNode[] elements = new ElementNode[this.elements.length * 2 - 1];
308 for(int i=0; i<this.elements.length; i++) {
309 elements[i*2] = this.elements[i];
310 if (i<this.elements.length-1)
311 elements[i*2+1] = new DropNode(sep);
313 this.elements = elements;
316 public Sequence build(Context cx, Union u, NonTerminalNode cnt, boolean dropall) {
317 Sequence ret = build0(cx, cnt, dropall);
318 for(Seq s : and) ret = ret.and(s.build(cx, null, cnt, true));
319 for(Seq s : not) ret = ret.andnot(s.build(cx, null, cnt, true));
320 if (u!=null) u.add(ret);
323 public Sequence build0(Context cx, NonTerminalNode cnt, boolean dropall) {
324 boolean[] drops = new boolean[elements.length];
325 Element[] els = new Element[elements.length];
327 for(int i=0; i<elements.length; i++) {
328 if (dropall) drops[i] = true;
329 else drops[i] = elements[i].drop(cx);
330 if (elements[i].getOwnerTag() != null)
331 tag = elements[i].getOwnerTag();
335 boolean multiNonDrop = false;
336 for(int i=0; i<drops.length; i++)
338 if (idx==-1) idx = i;
339 else multiNonDrop = true;
340 for(int i=0; i<elements.length; i++) {
341 if (!multiNonDrop && i==idx && tag!=null && elements[i] instanceof RepeatNode) {
342 els[i] = ((RepeatNode)elements[i]).build(cx, cnt, dropall, tag);
345 els[i] = elements[i].build(cx, cnt, dropall);
347 if (tag==null && multiNonDrop)
348 throw new RuntimeException("multiple non-dropped elements in sequence: " + Sequence.create("", els));
349 boolean[] lifts = new boolean[elements.length];
350 for(int i=0; i<elements.length; i++)
351 lifts[i] = elements[i].lifted;
355 ? Sequence.create(illegalTag, els)
356 : Sequence.create(tag, els, drops, lifts);
357 else if (tag==null) ret = Sequence.create(els, idx);
358 else ret = Sequence.create(tag, els, drops, lifts);
361 ret = Sequence.create(tag, els, drops, lifts);
362 if (this.follow != null)
363 ret = ret.followedBy(this.follow.toAtom(cx));
368 public class ReferenceNode extends ElementNode {
369 public String nonTerminal;
370 public ReferenceNode() { }
371 public NonTerminalNode resolve(Context cx) {
372 NonTerminalNode ret = cx.grammar.get(nonTerminal);
373 if (ret==null) throw new RuntimeException("undefined nonterminal: " + nonTerminal);
376 public ReferenceNode(String nonTerminal) { this.nonTerminal = prefix + nonTerminal; }
377 public Atom toAtom(Context cx) { return cx.grammar.get(nonTerminal).toAtom(cx); }
378 public boolean drop(Context cx) { return resolve(cx).drop(cx); }
379 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
380 if (!this.nonTerminal.startsWith(prefix)) nonTerminal = prefix + nonTerminal;
381 Element ret = cx.get(nonTerminal);
382 if (ret == null) throw new RuntimeException("unknown nonterminal \""+nonTerminal+"\"");
387 public class LiteralNode extends ElementNode {
388 private String string;
389 private final String thePrefix = prefix;
390 private boolean caret;
391 public LiteralNode(String string) { this(string, false); }
392 public LiteralNode(String string, boolean caret) {
393 this.string = string;
396 public String getOwnerTag() { return caret ? thePrefix+string : super.getOwnerTag(); }
397 public String toString() { return "\""+string+"\""; }
398 public boolean drop(Context cx) { return true; }
399 public Atom toAtom(Context cx) {
400 if (string.length()!=1) return super.toAtom(cx);
401 edu.berkeley.sbp.util.Range.Set set = new edu.berkeley.sbp.util.Range.Set();
402 set.add(string.charAt(0), string.charAt(0));
403 return CharAtom.set(set);
405 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return CharAtom.string(string); }
408 public class AtomNode extends ElementNode {
409 AtomNodeRange[] ranges;
410 public AtomNode() { this(new AtomNodeRange[0]); }
411 public AtomNode(AtomNodeRange[] ranges) { this.ranges = ranges; }
412 public AtomNode(AtomNodeRange range) { this.ranges = new AtomNodeRange[] { range }; }
413 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
414 public Atom toAtom(Context cx) {
415 edu.berkeley.sbp.util.Range.Set set = new edu.berkeley.sbp.util.Range.Set();
416 for(AtomNodeRange r : ranges) set.add(r.first, r.last);
417 return CharAtom.set(set);
420 public class AtomNodeRange {
423 public AtomNodeRange(char only) { first = only; last = only; }
424 public AtomNodeRange(char first, char last) { this.first = first; this.last = last; }
427 public class RepeatNode extends ElementNode {
428 public ElementNode e, sep;
429 public final boolean zero, many, max;
430 public RepeatNode(ElementNode e, ElementNode sep, boolean zero, boolean many, boolean max) {
431 this.e = e; this.sep = sep; this.zero = zero; this.many = many; this.max = max;
433 public Atom toAtom(Context cx) { return sep==null ? e.toAtom(cx) : super.toAtom(cx); }
434 public boolean drop(Context cx) { return e.drop(cx); }
435 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
436 Element ret = build(cx, cnt, dropall, illegalTag);
437 String must = "must be tagged unless they appear within a dropped expression or their contents are dropped: ";
438 if (!dropall && !drop(cx) && !e.drop(cx))
439 if (!many) throw new RuntimeException("options (?) " + must + ret);
440 else if (zero) throw new RuntimeException("zero-or-more repetitions (*) " + must + ret);
441 else throw new RuntimeException("one-or-more repetitions (+) " + must + ret);
444 public Element build(Context cx, NonTerminalNode cnt, boolean dropall, Object repeatTag) {
446 ? Repeat.repeat(e.build(cx, null, dropall), zero, many, sep==null ? null : sep.build(cx, null, dropall), repeatTag)
448 ? Repeat.repeatMaximal(e.toAtom(cx), zero, many, repeatTag)
449 : Repeat.repeatMaximal(e.build(cx, null, dropall), zero, many, sep.toAtom(cx), repeatTag);
453 public abstract class ElementNode {
454 public boolean lifted = false;
455 public String getOwnerTag() { return null; }
456 public boolean drop(Context cx) { return false; }
457 public Atom toAtom(Context cx) { throw new Error("can't convert a " + this.getClass().getName() + " to an atom: " + this); }
458 public abstract Element build(Context cx, NonTerminalNode cnt, boolean dropall);
461 public abstract class ElementNodeWrapper extends ElementNode {
462 protected ElementNode _e;
463 public ElementNodeWrapper(ElementNode e) { this._e = e; }
464 public String getOwnerTag() { return _e.getOwnerTag(); }
465 public boolean drop(Context cx) { return _e.drop(cx); }
466 public Atom toAtom(Context cx) { return _e.toAtom(cx); }
467 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return _e.build(cx, cnt, dropall); }
470 public class TildeNode extends ElementNodeWrapper {
471 public TildeNode(ElementNode e) { super(e); }
472 public Atom toAtom(Context cx) { return (Atom)((Topology<Character>)_e.toAtom(cx).complement()); }
473 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
476 public class DropNode extends ElementNodeWrapper {
477 public DropNode(ElementNode e) { super(e); }
478 public boolean drop(Context cx) { return true; }
481 public Seq and2(Seq s, Seq a) { a.alwaysDrop = true; return s.and(a); }
482 public Seq andnot2(Seq s, Seq a) { a.alwaysDrop = true; return s.andnot(a); }
483 public Seq arrow(Seq s, ElementNode e) { return s.follow(e); }
484 public Seq tag(String tagname, Seq s) { return s.tag(tagname); }
485 public Seq seq(ElementNode[] elements) { return new Seq(elements); }
486 public Seq seq2(ElementNode[] elements) { return new Seq(elements); }
487 public ElementNode plusmax(final ElementNode e) { return new RepeatNode(e, null, false, true, true); }
488 public ElementNode plus(final ElementNode e) { return new RepeatNode(e, null, false, true, false); }
489 public ElementNode plusmaxfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, false, true, true); }
490 public ElementNode plusfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, false, true, false); }
491 public ElementNode starmax(final ElementNode e) { return new RepeatNode(e, null, true, true, true); }
492 public ElementNode star(final ElementNode e) { return new RepeatNode(e, null, true, true, false); }
493 public ElementNode starmaxfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, true, true, true); }
494 public ElementNode starfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, true, true, false); }
495 public ElementNode question(final ElementNode e) { return new RepeatNode(e, null, true, false, false); }
497 //////////////////////////////////////////////////////////////////////////////
499 public class Context {
500 public HashMap<String,Union> map = new HashMap<String,Union>();
501 public GrammarNode grammar;
502 public Context(Tree t) { }
503 public Context(GrammarNode g) { this.grammar = g; }
504 public Union build() {
506 for(NonTerminalNode nt : grammar.values()) {
507 Union u = get(nt.name);
508 if ("s".equals(nt.name))
513 public Union peek(String name) { return map.get(name); }
514 public void put(String name, Union u) { map.put(name, u); }
515 public Union get(String name) {
516 Union ret = map.get(name);
517 if (ret != null) return ret;
518 NonTerminalNode nt = grammar.get(name);
520 throw new Error("warning could not find " + name);
522 ret = new Union(name, false);
524 nt.buildIntoPreallocatedUnion(this, nt, nt.drop(this), ret);