1 // Copyright 2006 all rights reserved; see LICENSE file for BSD-style license
3 package edu.berkeley.sbp.meta;
4 import edu.berkeley.sbp.util.*;
5 import edu.berkeley.sbp.*;
6 import edu.berkeley.sbp.chr.*;
7 import edu.berkeley.sbp.misc.*;
9 import java.lang.annotation.*;
10 import java.lang.reflect.*;
14 * The inner classes of this class represent nodes in the Abstract
15 * Syntax Tree of a grammar.
17 public class GrammarAST {
20 * Create a grammar from a parse tree and binding resolver
22 * @param t a tree produced by parsing a grammar using the metagrammar
23 * @param s the name of the "start symbol"
24 * @param gbr a GrammarBindingResolver that resolves grammatical reductions into tree-node-heads
26 public static Union buildFromAST(Tree grammarAST, String startingNonterminal, File[] includes) {
27 return new GrammarAST(includes, "").buildGrammar(grammarAST, startingNonterminal);
30 private static Object illegalTag = ""; // this is the tag that should never appear in the non-dropped output FIXME
32 // Instance //////////////////////////////////////////////////////////////////////////////
34 private final String prefix;
35 private final File[] includes;
37 public GrammarAST(File[] includes, String prefix) {
39 this.includes = includes;
42 // Methods //////////////////////////////////////////////////////////////////////////////
44 private Union buildGrammar(Tree t, String rootNonTerminal) {
45 return ((GrammarAST.GrammarNode)walk(t)).build(rootNonTerminal);
48 public Object[] walkChildren(Tree t) {
49 Object[] ret = new Object[t.size()];
50 for(int i=0; i<ret.length; i++) {
51 ret[i] = walk(t.child(i));
52 if (ret[i] instanceof Object[])
53 ret[i] = Reflection.lub((Object[])ret[i]);
55 return Reflection.lub(ret);
57 private String stringifyChildren(Tree t) {
58 StringBuffer sb = new StringBuffer();
59 for(int i=0; i<t.size(); i++) {
60 sb.append(t.child(i).head());
61 sb.append(stringifyChildren(t.child(i)));
65 private String unescape(Tree t) {
66 StringBuffer sb = new StringBuffer();
67 for(int i=0; i<t.size(); i++)
68 sb.append(t.child(i).head()+stringifyChildren(t.child(i)));
72 private ElementNode walkElement(Tree t) { return (ElementNode)walk(t); }
73 private String walkString(Tree t) { return (String)walk(t); }
74 private Seq walkSeq(Tree t) { return (Seq)walk(t); }
75 private Object walk(Tree t) {
76 String head = (String)t.head();
77 while(head.indexOf('.') > 0)
78 head = head.substring(head.indexOf('.')+1);
79 if (head==null) throw new RuntimeException("head is null: " + t);
80 if (head.equals("|")) return walkChildren(t);
81 if (head.equals("RHS")) return walkChildren(t);
82 if (head.equals("Grammar")) return new GrammarNode(walkChildren(t));
83 if (head.equals("(")) return new UnionNode((Seq[][])walkChildren(t.child(0)));
84 if (head.equals("Word")) return stringifyChildren(t);
85 if (head.equals("Elements")) return new Seq((ElementNode[])Reflection.rebuild(walkChildren(t), ElementNode[].class));
86 if (head.equals("NonTerminalReference")) return new ReferenceNode(stringifyChildren(t.child(0)));
87 if (head.equals(")")) return new ReferenceNode(stringifyChildren(t.child(0)), true);
88 if (head.equals("{")) return new BracedNode(walkSeq(t.child(0)));
89 if (head.equals("::")) return walkSeq(t.child(1)).tag(walkString(t.child(0)));
90 if (head.equals("...")) return new DropNode(new RepeatNode(new TildeNode(new AtomNode()), null, true, true, false));
92 if (head.equals("++")) return new RepeatNode(walkElement(t.child(0)), null, false, true, true);
93 if (head.equals("+")) return new RepeatNode(walkElement(t.child(0)), null, false, true, false);
94 if (head.equals("++/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), false, true, true);
95 if (head.equals("+/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), false, true, false);
96 if (head.equals("**")) return new RepeatNode(walkElement(t.child(0)), null, true, true, true);
97 if (head.equals("*")) return new RepeatNode(walkElement(t.child(0)), null, true, true, false);
98 if (head.equals("**/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), true, true, true);
99 if (head.equals("*/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), true, true, false);
100 if (head.equals("?")) return new RepeatNode(walkElement(t.child(0)), null, true, false, false);
102 if (head.equals("!")) return new DropNode(walkElement(t.child(0)));
103 if (head.equals("^")) return new LiteralNode(walkString(t.child(0)), true);
104 if (head.equals("`")) return walkElement(t.child(0)).lifted();
105 if (head.equals("Quoted")) return stringifyChildren(t);
106 if (head.equals("Literal")) return new LiteralNode(walkString(t.child(0)));
107 if (head.equals("->")) return walkSeq(t.child(0)).follow(walkElement(t.child(1)));
108 if (head.equals("DropNT")) return new NonTerminalNode(walkString(t.child(0)), (Seq[][])walkChildren(t.child(1)), false, null, true);
109 if (head.equals("=")) return new NonTerminalNode(walkString(t.child(0)), (Seq[][])walk(t.child(2)),
110 true, t.size()==2 ? null : walkString(t.child(1)), false);
111 if (head.equals("&")) return and2(walkSeq(t.child(0)), walkSeq(t.child(1)));
112 if (head.equals("&~")) return andnot2(walkSeq(t.child(0)), walkSeq(t.child(1)));
113 if (head.equals("/")) return (walkSeq(t.child(0))).separate(walkElement(t.child(1)));
114 if (head.equals("()")) return new LiteralNode("");
115 if (head.equals("[")) return new AtomNode((char[][])Reflection.rebuild(walkChildren(t), char[][].class));
116 if (head.equals("\\{")) return new DropNode(new AtomNode(new char[] { CharAtom.left, CharAtom.left }));
117 if (head.equals("\\}")) return new DropNode(new AtomNode(new char[] { CharAtom.right, CharAtom.right }));
118 if (head.equals(">>")) return new DropNode(new AtomNode(new char[] { CharAtom.left, CharAtom.left }));
119 if (head.equals("<<")) return new DropNode(new AtomNode(new char[] { CharAtom.right, CharAtom.right }));
120 if (head.equals("~")) return new TildeNode(walkElement(t.child(0)));
121 if (head.equals("~~")) return new Seq(new RepeatNode(new TildeNode(new AtomNode()), null, true, true, false)).andnot(walkSeq(t.child(0)));
122 if (head.equals("Range") && t.size()==2 && ">".equals(t.child(0).head()))
123 return new char[] { CharAtom.left, CharAtom.left };
124 if (head.equals("Range") && t.size()==2 && "<".equals(t.child(0).head()))
125 return new char[] { CharAtom.right, CharAtom.right };
126 if (head.equals("Range") && t.size()==1) return new char[] { unescape(t).charAt(0), unescape(t).charAt(0) };
127 if (head.equals("Range")) return new char[] { unescape(t).charAt(0), unescape(t).charAt(1) };
128 if (head.equals("\"\"")) return "";
129 if (head.equals("\n")) return "\n";
130 if (head.equals("\r")) return "\r";
131 if (head.equals("grammar.Grammar")) return walkChildren(t);
132 if (head.equals("SubGrammar")) return GrammarAST.buildFromAST(t.child(0), "s", includes);
133 if (head.equals("NonTerminal"))
134 return new NonTerminalNode(walkString(t.child(0)),
135 (Seq[][])walkChildren(t.child(1)), false, null, false);
136 if (head.equals("Colons")) {
137 String tag = walkString(t.child(0));
138 Seq[][] seqs = (Seq[][])walk(t.child(1));
139 for(Seq[] seq : seqs)
140 for(int i=0; i<seq.length; i++)
141 seq[i] = seq[i].tag(tag);
142 return new NonTerminalNode(tag, seqs, false, null, false);
144 if (head.equals("TestCase"))
145 return new RegressionTests.TestCase(walkString(t.child(0)),
146 walkString(t.child(1)),
147 (String[])Reflection.coerce(walkChildren(t.child(2)), String[].class),
148 (Union)walk(t.child(3)),
151 if (head.equals("#import")) {
152 String fileName = (String)stringifyChildren(t.child(0));
153 for(File f : includes) {
154 File file = new File(f.getAbsolutePath()+File.separatorChar+fileName);
155 if (!file.exists()) continue;
157 String newPrefix = t.size()<2 ? "" : (walkString(t.child(1))+".");
158 FileInputStream fis = new FileInputStream(file);
159 Tree tr = new CharParser(MetaGrammar.newInstance()).parse(fis).expand1();
160 return (GrammarNode)new GrammarAST(includes, newPrefix).walk(tr);
161 } catch (Exception e) {
162 throw new RuntimeException("while parsing " + file, e);
165 throw new RuntimeException("unable to find #include file \""+fileName+"\"");
167 throw new RuntimeException("unknown head: \"" + head + "\" => " + (head.equals("...")));
171 // Nodes //////////////////////////////////////////////////////////////////////////////
173 /** Root node of a grammar's AST; a set of named nonterminals */
174 private class GrammarNode extends HashMap<String,NonTerminalNode> {
175 public GrammarNode(NonTerminalNode[] nonterminals) {
176 for(NonTerminalNode nt : nonterminals) {
177 if (nt==null) continue;
178 if (this.get(nt.name)!=null)
179 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
180 this.put(nt.name, nt);
183 public GrammarNode(Object[] nt) { add(nt); }
184 private void add(Object o) {
186 else if (o instanceof Object[]) for(Object o2 : (Object[])o) add(o2);
187 else if (o instanceof NonTerminalNode) {
188 NonTerminalNode nt = (NonTerminalNode)o;
189 if (this.get(nt.name)!=null)
190 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
191 this.put(nt.name, nt);
193 else if (o instanceof GrammarNode)
194 for(NonTerminalNode n : ((GrammarNode)o).values())
197 public String toString() {
199 for(NonTerminalNode nt : values()) ret += nt + ", ";
202 public Union build(String rootNonterminal) {
203 Context cx = new Context(this);
205 for(GrammarAST.NonTerminalNode nt : values())
206 if (nt.name.equals(rootNonterminal))
207 return (Union)cx.get(nt.name);
212 private class UnionNode extends ElementNode {
213 public Seq[][] sequences;
214 public String sep = null;
216 public UnionNode(Seq seq) { this(new Seq[][] { new Seq[] { seq } }); }
217 public UnionNode(Seq[][] sequences) { this(sequences, false, null); }
218 public UnionNode(Seq[][] sequences, boolean rep, String sep) {
219 this.sequences = sequences;
223 public boolean drop(Context cx) {
224 for(Seq[] seqs : sequences)
230 public Atom toAtom(Context cx) {
232 for(Seq[] ss : sequences)
234 ret = ret==null ? s.toAtom(cx) : (Atom)ret.union(s.toAtom(cx));
237 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
238 return buildIntoPreallocatedUnion(cx, cnt, dropall, new Union(null, false)); }
239 public Element buildIntoPreallocatedUnion(Context cx, NonTerminalNode cnt, boolean dropall, Union u) {
242 urep = new Union(null, false);
243 urep.add(Sequence.create(cnt.name, new Element[0]));
245 ? Sequence.create(new Element[] { u }, 0)
246 : Sequence.create(new Element[] { cx.get(sep), u }, 1));
248 HashSet<Sequence> bad2 = new HashSet<Sequence>();
249 for(int i=0; i<sequences.length; i++) {
250 Seq[] group = sequences[i];
251 Union u2 = new Union(null, false);
252 if (sequences.length==1) u2 = u;
253 for(int j=0; j<group.length; j++)
255 group[j].build(cx, u2, cnt, dropall);
257 Union u3 = new Union(null, false);
258 group[j].build(cx, u3, cnt, dropall);
259 Sequence s = Sequence.create(cnt.name,
260 new Element[] { u3, urep },
261 new boolean[] { false, false },
262 new boolean[] { false, true});
265 if (sequences.length==1) break;
266 Sequence seq = Sequence.create(u2);
267 for(Sequence s : bad2) seq = seq.andnot(s);
269 bad2.add(Sequence.create(u2));
275 private class NonTerminalNode extends UnionNode {
276 public boolean alwaysDrop;
277 public String name = null;
278 public boolean drop(Context cx) { return alwaysDrop; }
279 public NonTerminalNode(String name, Seq[][] sequences, boolean rep, String sep, boolean alwaysDrop) {
280 super(sequences, rep, sep==null?null:(prefix + sep));
281 this.name = prefix + name;
282 this.alwaysDrop = alwaysDrop;
284 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return cx.get(name); }
288 public boolean alwaysDrop = false;
289 public boolean drop(Context cx) {
290 if (alwaysDrop) return true;
291 if (tag!=null) return false;
292 for(int i=0; i<elements.length; i++)
293 if (!elements[i].drop(cx))
297 HashSet<Seq> and = new HashSet<Seq>();
298 HashSet<Seq> not = new HashSet<Seq>();
299 ElementNode[] elements;
302 public Seq(ElementNode e) { this(new ElementNode[] { e }); }
303 public Seq(ElementNode[] elements) { this(elements, true); }
304 public Seq(ElementNode[] el, boolean check) {
305 this.elements = new ElementNode[el.length];
306 System.arraycopy(el, 0, elements, 0, el.length);
307 for(int i=0; i<elements.length; i++) {
308 if (elements[i]==null)
309 throw new RuntimeException();
310 elements[i].ownerSeq = this;
312 // FIXME: this whole mechanism is sketchy
314 for(int i=0; i<elements.length; i++) {
315 if ((elements[i] instanceof ReferenceNode) && ((ReferenceNode)elements[i]).parenthesized) {
316 ReferenceNode rn = (ReferenceNode)elements[i];
317 ElementNode replace = null;
318 for(int j=0; j<elements.length; j++) {
319 if (!(elements[j] instanceof ReferenceNode)) continue;
320 ReferenceNode rn2 = (ReferenceNode)elements[j];
321 if (rn2.nonTerminal.equals(rn.nonTerminal) && !rn2.parenthesized) {
322 if (replace == null) {
323 replace = new UnionNode(new Seq(rn2).andnot(new Seq(elements, false)));
325 elements[j] = replace;
331 public Atom toAtom(Context cx) {
332 if (elements.length != 1)
333 throw new Error("you attempted to use ->, **, ++, or a similar character-class"+
334 " operator on a [potentially] multicharacter production");
335 return elements[0].toAtom(cx);
337 public Seq tag(String tag) { this.tag = tag; return this; }
338 public Seq follow(ElementNode follow) { this.follow = follow; return this; }
339 public Seq and(Seq s) { and.add(s); return this; }
340 public Seq andnot(Seq s) { not.add(s); return this; }
341 public Seq separate(ElementNode sep) {
342 ElementNode[] elements = new ElementNode[this.elements.length * 2 - 1];
343 for(int i=0; i<this.elements.length; i++) {
344 elements[i*2] = this.elements[i];
345 if (i<this.elements.length-1)
346 elements[i*2+1] = new DropNode(sep);
348 this.elements = elements;
351 public Sequence build(Context cx, Union u, NonTerminalNode cnt, boolean dropall) {
352 Sequence ret = build0(cx, cnt, dropall);
353 for(Seq s : and) ret = ret.and(s.build(cx, null, cnt, true));
354 for(Seq s : not) ret = ret.andnot(s.build(cx, null, cnt, true));
355 if (u!=null) u.add(ret);
358 public Sequence build0(Context cx, NonTerminalNode cnt, boolean dropall) {
359 boolean[] drops = new boolean[elements.length];
360 Element[] els = new Element[elements.length];
362 for(int i=0; i<elements.length; i++) {
363 if (dropall) drops[i] = true;
364 else drops[i] = elements[i].drop(cx);
365 if (elements[i].getOwnerTag() != null)
366 tag = elements[i].getOwnerTag();
370 boolean multiNonDrop = false;
371 for(int i=0; i<drops.length; i++)
373 if (idx==-1) idx = i;
374 else multiNonDrop = true;
375 for(int i=0; i<elements.length; i++) {
376 if (!multiNonDrop && i==idx && tag!=null && elements[i] instanceof RepeatNode) {
377 els[i] = ((RepeatNode)elements[i]).build(cx, cnt, dropall, tag);
380 els[i] = elements[i].build(cx, cnt, dropall);
382 if (tag==null && multiNonDrop)
383 throw new RuntimeException("multiple non-dropped elements in sequence: " + Sequence.create("", els));
384 boolean[] lifts = new boolean[elements.length];
385 for(int i=0; i<elements.length; i++)
386 lifts[i] = elements[i].lifted;
390 ? Sequence.create(illegalTag, els)
391 : Sequence.create(tag, els, drops, lifts);
392 else if (tag==null) ret = Sequence.create(els, idx);
393 else ret = Sequence.create(tag, els, drops, lifts);
396 ret = Sequence.create(tag, els, drops, lifts);
397 if (this.follow != null)
398 ret = ret.followedBy(this.follow.toAtom(cx));
403 private class ReferenceNode extends ElementNode {
404 public String nonTerminal;
405 public boolean parenthesized;
406 public ReferenceNode() { }
407 public ReferenceNode(String nonTerminal) { this(nonTerminal, false); }
408 public ReferenceNode(String nonTerminal, boolean parenthesized) {
409 this.nonTerminal = nonTerminal.indexOf('.')==-1 ? (prefix + nonTerminal) : nonTerminal;
410 this.parenthesized = parenthesized;
412 public NonTerminalNode resolve(Context cx) {
413 NonTerminalNode ret = cx.grammar.get(nonTerminal);
414 if (ret==null) throw new RuntimeException("undefined nonterminal: " + nonTerminal);
417 public Atom toAtom(Context cx) {
418 ElementNode ret = cx.grammar.get(nonTerminal);
419 if (ret == null) throw new RuntimeException("unknown nonterminal \""+nonTerminal+"\"");
420 return ret.toAtom(cx);
422 public boolean drop(Context cx) { return resolve(cx).drop(cx); }
423 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
425 if (!this.nonTerminal.startsWith(prefix))
426 nonTerminal = prefix + nonTerminal;
428 Element ret = cx.get(nonTerminal);
429 if (ret == null) throw new RuntimeException("unknown nonterminal \""+nonTerminal+"\"");
434 private class LiteralNode extends ElementNode {
435 private String string;
436 private final String thePrefix = prefix;
437 private boolean caret;
438 public LiteralNode(String string) { this(string, false); }
439 public LiteralNode(String string, boolean caret) {
440 this.string = string;
443 public String getOwnerTag() { return caret ? thePrefix+string : super.getOwnerTag(); }
444 public String toString() { return "\""+string+"\""; }
445 public boolean drop(Context cx) { return true; }
446 public Atom toAtom(Context cx) {
447 if (string.length()!=1) return super.toAtom(cx);
448 Range.Set set = new Range.Set();
449 set.add(string.charAt(0), string.charAt(0));
450 return CharAtom.set(set);
452 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return CharAtom.string(string); }
455 private class AtomNode extends ElementNode {
457 public AtomNode() { this(new char[0][]); }
458 public AtomNode(char[][] ranges) { this.ranges = ranges; }
459 public AtomNode(char[] range) { this.ranges = new char[][] { range }; }
460 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
461 public Atom toAtom(Context cx) {
462 Range.Set set = new Range.Set();
463 for(char[] r : ranges) set.add(r[0], r[1]);
464 return CharAtom.set(set);
468 private class RepeatNode extends ElementNode {
469 public ElementNode e, sep;
470 public final boolean zero, many, max;
471 public RepeatNode(ElementNode e, ElementNode sep, boolean zero, boolean many, boolean max) {
472 this.e = e; this.sep = sep; this.zero = zero; this.many = many; this.max = max;
474 public Atom toAtom(Context cx) { return sep==null ? e.toAtom(cx) : super.toAtom(cx); }
475 public boolean drop(Context cx) { return e.drop(cx); }
476 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
477 Element ret = build(cx, cnt, dropall, illegalTag);
478 String must = "must be tagged unless they appear within a dropped expression or their contents are dropped: ";
479 if (!dropall && !drop(cx) && !e.drop(cx))
480 if (!many) throw new RuntimeException("options (?) " + must + ret);
481 else if (zero) throw new RuntimeException("zero-or-more repetitions (*) " + must + ret);
482 else throw new RuntimeException("one-or-more repetitions (+) " + must + ret);
485 public Element build(Context cx, NonTerminalNode cnt, boolean dropall, Object repeatTag) {
487 ? Repeat.repeat(e.build(cx, null, dropall), zero, many, sep==null ? null : sep.build(cx, null, dropall), repeatTag)
489 ? Repeat.repeatMaximal(e.toAtom(cx), zero, many, repeatTag)
490 : Repeat.repeatMaximal(e.build(cx, null, dropall), zero, many, sep.toAtom(cx), repeatTag);
494 private abstract class ElementNode {
495 public boolean lifted = false;
496 public Seq ownerSeq = null;
497 public String getOwnerTag() { return null; }
498 public ElementNode lifted() { this.lifted = true; return this; }
499 public boolean drop(Context cx) { return false; }
500 public Atom toAtom(Context cx) { throw new Error("can't convert a " + this.getClass().getName() + " to an atom: " + this); }
501 public abstract Element build(Context cx, NonTerminalNode cnt, boolean dropall);
504 private abstract class ElementNodeWrapper extends ElementNode {
505 protected ElementNode _e;
506 public ElementNodeWrapper(ElementNode e) { this._e = e; }
507 public String getOwnerTag() { return _e.getOwnerTag(); }
508 public boolean drop(Context cx) { return _e.drop(cx); }
509 public Atom toAtom(Context cx) { return _e.toAtom(cx); }
510 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return _e.build(cx, cnt, dropall); }
513 private class TildeNode extends ElementNodeWrapper {
514 public TildeNode(ElementNode e) { super(e); }
515 public Atom toAtom(Context cx) { return (Atom)((Topology<Character>)_e.toAtom(cx).complement()); }
516 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
519 private class DropNode extends ElementNodeWrapper {
520 public DropNode(ElementNode e) { super(e); }
521 public boolean drop(Context cx) { return true; }
524 // FIXME: doesn't this require a tag?
525 private class BracedNode extends ElementNode {
527 public BracedNode(Seq seq) { this.body = seq; }
528 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
529 Union u = new Union(null, false);
530 Sequence s = body.build(cx, u, null, dropall);
531 Union u2 = new Union(null, false);
532 u2.add(Sequence.create(new Element[] {
541 public Seq and2(Seq s, Seq a) { a.alwaysDrop = true; return s.and(a); }
542 public Seq andnot2(Seq s, Seq a) { a.alwaysDrop = true; return s.andnot(a); }
544 //////////////////////////////////////////////////////////////////////////////
546 public class Context {
547 public HashMap<String,Union> map = new HashMap<String,Union>();
548 public GrammarNode grammar;
549 public Context(Tree t) { }
550 public Context(GrammarNode g) { this.grammar = g; }
551 public Union build() {
553 for(NonTerminalNode nt : grammar.values()) {
554 Union u = get(nt.name);
555 if ("s".equals(nt.name))
560 public Union peek(String name) { return map.get(name); }
561 public void put(String name, Union u) { map.put(name, u); }
562 public Union get(String name) {
563 Union ret = map.get(name);
564 if (ret != null) return ret;
565 NonTerminalNode nt = grammar.get(name);
567 throw new Error("warning could not find " + name);
569 ret = new Union(name, false);
571 nt.buildIntoPreallocatedUnion(this, nt, nt.drop(this), ret);