1 // Copyright 2006-2007 all rights reserved; see LICENSE file for BSD-style license
3 package edu.berkeley.sbp.meta;
4 import edu.berkeley.sbp.util.*;
5 import edu.berkeley.sbp.*;
6 import edu.berkeley.sbp.chr.*;
7 import edu.berkeley.sbp.misc.*;
9 import java.lang.annotation.*;
10 import java.lang.reflect.*;
14 * The inner classes of this class represent nodes in the Abstract
15 * Syntax Tree of a grammar.
17 public class GrammarAST {
20 * Create a grammar from a parse tree and binding resolver
22 * @param t a tree produced by parsing a grammar using the metagrammar
23 * @param s the name of the "start symbol"
24 * @param gbr a GrammarBindingResolver that resolves grammatical reductions into tree-node-heads
26 public static Union buildFromAST(Tree grammarAST, String startingNonterminal, File[] includes) {
27 return new GrammarAST(includes, "").buildGrammar(grammarAST, startingNonterminal);
30 public static Union getMetaGrammar() {
31 return MetaGrammar.newInstance();
34 private static Object illegalTag = ""; // this is the tag that should never appear in the non-dropped output FIXME
36 // Instance //////////////////////////////////////////////////////////////////////////////
38 private final String prefix;
39 private final File[] includes;
41 public GrammarAST(File[] includes, String prefix) {
43 this.includes = includes;
46 // Methods //////////////////////////////////////////////////////////////////////////////
48 private Union buildGrammar(Tree t, String rootNonTerminal) {
50 if (o instanceof Union) return (Union)o;
51 return ((GrammarAST.GrammarNode)o).build(rootNonTerminal);
54 private Object[] walkChildren(Tree t) {
55 Object[] ret = new Object[t.size()];
56 for(int i=0; i<ret.length; i++)
57 ret[i] = walk(t.child(i));
58 return Reflection.lub(ret);
60 private static String stringifyChildren(Tree t) {
61 StringBuffer sb = new StringBuffer();
62 for(int i=0; i<t.size(); i++) {
63 sb.append(t.child(i).head());
64 sb.append(stringifyChildren(t.child(i)));
68 private static String unescape(Tree t) {
69 StringBuffer sb = new StringBuffer();
70 for(int i=0; i<t.size(); i++)
71 sb.append(t.child(i).head()+stringifyChildren(t.child(i)));
75 private ElementNode walkElement(Tree t) { return (ElementNode)walk(t); }
76 private String walkString(Tree t) { return (String)walk(t); }
77 private Seq walkSeq(Tree t) { return (Seq)walk(t); }
78 private Object walk(Tree t) {
79 String head = (String)t.head();
80 while(head.indexOf('.') > 0)
81 head = head.substring(head.indexOf('.')+1);
82 if (head==null) throw new RuntimeException("head is null: " + t);
83 if (head.equals("|")) return walkChildren(t);
84 if (head.equals("RHS")) return walkChildren(t);
85 if (head.equals("Grammar")) return new GrammarNode(walkChildren(t));
86 if (head.equals("(")) return new UnionNode((Seq[][])walkChildren(t.child(0)));
87 if (head.equals("Word")) return stringifyChildren(t);
88 if (head.equals("Elements")) return new Seq((ElementNode[])Reflection.rebuild(walkChildren(t), ElementNode[].class));
89 if (head.equals("NonTerminalReference")) return new ReferenceNode(stringifyChildren(t.child(0)));
90 if (head.equals(")")) return new ReferenceNode(stringifyChildren(t.child(0)), true);
91 if (head.equals("{")) return new BracedNode(walkSeq(t.child(0)));
92 if (head.equals("::")) return walkSeq(t.child(1)).tag(walkString(t.child(0)));
93 if (head.equals("...")) return new DropNode(new RepeatNode(new TildeNode(new AtomNode()), null, true, true, false));
95 if (head.equals("++")) return new RepeatNode(walkElement(t.child(0)), null, false, true, true);
96 if (head.equals("+")) return new RepeatNode(walkElement(t.child(0)), null, false, true, false);
97 if (head.equals("++/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), false, true, true);
98 if (head.equals("+/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), false, true, false);
99 if (head.equals("**")) return new RepeatNode(walkElement(t.child(0)), null, true, true, true);
100 if (head.equals("*")) return new RepeatNode(walkElement(t.child(0)), null, true, true, false);
101 if (head.equals("**/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), true, true, true);
102 if (head.equals("*/")) return new RepeatNode(walkElement(t.child(0)), walkElement(t.child(1)), true, true, false);
103 if (head.equals("?")) return new RepeatNode(walkElement(t.child(0)), null, true, false, false);
105 if (head.equals("!")) return new DropNode(walkElement(t.child(0)));
106 if (head.equals("^")) return new LiteralNode(walkString(t.child(0)), true);
107 if (head.equals("`")) return walkElement(t.child(0)).lifted();
108 if (head.equals("Quoted")) return stringifyChildren(t);
109 if (head.equals("Literal")) return new LiteralNode(walkString(t.child(0)));
110 if (head.equals("->")) return walkSeq(t.child(0)).follow(walkElement(t.child(1)));
111 if (head.equals("DropNT")) return new NonTerminalNode(walkString(t.child(0)), (Seq[][])walkChildren(t.child(1)), false, null, true);
112 if (head.equals("=")) return new NonTerminalNode(walkString(t.child(0)), (Seq[][])walk(t.child(2)),
113 true, t.size()==2 ? null : walkString(t.child(1)), false);
114 if (head.equals("&")) return and2(walkSeq(t.child(0)), walkSeq(t.child(1)));
115 if (head.equals("&~")) return andnot2(walkSeq(t.child(0)), walkSeq(t.child(1)));
116 if (head.equals("/")) return (walkSeq(t.child(0))).separate(walkElement(t.child(1)));
117 if (head.equals("()")) return new LiteralNode("");
118 if (head.equals("[")) return new AtomNode((char[][])Reflection.rebuild(walkChildren(t), char[][].class));
119 if (head.equals("\\{")) return new DropNode(new AtomNode(new char[] { CharAtom.left, CharAtom.left }));
120 if (head.equals("\\}")) return new DropNode(new AtomNode(new char[] { CharAtom.right, CharAtom.right }));
121 if (head.equals(">>")) return new DropNode(new AtomNode(new char[] { CharAtom.left, CharAtom.left }));
122 if (head.equals("<<")) return new DropNode(new AtomNode(new char[] { CharAtom.right, CharAtom.right }));
123 if (head.equals("~")) return new TildeNode(walkElement(t.child(0)));
124 if (head.equals("~~")) return new Seq(new RepeatNode(new TildeNode(new AtomNode()), null, true, true, false)).andnot(walkSeq(t.child(0)));
125 if (head.equals("Range")) {
126 if (t.size()==2 && ">".equals(t.child(0).head())) return new char[] { CharAtom.left, CharAtom.left };
127 if (t.size()==2 && "<".equals(t.child(0).head())) return new char[] { CharAtom.right, CharAtom.right };
128 if (t.size()==1) return new char[] { unescape(t).charAt(0), unescape(t).charAt(0) };
129 return new char[] { unescape(t).charAt(0), unescape(t).charAt(1) };
131 if (head.equals("\"\"")) return "";
132 if (head.equals("\n")) return "\n";
133 if (head.equals("\r")) return "\r";
134 if (head.equals("SubGrammar")) return GrammarAST.buildFromAST(t.child(0), "s", includes);
135 if (head.equals("NonTerminal"))
136 return new NonTerminalNode(walkString(t.child(0)),
137 (Seq[][])walkChildren(t.child(1)), false, null, false);
138 if (head.equals("Colons")) {
139 String tag = walkString(t.child(0));
140 Seq[][] seqs = (Seq[][])walk(t.child(1));
141 for(Seq[] seq : seqs)
142 for(int i=0; i<seq.length; i++)
143 seq[i] = seq[i].tag(tag);
144 return new NonTerminalNode(tag, seqs, false, null, false);
146 if (head.equals("#import")) {
147 String fileName = (String)stringifyChildren(t.child(0));
148 for(File f : includes) {
149 File file = new File(f.getAbsolutePath()+File.separatorChar+fileName);
150 if (!file.exists()) continue;
152 String newPrefix = t.size()<2 ? "" : (walkString(t.child(1))+".");
153 FileInputStream fis = new FileInputStream(file);
154 Tree tr = new CharParser(MetaGrammar.newInstance()).parse(fis).expand1();
155 return (GrammarNode)new GrammarAST(includes, newPrefix).walk(tr);
156 } catch (Exception e) {
157 throw new RuntimeException("while parsing " + file, e);
160 throw new RuntimeException("unable to find #include file \""+fileName+"\"");
162 throw new RuntimeException("unknown head: \"" + head + "\" => " + (head.equals("...")));
166 // Nodes //////////////////////////////////////////////////////////////////////////////
168 /** Root node of a grammar's AST; a set of named nonterminals */
169 private class GrammarNode extends HashMap<String,NonTerminalNode> {
170 public GrammarNode(NonTerminalNode[] nonterminals) {
171 for(NonTerminalNode nt : nonterminals) {
172 if (nt==null) continue;
173 if (this.get(nt.name)!=null)
174 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
175 this.put(nt.name, nt);
178 public GrammarNode(Object[] nt) { add(nt); }
179 private void add(Object o) {
181 else if (o instanceof Object[]) for(Object o2 : (Object[])o) add(o2);
182 else if (o instanceof NonTerminalNode) {
183 NonTerminalNode nt = (NonTerminalNode)o;
184 if (this.get(nt.name)!=null)
185 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
186 this.put(nt.name, nt);
188 else if (o instanceof GrammarNode)
189 for(NonTerminalNode n : ((GrammarNode)o).values())
192 public String toString() {
194 for(NonTerminalNode nt : values()) ret += nt + ", ";
197 public Union build(String rootNonterminal) {
198 Context cx = new Context(this);
200 for(GrammarAST.NonTerminalNode nt : values())
201 if (nt.name.equals(rootNonterminal))
202 return (Union)cx.get(nt.name);
207 private class UnionNode extends ElementNode {
208 public Seq[][] sequences;
209 public String sep = null;
211 public UnionNode(Seq seq) { this(new Seq[][] { new Seq[] { seq } }); }
212 public UnionNode(Seq[][] sequences) { this(sequences, false, null); }
213 public UnionNode(Seq[][] sequences, boolean rep, String sep) {
214 this.sequences = sequences;
218 public boolean drop(Context cx) {
219 for(Seq[] seqs : sequences)
225 public Atom toAtom(Context cx) {
227 for(Seq[] ss : sequences)
229 ret = ret==null ? s.toAtom(cx) : (Atom)ret.union(s.toAtom(cx));
232 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
233 return buildIntoPreallocatedUnion(cx, cnt, dropall, new Union(null, false)); }
234 public Element buildIntoPreallocatedUnion(Context cx, NonTerminalNode cnt, boolean dropall, Union u) {
237 urep = new Union(null, false);
238 urep.add(Sequence.create(cnt.name, new Element[0]));
240 ? Sequence.create(new Element[] { u }, 0)
241 : Sequence.create(new Element[] { cx.get(sep), u }, 1));
243 HashSet<Sequence> bad2 = new HashSet<Sequence>();
244 for(int i=0; i<sequences.length; i++) {
245 Seq[] group = sequences[i];
246 Union u2 = new Union(null, false);
247 if (sequences.length==1) u2 = u;
248 for(int j=0; j<group.length; j++)
250 group[j].build(cx, u2, cnt, dropall);
252 Union u3 = new Union(null, false);
253 group[j].build(cx, u3, cnt, dropall);
254 Sequence s = Sequence.create(cnt.name,
255 new Element[] { u3, urep },
256 new boolean[] { false, false },
257 new boolean[] { false, true});
260 if (sequences.length==1) break;
261 Sequence seq = Sequence.create(u2);
262 for(Sequence s : bad2) seq = seq.andnot(s);
264 bad2.add(Sequence.create(u2));
270 private class NonTerminalNode extends UnionNode {
271 public boolean alwaysDrop;
272 public String name = null;
273 public boolean drop(Context cx) { return alwaysDrop; }
274 public NonTerminalNode(String name, Seq[][] sequences, boolean rep, String sep, boolean alwaysDrop) {
275 super(sequences, rep, sep==null?null:(prefix + sep));
276 this.name = prefix + name;
277 this.alwaysDrop = alwaysDrop;
279 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return cx.get(name); }
283 public boolean alwaysDrop = false;
284 public boolean drop(Context cx) {
285 if (alwaysDrop) return true;
286 if (tag!=null) return false;
287 for(int i=0; i<elements.length; i++)
288 if (!elements[i].drop(cx))
292 HashSet<Seq> and = new HashSet<Seq>();
293 HashSet<Seq> not = new HashSet<Seq>();
294 ElementNode[] elements;
297 public Seq(ElementNode e) { this(new ElementNode[] { e }); }
298 public Seq(ElementNode[] elements) { this(elements, true); }
299 public Seq(ElementNode[] el, boolean check) {
300 this.elements = new ElementNode[el.length];
301 System.arraycopy(el, 0, elements, 0, el.length);
302 for(int i=0; i<elements.length; i++) {
303 if (elements[i]==null)
304 throw new RuntimeException();
305 elements[i].ownerSeq = this;
307 // FIXME: this whole mechanism is sketchy
309 for(int i=0; i<elements.length; i++) {
310 if ((elements[i] instanceof ReferenceNode) && ((ReferenceNode)elements[i]).parenthesized) {
311 ReferenceNode rn = (ReferenceNode)elements[i];
312 ElementNode replace = null;
313 for(int j=0; j<elements.length; j++) {
314 if (!(elements[j] instanceof ReferenceNode)) continue;
315 ReferenceNode rn2 = (ReferenceNode)elements[j];
316 if (rn2.nonTerminal.equals(rn.nonTerminal) && !rn2.parenthesized) {
317 if (replace == null) {
318 replace = new UnionNode(new Seq(rn2).andnot(new Seq(elements, false)));
320 elements[j] = replace;
326 public Atom toAtom(Context cx) {
327 if (elements.length != 1)
328 throw new Error("you attempted to use ->, **, ++, or a similar character-class"+
329 " operator on a [potentially] multicharacter production");
330 return elements[0].toAtom(cx);
332 public Seq tag(String tag) { this.tag = tag; return this; }
333 public Seq follow(ElementNode follow) { this.follow = follow; return this; }
334 public Seq and(Seq s) { and.add(s); return this; }
335 public Seq andnot(Seq s) { not.add(s); return this; }
336 public Seq separate(ElementNode sep) {
337 ElementNode[] elements = new ElementNode[this.elements.length * 2 - 1];
338 for(int i=0; i<this.elements.length; i++) {
339 elements[i*2] = this.elements[i];
340 if (i<this.elements.length-1)
341 elements[i*2+1] = new DropNode(sep);
343 this.elements = elements;
346 public Sequence build(Context cx, Union u, NonTerminalNode cnt, boolean dropall) {
347 Sequence ret = build0(cx, cnt, dropall);
348 for(Seq s : and) ret = ret.and(s.build(cx, null, cnt, true));
349 for(Seq s : not) ret = ret.andnot(s.build(cx, null, cnt, true));
350 if (u!=null) u.add(ret);
353 public Sequence build0(Context cx, NonTerminalNode cnt, boolean dropall) {
354 boolean[] drops = new boolean[elements.length];
355 Element[] els = new Element[elements.length];
357 for(int i=0; i<elements.length; i++) {
358 if (dropall) drops[i] = true;
359 else drops[i] = elements[i].drop(cx);
360 if (elements[i].getOwnerTag() != null)
361 tag = elements[i].getOwnerTag();
365 boolean multiNonDrop = false;
366 for(int i=0; i<drops.length; i++)
368 if (idx==-1) idx = i;
369 else multiNonDrop = true;
370 for(int i=0; i<elements.length; i++) {
371 if (!multiNonDrop && i==idx && tag!=null && elements[i] instanceof RepeatNode) {
372 els[i] = ((RepeatNode)elements[i]).build(cx, cnt, dropall, tag);
375 els[i] = elements[i].build(cx, cnt, dropall);
377 if (tag==null && multiNonDrop)
378 throw new RuntimeException("multiple non-dropped elements in sequence: " + Sequence.create("", els));
379 boolean[] lifts = new boolean[elements.length];
380 for(int i=0; i<elements.length; i++)
381 lifts[i] = elements[i].lifted;
385 ? Sequence.create(illegalTag, els)
386 : Sequence.create(tag, els, drops, lifts);
387 else if (tag==null) ret = Sequence.create(els, idx);
388 else ret = Sequence.create(tag, els, drops, lifts);
391 ret = Sequence.create(tag, els, drops, lifts);
392 if (this.follow != null)
393 ret = ret.followedBy(this.follow.toAtom(cx));
398 private class ReferenceNode extends ElementNode {
399 public String nonTerminal;
400 public boolean parenthesized;
401 public ReferenceNode() { }
402 public ReferenceNode(String nonTerminal) { this(nonTerminal, false); }
403 public ReferenceNode(String nonTerminal, boolean parenthesized) {
404 this.nonTerminal = nonTerminal.indexOf('.')==-1 ? (prefix + nonTerminal) : nonTerminal;
405 this.parenthesized = parenthesized;
407 public NonTerminalNode resolve(Context cx) {
408 NonTerminalNode ret = cx.grammar.get(nonTerminal);
409 if (ret==null) throw new RuntimeException("undefined nonterminal: " + nonTerminal);
412 public Atom toAtom(Context cx) {
413 ElementNode ret = cx.grammar.get(nonTerminal);
414 if (ret == null) throw new RuntimeException("unknown nonterminal \""+nonTerminal+"\"");
415 return ret.toAtom(cx);
417 public boolean drop(Context cx) { return resolve(cx).drop(cx); }
418 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
420 if (!this.nonTerminal.startsWith(prefix))
421 nonTerminal = prefix + nonTerminal;
423 Element ret = cx.get(nonTerminal);
424 if (ret == null) throw new RuntimeException("unknown nonterminal \""+nonTerminal+"\"");
429 private class LiteralNode extends ElementNode {
430 private String string;
431 private final String thePrefix = prefix;
432 private boolean caret;
433 public LiteralNode(String string) { this(string, false); }
434 public LiteralNode(String string, boolean caret) {
435 this.string = string;
438 public String getOwnerTag() { return caret ? thePrefix+string : super.getOwnerTag(); }
439 public String toString() { return "\""+string+"\""; }
440 public boolean drop(Context cx) { return true; }
441 public Atom toAtom(Context cx) {
442 if (string.length()!=1) return super.toAtom(cx);
443 Range.Set set = new Range.Set();
444 set.add(string.charAt(0), string.charAt(0));
445 return CharAtom.set(set);
447 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return CharAtom.string(string); }
450 private class AtomNode extends ElementNode {
452 public AtomNode() { this(new char[0][]); }
453 public AtomNode(char[][] ranges) { this.ranges = ranges; }
454 public AtomNode(char[] range) { this.ranges = new char[][] { range }; }
455 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
456 public Atom toAtom(Context cx) {
457 Range.Set set = new Range.Set();
458 for(char[] r : ranges) set.add(r[0], r[1]);
459 return CharAtom.set(set);
463 private class RepeatNode extends ElementNode {
464 public ElementNode e, sep;
465 public final boolean zero, many, max;
466 public RepeatNode(ElementNode e, ElementNode sep, boolean zero, boolean many, boolean max) {
467 this.e = e; this.sep = sep; this.zero = zero; this.many = many; this.max = max;
469 public Atom toAtom(Context cx) { return sep==null ? e.toAtom(cx) : super.toAtom(cx); }
470 public boolean drop(Context cx) { return e.drop(cx); }
471 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
472 Element ret = build(cx, cnt, dropall, illegalTag);
473 String must = "must be tagged unless they appear within a dropped expression or their contents are dropped: ";
474 if (!dropall && !drop(cx) && !e.drop(cx))
475 if (!many) throw new RuntimeException("options (?) " + must + ret);
476 else if (zero) throw new RuntimeException("zero-or-more repetitions (*) " + must + ret);
477 else throw new RuntimeException("one-or-more repetitions (+) " + must + ret);
480 public Element build(Context cx, NonTerminalNode cnt, boolean dropall, Object repeatTag) {
482 ? Repeat.repeat(e.build(cx, null, dropall), zero, many, sep==null ? null : sep.build(cx, null, dropall), repeatTag)
484 ? Repeat.repeatMaximal(e.toAtom(cx), zero, many, repeatTag)
485 : Repeat.repeatMaximal(e.build(cx, null, dropall), zero, many, sep.toAtom(cx), repeatTag);
489 private abstract class ElementNode {
490 public boolean lifted = false;
491 public Seq ownerSeq = null;
492 public String getOwnerTag() { return null; }
493 public ElementNode lifted() { this.lifted = true; return this; }
494 public boolean drop(Context cx) { return false; }
495 public Atom toAtom(Context cx) { throw new Error("can't convert a " + this.getClass().getName() + " to an atom: " + this); }
496 public abstract Element build(Context cx, NonTerminalNode cnt, boolean dropall);
499 private abstract class ElementNodeWrapper extends ElementNode {
500 protected ElementNode _e;
501 public ElementNodeWrapper(ElementNode e) { this._e = e; }
502 public String getOwnerTag() { return _e.getOwnerTag(); }
503 public boolean drop(Context cx) { return _e.drop(cx); }
504 public Atom toAtom(Context cx) { return _e.toAtom(cx); }
505 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return _e.build(cx, cnt, dropall); }
508 private class TildeNode extends ElementNodeWrapper {
509 public TildeNode(ElementNode e) { super(e); }
510 public Atom toAtom(Context cx) { return (Atom)((Topology<Character>)_e.toAtom(cx).complement()); }
511 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
514 private class DropNode extends ElementNodeWrapper {
515 public DropNode(ElementNode e) { super(e); }
516 public boolean drop(Context cx) { return true; }
519 // FIXME: doesn't this require a tag?
520 private class BracedNode extends ElementNode {
522 public BracedNode(Seq seq) { this.body = seq; }
523 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
524 Union u = new Union(null, false);
525 Sequence s = body.build(cx, u, null, dropall);
526 Union u2 = new Union(null, false);
527 u2.add(Sequence.create(new Element[] {
536 public Seq and2(Seq s, Seq a) { a.alwaysDrop = true; return s.and(a); }
537 public Seq andnot2(Seq s, Seq a) { a.alwaysDrop = true; return s.andnot(a); }
539 //////////////////////////////////////////////////////////////////////////////
541 public class Context {
542 public HashMap<String,Union> map = new HashMap<String,Union>();
543 public GrammarNode grammar;
544 public Context(Tree t) { }
545 public Context(GrammarNode g) { this.grammar = g; }
546 public Union build() {
548 for(NonTerminalNode nt : grammar.values()) {
549 Union u = get(nt.name);
550 if ("s".equals(nt.name))
555 public Union peek(String name) { return map.get(name); }
556 public void put(String name, Union u) { map.put(name, u); }
557 public Union get(String name) {
558 Union ret = map.get(name);
559 if (ret != null) return ret;
560 NonTerminalNode nt = grammar.get(name);
562 throw new Error("warning could not find " + name);
564 ret = new Union(name, false);
566 nt.buildIntoPreallocatedUnion(this, nt, nt.drop(this), ret);