1 // Copyright 2006 all rights reserved; see LICENSE file for BSD-style license
3 package edu.berkeley.sbp.meta;
4 import edu.berkeley.sbp.util.*;
5 import edu.berkeley.sbp.*;
6 import edu.berkeley.sbp.chr.*;
7 import edu.berkeley.sbp.misc.*;
9 import java.lang.annotation.*;
10 import java.lang.reflect.*;
13 // FIXME: deal with question-marks
15 // FIXME: put back in associativity: ^")"
17 // means that the FIRST "A" cannot match the SAME sequence in
18 // which the (A) occurs
19 // -- and add a test case
21 // FIXME: make it so that we can have multi-result nonterminals
22 // so long as they always appear under double-colons?
23 // auto-insert the unwrap?
25 // FIXME: put associativity back in
27 // FIXME: "flat" sequences (no subtrees contain "::"s?)
29 // freezing problem is related to comments in grammar files
31 /** The java classes typically used to represent a parsed grammar AST; each inner class is a type of AST node. */
32 public class GrammarBuilder {
34 public static Object illegalTag = ""; // this is the tag that should never appear in the non-dropped output FIXME
36 private final String prefix;
37 private final String path;
39 //public GrammarBuilder(String path) { this(path, ""); }
40 public GrammarBuilder(String path, String prefix) {
45 public Union buildGrammar(Tree t, String rootNonTerminal) {
46 return ((GrammarBuilder.GrammarNode)walk(t)).build(rootNonTerminal);
49 private ElementNode epsilon = new LiteralNode("");
51 public Object[] walkChildren(Tree t) {
52 Object[] ret = new Object[t.size()];
53 for(int i=0; i<ret.length; i++) {
54 ret[i] = walk(t.child(i));
55 if (ret[i] instanceof Object[])
56 ret[i] = Reflection.lub((Object[])ret[i]);
58 return Reflection.lub(ret);
60 public String stringifyChildren(Tree t) {
61 StringBuffer sb = new StringBuffer();
62 for(int i=0; i<t.size(); i++) {
63 sb.append(t.child(i).head());
64 sb.append(stringifyChildren(t.child(i)));
68 public String unescape(Tree t) {
69 StringBuffer sb = new StringBuffer();
70 for(int i=0; i<t.size(); i++)
71 sb.append(t.child(i).head()+stringifyChildren(t.child(i)));
75 public Object walk(Tree t) {
76 String head = (String)t.head();
77 while(head.indexOf('.') != -1) head = head.substring(head.indexOf('.')+1);
78 if (head==null) throw new RuntimeException("head is null: " + t);
79 if (head.equals("|")) return walkChildren(t);
80 if (head.equals("RHS")) return walkChildren(t);
81 if (head.equals("Grammar")) return new GrammarNode(walkChildren(t));
82 if (head.equals("(")) return new UnionNode((Seq[][])walkChildren(t.child(0)));
83 if (head.equals("Word")) return stringifyChildren(t);
84 if (head.equals("Elements")) return seq2((ElementNode[])Reflection.rebuild(walkChildren(t), ElementNode[].class));
85 if (head.equals("NonTerminalReference")) return new ReferenceNode(stringifyChildren(t.child(0)));
86 //if (head.equals(")")) return new ReferenceNode(stringifyChildren(t.child(0)));
87 if (head.equals("{")) return new XTree((Seq)walk(t.child(0)));
88 if (head.equals("::")) return tag((String)walk(t.child(0)), (Seq)walk(t.child(1)));
89 if (head.equals("++")) return plusmax((ElementNode)walk(t.child(0)));
90 if (head.equals("+")) return plus((ElementNode)walk(t.child(0)));
91 if (head.equals("++/")) return plusmaxfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1)));
92 if (head.equals("+/")) return plusfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1)));
93 if (head.equals("**")) return starmax((ElementNode)walk(t.child(0)));
94 if (head.equals("*")) return star((ElementNode)walk(t.child(0)));
95 if (head.equals("**/")) return starmaxfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1)));
96 if (head.equals("*/")) return starfollow((ElementNode)walk(t.child(0)), (ElementNode)walk(t.child(1)));
97 if (head.equals("?")) return question((ElementNode)walk(t.child(0)));
98 if (head.equals("!")) return new DropNode((ElementNode)walk(t.child(0)));
99 if (head.equals("^")) return new LiteralNode((String)walk(t.child(0)), true);
100 if (head.equals("Quoted")) return stringifyChildren(t);
101 if (head.equals("Literal")) return new LiteralNode((String)walk(t.child(0)));
102 if (head.equals("->")) return arrow((Seq)walk(t.child(0)), (ElementNode)walk(t.child(1)));
103 if (head.equals("DropNT")) return new NonTerminalNode((String)walk(t.child(0)), (Seq[][])walkChildren(t.child(1)), false, null, true);
104 if (head.equals("=") && t.size()==2) return new NonTerminalNode((String)walk(t.child(0)), (Seq[][])walk(t.child(1)), true, null, false);
105 if (head.equals("=")) return new NonTerminalNode((String)walk(t.child(0)), (Seq[][])walk(t.child(2)), true, (String)walk(t.child(1)), false);
106 if (head.equals("&")) return and2((Seq)walk(t.child(0)), (Seq)walk(t.child(1)));
107 if (head.equals("&~")) return andnot2((Seq)walk(t.child(0)), (Seq)walk(t.child(1)));
108 if (head.equals("/")) return ((Seq)walk(t.child(0))).separate((ElementNode)walk(t.child(1)));
109 if (head.equals("()")) return epsilon;
110 if (head.equals("[")) return new AtomNode((AtomNodeRange[])Reflection.rebuild(walkChildren(t), AtomNodeRange[].class));
111 if (head.equals("\\{")) return new DropNode(new AtomNode(new AtomNodeRange(CharAtom.left, CharAtom.left)));
112 if (head.equals("\\}")) return new DropNode(new AtomNode(new AtomNodeRange(CharAtom.right, CharAtom.right)));
113 if (head.equals("~")) return new TildeNode((ElementNode)walk(t.child(0)));
114 if (head.equals("Range") && t.size()==1) return new AtomNodeRange(unescape(t).charAt(0));
115 if (head.equals("Range")) return new AtomNodeRange(unescape(t).charAt(0), unescape(t).charAt(1));
116 if (head.equals("\"\"")) return "";
117 if (head.equals("\n")) return "\n";
118 if (head.equals("\r")) return "\r";
119 if (head.equals("grammar.Grammar")) return walkChildren(t);
120 if (head.equals("SubGrammar")) return Grammar.create(t.child(0), "s");
121 if (head.equals("NonTerminal"))
122 return new NonTerminalNode((String)walk(t.child(0)),
123 (Seq[][])walkChildren(t.child(1)), false, null, false);
124 if (head.equals("Colons")) {
125 String tag = (String)walk(t.child(0));
126 Seq[][] seqs = (Seq[][])walk(t.child(1));
127 for(Seq[] seq : seqs)
128 for(int i=0; i<seq.length; i++)
129 seq[i] = tag(tag, seq[i]);
130 return new NonTerminalNode(tag, seqs, false, null, false);
132 if (head.equals("TestCase"))
133 return new RegressionTests.TestCase((String)walk(t.child(0)),
134 (String)walk(t.child(1)),
135 (String[])Reflection.coerce(walkChildren(t.child(2)), String[].class),
136 (Union)walk(t.child(3)),
139 if (head.equals("#import")) {
140 String fileName = path+(String)stringifyChildren(t.child(0));
142 String newPrefix = (String)walk(t.child(1));
143 if (newPrefix.length() > 0) newPrefix += ".";
144 FileInputStream fis = new FileInputStream(fileName);
145 Tree tr = new CharParser(MetaGrammar.newInstance()).parse(fis).expand1();
146 return (GrammarNode)new GrammarBuilder(path, newPrefix).walk(tr);
147 } catch (Exception e) {
148 throw new RuntimeException("while parsing " + fileName, e);
151 throw new RuntimeException("unknown head: " + head + "\n"+t);
154 /** A grammar (a set of nonterminals) */
155 public class GrammarNode extends HashMap<String,NonTerminalNode> {
156 public GrammarNode(NonTerminalNode[] nonterminals) {
157 for(NonTerminalNode nt : nonterminals) {
158 if (nt==null) continue;
159 if (this.get(nt.name)!=null)
160 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
161 this.put(nt.name, nt);
164 public GrammarNode(Object[] nt) { add(nt); }
165 private void add(Object o) {
167 else if (o instanceof Object[]) for(Object o2 : (Object[])o) add(o2);
168 else if (o instanceof NonTerminalNode) {
169 NonTerminalNode nt = (NonTerminalNode)o;
170 if (this.get(nt.name)!=null)
171 throw new RuntimeException("duplicate definition of nonterminal \""+nt.name+"\"");
172 this.put(nt.name, nt);
174 else if (o instanceof GrammarNode)
175 for(NonTerminalNode n : ((GrammarNode)o).values())
178 public String toString() {
180 for(NonTerminalNode nt : values()) ret += nt + ", ";
183 public Union build(String rootNonterminal) {
184 Context cx = new Context(this);
186 for(GrammarBuilder.NonTerminalNode nt : values())
187 if (nt.name.equals(rootNonterminal))
188 return (Union)cx.get(nt.name);
193 public class UnionNode extends ElementNode {
194 public Seq[][] sequences;
195 public String sep = null;
197 public UnionNode(Seq[][] sequences) { this(sequences, false, null); }
198 public UnionNode(Seq[][] sequences, boolean rep, String sep) {
199 this.sequences = sequences;
203 public Atom toAtom(Context cx) {
205 for(Seq[] ss : sequences)
207 ret = ret==null ? s.toAtom(cx) : (Atom)ret.union(s.toAtom(cx));
210 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
211 return buildIntoPreallocatedUnion(cx, cnt, dropall, new Union(null, false)); }
212 public Element buildIntoPreallocatedUnion(Context cx, NonTerminalNode cnt, boolean dropall, Union u) {
215 urep = new Union(null, false);
216 urep.add(Sequence.create(new Element[0], cnt.name));
218 ? Sequence.create(new Element[] { u }, 0)
219 : Sequence.create(new Element[] { cx.get(sep), u }, 1));
221 HashSet<Sequence> bad2 = new HashSet<Sequence>();
222 for(int i=0; i<sequences.length; i++) {
223 Seq[] group = sequences[i];
224 Union u2 = new Union(null, false);
225 if (sequences.length==1) u2 = u;
226 for(int j=0; j<group.length; j++)
228 group[j].build(cx, u2, cnt, dropall);
230 Union u3 = new Union(null, false);
231 group[j].build(cx, u3, cnt, dropall);
232 Sequence s = Sequence.create(cnt.name,
233 new Element[] { u3, urep },
234 new boolean[] { false, false },
238 if (sequences.length==1) break;
239 Sequence seq = Sequence.create(u2);
240 for(Sequence s : bad2) seq = seq.andnot(s);
242 bad2.add(Sequence.create(u2));
248 public class NonTerminalNode extends UnionNode {
249 public boolean alwaysDrop;
250 public String name = null;
251 public boolean drop() { return alwaysDrop; }
252 public NonTerminalNode(String name, Seq[][] sequences, boolean rep, String sep, boolean alwaysDrop) {
253 super(sequences, rep, sep==null?null:(prefix + sep));
254 this.name = prefix + name;
255 this.alwaysDrop = alwaysDrop;
257 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return cx.get(name); }
261 public boolean alwaysDrop = false;
262 public boolean drop() { return alwaysDrop; }
263 HashSet<Seq> and = new HashSet<Seq>();
264 HashSet<Seq> not = new HashSet<Seq>();
265 ElementNode[] elements;
268 public Seq(ElementNode e) { this(new ElementNode[] { e }); }
269 public Seq(ElementNode[] elements) {
270 this.elements = elements;
271 for(int i=0; i<elements.length; i++)
272 if (elements[i]==null)
273 throw new RuntimeException();
275 public Atom toAtom(Context cx) {
276 if (elements.length != 1)
277 throw new Error("you attempted to use ->, **, ++, or a similar character-class"+
278 " operator on a [potentially] multicharacter production");
279 return elements[0].toAtom(cx);
281 public Seq tag(String tag) { this.tag = tag; return this; }
282 public Seq follow(ElementNode follow) { this.follow = follow; return this; }
283 public Seq and(Seq s) { and.add(s); return this; }
284 public Seq andnot(Seq s) { not.add(s); return this; }
286 Seq ret = new Seq(elements);
293 public Seq separate(ElementNode sep) {
294 ElementNode[] elements = new ElementNode[this.elements.length * 2 - 1];
295 for(int i=0; i<this.elements.length; i++) {
296 elements[i*2] = this.elements[i];
297 if (i<this.elements.length-1)
298 elements[i*2+1] = new DropNode(sep);
300 this.elements = elements;
303 public Sequence build(Context cx, Union u, NonTerminalNode cnt, boolean dropall) {
304 Sequence ret = build0(cx, cnt, dropall);
305 for(Seq s : and) ret = ret.and(s.build(cx, null, cnt, true));
306 for(Seq s : not) ret = ret.andnot(s.build(cx, null, cnt, true));
307 if (u!=null) u.add(ret);
310 public Sequence build0(Context cx, NonTerminalNode cnt, boolean dropall) {
311 boolean[] drops = new boolean[elements.length];
312 Element[] els = new Element[elements.length];
314 for(int i=0; i<elements.length; i++) {
315 if (dropall) drops[i] = true;
316 else drops[i] = elements[i].drop();
317 if (elements[i] instanceof ReferenceNode)
318 if (((ReferenceNode)elements[i]).resolve(cx).drop())
320 if (elements[i].getOwnerTag() != null)
321 tag = elements[i].getOwnerTag();
325 boolean multiNonDrop = false;
326 for(int i=0; i<drops.length; i++)
328 if (idx==-1) idx = i;
329 else multiNonDrop = true;
330 for(int i=0; i<elements.length; i++) {
331 if (!multiNonDrop && i==idx && tag!=null && elements[i] instanceof RepeatNode) {
332 els[i] = ((RepeatNode)elements[i]).build(cx, cnt, dropall, tag);
335 els[i] = elements[i].build(cx, cnt, dropall);
337 if (tag==null && multiNonDrop)
338 throw new RuntimeException("multiple non-dropped elements in sequence: " + Sequence.create(els, ""));
342 ? Sequence.create(els, illegalTag)
343 : Sequence.create(tag, els, drops, false);
344 else if (tag==null) ret = Sequence.create(els, idx);
345 else ret = Sequence.create(tag, els, drops, false);
348 ret = Sequence.create(tag, els, drops, false);
349 if (this.follow != null)
350 ret = ret.followedBy(this.follow.toAtom(cx));
355 public class ReferenceNode extends ElementNode {
356 public String nonTerminal;
357 public ReferenceNode() { }
358 public NonTerminalNode resolve(Context cx) { return cx.grammar.get(nonTerminal); }
359 public ReferenceNode(String nonTerminal) { this.nonTerminal = prefix + nonTerminal; }
360 public Atom toAtom(Context cx) { return cx.grammar.get(nonTerminal).toAtom(cx); }
361 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
362 if (!this.nonTerminal.startsWith(prefix)) nonTerminal = prefix + nonTerminal;
363 Element ret = cx.get(nonTerminal);
364 if (ret == null) throw new RuntimeException("unknown nonterminal \""+nonTerminal+"\"");
369 public class LiteralNode extends ElementNode {
370 private String string;
371 private final String thePrefix = prefix;
372 private boolean caret;
373 public LiteralNode(String string) { this(string, false); }
374 public LiteralNode(String string, boolean caret) {
375 this.string = string;
378 public String getOwnerTag() { return caret ? thePrefix+string : super.getOwnerTag(); }
379 public String toString() { return "\""+string+"\""; }
380 public boolean drop() { return true; }
381 public Atom toAtom(Context cx) {
382 if (string.length()!=1) return super.toAtom(cx);
383 edu.berkeley.sbp.util.Range.Set set = new edu.berkeley.sbp.util.Range.Set();
384 set.add(string.charAt(0), string.charAt(0));
385 return CharAtom.set(set);
387 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return CharAtom.string(string); }
390 public class AtomNode extends ElementNode {
391 AtomNodeRange[] ranges;
392 public AtomNode(AtomNodeRange[] ranges) { this.ranges = ranges; }
393 public AtomNode(AtomNodeRange range) { this.ranges = new AtomNodeRange[] { range }; }
394 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
395 public Atom toAtom(Context cx) {
396 edu.berkeley.sbp.util.Range.Set set = new edu.berkeley.sbp.util.Range.Set();
397 for(AtomNodeRange r : ranges) set.add(r.first, r.last);
398 return CharAtom.set(set);
401 public class AtomNodeRange {
404 public AtomNodeRange(char only) { first = only; last = only; }
405 public AtomNodeRange(char first, char last) { this.first = first; this.last = last; }
408 public class RepeatNode extends ElementNode {
409 public ElementNode e, sep;
410 public boolean zero, many, max;
411 public RepeatNode(ElementNode e, ElementNode sep, boolean zero, boolean many, boolean max) {
412 this.e = e; this.sep = sep; this.zero = zero; this.many = many; this.max = max;}
413 public Atom toAtom(Context cx) { return sep==null ? e.toAtom(cx) : super.toAtom(cx); }
414 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
415 if (!dropall && !drop() && !e.drop())
416 throw new Error("you need a tag on this repetition: " + build(cx, cnt, dropall, ""));
417 return build(cx, cnt, dropall, illegalTag);
419 public Element build(Context cx, NonTerminalNode cnt, boolean dropall, Object repeatTag) {
421 ? Repeat.repeat(e.build(cx, null, dropall), zero, many, sep==null ? null : sep.build(cx, null, dropall), repeatTag)
423 ? Repeat.repeatMaximal(e.toAtom(cx), zero, many, repeatTag)
424 : Repeat.repeatMaximal(e.build(cx, null, dropall), zero, many, sep.toAtom(cx), repeatTag);
428 public abstract class ElementNode {
429 public String getOwnerTag() { return null; }
430 public boolean drop() { return false; }
431 public Atom toAtom(Context cx) { throw new Error("can't convert a " + this.getClass().getName() + " to an atom: " + this); }
432 public abstract Element build(Context cx, NonTerminalNode cnt, boolean dropall);
435 public abstract class ElementNodeWrapper extends ElementNode {
436 protected ElementNode _e;
437 public ElementNodeWrapper(ElementNode e) { this._e = e; }
438 public String getOwnerTag() { return _e.getOwnerTag(); }
439 public boolean drop() { return _e.drop(); }
440 public Atom toAtom(Context cx) { return _e.toAtom(cx); }
441 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return _e.build(cx, cnt, dropall); }
444 public class TildeNode extends ElementNodeWrapper {
445 public TildeNode(ElementNode e) { super(e); }
446 public Atom toAtom(Context cx) { return (Atom)((Topology<Character>)_e.toAtom(cx).complement()); }
447 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) { return toAtom(cx); }
450 public class DropNode extends ElementNodeWrapper {
451 public DropNode(ElementNode e) { super(e); }
452 public boolean drop() { return true; }
455 public Seq and2(Seq s, Seq a) { a.alwaysDrop = true; return s.and(a); }
456 public Seq andnot2(Seq s, Seq a) { a.alwaysDrop = true; return s.andnot(a); }
457 public Seq arrow(Seq s, ElementNode e) { return s.follow(e); }
458 public Seq tag(String tagname, Seq s) { return s.tag(tagname); }
459 public Seq seq(ElementNode[] elements) { return new Seq(elements); }
460 public Seq seq2(ElementNode[] elements) { return new Seq(elements); }
461 public ElementNode plusmax(final ElementNode e) { return new RepeatNode(e, null, false, true, true); }
462 public ElementNode plus(final ElementNode e) { return new RepeatNode(e, null, false, true, false); }
463 public ElementNode plusmaxfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, false, true, true); }
464 public ElementNode plusfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, false, true, false); }
465 public ElementNode starmax(final ElementNode e) { return new RepeatNode(e, null, true, true, true); }
466 public ElementNode star(final ElementNode e) { return new RepeatNode(e, null, true, true, false); }
467 public ElementNode starmaxfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, true, true, true); }
468 public ElementNode starfollow(final ElementNode e, final ElementNode sep) { return new RepeatNode(e, sep, true, true, false); }
469 public ElementNode question(final ElementNode e) { return new RepeatNode(e, null, true, true, false); }
471 //////////////////////////////////////////////////////////////////////////////
473 public class Context {
474 public HashMap<String,Union> map = new HashMap<String,Union>();
475 public GrammarNode grammar;
476 public Context(Tree t) { }
477 public Context(GrammarNode g) { this.grammar = g; }
478 public Union build() {
480 for(NonTerminalNode nt : grammar.values()) {
481 Union u = get(nt.name);
482 if ("s".equals(nt.name))
487 public Union peek(String name) { return map.get(name); }
488 public void put(String name, Union u) { map.put(name, u); }
489 public Union get(String name) {
490 Union ret = map.get(name);
491 if (ret != null) return ret;
492 NonTerminalNode nt = grammar.get(name);
494 throw new Error("warning could not find " + name);
496 ret = new Union(name, false);
498 nt.buildIntoPreallocatedUnion(this, nt, nt.drop(), ret);
505 public class XTree extends ElementNode {
507 public XTree(Seq seq) { this.body = seq; }
508 public Element build(Context cx, NonTerminalNode cnt, boolean dropall) {
509 Union u = new Union(null, false);
510 Sequence s = body.build(cx, u, null, dropall);
511 Union u2 = new Union(null, false);
512 u2.add(Sequence.create(new Element[] {