From: adam Date: Tue, 13 Dec 2005 08:20:03 +0000 (-0500) Subject: checkpoint X-Git-Tag: tag_for_25-Mar~586 X-Git-Url: http://git.megacz.com/?p=sbp.git;a=commitdiff_plain;h=77d1e5fd29be1ca529e71a4c011f5edea61588e8 checkpoint darcs-hash:20051213082003-5007d-1c2c8cfe176e8c926cd99966054104bf370b792c.gz --- diff --git a/TODO b/TODO index 78baf01..1185209 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,33 @@ +// priorities are all messy and dont get serialized +// 1. Error messages +// 2. Java MetaGrammar (proof of concept) +// 3. Ivan's MetaGrammar +// 4. Documentation format +// - TIB + +// TODO: better API for interfacing with Java +// TODO: error messages +// TODO: integrate with TIB + +// Element +// Walk +// ParseTable / GSS +// MetaGrammar (necessary/relevant?) +// Tree (cleanup?) +// Union.SubUnion +// Repeat + +// FEATURE: serialization of ParseTable's, generation of Java code +// FEATURE: infer reject elements for literals +// FEATURE: prefer whitespace higher up +// FEATURE: full conjunctive and boolean grammars +// FEATURE: "ambiguity modulo dropped fragments"? can this be checked for statically? eliminated statically? +// - drop stuff during the parsing process (drop nodes) + +// LATER: Element -- parameterize over the input token type? Makes a huge mess... +// LATER: Go back to where Sequence is not an Element? +// - The original motivation for making Sequence "first class" was the fact that +// in order to do associativity right you need to have per-Sequence follow sets ______________________________________________________________________________ Immediately diff --git a/src/edu/berkeley/sbp/Repeat.java b/src/edu/berkeley/sbp/Repeat.java index 2365cbd..35f3477 100644 --- a/src/edu/berkeley/sbp/Repeat.java +++ b/src/edu/berkeley/sbp/Repeat.java @@ -30,7 +30,7 @@ public class Repeat extends Union { Repeat(final Element e, boolean zeroOkay, boolean manyOkay) { this(e, zeroOkay, manyOkay, null); } Repeat(final Element e, boolean zeroOkay, boolean manyOkay, Element separator) { - super(e+(!manyOkay ? "?" : (zeroOkay ? "*" : "+")), true); + super(e+(!manyOkay ? "?" : (zeroOkay ? "*" : "+"))+(separator==null?"":("/"+separator.toString())), true); this.e = e; this.zeroOkay = zeroOkay; this.manyOkay = manyOkay; diff --git a/src/edu/berkeley/sbp/Union.java b/src/edu/berkeley/sbp/Union.java index df6c374..7370a68 100644 --- a/src/edu/berkeley/sbp/Union.java +++ b/src/edu/berkeley/sbp/Union.java @@ -50,7 +50,7 @@ public class Union extends Element implements Iterable { public String toString() { return shortForm; } private static String pad(int i,String s) { return s.length() >= i ? s : pad(i-1,s)+" "; } - void toString(StringBuffer sb) { + public void toString(StringBuffer sb) { if (synthetic) return; boolean first = true; if (alternatives.size()==0) { diff --git a/src/edu/berkeley/sbp/misc/MetaGrammar.java b/src/edu/berkeley/sbp/misc/MetaGrammar.java index 892f6c6..e371624 100644 --- a/src/edu/berkeley/sbp/misc/MetaGrammar.java +++ b/src/edu/berkeley/sbp/misc/MetaGrammar.java @@ -9,7 +9,12 @@ public class MetaGrammar extends ReflectiveWalker { public static Union make() throws Exception { return ((MetaGrammar)new MetaGrammar().walk(meta)).done(); } - + public String toString() { + StringBuffer ret = new StringBuffer(); + for(Union u : nt.values()) u.toString(ret); + return ret.toString(); + } + // FIXME private static HashSet dropAll = new HashSet(); @@ -114,6 +119,9 @@ public class MetaGrammar extends ReflectiveWalker { public Range range0(char a) { return new Range(a, a); } public Range range0(char a, char b) { return new Range(a, b); } + public Range range1(char a, char b) { return new Range(a, b); } + public Element rangex(Range[] rr) { return range("~", rr); } + public Element range(Range[] rr) { return range(null, rr); } public Element range(Object o, Range[] rr) { Range.Set ret = !"~".equals(o+"") ? new Range.Set() : new Range.Set(new Range(true, true)); if (rr != null) @@ -373,7 +381,8 @@ public class MetaGrammar extends ReflectiveWalker { public MyDrop(Object o) { this.o = o; } } - private static final Tree meta = + public static final Tree meta = + @@ -411,7 +420,7 @@ new Tree(null, "gram", new Tree[] { new Tree(null, null, new Tree[] { }), new Tree(null, "m", new Tree[] { }), new Tree(null, "a", new Tree[] { }), new Tree(null, "r", new Tree[] { })})}), - new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "=>", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "+/", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "r", new Tree[] { })})})}), + new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "=>", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "$$", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "r", new Tree[] { })})})}), new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "w", new Tree[] { }), new Tree(null, "s", new Tree[] { })})})})})}), new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "g", new Tree[] { }), @@ -491,7 +500,7 @@ new Tree(null, "gram", new Tree[] { new Tree(null, null, new Tree[] { }), new Tree(null, "v", new Tree[] { }), new Tree(null, "e", new Tree[] { }), new Tree(null, "s", new Tree[] { })})}), - new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "rewrite", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "+/", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { }), + new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "rewrite", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "$$", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { }), new Tree(null, "q", new Tree[] { }), new Tree(null, "u", new Tree[] { }), new Tree(null, "i", new Tree[] { }), @@ -510,7 +519,7 @@ new Tree(null, "gram", new Tree[] { new Tree(null, null, new Tree[] { }), new Tree(null, "A", new Tree[] { }), new Tree(null, "l", new Tree[] { }), new Tree(null, "t", new Tree[] { })})}), - new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "=>", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "+/", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "c", new Tree[] { }), + new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "=>", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "$$", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "c", new Tree[] { }), new Tree(null, "o", new Tree[] { }), new Tree(null, "n", new Tree[] { }), new Tree(null, "j", new Tree[] { }), @@ -557,7 +566,7 @@ new Tree(null, "gram", new Tree[] { new Tree(null, null, new Tree[] { }), new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "rewrite", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "+", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { })})})})})})})})})})}), new Tree(null, "::=", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { }), new Tree(null, "s", new Tree[] { })})}), - new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "rewrite", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "+/", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { })})})}), + new Tree(null, null, new Tree[] { new Tree(null, "alternatives", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "rewrite", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "$$", new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { })})})}), new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "w", new Tree[] { }), new Tree(null, "s", new Tree[] { })})})})})})})})})})}), new Tree(null, "::=", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "c", new Tree[] { }), @@ -772,12 +781,6 @@ new Tree(null, "gram", new Tree[] { new Tree(null, null, new Tree[] { }), new Tree(null, "g", new Tree[] { }), new Tree(null, "e", new Tree[] { })})})}), new Tree(null, "rewrite", new Tree[] { new Tree(null, "/", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { })})})}), - new Tree(null, "care", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "+", new Tree[] { }), - new Tree(null, "/", new Tree[] { })})})}), - new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { })})})})}), - new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "w", new Tree[] { }), - new Tree(null, "s", new Tree[] { })})})})})}), - new Tree(null, "rewrite", new Tree[] { new Tree(null, "/", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { })})})}), new Tree(null, "care", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "%", new Tree[] { }), new Tree(null, "%", new Tree[] { })})})}), new Tree(null, "nonTerminalY", new Tree[] { new Tree(null, "sify", new Tree[] { new Tree(null, null, new Tree[] { new Tree(null, "e", new Tree[] { })})})})}), @@ -967,3 +970,4 @@ new Tree(null, "gram", new Tree[] { new Tree(null, null, new Tree[] { }), + diff --git a/src/edu/berkeley/sbp/misc/RegressionTests.java b/src/edu/berkeley/sbp/misc/RegressionTests.java index e61673a..ebe29c4 100644 --- a/src/edu/berkeley/sbp/misc/RegressionTests.java +++ b/src/edu/berkeley/sbp/misc/RegressionTests.java @@ -6,37 +6,6 @@ import edu.berkeley.sbp.*; import edu.berkeley.sbp.misc.*; import edu.berkeley.sbp.*; -// priorities are all messy and dont get serialized -// 1. Error messages -// 2. Java MetaGrammar (proof of concept) -// 3. Ivan's MetaGrammar -// 4. Documentation format -// - TIB - -// TODO: better API for interfacing with Java -// TODO: error messages -// TODO: integrate with TIB - -// Element -// Walk -// ParseTable / GSS -// MetaGrammar (necessary/relevant?) -// Tree (cleanup?) -// Union.SubUnion -// Repeat - -// FEATURE: serialization of ParseTable's, generation of Java code -// FEATURE: infer reject elements for literals -// FEATURE: prefer whitespace higher up -// FEATURE: full conjunctive and boolean grammars -// FEATURE: "ambiguity modulo dropped fragments"? can this be checked for statically? eliminated statically? -// - drop stuff during the parsing process (drop nodes) - -// LATER: Element -- parameterize over the input token type? Makes a huge mess... -// LATER: Go back to where Sequence is not an Element? -// - The original motivation for making Sequence "first class" was the fact that -// in order to do associativity right you need to have per-Sequence follow sets - public class RegressionTests { public static boolean yes = false; @@ -65,8 +34,13 @@ public class RegressionTests { s = s2; } + //MetaGrammar mg0 = new MetaGrammar(); + //mg0.walk(MetaGrammar.meta); + //System.out.println(mg0); Tree res = new Parser(MetaGrammar.make(), CharToken.top()).parse1(new CharToken.Stream(new InputStreamReader(new FileInputStream(s[0])))); - Union meta = ((MetaGrammar)new MetaGrammar().walk(res)).done(); + MetaGrammar mg = (MetaGrammar)new MetaGrammar().walk(res); + //System.out.println(mg); + Union meta = mg.done(); SequenceInputStream sis = new SequenceInputStream(new FileInputStream(s[0]), new FileInputStream(s[1])); res = new Parser(meta, CharToken.top()).parse1(new CharToken.Stream(new InputStreamReader(sis), "parsing " + s[1] + " using " + s[0])); Union testcasegrammar = ((MetaGrammar)new MetaGrammar("ts").walk(res)).done("ts"); diff --git a/tests/meta.g b/tests/meta.g index c89fae1..1a97296 100644 --- a/tests/meta.g +++ b/tests/meta.g @@ -1,18 +1,16 @@ s ::= ws grammar ws => "gram" ws !::= w** -grammar ::= r$$ws => "grammar" +grammar ::= r $$ ws => "grammar" r ::= word ^"::=" alternatives /ws | word ^"!::=" alternatives /ws -ec ::= [~\]\\\-\~] | escaped - alternatives ::= equiAlt $$ (ws ">" ws) equiAlt ::= conjuncts $$ (ws "|" ws) => "alternatives" sequence ::= Es > Es ^"/" e /ws Es ::= e+ -es ::= e$$ws +es ::= e %% ws conjuncts ::= rewrite @@ -24,13 +22,17 @@ rewrite ::= sequence /ws => "rewrite" | sequence ^"=>" quoted /ws | sequence "=>" "()" /ws => "wrap" +ec ::= [~\-\]\\\~] + | escaped + range ::= ec => "range0" - | ec ^"-" ec => "range0" + | ec ^"-" ec => "range1" e ::= word => "nonTerminalY" | [(][)] => "epsilon" | ^"{" alternatives "}" /ws - | "[" [\~]? range* "]" => "range" + | "[" ranges "]" => "range" + | "[~" ranges "]" => "rangex" | e ^"%%" e /ws | e ^"$$" e /ws | e ^"?" /ws @@ -50,6 +52,8 @@ e ::= word => "nonTerminalY" | "(" word ^")" /ws > ^"(" alternatives ")" /ws +ranges ::= range* + w !::= " " | "//" [~\n]* "\n" | "\n"