From 9ded11559a1b6f817e99355b1c9e2c88042e91d4 Mon Sep 17 00:00:00 2001 From: adam Date: Thu, 5 Jan 2006 04:01:18 -0500 Subject: [PATCH] checkpoint darcs-hash:20060105090118-5007d-d6be2943f8cb29370e8f13c5114338a874434b58.gz --- src/edu/berkeley/sbp/Element.java | 2 +- src/edu/berkeley/sbp/GSS.java | 4 +- src/edu/berkeley/sbp/Parser.java | 16 +++- src/edu/berkeley/sbp/Sequence.java | 4 +- src/edu/berkeley/sbp/misc/MetaGrammar.java | 28 ++++++- src/edu/berkeley/sbp/tib/Tib.java | 18 +++-- tests/input.tibdoc | 9 +-- tests/meta.g | 1 + tests/tibdoc.g | 111 ++++++++++++++-------------- 9 files changed, 120 insertions(+), 73 deletions(-) diff --git a/src/edu/berkeley/sbp/Element.java b/src/edu/berkeley/sbp/Element.java index c94adbd..f88ce87 100644 --- a/src/edu/berkeley/sbp/Element.java +++ b/src/edu/berkeley/sbp/Element.java @@ -12,7 +12,7 @@ public abstract class Element { /** if this element always matches exactly one token, return a topology covering exactly those possible tokens, otherwise null */ abstract Topology toAtom(); - + public Topology toAtom0() { return toAtom(); } Forest epsilonForm() { throw new Error("no epsilon form: " + this); } final boolean possiblyEpsilon(Walk.Cache cache) { Boolean ret = cache==null ? null : cache.possiblyEpsilon.get(this); diff --git a/src/edu/berkeley/sbp/GSS.java b/src/edu/berkeley/sbp/GSS.java index 16725f1..74cbb48 100644 --- a/src/edu/berkeley/sbp/GSS.java +++ b/src/edu/berkeley/sbp/GSS.java @@ -81,7 +81,7 @@ class GSS { private void newNode2(Node p, Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) { p.holder.merge(pending); if (p.parents().contains(parent)) return; - p.parents().add(parent); + p.parents().add(parent, true); if (p!=parent && !fromEmptyReduction) p.queueReductions(parent); } private void newNode3(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) { @@ -226,7 +226,7 @@ class GSS { this.state = state; Phase start = parent==null ? null : parent.phase(); if (pending != null) this.holder().merge(pending); - if (parent != null) parents().add(parent); + if (parent != null) parents().add(parent, true); if (Phase.this.hash.get(code(state, start)) != null) throw new Error("severe problem!"); Phase.this.hash.put(code(state, start), this); Phase.this.numNodes++; diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index 4244d21..fac14b0 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -128,12 +128,22 @@ public abstract class Parser { if (start0.contains(p.owner()) && p.next()==null) state.accept = true; - // FIXME: how does right-nullability interact with follow restrictions? - // all right-nullable rules get a reduction [Johnstone 2000] if (p.isRightNullable(cache)) { Walk.Follow wf = new Walk.Follow(top.empty(), p.owner(), all_elements, cache); Reduction red = new Reduction(p); - state.reductions.put(wf.walk(p.owner()), red); + + Topology follow = wf.walk(p.owner()); + if (p.owner() instanceof Sequence.RewritingSequence && + (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) { + System.out.println("follow before: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow)); + } + for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) + follow = follow.intersect(new Walk.Follow(top.empty(), p2.element(), all_elements, cache).walk(p2.element())); + if (p.owner() instanceof Sequence.RewritingSequence && + (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) { + System.out.println("follow after: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow)); + } + state.reductions.put(follow, red); if (wf.includesEof()) state.eofReductions.add(red); } diff --git a/src/edu/berkeley/sbp/Sequence.java b/src/edu/berkeley/sbp/Sequence.java index 83d9b6f..9f2fb59 100644 --- a/src/edu/berkeley/sbp/Sequence.java +++ b/src/edu/berkeley/sbp/Sequence.java @@ -177,7 +177,7 @@ public abstract class Sequence extends Element implements Iterable { public Forest postReduce(Token.Location loc, Forest[] args) { return (Forest)Forest.singleton(loc, args[idx], this); } } - static class Unwrap extends Sequence { + public static class Unwrap extends Sequence { private boolean[] drops; public Unwrap(Element[] e, HashSet and, HashSet not) { super(e, and, not); this.drops = null; } public Unwrap(Element[] e, boolean[] drops, HashSet and, HashSet not) { super(e, and, not); this.drops = drops; } @@ -194,7 +194,7 @@ public abstract class Sequence extends Element implements Iterable { } static class RewritingSequence extends Sequence { - private final Object tag; + /*private*/public final Object tag; private final boolean[] drops; private int count = 0; public RewritingSequence(Object tag, Element[] e, HashSet and, HashSet not) { this(tag, e, null, and, not); } diff --git a/src/edu/berkeley/sbp/misc/MetaGrammar.java b/src/edu/berkeley/sbp/misc/MetaGrammar.java index 73e1732..ac0e55e 100644 --- a/src/edu/berkeley/sbp/misc/MetaGrammar.java +++ b/src/edu/berkeley/sbp/misc/MetaGrammar.java @@ -96,6 +96,7 @@ public class MetaGrammar extends StringWalker { else if ("+".equals(head)) return Repeat.many1((Element)walk(tree.child(0))); else if ("+/".equals(head)) return Repeat.many1((Element)walk(tree.child(0)), (Element)walk(tree.child(1))); else if ("*/".equals(head)) return Repeat.many0((Element)walk(tree.child(0)), (Element)walk(tree.child(1))); + else if ("++/".equals(head)) return Repeat.maximal1((Element)walk(tree.child(0)), (Element)walk(tree.child(1))); else if ("**".equals(head)) return Repeat.maximal0((Element)walk(tree.child(0))); else if ("++".equals(head)) return Repeat.maximal1((Element)walk(tree.child(0))); else if ("?".equals(head)) return Repeat.maybe((Element)walk(tree.child(0))); @@ -115,6 +116,7 @@ public class MetaGrammar extends StringWalker { else if ("range".equals(head)) return new Range(walk(tree, 0).toString().charAt(0), walk(tree,0).toString().charAt(0)); else if ("gram".equals(head)) return walk(tree, 0); else if ("=>".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.tag = string(tree.child(1)); return p; } + else if ("[]".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.unwrap = true; return p; } else if ("psy".equals(head)) return (PreSequence)walk(tree, 0); else if ("psyl".equals(head)) throw new Error("not supported"); else if ("psyr".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.noFollow = (Element)walk(tree, 1); return p; } @@ -214,6 +216,7 @@ public class MetaGrammar extends StringWalker { u.add(buildSequence(u)); return u; } + public boolean unwrap = false; public Sequence buildSequence(Union u) { return buildSequence(u, false, false); } public Sequence buildSequence(Union u, boolean lame, boolean dropAll) { for(Sequence s : and) u.add(s); @@ -237,6 +240,7 @@ public class MetaGrammar extends StringWalker { Element[] expansion = o2; Sequence ret = null; if (dropAll || lame) ret = Sequence.drop(expansion, and, not, lame); + else if (unwrap) ret = new Sequence.Unwrap(expansion, drops, and, not); else if (tag!=null) ret = Sequence.rewritingSequence(tag, expansion, drops, and, not); else { int idx = -1; @@ -248,7 +252,7 @@ public class MetaGrammar extends StringWalker { else ret = Sequence.drop(expansion, and, not, false); } set.add(ret); - if (this.noFollow != null) ret.noFollow = this.noFollow; + if (this.noFollow != null) ret.noFollow = new Atom.Invert(new Atom.Infer(this.noFollow)); return ret; } } @@ -338,6 +342,12 @@ public class MetaGrammar extends StringWalker { + + + + + + // DO NOT EDIT STUFF BELOW: IT IS AUTOMATICALLY GENERATED new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "grammar", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "::=", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })}), new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=>", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "g", new edu.berkeley.sbp.Tree[] { }), @@ -617,6 +627,16 @@ new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.be new edu.berkeley.sbp.Tree(null, "x", new edu.berkeley.sbp.Tree[] { })})})})})}), new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "/", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps2", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "p", new edu.berkeley.sbp.Tree[] { }), new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { }), + new edu.berkeley.sbp.Tree(null, "x", new edu.berkeley.sbp.Tree[] { })})}), + new edu.berkeley.sbp.Tree(null, "literal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=", new edu.berkeley.sbp.Tree[] { }), + new edu.berkeley.sbp.Tree(null, ">", new edu.berkeley.sbp.Tree[] { })})})}), + new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "[", new edu.berkeley.sbp.Tree[] { }), + new edu.berkeley.sbp.Tree(null, "]", new edu.berkeley.sbp.Tree[] { })}), + new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { })}), + new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "w", new edu.berkeley.sbp.Tree[] { }), + new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })})})})}), + new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "/", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps2", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "p", new edu.berkeley.sbp.Tree[] { }), + new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { }), new edu.berkeley.sbp.Tree(null, "x", new edu.berkeley.sbp.Tree[] { })})})}), new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=", new edu.berkeley.sbp.Tree[] { }), new edu.berkeley.sbp.Tree(null, ">", new edu.berkeley.sbp.Tree[] { })}), @@ -928,3 +948,9 @@ new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.be + + + + + + diff --git a/src/edu/berkeley/sbp/tib/Tib.java b/src/edu/berkeley/sbp/tib/Tib.java index 8ee70ca..29181a0 100644 --- a/src/edu/berkeley/sbp/tib/Tib.java +++ b/src/edu/berkeley/sbp/tib/Tib.java @@ -54,6 +54,8 @@ public class Tib implements Token.Stream { } if (pos >= cur.size()) { pos = cur.iip+1; + _row = cur.endrow; + _col = cur.endcol; cur = cur.parent; if (cur==null) return null; return CharToken.right; @@ -85,7 +87,8 @@ public class Tib implements Token.Stream { boolean blankLine = false; Block top = new Block.Root(); for(String s = br.readLine(); s != null; s = br.readLine()) { - col = 0; + row++; + col=0; while (s.length() > 0 && s.charAt(0) == ' ' && (!(top instanceof Block.Literal) || col < top.col)) { col++; s = s.substring(1); } @@ -94,6 +97,8 @@ public class Tib implements Token.Stream { while (col < top.col) { if (s.startsWith("{}") && top instanceof Block.Literal && ((Block.Literal)top).braceCol == col) break; blankLine = false; + top.endrow = row; + top.endcol = col; top = top.closeIndent(); } if (s.startsWith("{}")) { @@ -107,16 +112,16 @@ public class Tib implements Token.Stream { } while (s.length() > 0 && s.charAt(s.length()-1)==' ') { s = s.substring(0, s.length()-1); } if (col > top.col) top = new Block.Indent(top, row, col); - else if (blankLine) { top = top.closeIndent(); top = new Block.Indent(top, row, col); } + else if (blankLine) { top.endrow=row; top.endcol=col; top = top.closeIndent(); top = new Block.Indent(top, row, col); } blankLine = false; for(int i=0; i { Block parent; public final int row; public final int col; + public int endrow; + public int endcol; public final int iip; private final Vector children = new Vector(); private String pending = ""; @@ -262,7 +269,8 @@ public class Tib implements Token.Stream { // Testing ////////////////////////////////////////////////////////////////////////////// - public static void main(String[] s) throws Exception { System.out.println(parse(new BufferedReader(new InputStreamReader(System.in))).toString(-1)); } + public static void main(String[] s) throws Exception { + System.out.println(parse(new BufferedReader(new InputStreamReader(System.in))).toString(-1)); } // Utilities ////////////////////////////////////////////////////////////////////////////// diff --git a/tests/input.tibdoc b/tests/input.tibdoc index 7fab26b..a106d00 100644 --- a/tests/input.tibdoc +++ b/tests/input.tibdoc @@ -1,9 +1,8 @@ header author = Adam Megacz - email = adam@megacz.com - comment = my homepage is at + myemail = adam@foo.megacz.com + comment = my homepage is at http://www.megacz.com you should *check* it out + date = published \today, yep! == Introduction == - this is the body adam@megacz.com - - + this is the body adam@megacz.com text diff --git a/tests/meta.g b/tests/meta.g index 98a62a3..7859386 100644 --- a/tests/meta.g +++ b/tests/meta.g @@ -22,6 +22,7 @@ psx ::= psy => "psy" | e "<-" psy "->" e /ws => "psylr" sequence ::= quoted => "qprod" > psx + | psx "=>" ^"[]" /ws | psx ^"=>" (word|quoted) /ws ec ::= ~[\-\]\\] diff --git a/tests/tibdoc.g b/tests/tibdoc.g index 99bab83..38104af 100644 --- a/tests/tibdoc.g +++ b/tests/tibdoc.g @@ -1,4 +1,5 @@ x::="x" +// interactions between !=> and &~ mean that I need to rethink the chartage // indentation styling... // literal blocks [[need to ignore bracing]] double-colon style? // definition -- by prior line indentation, like headings in the original structured text @@ -6,26 +7,10 @@ x::="x" // dropcap // output formats: latex, contex, ps, pdf, html, man, txt, rfc -// URGENT: why does swapping [a-zA-Z0-9] for alphanum in "item" cause severe breakage? -// URGENT: why does enabling "url" cause severe breakage? (probably same cause) -// ... something to do with unit productions - -// output formats: latex, contex, ps, pdf, html, man, txt, rfc -// bullet list -// numbered list -// horizontal rule -// dropcap -// smallcap -// strikethrough -// link +// escapification +// comment // math // image -// today's date, last edit date -// blockquote -// superscript -// subscript -// citations/references -// typewriter-text // figures // FIXME: these have to go at the top so they have their dropAll bit set before PreSequence.build... @@ -35,9 +20,9 @@ nw ::= ~[\r\n\ ] ////////////////////////////////////////////////////////////////////////////// -s ::= { Doc } => "top" +s ::= {Doc} => top -Doc ::= Header Body /ws => doc +Doc ::= Header Body /ws => doc Header ::= "header" { kv */ ws } /ws => header Body ::= Section*/ws => body Section ::= SectionHeader Paragraph*/ws /ws => section @@ -47,38 +32,57 @@ SectionHeaderBody ::= "=" SectionHeaderBody "=" kv ::= word "=" text /ws => kv1 +num !::= [0-9]++ Paragraph ::= { "\"\"" ws text } => "blockquote" > { "*" " " ws text } => "ul" > { "#" " " ws text } => "ol" - > { num " " ws text => "ol" } + > { num " " ws text } => "ol" > { "---" "-"* } => "hr" > { text } => "p" -text ::= item */ ws +text ::= item => "t" +itemx ::= ws item => "w" | () +item ::= blockquote => "b" + > pre itemx => "a" + > structured itemx => "1" + > structuredx itemx => "2" + > styled itemx => "3" + > qtext itemx => "4" + > alphanum++ itemx => "5" + > symbol itemx => "6" +// > sym++ itemx => [] + > Paragraph itemx => "7" + +symbol ::= sym++ + +blockquote ::= "\"\"" text "\"\"" => "blockquote" + | "\"\"" block => "blockquote" + +qtext ::= "\"" text "\"" => "quoted" pre ::= "[verbatim]" { ~[]+ } /ws => "verbatim" // FIXME doesn't work -item ::= pre - > email - > structured - > styled - > "\"" text "\"" => quoted - > alphanum++ => "stringify" - > symbol - -symbol ::= symbolx & sym++ -symbolx ::= "--" => emdash - | "," - | ":" - | ";" - -styled ::= "**" text "**" => bold - | "__" text "__" => ul - | "~~" text "~~" => it // hard to type - | "((" text "))" => footnote - -structured ::= glyph -// | url -glyph ::= "(r)" | "(c)" | "(tm)" // euro symbol? +styled ::= "__" text "__" => ul + | "((" text "))" => footnote + | ( "[[" text "]]" => tt + > "[" word "]" => citation + ) + | "!!" text "!!" => strikethrough + | "^^" (word|block) => superscript + | ",," (word|block) => subscript + | "\\sc" block => smallcap + | "**" text "**" => bold + | "!" (word|block) => keyword + > "*" text "*" => it + +block ::= { text } +structured ::= { text } "->" (url|email) => link + //> alphanum++ "->" (url|email) => link +structuredx ::= glyph + > email + > url + +glyph ::= "(r)" | "(c)" | "(tm)" | "--" // euro symbol? + | "\\today" -> ~[a-z] => today // URLs ////////////////////////////////////////////////////////////////////////////// @@ -88,8 +92,8 @@ glyph ::= "(r)" | "(c)" | "(tm)" // euro symbol? // only gets parsed once urlpath ::= urlchar* -username ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify" -password ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify" +username ::= [a-zA-Z0-9;/?:&=$\-_.+]++ +password ::= [a-zA-Z0-9;/?:&=$\-_.+]++ urlchar ::= [a-zA-Z0-9;/?:&=$\-_.+@] | "%" [0-9] [0-9] => "%" url ::= "mailto" ":" email @@ -98,19 +102,19 @@ url_login ::= username (":" password) "@" => "login" method ::= [+\-.a-z0-9]+ port ::= [0-9]+ -domain ::= part +/ "." -part ::= [A-Za-z0-9\-]++ => "stringify" +domain ::= (part +/ ".") -> ~"." +part ::= [a-zA-Z0-9\-]++ // interesting use of boolean grammars // &~ ([\-0-9] ~[]* | ~[]* [\-0-9]) -email ::= username "@" host => email -host ::= domain - | [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip" +email ::= username "@" host -> ~[.] => emailaddr +host ::= [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip" + | domain // Tokens /////////////////////////////////////////////////////////////////// -word ::= alphanum++ => "stringify" +word ::= alphanum++ | quoted quoted ::= "\"" ((~[\"\\] | escaped)+) "\"" @@ -123,9 +127,8 @@ escaped ::= "\\n" => "\n" // Chars /////////////////////////////////////////////////////////////// alpha ::= [a-zA-Z] -num !::= [0-9]++ => "stringify" //num ::= [0-9] alphanum ::= [a-zA-Z0-9] -sym ::= ~[a-zA-Z0-9\ \r\n] +sym ::= ~[a-zA-Z0-9\ \r\n=\">] -- 1.7.10.4