/** if this element always matches exactly one token, return a topology covering exactly those possible tokens, otherwise <tt>null</tt> */
abstract Topology toAtom();
-
+ public Topology toAtom0() { return toAtom(); }
Forest epsilonForm() { throw new Error("no epsilon form: " + this); }
final boolean possiblyEpsilon(Walk.Cache cache) {
Boolean ret = cache==null ? null : cache.possiblyEpsilon.get(this);
private void newNode2(Node p, Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) {
p.holder.merge(pending);
if (p.parents().contains(parent)) return;
- p.parents().add(parent);
+ p.parents().add(parent, true);
if (p!=parent && !fromEmptyReduction) p.queueReductions(parent);
}
private void newNode3(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) {
this.state = state;
Phase start = parent==null ? null : parent.phase();
if (pending != null) this.holder().merge(pending);
- if (parent != null) parents().add(parent);
+ if (parent != null) parents().add(parent, true);
if (Phase.this.hash.get(code(state, start)) != null) throw new Error("severe problem!");
Phase.this.hash.put(code(state, start), this);
Phase.this.numNodes++;
if (start0.contains(p.owner()) && p.next()==null)
state.accept = true;
- // FIXME: how does right-nullability interact with follow restrictions?
- // all right-nullable rules get a reduction [Johnstone 2000]
if (p.isRightNullable(cache)) {
Walk.Follow wf = new Walk.Follow(top.empty(), p.owner(), all_elements, cache);
Reduction red = new Reduction(p);
- state.reductions.put(wf.walk(p.owner()), red);
+
+ Topology follow = wf.walk(p.owner());
+ if (p.owner() instanceof Sequence.RewritingSequence &&
+ (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) {
+ System.out.println("follow before: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow));
+ }
+ for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next())
+ follow = follow.intersect(new Walk.Follow(top.empty(), p2.element(), all_elements, cache).walk(p2.element()));
+ if (p.owner() instanceof Sequence.RewritingSequence &&
+ (((Sequence.RewritingSequence)p.owner()).tag+"").equals("emailaddr")) {
+ System.out.println("follow after: " + new edu.berkeley.sbp.misc.CharToken.CharRange(follow));
+ }
+ state.reductions.put(follow, red);
if (wf.includesEof()) state.eofReductions.add(red);
}
public <T> Forest<T> postReduce(Token.Location loc, Forest<T>[] args) { return (Forest<T>)Forest.singleton(loc, args[idx], this); }
}
- static class Unwrap extends Sequence {
+ public static class Unwrap extends Sequence {
private boolean[] drops;
public Unwrap(Element[] e, HashSet<Sequence> and, HashSet<Sequence> not) { super(e, and, not); this.drops = null; }
public Unwrap(Element[] e, boolean[] drops, HashSet<Sequence> and, HashSet<Sequence> not) { super(e, and, not); this.drops = drops; }
}
static class RewritingSequence extends Sequence {
- private final Object tag;
+ /*private*/public final Object tag;
private final boolean[] drops;
private int count = 0;
public RewritingSequence(Object tag, Element[] e, HashSet<Sequence> and, HashSet<Sequence> not) { this(tag, e, null, and, not); }
else if ("+".equals(head)) return Repeat.many1((Element)walk(tree.child(0)));
else if ("+/".equals(head)) return Repeat.many1((Element)walk(tree.child(0)), (Element)walk(tree.child(1)));
else if ("*/".equals(head)) return Repeat.many0((Element)walk(tree.child(0)), (Element)walk(tree.child(1)));
+ else if ("++/".equals(head)) return Repeat.maximal1((Element)walk(tree.child(0)), (Element)walk(tree.child(1)));
else if ("**".equals(head)) return Repeat.maximal0((Element)walk(tree.child(0)));
else if ("++".equals(head)) return Repeat.maximal1((Element)walk(tree.child(0)));
else if ("?".equals(head)) return Repeat.maybe((Element)walk(tree.child(0)));
else if ("range".equals(head)) return new Range(walk(tree, 0).toString().charAt(0), walk(tree,0).toString().charAt(0));
else if ("gram".equals(head)) return walk(tree, 0);
else if ("=>".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.tag = string(tree.child(1)); return p; }
+ else if ("[]".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.unwrap = true; return p; }
else if ("psy".equals(head)) return (PreSequence)walk(tree, 0);
else if ("psyl".equals(head)) throw new Error("not supported");
else if ("psyr".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.noFollow = (Element)walk(tree, 1); return p; }
u.add(buildSequence(u));
return u;
}
+ public boolean unwrap = false;
public Sequence buildSequence(Union u) { return buildSequence(u, false, false); }
public Sequence buildSequence(Union u, boolean lame, boolean dropAll) {
for(Sequence s : and) u.add(s);
Element[] expansion = o2;
Sequence ret = null;
if (dropAll || lame) ret = Sequence.drop(expansion, and, not, lame);
+ else if (unwrap) ret = new Sequence.Unwrap(expansion, drops, and, not);
else if (tag!=null) ret = Sequence.rewritingSequence(tag, expansion, drops, and, not);
else {
int idx = -1;
else ret = Sequence.drop(expansion, and, not, false);
}
set.add(ret);
- if (this.noFollow != null) ret.noFollow = this.noFollow;
+ if (this.noFollow != null) ret.noFollow = new Atom.Invert(new Atom.Infer(this.noFollow));
return ret;
}
}
+
+
+
+
+
+
// DO NOT EDIT STUFF BELOW: IT IS AUTOMATICALLY GENERATED
new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "grammar", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "::=", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })}),
new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=>", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "g", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "x", new edu.berkeley.sbp.Tree[] { })})})})})}),
new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "/", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps2", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "p", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { }),
+ new edu.berkeley.sbp.Tree(null, "x", new edu.berkeley.sbp.Tree[] { })})}),
+ new edu.berkeley.sbp.Tree(null, "literal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=", new edu.berkeley.sbp.Tree[] { }),
+ new edu.berkeley.sbp.Tree(null, ">", new edu.berkeley.sbp.Tree[] { })})})}),
+ new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "[", new edu.berkeley.sbp.Tree[] { }),
+ new edu.berkeley.sbp.Tree(null, "]", new edu.berkeley.sbp.Tree[] { })}),
+ new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { })}),
+ new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "w", new edu.berkeley.sbp.Tree[] { }),
+ new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })})})})}),
+ new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "/", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps2", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "p", new edu.berkeley.sbp.Tree[] { }),
+ new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "x", new edu.berkeley.sbp.Tree[] { })})})}),
new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, ">", new edu.berkeley.sbp.Tree[] { })}),
+
+
+
+
+
+
}
if (pos >= cur.size()) {
pos = cur.iip+1;
+ _row = cur.endrow;
+ _col = cur.endcol;
cur = cur.parent;
if (cur==null) return null;
return CharToken.right;
boolean blankLine = false;
Block top = new Block.Root();
for(String s = br.readLine(); s != null; s = br.readLine()) {
- col = 0;
+ row++;
+ col=0;
while (s.length() > 0 &&
s.charAt(0) == ' ' &&
(!(top instanceof Block.Literal) || col < top.col)) { col++; s = s.substring(1); }
while (col < top.col) {
if (s.startsWith("{}") && top instanceof Block.Literal && ((Block.Literal)top).braceCol == col) break;
blankLine = false;
+ top.endrow = row;
+ top.endcol = col;
top = top.closeIndent();
}
if (s.startsWith("{}")) {
}
while (s.length() > 0 && s.charAt(s.length()-1)==' ') { s = s.substring(0, s.length()-1); }
if (col > top.col) top = new Block.Indent(top, row, col);
- else if (blankLine) { top = top.closeIndent(); top = new Block.Indent(top, row, col); }
+ else if (blankLine) { top.endrow=row; top.endcol=col; top = top.closeIndent(); top = new Block.Indent(top, row, col); }
blankLine = false;
for(int i=0; i<s.length(); i++) {
top.add(s.charAt(i));
switch(s.charAt(i)) {
case '{': top = new Block.Brace(top, row, col); break;
- case '}': top = top.closeBrace(); break;
+ case '}': top.endrow=row; top.endcol=col; top = top.closeBrace(); break;
}
}
- top.add(' ');
+ top.add('\n');
top.finishWord();
}
// FIXME
Block parent;
public final int row;
public final int col;
+ public int endrow;
+ public int endcol;
public final int iip;
private final Vector children = new Vector();
private String pending = "";
// Testing //////////////////////////////////////////////////////////////////////////////
- public static void main(String[] s) throws Exception { System.out.println(parse(new BufferedReader(new InputStreamReader(System.in))).toString(-1)); }
+ public static void main(String[] s) throws Exception {
+ System.out.println(parse(new BufferedReader(new InputStreamReader(System.in))).toString(-1)); }
// Utilities //////////////////////////////////////////////////////////////////////////////
header
author = Adam Megacz
- email = adam@megacz.com
- comment = my homepage is at
+ myemail = adam@foo.megacz.com
+ comment = my homepage is at http://www.megacz.com you should *check* it out
+ date = published \today, yep!
== Introduction ==
- this is the body adam@megacz.com
-
-
+ this is the body adam@megacz.com text
| e "<-" psy "->" e /ws => "psylr"
sequence ::= quoted => "qprod"
> psx
+ | psx "=>" ^"[]" /ws
| psx ^"=>" (word|quoted) /ws
ec ::= ~[\-\]\\]
x::="x"
+// interactions between !=> and &~ mean that I need to rethink the chartage
// indentation styling...
// literal blocks [[need to ignore bracing]] double-colon style?
// definition -- by prior line indentation, like headings in the original structured text
// dropcap
// output formats: latex, contex, ps, pdf, html, man, txt, rfc
-// URGENT: why does swapping [a-zA-Z0-9] for alphanum in "item" cause severe breakage?
-// URGENT: why does enabling "url" cause severe breakage? (probably same cause)
-// ... something to do with unit productions
-
-// output formats: latex, contex, ps, pdf, html, man, txt, rfc
-// bullet list
-// numbered list
-// horizontal rule
-// dropcap
-// smallcap
-// strikethrough
-// link
+// escapification
+// comment
// math
// image
-// today's date, last edit date
-// blockquote
-// superscript
-// subscript
-// citations/references
-// typewriter-text
// figures
// FIXME: these have to go at the top so they have their dropAll bit set before PreSequence.build...
//////////////////////////////////////////////////////////////////////////////
-s ::= { Doc } => "top"
+s ::= {Doc} => top
-Doc ::= Header Body /ws => doc
+Doc ::= Header Body /ws => doc
Header ::= "header" { kv */ ws } /ws => header
Body ::= Section*/ws => body
Section ::= SectionHeader Paragraph*/ws /ws => section
kv ::= word "=" text /ws => kv1
+num !::= [0-9]++
Paragraph ::= { "\"\"" ws text } => "blockquote"
> { "*" " " ws text } => "ul"
> { "#" " " ws text } => "ol"
- > { num " " ws text => "ol" }
+ > { num " " ws text } => "ol"
> { "---" "-"* } => "hr"
> { text } => "p"
-text ::= item */ ws
+text ::= item => "t"
+itemx ::= ws item => "w" | ()
+item ::= blockquote => "b"
+ > pre itemx => "a"
+ > structured itemx => "1"
+ > structuredx itemx => "2"
+ > styled itemx => "3"
+ > qtext itemx => "4"
+ > alphanum++ itemx => "5"
+ > symbol itemx => "6"
+// > sym++ itemx => []
+ > Paragraph itemx => "7"
+
+symbol ::= sym++
+
+blockquote ::= "\"\"" text "\"\"" => "blockquote"
+ | "\"\"" block => "blockquote"
+
+qtext ::= "\"" text "\"" => "quoted"
pre ::= "[verbatim]" { ~[]+ } /ws => "verbatim" // FIXME doesn't work
-item ::= pre
- > email
- > structured
- > styled
- > "\"" text "\"" => quoted
- > alphanum++ => "stringify"
- > symbol
-
-symbol ::= symbolx & sym++
-symbolx ::= "--" => emdash
- | ","
- | ":"
- | ";"
-
-styled ::= "**" text "**" => bold
- | "__" text "__" => ul
- | "~~" text "~~" => it // hard to type
- | "((" text "))" => footnote
-
-structured ::= glyph
-// | url
-glyph ::= "(r)" | "(c)" | "(tm)" // euro symbol?
+styled ::= "__" text "__" => ul
+ | "((" text "))" => footnote
+ | ( "[[" text "]]" => tt
+ > "[" word "]" => citation
+ )
+ | "!!" text "!!" => strikethrough
+ | "^^" (word|block) => superscript
+ | ",," (word|block) => subscript
+ | "\\sc" block => smallcap
+ | "**" text "**" => bold
+ | "!" (word|block) => keyword
+ > "*" text "*" => it
+
+block ::= { text }
+structured ::= { text } "->" (url|email) => link
+ //> alphanum++ "->" (url|email) => link
+structuredx ::= glyph
+ > email
+ > url
+
+glyph ::= "(r)" | "(c)" | "(tm)" | "--" // euro symbol?
+ | "\\today" -> ~[a-z] => today
// URLs //////////////////////////////////////////////////////////////////////////////
// only gets parsed once
urlpath ::= urlchar*
-username ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify"
-password ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify"
+username ::= [a-zA-Z0-9;/?:&=$\-_.+]++
+password ::= [a-zA-Z0-9;/?:&=$\-_.+]++
urlchar ::= [a-zA-Z0-9;/?:&=$\-_.+@]
| "%" [0-9] [0-9] => "%"
url ::= "mailto" ":" email
method ::= [+\-.a-z0-9]+
port ::= [0-9]+
-domain ::= part +/ "."
-part ::= [A-Za-z0-9\-]++ => "stringify"
+domain ::= (part +/ ".") -> ~"."
+part ::= [a-zA-Z0-9\-]++ // interesting use of boolean grammars
// &~ ([\-0-9] ~[]* | ~[]* [\-0-9])
-email ::= username "@" host => email
-host ::= domain
- | [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip"
+email ::= username "@" host -> ~[.] => emailaddr
+host ::= [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip"
+ | domain
// Tokens ///////////////////////////////////////////////////////////////////
-word ::= alphanum++ => "stringify"
+word ::= alphanum++
| quoted
quoted ::= "\"" ((~[\"\\] | escaped)+) "\""
// Chars ///////////////////////////////////////////////////////////////
alpha ::= [a-zA-Z]
-num !::= [0-9]++ => "stringify"
//num ::= [0-9]
alphanum ::= [a-zA-Z0-9]
-sym ::= ~[a-zA-Z0-9\ \r\n]
+sym ::= ~[a-zA-Z0-9\ \r\n=\">]