| sequence ^"&" e*/ws /ws
| sequence ^"&~" e*/ws /ws
-ps ::= e*/ws => "ps"
- | (e+/ws ws)? "^" quoted (ws e+/ws)? => "ps2"
-psx ::= ps
- | ps wp ^"/" ws e
-sequence ::= psx
- | psx ^"=>" (word|quoted) /ws
+ps ::= e*/ws => "ps"
+ | (e+/ws ws)? "^" quoted (ws e+/ws)? => "ps2"
+psy ::= ps
+ | ps wp ^"/" ws e
+psx ::= psy ^"?=>" e /ws
+ | psy ^"!=>" e /ws
+ > psy
+sequence ::= quoted => "qprod"
+ > psx
+ | psx ^"=>" (word|quoted) /ws
ec ::= [~\-\]\\\~]
| escaped
| ^"[" range* "]"
| ^"[~" range* "]"
- | (e ws ^"**" > e ws ^"*")
- | e ^"*/" e /ws
- | (e ws ^"++" > e ws ^"+")
- | e ^"+/" e /ws
+ | (e ^"++" /ws > e ^"+" /ws)
+ | (e ^"++/" e /ws > e ^"+/" e /ws)
+ | (e ^"**" /ws > e ^"*" /ws)
+ | (e ^"**/" e /ws > e ^"*/" e /ws)
| e ^"?" /ws
| "(" word ^")" /ws
| "//" ([~\n]*) "\n"
| "\n"
| "\r"
-word ::= [a-zA-Z0-9_]++
+word ::= [a-zA-Z0-9_]+ !=> [a-zA-Z0-9_]
quoted ::= "\"" (([~\"\\] | escaped)+) "\""
| "\"\"" => ""
escaped ::= "\\n" => "\n"
--- /dev/null
+
+// URGENT: why does swapping [a-zA-Z0-9] for alphanum in "item" cause severe breakage?
+// URGENT: why does enabling "url" cause severe breakage? (probably same cause)
+// ... something to do with unit productions
+
+
+
+// output formats: latex, contex, ps, pdf, html, man, txt, rfc
+// bullet list
+// numbered list
+// horizontal rule
+// dropcap
+// smallcap
+// strikethrough
+// link
+// math
+// image
+// today's date, last edit date
+// blockquote
+// superscript
+// subscript
+// citations/references
+// typewriter-text
+// figures
+
+// FIXME: these have to go at the top so they have their dropAll bit set before PreSequence.build...
+ws !::= w**
+w !::= [\r\n\ ]
+nw ::= [~\r\n\ ]
+
+//////////////////////////////////////////////////////////////////////////////
+
+s ::= {Doc} => top
+
+Doc ::= Header Body /ws => doc
+Header ::= "header" { kv */ ws } /ws => header
+Body ::= Section*/ws => body
+Section ::= SectionHeader ({text}*/ws) /ws => section
+SectionHeader ::= "==" SectionHeaderBody "=="
+SectionHeaderBody ::= "=" SectionHeaderBody "="
+ > ws text ws
+
+kv ::= word "=" text /ws => kv1
+
+text ::= item */ ws
+pre ::= "[verbatim]" { [~]+ } /ws => "verbatim" // FIXME doesn't work
+item ::= pre
+ > email
+ > structured
+ > styled
+ > "\"" text "\"" => quoted
+ > [a-zA-Z0-9]++
+ > symbol
+
+symbol ::= symbolx & sym++
+symbolx ::= "--" => emdash
+ | ","
+ | ":"
+ | ";"
+
+styled ::= "**" text "**" => bold
+ | "__" text "__" => ul
+ | "~~" text "~~" => it // hard to type
+ | "((" text "))" => footnote
+
+structured ::= glyph
+// | url
+
+glyph ::= "(r)" | "(c)" | "(tm)" // euro symbol?
+
+
+// URLs //////////////////////////////////////////////////////////////////////////////
+
+// interesting opportunity to show off boolean grammars here: define other
+// subtypes of url (ftp, etc) as conjunctions, but the "master pattern"
+// only gets parsed once
+
+urlpath ::= urlchar*
+username ::= [a-zA-Z0-9;/?:&=$\-_.+]++
+password ::= [a-zA-Z0-9;/?:&=$\-_.+]++
+urlchar ::= [a-zA-Z0-9;/?:&=$\-_.+@]
+ | "%" [0-9] [0-9] => "%"
+url ::= "mailto" ":" email
+ > method "://" url_login? host (":" port)? ("/" urlpath)? => "url"
+url_login ::= username (":" password) "@" => "login"
+method ::= [+\-.a-z0-9]+
+port ::= [0-9]+
+
+domain ::= part +/ "."
+part ::= [a-zA-Z0-9\-]++ // interesting use of boolean grammars
+// &~ ([\-0-9] [~]* | [~]* [\-0-9])
+
+email ::= username "@" host => email
+host ::= [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip"
+ | domain
+
+
+
+// Tokens ///////////////////////////////////////////////////////////////////
+
+word ::= alphanum++
+ | quoted
+
+quoted ::= "\"" (([~\"\\] | escaped)+) "\""
+ | "\"\"" => ""
+escaped ::= "\\n" => "\n"
+ | "\\r" => "\r"
+ | "\\" [~nr]
+
+
+// Chars ///////////////////////////////////////////////////////////////
+
+alpha ::= [a-zA-Z]
+num ::= [0-9]
+alphanum ::= [a-zA-Z0-9]
+sym ::= [~a-zA-Z0-9\ \r\n]
+
+