checkpoint
[sbp.git] / tests / tibdoc.g
1 x  ="x"
2 // interactions between !=> and &~ mean that I need to rethink the chartage
3 // indentation styling...
4 // literal blocks [[need to ignore bracing]] double-colon style?
5 // definition -- by prior line indentation, like headings in the original structured text
6 // tables
7 // dropcap
8 // output formats: latex, contex, ps, pdf, html, man, txt, rfc
9
10 // escapification
11 // comment
12 // math
13 // image
14 // figures
15
16 // FIXME: these have to go at the top so they have their dropAll bit set before PreSequence.build...
17 ws      !  = w**
18 w       !  =  [\r\n\ ]
19 nw         = ~[\r\n\ ]
20
21 //////////////////////////////////////////////////////////////////////////////
22
23 s                   = Doc => top
24
25 Doc                 = {Header} Body                   /ws => doc
26 Header              = "header" { kv */ ws }           /ws => header
27 Body                = Section*/ws                         => body
28 Section             = { SectionHeader Paragraph* /ws => section }
29 SectionHeader       = "==" SectionHeaderBody "=="
30 SectionHeaderBody   =  "=" SectionHeaderBody "="
31                     >      ws text ws
32
33 sp    !  = " "**
34 blank    = sp "\n" sp "\n" ws
35
36 kv           = word "=" text /ws => kv1
37 wp !  = w++
38 num !  = [0-9]++
39 Paragraph    = { "\"\"" ws  text }  => "blockquote"
40              > { "---" "-"*      }  => "hr"
41              > { text }             => "p"
42
43 onums     !  = nums (". "|") ")
44 any       !  = ~[]*
45
46 uli          = "* "         (ws text &~ any (oli|uli)) => "li"
47 oli          = ("# "|onums) (ws text &~ any (oli|uli)) => "li"
48
49 text         = Item => text
50 Itemx        = ws Item
51              | ()
52 Item         = blockquote
53              > { uli+/ws => "ul" }       Itemx  => []
54              | { oli+/ws => "ol" }       Itemx  => []
55              > pre                       Itemx  => []
56              > structured                Itemx  => []
57              > structuredx               Itemx  => []
58              > styled                    Itemx  => []
59              > qtext                     Itemx  => []
60              > (alphanum++ => stringify) Itemx  => []
61              > symbol                    Itemx  => []
62              > (sym++ => stringify)      Itemx  => []
63              > Paragraph                 Itemx  => []
64
65 blockquote   = "\"\"" text "\"\""        => "blockquote"
66              | "\"\"" block              => "blockquote"
67              
68 qtext        = "\"" text "\""            => "quoted"
69 pre          = "[verbatim]" { ~[]+ } /ws => "verbatim"   // FIXME doesn't work
70
71 styled       = "__" text "__"      => underline
72              | "((" text "))"      => footnote
73              | ( "[[" text "]]"    => tt
74                >  "[" word "]"     => citation
75                )
76              | "!!" text "!!"      => strikethrough
77              | "^^" (word|block)   => superscript
78              | ",," (word|block)   => subscript
79              | "\\sc" block        => smallcap
80              | "**" text "**"      => bold
81              | "!" (word|block)    => keyword
82              >  "*" text "*"       => it
83
84 block   = { text }
85 structured   = { text } "->" (url|email) => link
86               //> alphanum++ "->" (url|email) => link
87 structuredx   = glyph
88               > email
89               > url
90
91 glyph        = "(r)" | "(c)" | "(tm)" | "--"  // euro symbol?
92              | "\\today" -> ~[a-z] => today
93
94
95 // URLs //////////////////////////////////////////////////////////////////////////////
96
97 // interesting opportunity to show off boolean grammars here: define other
98 // subtypes of url (ftp, etc) as conjunctions, but the "master pattern"
99 // only gets parsed once
100
101 urlpath      = urlchar*
102 username     = [a-zA-Z0-9;/?:&=$\-_.+]++ => stringify
103 password     = [a-zA-Z0-9;/?:&=$\-_.+]++ => stringify
104 urlchar      = [a-zA-Z0-9;/?:&=$\-_.+@]
105              | "%" [0-9] [0-9]       => "%"
106 url          = "mailto" ":"   email
107              > method "://" url_login? host (":" nums)? ("/" urlpath)?     => "url"
108 url_login    = username (":" password) "@"                                 => "login"
109 method       = [+\-.a-z0-9]+ => stringify
110 domain       = (part +/ ".") -> ~"." => domain
111 part         = [a-zA-Z0-9\-]++  => stringify
112 // interesting use of boolean grammars
113 //            &~ ([\-0-9] ~[]* | ~[]* [\-0-9])
114
115 email        = username "@" host -> ~[.] => emailaddr
116 nums         = [0-9]++ => stringify
117 host         = nums "." nums "." nums "." nums => "ip"
118              | domain
119
120
121
122 // Tokens ///////////////////////////////////////////////////////////////////
123
124 word       = alphanum++ => stringify
125            | quoted
126
127 quoted     = "\"" ((~[\"\\] | escaped)+) "\""
128            | "\"\"" => ""
129 escaped    = "\\n" => "\n"
130            | "\\r" => "\r"
131            | "\\" ~[nr]
132
133
134 // Chars ///////////////////////////////////////////////////////////////
135
136 alpha      = [a-zA-Z]
137 //num        = [0-9]
138 alphanum   = [a-zA-Z0-9]
139 sym        = ~[a-zA-Z0-9\ \r\n=\">]
140
141