checkpoint
[sbp.git] / tests / tibdoc.g
1 x  ="x"
2 // interactions between !=> and &~ mean that I need to rethink the chartage
3 // indentation styling...
4 // literal blocks [[need to ignore bracing]] double-colon style?
5 // definition -- by prior line indentation, like headings in the original structured text
6 // tables
7 // dropcap
8 // output formats: latex, contex, ps, pdf, html, man, txt, rfc
9
10 // escapification
11 // comment
12 // math
13 // image
14 // figures
15
16 // "reference-style" links
17 //
18 //   this[1] is fun
19 //
20 //   [1] http://...
21 //
22
23 // consider ++bold++ and **italic**?
24 // \br
25 // nonbreaking text?
26 // ellipsis detection (...)
27 // degree: 15^o
28 // Arrows: <- -> => <= <->
29
30 // textblocks:
31 //  - attention, caution, danger, error, hint, important, note, tip, warning
32 // definition
33 // sidebar
34 // figure-with-caption
35 // epigraph (end-of-chapter note)
36 // compound paragraph (??)
37 // csv-table?
38 // table of contents
39 // header, footer
40 // #include
41
42 // simple macros (#define) (\define)
43 // today's date
44
45 // table representation
46 //  
47 //  \table
48 //     a bbb c
49 //     ddd   e
50 //
51 //     [a] ...
52 //     [b] ...
53 //     [c] ...
54
55 // FIXME: these have to go at the top so they have their dropAll bit set before PreSequence.build...
56 ws      !  = w**
57 w       !  =  [\r\n\ ]
58 nw         = ~[\r\n\ ]
59
60 //////////////////////////////////////////////////////////////////////////////
61
62 s                   = Doc => top
63
64 Doc                 = {Header} Body                   /ws => doc
65 Header              = "header" { kv */ ws }           /ws => header
66 Body                = Section*/ws                         => body
67 Section             = { SectionHeader Paragraph* /ws => section }
68 SectionHeader       = "==" SectionHeaderBody "=="
69 SectionHeaderBody   =  "=" SectionHeaderBody "="
70                     >      ws text ws
71
72 sp    !  = " "**
73 blank    = sp "\n" sp "\n" ws
74
75 kv           = word "=" text /ws => kv1
76 wp !  = w++
77 num !  = [0-9]++
78 Paragraph    = { "\"\"" ws  text }  => "blockquote"
79              > { "---" "-"*      }  => "hr"
80              > { text }             => "p"
81
82 onums     !  = nums (". "|") ")
83 any       !  = ~[]*
84
85 uli          = "* "         (ws text &~ any (oli|uli)) => "li"
86 oli          = ("# "|onums) (ws text &~ any (oli|uli)) => "li"
87
88 text         = Item => text
89 Itemx        = ws Item
90              | ()
91 Item         = blockquote
92              > { uli+/ws => "ul" }       Itemx  => []
93              | { oli+/ws => "ol" }       Itemx  => []
94              > pre                       Itemx  => []
95              > structured                Itemx  => []
96              > structuredx               Itemx  => []
97              > styled                    Itemx  => []
98              > qtext                     Itemx  => []
99              > (alphanum++ => stringify) Itemx  => []
100              > symbol                    Itemx  => []
101              > (sym++ => stringify)      Itemx  => []
102              > Paragraph                 Itemx  => []
103
104 blockquote   = "\"\"" text "\"\""        => "blockquote"
105              | "\"\"" block              => "blockquote"
106              
107 qtext        = "\"" text "\""            => "quoted"
108 pre          = "[verbatim]" { ~[]+ } /ws => "verbatim"   // FIXME doesn't work
109
110 styled       = "__" text "__"      => underline
111              | "((" text "))"      => footnote
112              | ( "[[" text "]]"    => tt
113                >  "[" word "]"     => citation
114                )
115              | "!!" text "!!"      => strikethrough
116              | "^^" (word|block)   => superscript
117              | ",," (word|block)   => subscript
118              | "\\sc" block        => smallcap
119              | "**" text "**"      => bold
120              | "!" (word|block)    => keyword
121              >  "*" text "*"       => it
122
123 block   = { text }
124 structured   = { text } "->" (url|email) => link
125               //> alphanum++ "->" (url|email) => link
126 structuredx   = glyph
127               > email
128               > url
129
130 glyph        = "(r)" | "(c)" | "(tm)" | "--"  // euro symbol?
131              | "\\today" -> ~[a-z] => today
132
133
134 // URLs //////////////////////////////////////////////////////////////////////////////
135
136 // interesting opportunity to show off boolean grammars here: define other
137 // subtypes of url (ftp, etc) as conjunctions, but the "master pattern"
138 // only gets parsed once
139
140 urlpath      = urlchar*
141 username     = [a-zA-Z0-9;/?:&=$\-_.+]++ => stringify
142 password     = [a-zA-Z0-9;/?:&=$\-_.+]++ => stringify
143 urlchar      = [a-zA-Z0-9;/?:&=$\-_.+@]
144              | "%" [0-9] [0-9]       => "%"
145 url          = "mailto" ":"   email
146              > method "://" url_login? host (":" nums)? ("/" urlpath)?     => "url"
147 url_login    = username (":" password) "@"                                 => "login"
148 method       = [+\-.a-z0-9]+ => stringify
149 domain       = (part +/ ".") -> ~"." => domain
150 part         = [a-zA-Z0-9\-]++  => stringify
151 // interesting use of boolean grammars
152 //            &~ ([\-0-9] ~[]* | ~[]* [\-0-9])
153
154 email        = username "@" host -> ~[.] => emailaddr
155 nums         = [0-9]++ => stringify
156 host         = nums "." nums "." nums "." nums => "ip"
157              | domain
158
159
160
161 // Tokens ///////////////////////////////////////////////////////////////////
162
163 word       = alphanum++ => stringify
164            | quoted
165
166 quoted     = "\"" ((~[\"\\] | escaped)+) "\""
167            | "\"\"" => ""
168 escaped    = "\\n" => "\n"
169            | "\\r" => "\r"
170            | "\\" ~[nr]
171
172
173 // Chars ///////////////////////////////////////////////////////////////
174
175 alpha      = [a-zA-Z]
176 //num        = [0-9]
177 alphanum   = [a-zA-Z0-9]
178 sym        = ~[a-zA-Z0-9\ \r\n=\">]
179
180