tests/tibdoc.g

   1 x  ="x"
   2 // interactions between !=> and &~ mean that I need to rethink the chartage
   3 // indentation styling...
   4 // literal blocks [[need to ignore bracing]] double-colon style?
   5 // definition -- by prior line indentation, like headings in the original structured text
   6 // tables
   7 // dropcap
   8 // output formats: latex, contex, ps, pdf, html, man, txt, rfc
   9
  10 // escapification
  11 // comment
  12 // math
  13 // image
  14 // figures
  15
  16 // "reference-style" links
  17 //
  18 //   this[1] is fun
  19 //
  20 //   [1] http://...
  21 //
  22
  23 // consider ++bold++ and **italic**?
  24 // \br
  25 // nonbreaking text?
  26 // ellipsis detection (...)
  27 // degree: 15^o
  28 // Arrows: <- -> => <= <->
  29
  30 // textblocks:
  31 //  - attention, caution, danger, error, hint, important, note, tip, warning
  32 // definition
  33 // sidebar
  34 // figure-with-caption
  35 // epigraph (end-of-chapter note)
  36 // compound paragraph (??)
  37 // csv-table?
  38 // table of contents
  39 // header, footer
  40 // #include
  41
  42 // simple macros (#define) (\define)
  43 // today's date
  44
  45 // table representation
  46 //
  47 //  \table
  48 //     a bbb c
  49 //     ddd   e
  50 //
  51 //     [a] ...
  52 //     [b] ...
  53 //     [c] ...
  54
  55 // FIXME: these have to go at the top so they have their dropAll bit set before PreSequence.build...
  56 ws      !  = w**
  57 w       !  =  [\r\n\ ]
  58 nw         = ~[\r\n\ ]
  59
  60 //////////////////////////////////////////////////////////////////////////////
  61
  62 s                   = Doc => top
  63
  64 Doc                 = {Header} Body                   /ws => doc
  65 Header              = "header" { kv */ ws }           /ws => header
  66 Body                = Section*/ws                         => body
  67 Section             = { SectionHeader Paragraph* /ws => section }
  68 SectionHeader       = "==" SectionHeaderBody "=="
  69 SectionHeaderBody   =  "=" SectionHeaderBody "="
  70                     >      ws text ws
  71
  72 sp    !  = " "**
  73 blank    = sp "\n" sp "\n" ws
  74
  75 kv           = word "=" text /ws => kv1
  76 wp !  = w++
  77 num !  = [0-9]++
  78 Paragraph    = { "\"\"" ws  text }  => "blockquote"
  79              > { "---" "-"*      }  => "hr"
  80              > { text }             => "p"
  81
  82 onums     !  = nums (". "|") ")
  83 any       !  = ~[]*
  84
  85 uli          = "* "         (ws text &~ any (oli|uli)) => "li"
  86 oli          = ("# "|onums) (ws text &~ any (oli|uli)) => "li"
  87
  88 text         = Item => text
  89 Itemx        = ws Item
  90              | ()
  91 Item         = blockquote
  92              > { uli+/ws => "ul" }       Itemx  => []
  93              | { oli+/ws => "ol" }       Itemx  => []
  94              > pre                       Itemx  => []
  95              > structured                Itemx  => []
  96              > structuredx               Itemx  => []
  97              > styled                    Itemx  => []
  98              > qtext                     Itemx  => []
  99              > (alphanum++ => stringify) Itemx  => []
 100              > symbol                    Itemx  => []
 101              > (sym++ => stringify)      Itemx  => []
 102              > Paragraph                 Itemx  => []
 103
 104 blockquote   = "\"\"" text "\"\""        => "blockquote"
 105              | "\"\"" block              => "blockquote"
 106
 107 qtext        = "\"" text "\""            => "quoted"
 108 pre          = "[verbatim]" { ~[]+ } /ws => "verbatim"   // FIXME doesn't work
 109
 110 styled       = "__" text "__"      => underline
 111              | "((" text "))"      => footnote
 112              | ( "[[" text "]]"    => tt
 113                >  "[" word "]"     => citation
 114                )
 115              | "!!" text "!!"      => strikethrough
 116              | "^^" (word|block)   => superscript
 117              | ",," (word|block)   => subscript
 118              | "\\sc" block        => smallcap
 119              | "**" text "**"      => bold
 120              | "!" (word|block)    => keyword
 121              >  "*" text "*"       => it
 122
 123 block   = { text }
 124 structured   = { text } "->" (url|email) => link
 125               //> alphanum++ "->" (url|email) => link
 126 structuredx   = glyph
 127               > email
 128               > url
 129
 130 glyph        = "(r)" | "(c)" | "(tm)" | "--"  // euro symbol?
 131              | "\\today" -> ~[a-z] => today
 132
 133
 134 // URLs //////////////////////////////////////////////////////////////////////////////
 135
 136 // interesting opportunity to show off boolean grammars here: define other
 137 // subtypes of url (ftp, etc) as conjunctions, but the "master pattern"
 138 // only gets parsed once
 139
 140 urlpath      = urlchar*
 141 username     = [a-zA-Z0-9;/?:&=$\-_.+]++ => stringify
 142 password     = [a-zA-Z0-9;/?:&=$\-_.+]++ => stringify
 143 urlchar      = [a-zA-Z0-9;/?:&=$\-_.+@]
 144              | "%" [0-9] [0-9]       => "%"
 145 url          = "mailto" ":"   email
 146              > method "://" url_login? host (":" nums)? ("/" urlpath)?     => "url"
 147 url_login    = username (":" password) "@"                                 => "login"
 148 method       = [+\-.a-z0-9]+ => stringify
 149 domain       = (part +/ ".") -> ~"." => domain
 150 part         = [a-zA-Z0-9\-]++  => stringify
 151 // interesting use of boolean grammars
 152 //            &~ ([\-0-9] ~[]* | ~[]* [\-0-9])
 153
 154 email        = username "@" host -> ~[.] => emailaddr
 155 nums         = [0-9]++ => stringify
 156 host         = nums "." nums "." nums "." nums => "ip"
 157              | domain
 158
 159
 160
 161 // Tokens ///////////////////////////////////////////////////////////////////
 162
 163 word       = alphanum++ => stringify
 164            | quoted
 165
 166 quoted     = "\"" ((~[\"\\] | escaped)+) "\""
 167            | "\"\"" => ""
 168 escaped    = "\\n" => "\n"
 169            | "\\r" => "\r"
 170            | "\\" ~[nr]
 171
 172
 173 // Chars ///////////////////////////////////////////////////////////////
 174
 175 alpha      = [a-zA-Z]
 176 //num        = [0-9]
 177 alphanum   = [a-zA-Z0-9]
 178 sym        = ~[a-zA-Z0-9\ \r\n=\">]
 179
 180