Scannerless Boolean Parser
Adam Megacz <megacz@cs.berkeley.edu>
-12-Dec-2005
+updated 27-May-2007
______________________________________________________________________________
Requirements
sbp/ -- core API: grammar, trees, and forests
misc -- regression testing and sample code
util -- some nifty data structures not specific to parsing
- tib -- Text with Indentation and Braces (experimental/undocumented)
+ meta -- code relating to the metagrammar (meta.g)
+ chr -- code for parsing character streams (you want this)
doc/
+ jargon.txt -- read this!
api/ -- generated javadocs go here
tests/
meta.g -- the tentative metagrammar, written in itself
would be anything drastic enough to require more than a few minutes
to update a grammar description.
- The TIB stuff is super experimental; I'm not even sure if I'll keep
- it around. Use at your own risk.
+
+______________________________________________________________________________
+Haskell Integration
+
+ If you're using LambdaVM (the Java backend for GHC), you'll be able
+ to use SBP in your Haskell code. See src/SBP.lhs for more info.
+
+++ /dev/null
-\begin{code}
-module Main
-where
-import SBP
-main = do t <- parseFile "../wix/wix.g" "../wix/lasik.wix"
- putStrLn $ toHtml $ ((fromTree $ coalesceFlatHeadlessNodes t) :: Doc)
-
--- url crap
--- ul/ol
--- glyphs
-
-------------------------------------------------------------------------------
-data Doc = Doc Header [Section]
-data Section = Section [Text] [Paragraph]
-data Paragraph = Blockquote [Text]
- | HR
- | OL
- | P [Text]
-data Text = WS
- | Chars String
- | Symbol String
- | Quotes [Text]
- | Block [Text]
- | Command String [Text]
- | Verbatim String
- | Link [Text] URL
- | Underline [Text]
- | Footnote [Text]
- | TT [Text]
- | Citation [Text]
- | Strikethrough [Text]
- | Superscript [Text]
- | Subscript [Text]
- | Smallcap [Text]
- | Bold [Text]
- | Keyword [Text]
- | Italic [Text]
-data Header = Header
-data URL = URL
- deriving Show
-
-{-
-glyph = euro:: "(e)"
- | r:: "(r)"
- | c:: "(c)"
- | tm:: "(tm)"
- | emdash:: "--"
- | ellipses:: "..."
- | cent:: "\\cent"
--}
-
-------------------------------------------------------------------------------
-class FromTree a where
- fromTree :: Tree -> a
-class FromTrees a where
- fromTrees :: [Tree] -> a
-instance FromTree a => FromTree [a] where
- fromTree (Tree _ c _) = map fromTree c
-
-instance FromTree Doc where
- fromTree (Tree "Doc" [a,b] _) = Doc Header $ fromTree b
- fromTree (Tree "Doc" [b] _) = Doc Header $ fromTree b
- fromTree _ = error "top level must be Doc"
-
-instance FromTree Section where
- fromTree (Tree "Section" [(Tree _ c _),(Tree _ paragraphs _)] _) =
- Section (map fromTree c) $ map fromTree paragraphs
-
-instance FromTree Paragraph where
- fromTree (Tree "P" [Tree _ text _] _) = P $ map fromTree text
- fromTree (Tree "HR" _ _) = HR
-
-instance FromTree Text where
- fromTree (Tree "Chars" chars _) = Chars $ fromTrees chars
- fromTree (Tree "WS" _ _) = WS
- fromTree (Tree "Symbol" sym _) = Symbol $ fromTrees sym
- fromTree (Tree "Quotes" x _) = Quotes $ map fromTree x
- fromTree (Tree "Block" x _) = Block $ map fromTree x
- fromTree (Tree "Command" [x,y] _) = Command (fromTree x) (fromTree y)
- fromTree (Tree "Verbatim" x _) = Verbatim $ fromTrees x
- fromTree (Tree "Link" [word,link] _) = Link (fromTree word) (fromTree link)
- fromTree (Tree "Underline" x _) = Underline $ map fromTree x
- fromTree (Tree "Footnote" x _) = Footnote $ map fromTree x
- fromTree (Tree "TT" x _) = TT $ map fromTree x
- fromTree (Tree "Citation" x _) = Citation $ map fromTree x
- fromTree (Tree "Strikethrough" x _) = Strikethrough $ map fromTree x
- fromTree (Tree "Superscript" x _) = Superscript $ map fromTree x
- fromTree (Tree "Subscript" x _) = Subscript $ map fromTree x
- fromTree (Tree "Smallcap" x _) = Smallcap $ map fromTree x
- fromTree (Tree "Bold" x _) = Bold $ map fromTree x
- fromTree (Tree "Keyword" x _) = Keyword $ map fromTree x
- fromTree (Tree "Italic" x _) = Italic $ map fromTree x
- fromTree (Tree x _ _) = Chars $ x
-
-instance FromTree URL where
- fromTree x = URL
-
-instance FromTree String where
- fromTree (Tree h c _) = h++(concatMap fromTree c)
-instance FromTrees String where
- fromTrees ts = concatMap (fromTree :: Tree -> String) ts
-
-------------------------------------------------------------------------------
-class ToHtml a where
- toHtml :: a -> String
-instance ToHtml a => ToHtml [a] where
- toHtml x = concatMap toHtml x
-
-instance ToHtml Doc where
- toHtml (Doc h secs) = "<html><body>" ++ (toHtml secs) ++ "</body></html>"
-instance ToHtml Section where
- toHtml (Section header paragraphs) = "<h1>"++(toHtml header)++"</h1>"++(toHtml paragraphs)
-instance ToHtml Paragraph where
- toHtml (Blockquote t) = "<blockquote>"++(toHtml t)++"</blockquote>"
- toHtml HR = "<hr/>"
- toHtml OL = "<ol/>"
- toHtml (P t) = "<p>"++(toHtml t)++"</p>"
-instance ToHtml Text where
- toHtml WS = " "
- toHtml (Chars s) = toHtml s
- toHtml (Symbol s) = toHtml s
- toHtml (Quotes x) = "\""++(toHtml x)++"\""
- toHtml (Block x) = toHtml x
- toHtml (Verbatim x) = "<pre>\n"++x++"\n</pre>"
- toHtml (Link t ref) = "<a href='"++(show ref)++"'>"++(toHtml t)++"</a>"
- toHtml (Underline x) = "<u>"++(toHtml x)++"</u>"
- toHtml (TT x) = "<tt>"++(toHtml x)++"</tt>"
- toHtml (Citation x) = "<i>"++(toHtml x)++"</i>"
- toHtml (Strikethrough x) = "<strike>"++(toHtml x)++"</strike>"
- toHtml (Superscript x) = "<sup>"++(toHtml x)++"</sup>"
- toHtml (Subscript x) = "<sub>"++(toHtml x)++"</sub>"
- toHtml (Smallcap x) = "<sc>"++(toHtml x)++"</sc>"
- toHtml (Bold x) = "<b>"++(toHtml x)++"</b>"
- toHtml (Keyword x) = "<tt>"++(toHtml x)++"</tt>"
- toHtml (Italic x) = "<i>"++(toHtml x)++"</i>"
- toHtml (Command x y) = error $ "unsupported command "++(show x)
- toHtml (Footnote x) = error $ "footnotes not supported"
-
-instance ToHtml String where
- toHtml s = concatMap htmlEscapeChar s
- where
- htmlEscapeChar '<' = "<"
- htmlEscapeChar '>' = ">"
- htmlEscapeChar '&' = "&"
- htmlEscapeChar '\'' = "'"
- htmlEscapeChar '\"' = """
- htmlEscapeChar c = [c]
-
-
-
-
-
-
-{-
-
-Doc: { { Section: { { Chars: { 19 } Symbol: { - } Chars: { Nov } }
- { P: { { Chars: { Two } WS Chars: { weeks } WS Chars: { ago } WS
- Chars: { I } WS Chars: { had } WS Chars: { Lasik } WS
- Chars: { performed } WS Chars: { at } WS
- Link: { { Chars: { the } WS Chars: { Pacific } WS Chars: { Vision }
- WS Chars: { Institute } }
- URL: { http DNS: { { pacificvision org } } { . } } }
- WS Chars: { The } WS Chars: { short } WS Chars: { story } WS
- Chars: { is } WS Chars: { that } WS Chars: { it } WS
- Chars: { rocks } Symbol: { , } WS Chars: { and } WS Chars: { I } WS
- Chars: { very } WS Chars: { highly } WS
- Chars: { recommend } WS Chars: { Dr } Symbol: { . } WS
- Chars: { Faktorovich }
- Symbol: { , } WS
- Chars: { as } WS
- Chars: { well }
- WS Chars: { as }
- WS
- Chars: { the }
-
--}
-\end{code}
-
-
-
-