From: adam Date: Wed, 23 May 2007 21:55:40 +0000 (-0700) Subject: initial release X-Git-Url: http://git.megacz.com/?p=wix.git;a=commitdiff_plain;h=3c96b1336f651fa3689e975f4793b55c43591d21 initial release darcs-hash:20070523215540-5007d-6c77ce822cbea83178ed5bc31f33146754f03eba.gz --- 3c96b1336f651fa3689e975f4793b55c43591d21 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..007ac4b --- /dev/null +++ b/Makefile @@ -0,0 +1,56 @@ +ghcroot = /usr/local/brian/ghc +pwd = $(shell pwd) +ghc = $(ghcroot)/compiler/ghc-inplace -fallow-undecidable-instances -fallow-overlapping-instances +ghc += -fglasgow-exts -cpp -hidir $(pwd)/build/hi -i$(pwd)/build/hi -odir $(pwd)/build/class/ +ghclibs = $(ghcroot)/rts/HSrts.jar:$(ghcroot)/libraries/base/HSbase.jar:$(ghcroot)/libraries/stm/HSstm.jar + +java = java -Xmx800m +java += $(profile) -cp $(ghclibs):lib/edu.berkeley.sbp.jar:build/class + +run: build/class/Main.class build/class/Tib.class + $(java) HaskellHelper xt html + +install: build/class/Main.class build/class/Tib.class + $(java) HaskellHelper xt/ ~/www/ + +install2: build/class/Main.class build/class/Tib.class +# $(java) HaskellHelper xt/slipway/ ~/www/slipway/ +# $(java) HaskellHelper xt/research/ ~/www/research/ +# $(java) HaskellHelper xt/research.cs.berkeley.edu/ ~/www/research.cs.berkeley.edu/ +# $(java) HaskellHelper xt/random/ ~/www/random/ +# $(java) HaskellHelper xt/otherpeople/ ~/www/otherpeople/ +# $(java) HaskellHelper xt/sbp ~/www/sbp + $(java) HaskellHelper xt/software/wix ~/www/software/wix +# $(java) HaskellHelper xt/thoughts ~/www/thoughts +# $(java) HaskellHelper xt2 ~/ + +docs: + $(java) HaskellHelper xt2/docs/ ~/docs/ + +debug: build/class/Main.class build/class/Tib.class + $(java) -Dsbp.verbose=true HaskellHelper xt html + +wix.jar: build/class/Main.class build/class/Tib.class + rm -rf tmp + mkdir tmp + cd build/class; for A in $(ghcroot)/rts/HSrts.jar $(ghcroot)/libraries/base/HSbase.jar $(ghcroot)/libraries/stm/HSstm.jar ../../lib/edu.berkeley.sbp.jar; \ + do jar xvf $$A; done + echo 'Main-Class: HaskellHelper' > build/manifest + cd build/class; jar cvmf ../manifest ../../wix.jar . + +build/class/Tib.class: $(shell find src -name \*.java) + javac -d build/class -cp lib/edu.berkeley.sbp.jar $(shell find src -name \*.java) + +build/class/Main.class: $(shell find src -name \*.lhs) + mkdir -p build/class/Text/PrettyPrint + mkdir -p build/hi + cd src; $(ghc) -c -java Util.lhs + cd src; $(ghc) -c -java Text/PrettyPrint/Leijen.hs + cd src; $(ghc) -c -java SBP.lhs + cd src; $(ghc) -c -java FromTree.lhs + cd src; $(ghc) -c -java Doc.lhs + cd src; $(ghc) -c -java Html.lhs + cd src; $(ghc) -c -java Wix.lhs + cd src; $(ghc) -c -java Main.lhs + +clean:; rm -rf build \ No newline at end of file diff --git a/src/Doc.lhs b/src/Doc.lhs new file mode 100644 index 0000000..a36d367 --- /dev/null +++ b/src/Doc.lhs @@ -0,0 +1,242 @@ +\begin{code} +module Doc +where +import Numeric +import Data.Bits +import Data.Char +import SBP +import Util +import FromTree +import qualified Text.PrettyPrint.Leijen as PP + +data Doc = Doc Header [Section] +data Header = Header -- not yet specified + +data Section = Section + Int -- heading level + [Text] -- title + [Paragraph] -- content + +data Paragraph = Blockquote [Paragraph] + | P [Text] + | OL [[Paragraph]] -- list of items; each item is a [Paragraph] + | UL [[Paragraph]] + | HR + +data Style = TT | Underline | Superscript | Subscript + | Strikethrough | Italic | Bold | Highlight + +data Text = WS + | Chars String + | Quotes [Text] + | GlyphText Glyph + | Math String + | Command String [Text] + | Verbatim String + | Link [Text] URL + | Footnote [Text] + | Styled Style [Text] + | Keyword [Text] + | SubPar [Paragraph] + +data Glyph = Euro | CircleR | CircleC | TradeMark | ServiceMark + | Emdash | Ellipsis | Cent | Daggar | DoubleDaggar + | Clover | Flat | Sharp | Natural | CheckMark | XMark + | LeftArrow | DoubleLeftArrow | DoubleRightArrow + | DoubleLeftRightArrow | LeftRightArrow | Degree + +data Login = Login String (Maybe String) +data URL = URLPath String + | Email String Host + | URL { url_method :: String, + url_login :: Maybe Login, + url_host :: Host, + url_port :: Maybe Int, + url_path :: String, + url_ref :: Maybe String } +data Host = IP Int Int Int Int + | DNS [String] + +-- Doc ------------------------------------------------------------------------------ + +instance PP.Pretty Doc where + pretty _ = PP.text $ "" + +instance FromTree Doc where + fromTree (Tree "Doc" [a,b] _) = Doc Header $ fromTree b + fromTree t = error $ "unable to create Doc from " ++ (show t) + +-- Section ------------------------------------------------------------------------------ + +instance FromTree Section where + fromTree (Tree "Section" ((Tree "SectionHeader" [(Tree "=" e _),c] _):p) _) = + Section ((length e)-1) (fromTree c) $ concatMap fromTree p + fromTree t = error $ "couldnt Section.FromTree on " ++ (show t) + +-- Paragraph ------------------------------------------------------------------------------ + +instance FromTrees [Paragraph] where + fromTrees ts = consolidate $ concatMap fromTree ts +instance FromTree [Paragraph] where + fromTree t = consolidate $ fromTree' t + where + fromTree' (Tree "Verbatim" [ident,v] _) = [P [(Verbatim $ unindent ident $ unverbate v)]] + fromTree' (Tree "TextParagraph" [(Tree _ text _)] _) = [P $ concatMap fromTree text] + fromTree' (Tree "Pars" pars _) = concatMap fromTree pars + fromTree' (Tree "HR" _ _) = [HR] + fromTree' (Tree "OL" a _) = [OL $ map (\(Tree "LI" x _) -> fromTrees x) a] + fromTree' (Tree "UL" a _) = [UL $ map (\(Tree "LI" x _) -> fromTrees x) a] + fromTree' (Tree "" _ _) = [] + fromTree' (Tree "Blockquote" pars _) = [Blockquote $ fromTrees pars] + fromTree' t = error $ "unable to create [Paragraph] from " ++ (show t) + +consolidate [] = [] +consolidate [a] = [a] +consolidate ((OL []):x) = consolidate x +consolidate ((UL []):x) = consolidate x +consolidate ((OL a):(OL b):x) = consolidate ((OL $ a++b):x) +consolidate ((UL a):(UL b):x) = consolidate ((UL $ a++b):x) +consolidate (a:b) = a:(consolidate b) + +-- Verbatim ------------------------------------------------------------------------------ + +unverbate (Tree "Verbatim" x _) = concatMap unverbate x +unverbate (Tree "VerbatimBrace" [x,y] _) = (unverbate x)++" "++(unverbate y) +unverbate (Tree t [] _) = t + +unindent (Tree "I" indent _) v = unindent' ((length indent)+1) v + where + unindent' i ('\n':x) = '\n':(unindent' i (drop' i x)) + unindent' i (a:b) = a:(unindent' i b) + unindent' i [] = [] + drop' 0 x = x + drop' n x@('\n':r) = x + drop' n [] = [] + drop' n (a:b) = drop' (n-1) b + +-- Text ------------------------------------------------------------------------------ + +instance FromTree [Text] where + fromTree (Tree "Word" chars _) = [Chars $ concatMap fromTree chars] + fromTree (Tree "Ordinal" x _) = [Command "ordinal" $ [Chars (show x)]] + fromTree (Tree "Fraction" [n,d] _) = [Command "fraction" $ [(Chars (show n)), (Chars (show d))]] + fromTree (Tree "WS" _ _) = [WS] + fromTree (Tree "Quotes" [x] _) = [Quotes $ fromTree x] + fromTree (Tree "Pars" y _) = [SubPar $ fromTrees y] + fromTree (Tree "Command" [x,y] _) = [Command (fromTree x) (fromTree y)] + fromTree (Tree "Command" [x] _) = [Command (fromTree x) []] + fromTree (Tree "Link" [word,link] _) = [Link (fromTree word) (fromTree link)] + fromTree (Tree "Footnote" x _) = [Footnote $ concatMap fromTree x] + fromTree (Tree "Keyword" x _) = [Keyword $ concatMap fromTree x] + fromTree (Tree "Math" x _) = [Math $ fromTrees x] + fromTree (Tree "TT" x _) = [Styled TT $ concatMap fromTree x] + fromTree (Tree "Italic" [x] _) = [Styled Italic $ fromTree x] + fromTree (Tree "Bold" [x] _) = [Styled Bold $ fromTree x] + fromTree (Tree "Highlight" [x] _) = [Styled Highlight $ fromTree x] + fromTree (Tree "Strikethrough" x _) = [Styled Strikethrough $ concatMap fromTree x] + fromTree (Tree "Superscript" x _) = [Styled Superscript $ concatMap fromTree x] + fromTree (Tree "Subscript" x _) = [Styled Subscript $ concatMap fromTree x] + fromTree (Tree "Underline" x _) = [Styled Underline $ concatMap fromTree x] + fromTree (Tree "(e)" _ _) = [GlyphText Euro] + fromTree (Tree "(r)" _ _) = [GlyphText CircleR] + fromTree (Tree "(c)" _ _) = [GlyphText CircleC] + fromTree (Tree "(tm)" _ _) = [GlyphText TradeMark] + fromTree (Tree "--" _ _) = [GlyphText Emdash] + fromTree (Tree "<-" _ _) = [GlyphText LeftArrow] + fromTree (Tree "<=" _ _) = [GlyphText DoubleLeftArrow] + fromTree (Tree "=>" _ _) = [GlyphText DoubleRightArrow] + fromTree (Tree "<=>" _ _) = [GlyphText DoubleLeftRightArrow] + fromTree (Tree "<->" _ _) = [GlyphText LeftRightArrow] + fromTree (Tree "^o" _ _) = [GlyphText Degree] + fromTree (Tree "..." _ _) = [GlyphText Ellipsis] + fromTree (Tree "Text" t _) = concatMap fromTree t + fromTree (Tree "" [] _) = [] + fromTree t = error $ "unable to create [Text] from " ++ (show t) + +-- URLs ------------------------------------------------------------------------------ + +instance Show Login where + show (Login name Nothing) = name + show (Login name (Just pass)) = name++":"++(urlEscape pass) + +instance Show URL where + show (URLPath up) = up + show (Email s h) = "mailto:" ++ s ++ "@" ++ (show h) + show (URL { url_method=m, url_login=l, url_host=h, url_port=port, url_path=path, url_ref=ref }) = + m++"://"++ + (case l of + Nothing -> "" + (Just log) -> (show log)++"@") + ++(show h) + ++"/" + ++(urlEscape path) + ++(case ref of + Nothing -> "" + (Just []) -> "" + (Just j) -> "#"++(urlEscape j)) + +instance FromTree URL where + fromTree (Tree "URL" stuff _) = fromTrees stuff + fromTree (Tree "Email" [(Tree "username" un _),host] _) = Email (fromTrees un) (fromTree host) + fromTree (Tree "Path" stuff _) = URLPath $ map fromUrlChar stuff + where + fromUrlChar (Tree "%" [(Tree a [] _),(Tree b [] _)] _) = chr $ (fst $ head $ readHex (a++b)) + fromUrlChar (Tree [c] [] _) = c + fromUrlChar t = error $ "could not parse as an url char: " ++ (show t) + +fromTreeChildren (Tree _ c _) = fromTrees c +instance FromTrees URL where + fromTrees (method:login:host:port:rest) = + URL { url_method = fromTreeChildren method, + url_host = fromTree host, + url_login = Nothing, + url_port = case port of { (Tree "Port" port _) -> Just $ fromTrees port; _ -> Nothing }, + url_path = case rest of { ((Tree "Path" p _):_) -> fromTrees p; _ -> "" }, + url_ref = case rest of { (_:(Tree "Path" r _):_) -> Just $ fromTrees r; _ -> Nothing } + } + fromTrees x = error $ show x + +instance Show Host where + show (IP a b c d) = (show a)++"."++(show b)++"."++(show c)++"."++(show d) + show (DNS host) = join "." host + +instance FromTree Host where + fromTree (Tree "IP" (a:b:c:d:[]) _) = + IP (fromTreeChildren a) (fromTreeChildren b) (fromTreeChildren c) (fromTreeChildren d) + fromTree (Tree "DNS" parts _) = DNS $ map (\(Tree _ c _) -> fromTrees c) parts + +urlEscape s = concatMap urlEscapeChar s + where + -- non-alphanumerics which may appear unescaped + urlEscapeChar '$' = "$" + urlEscapeChar '-' = "-" + urlEscapeChar '_' = "_" + urlEscapeChar '.' = "." + urlEscapeChar '!' = "!" + urlEscapeChar '*' = "*" + urlEscapeChar '\'' = "\'" + urlEscapeChar '(' = "(" + urlEscapeChar ')' = ")" + urlEscapeChar ',' = "," + + -- technically these aren't allowed by RFC, but we include them anyways + urlEscapeChar '/' = "/" + urlEscapeChar ';' = ";" + urlEscapeChar '&' = "&" + urlEscapeChar '=' = "=" + urlEscapeChar '$' = "$" + + -- FIXME: this will wind up "disencoding" a %-encoded question mark + urlEscapeChar '?' = "?" + + urlEscapeChar c | c >= 'a' && c <= 'z' = [c] + | c >= 'A' && c <= 'Z' = [c] + | c >= '0' && c <= '9' = [c] + + -- encoded + | otherwise = '%':d1:d2:[] + where i = ord c + d1 = head $ showHex ((i .&. 0xff) `shiftR` 4) "" + d2 = head $ showHex ((i .&. 0x0f)) "" + +\end{code} diff --git a/src/FromTree.lhs b/src/FromTree.lhs new file mode 100644 index 0000000..aca7ef6 --- /dev/null +++ b/src/FromTree.lhs @@ -0,0 +1,25 @@ +\begin{code} +module FromTree +where +import SBP + +class FromTree a where + fromTree :: Tree -> a +class FromTrees a where + fromTrees :: [Tree] -> a +instance FromTree a => FromTree [a] where + fromTree (Tree _ c _) = map fromTree c +instance FromTree a => FromTrees [a] where + fromTrees c = map fromTree c + +instance FromTree String where + fromTree (Tree h c _) = h++(concatMap fromTree c) +instance FromTrees String where + fromTrees ts = concatMap (fromTree :: Tree -> String) ts + +instance FromTree Int where + fromTree t = read $ fromTree t +instance FromTrees Int where + fromTrees t = read $ fromTrees t + +\end{code} diff --git a/src/HaskellHelper.java b/src/HaskellHelper.java new file mode 100644 index 0000000..3ca1cae --- /dev/null +++ b/src/HaskellHelper.java @@ -0,0 +1,116 @@ +// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license + +import edu.berkeley.sbp.*; +import edu.berkeley.sbp.misc.*; +import edu.berkeley.sbp.util.*; +import edu.berkeley.sbp.meta.*; +import edu.berkeley.sbp.chr.*; +import java.io.*; + +public class HaskellHelper { + public static boolean isNull(Object o) { return o==null; } + + private static CharParser parser = null; + static { + synchronized(HaskellHelper.class) { + if (parser == null) { + try { + // FIXME: bundle this into the jarfile + File grammarFile = new File("src/wix.g"); + Tree res = new CharParser(MetaGrammar.newInstance()) + .parse(new FileInputStream(grammarFile)).expand1(); + Union grammar = GrammarAST.buildFromAST(res, "s", new File[] { + new File(grammarFile.getParent()) + }); + parser = new CharParser(grammar); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + } + + public static Tree help(String targetFile) throws Throwable { + Tree ret = null; + try { + Reader r = new InputStreamReader(new FileInputStream(targetFile)); + Input input = new CharInput(new IndentingReader(r, CharAtom.left, CharAtom.right)); + ret = parser.parse(input).expand1(); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + if (ret==null) throw new NullPointerException("CharParser returned null"); + return ret; + } + + public static void main(String[] argv) throws Throwable { + if (argv.length != 2) { + System.out.println("usage: java -jar wix.jar [-v] "); + // FIXME: implement this + System.out.println(" | java -jar wix.jar [-v] .wix"); + System.out.println(""); + // FIXME: implement these + System.out.println(" -v print text as it is parsed (sbp.verbose=true)"); + System.out.println(" -vv like -v, but also dump parse tree"); + System.out.println(" -vvv like -vv, but also dump wix tree"); + System.exit(-1); + return; + } + File indir = new File(argv[0]); + File outdir = new File(argv[1]); + process(indir, "", outdir); + } + + private static void process(File indir, String suffix, File outdir) throws Throwable { + File f = new File(indir.getAbsolutePath()+suffix); + if (!f.exists()) return; + if (f.isDirectory()) { + for (String s : f.list()) + process(indir, suffix + File.separatorChar + s, outdir); + return; + } + if (f.getPath().endsWith(".wix")) { + System.out.println(); + String out = "== " + suffix + " "; + while(out.length() < 75) out+="="; + System.out.println(ANSI.yellow(out)); + Class.forName("Main"). + getMethod("main", new Class[] { String[].class }). + invoke(null, new Object[] { new String[] { f.getAbsolutePath() } }); + String outPath = outdir.getAbsolutePath()+suffix; + outPath = outPath.substring(0, outPath.length()-".wix".length())+".html"; + new File(new File(outPath).getParent()).mkdirs(); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outPath+"+"))); + pw.println(ret); + pw.flush(); + pw.close(); + File dest = new File(outPath); + if (dest.exists()) { + try { + Process p = Runtime.getRuntime().exec(new String[] { + "diff", + "-Bub", + dest.getAbsolutePath(), + new File(outPath+"+").getAbsolutePath() + }); + BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream())); + br.readLine(); + br.readLine(); + for(String s = br.readLine(); s != null; s = br.readLine()) { + if (s.startsWith("+")) System.out.println(ANSI.green(s)); + else if (s.startsWith("-")) System.out.println(ANSI.red(s)); + /*else System.out.println(ANSI.blue(s));*/ + } + p.waitFor(); + } catch (Exception e) { + e.printStackTrace(); + } + } + new File(outPath+"+").renameTo(dest); + } + } + + public static Object ret; + public static void putBack(String o) { ret = o; } +} diff --git a/src/Html.lhs b/src/Html.lhs new file mode 100644 index 0000000..e244bca --- /dev/null +++ b/src/Html.lhs @@ -0,0 +1,240 @@ +\begin{code} +module Html +where +import SBP +import FromTree +import Doc +import List(isSuffixOf,isPrefixOf) + +-- FIXME: use pretty-printing when asked to for better display + +class ToHtml a where + toHtml :: a -> String +instance ToHtml a => ToHtml [a] where + toHtml x = concatMap toHtml x + +style = + "\n\n" + + +instance ToHtml Doc where + toHtml (Doc h secs) = + "\n"++ + "\n\n"++ + "\n"++ + "\n"++ + style++ + --FIXME: title tag + "\n"++ + "\n"++ -- tell jsmath we will escape stuff manually + + -- FIXME: only put this in if math appears on the page + "\n"++ + "\n"++ + "\n"++ + "\n"++ + + "
\n"++ + (toHtml secs) ++ + "

\n"++ + ""++ + "\n"++ + "
\n"++ + "" + +instance ToHtml Section where + toHtml (Section level header paragraphs) = + "\n\n"++ + (toHtml header)++ + "\n\n"++ + (toHtml paragraphs) +stag t body = "\n<"++t++">\n"++body++"\n\n" +tag t body = "<"++t++">"++body++"" + +instance ToHtml Paragraph where + toHtml (Blockquote t) = "\n\n" + ++"\n" + ++"
\n" + ++(toHtml t) + ++"
\n" + toHtml HR = stag "hr" [] + toHtml (OL t) = stag "ol" $ concatMap (\x -> stag "li" $ concatMap toHtml x) t + toHtml (UL t) = stag "ul" $ concatMap (\x -> stag "li" $ concatMap toHtml x) t + toHtml (P t) = stag "p" $ toHtml t + + +link ref body = ""++icon++body++"" + where + icon = if ".pdf" `isSuffixOf` ref then " " + else if "mailto:" `isPrefixOf` ref then " " + else "" + img = "style='vertical-align: text-bottom;' border=0 " +-- margin-bottom: -2px; padding-bottom: 2px; border-bottom: 1px blue solid; + +instance ToHtml Text where + toHtml WS = " " + toHtml (Chars s) = toHtml s + + -- directional quotes: see http://www.dwheeler.com/essays/quotes-in-html.html + toHtml (Quotes x) = "“"++(toHtml x)++"”" + toHtml (Verbatim x) = pre x + toHtml (Link t ref) = link (show ref) (toHtml t) + toHtml (Command "url" y) = ""++(link (toHtml y) (toHtml y))++"" + toHtml (Command "WiX" y) = "WIX" + toHtml (Command "TeX" y) = "TEX" +-- u'1/2' : u'\u00BD', +-- u'1/4' : u'\u00BC', +-- u'3/4' : u'\u00BE', +-- u'1/3' : u'\u2153', +-- u'2/3' : u'\u2154', +-- u'1/5' : u'\u2155', +-- u'2/5' : u'\u2156', +-- u'3/5' : u'\u2157', +-- u'4/5' : u'\u2158', +-- u'1/6' : u'\u2159', +-- u'5/6' : u'\u215A', +-- u'1/8' : u'\u215B', +-- u'3/8' : u'\u215C', +-- u'5/8' : u'\u215D', +-- u'7/8' : u'\u215E', + + toHtml (Styled Underline x) = tag "u" $ toHtml x + toHtml (Styled TT x) = tag "tt" $ toHtml x + toHtml (Styled Italic x) = tag "i" $ toHtml x + toHtml (Styled Strikethrough x) = tag "strike" $ toHtml x + toHtml (Styled Superscript x) = tag "sup" $ toHtml x + toHtml (Styled Subscript x) = tag "sub" $ toHtml x + toHtml (Styled Bold x) = tag "b" $ toHtml x + toHtml (Styled Highlight x) = ""++(toHtml x)++"" + + toHtml (Keyword x) = tag "tt" $ toHtml x + toHtml (SubPar x) = stag "p" $ concatMap toHtml x + + toHtml (Command "red" y) = ""++(toHtml y)++"" + toHtml (Command "orange" y) = ""++(toHtml y)++"" + toHtml (Command "green" y) = ""++(toHtml y)++"" + toHtml (Command "sc" y) = ""++(toHtml y)++"" + toHtml (Command "image" y) = "" + toHtml (Command "image3" y) = "" + toHtml (Command "image4" y) = "
" + toHtml (Command "warn" y) = "\n
\n\n" + ++"\n" + ++"
\n" + ++(toHtml y) + ++"
\n" + toHtml (Command "announce" y) = "\n
\n\n" + ++"\n" + ++"
\n" + ++(toHtml y) + ++"
\n" + toHtml (Command "br" _) = "\n
\n" + toHtml (Command "cent" _) = "½" + toHtml (Command "euro" _) = "€" + toHtml (Command "ordinal" x) = (toHtml x)++""++"th"++"" + -- FIXME: use "unicode vulgar fractions" here + toHtml (Command "fraction" [n,d]) = ""++(toHtml n)++""++"/"++""++(toHtml d)++"" + toHtml (Command "rfc" x) = "RFC"++(toHtml x)++"" + + -- FIXME: add div as well (for display-mode math) + toHtml (Math m) = "" ++ (toHtml m) ++ "" + toHtml (Footnote x) = error $ "footnotes not supported" + + toHtml (GlyphText Euro) = "€" + toHtml (GlyphText CircleR) = "¢" + toHtml (GlyphText CircleC) = "®" + toHtml (GlyphText TradeMark) = "™" + toHtml (GlyphText ServiceMark) = "™" + toHtml (GlyphText Emdash) = "—" + toHtml (GlyphText Ellipsis) = "…" + toHtml (GlyphText Cent) = "½" + toHtml (GlyphText Daggar) = "†" + toHtml (GlyphText DoubleDaggar) = "‡" + toHtml (GlyphText Clover) = "⌘" + toHtml (GlyphText Flat) = "⋖" + toHtml (GlyphText Natural) = "⋗" + toHtml (GlyphText Sharp) = "⋘" + toHtml (GlyphText CheckMark) = "✓" + toHtml (GlyphText XMark) = "✗" + toHtml (GlyphText LeftArrow) = "&#;" -- FIXME + toHtml (GlyphText DoubleLeftArrow) = "&#;" -- FIXME + toHtml (GlyphText DoubleRightArrow) = "&#;" -- FIXME + toHtml (GlyphText DoubleLeftRightArrow) = "&#;" -- FIXME + toHtml (GlyphText LeftRightArrow) = "&#;" -- FIXME + toHtml (GlyphText Degree) = "&#;" -- FIXME + + toHtml (Command ('k':'e':'y':'s':'t':'r':'o':'k':'e':':':k) _) = + "&#x"++(case k of + "command" -> "2318" + "shift" -> "21E7" + "option" -> "2325" + "control" -> "2303" + "capslock" -> "21EA" + "apple" -> "F8FF" + )++";" + toHtml (Command x y) = error $ "unsupported command "++(show x) + +instance ToHtml String where + toHtml s = concatMap htmlEscapeChar s + where + htmlEscapeChar '<' = "<" + htmlEscapeChar '>' = ">" + htmlEscapeChar '&' = "&" + htmlEscapeChar '\'' = "'" + htmlEscapeChar '\"' = """ + htmlEscapeChar c = [c] + +pre x = "\n
"++ (pre' x) ++ "\n
\n" + where + pre' (' ':b) = " "++(pre' b) + pre' ('\n':b) = "
\n"++(pre' b) + pre' (a:b) = a:(pre' b) + pre' [] = [] +\end{code} diff --git a/src/Main.lhs b/src/Main.lhs new file mode 100644 index 0000000..62c78b2 --- /dev/null +++ b/src/Main.lhs @@ -0,0 +1,10 @@ +\begin{code} +module Main +where +import Wix(main') +import System + +main = do x <- getArgs + main' (head x) 0 + +\end{code} diff --git a/src/SBP.lhs b/src/SBP.lhs new file mode 100644 index 0000000..de32f62 --- /dev/null +++ b/src/SBP.lhs @@ -0,0 +1,159 @@ +\begin{code} +-- +-- These bindings are highly experimental and subject to change +-- without notice. You've been warned. +-- +module SBP(Tree(Tree),Location(Location),Region(Region),parseFile,prettyPrintTree,coalesceFlatHeadlessNodes) +where + +#if defined(java_HOST_OS) +import Foreign +import Foreign.Java +#define CONCAT(x,y) x/**/y +#define DEFINE_OBJECT(s,name) \ +data CONCAT(name,_); \ +type name = JObject CONCAT(name,_); \ +foreign import jvm s CONCAT(_,name) :: JClass; \ +instance JType_ CONCAT(name,_) where jClass_ _ = CONCAT(_,name); +#else +import Header_Java; +import Class_HaskellHelper; +import Class_java_lang_Object; +import Class_java_lang_Class; +import Class_java_lang_String; +import Class_edu_berkeley_sbp_Tree; +import Header_edu_berkeley_sbp_Tree; +import JVM_HaskellHelper; +import Header_HaskellHelper; +import TypedString; +import JVMBridge; +import JavaText; +import JavaTypes; +import Data.Int; +import Invocation; +#endif +--import Text.PrettyPrint.HughesPJ +import Text.PrettyPrint.Leijen + +data Location = Location Int Int +data Region = Region Location Location + +data Tree = Tree String [Tree] Region +instance Show Tree + where + show t@(Tree _ _ _) = show $ prettyPrintTree $ t + +coalesceFlatHeadlessNodes t@(Tree s children r) + | s==[], flat t = Tree (concat $ map (\(Tree s _ _) -> s) children) [] r + | otherwise = Tree s (map coalesceFlatHeadlessNodes children) r + where + flat (Tree _ children _) = not (any id $ map notFlatComponent children) + notFlatComponent (Tree _ [] _) = False + notFlatComponent (Tree _ _ _) = True + +fsep = fillSep +prettyPrintTree (Tree "" [] _) = empty +prettyPrintTree (Tree s [] _) = text s +prettyPrintTree (Tree [] children _) = prettyPrintTreeList children +prettyPrintTree (Tree s children _) = (text (s++":")) <$$> (prettyPrintTreeList children) +prettyPrintTreeList [] = text "{}" +prettyPrintTreeList children + | allsingles children = text $ "\"" ++ (concatMap (\(Tree s _ _) -> s) children) ++ "\"" + | otherwise = hang 2 $ + (text "{") + <+> + (group + ((fsep $ map (group . prettyPrintTree) children) + <+> + (text "}"))) +allsingles = all (\(Tree s c _) -> (length s)==1 && (length c)==0) + +nullRegion = (Region (Location 0 0) (Location 0 0)) + + + +------------------------------------------------------------------------------ +#if defined(java_HOST_OS) +foreign import jvm type "edu.berkeley.sbp.Tree" JTree# +data JTree = JTree JTree# +foreign import jvm type "java.lang.Object" Object# +data Object = Object Object# +foreign import jvm safe "edu.berkeley.sbp.misc.RegressionTests.main" regressionTests :: IO () +foreign import jvm safe "HaskellHelper.help" haskellHelper :: JString -> IO JTree +foreign import jvm safe "HaskellHelper.isNull" isNull :: Object -> IO Bool +foreign import jvm safe "getHead" getHead :: JTree -> IO Object +foreign import jvm safe "child" getChild :: JTree -> Int32 -> IO JTree +foreign import jvm safe "size" size :: JTree -> IO Int32 +foreign import jvm safe "toString" jtoString :: Object -> IO JString + +toString o = do isn <- isNull o + if isn then return "" + else do str <- jtoString o + return (unpackJString str) + + +haskify :: JTree -> IO Tree +haskify t = + do head <- getHead t + str <- toString head + numChildren <- size t + children <- if numChildren == 0 + then do return [] + else do children <- mapM (\i -> getChild t i) + $ take (fromIntegral numChildren) + $ iterate (+1) 0 + h <- mapM haskify children + return h + return $ Tree str children nullRegion + +parseFile :: + String -> -- file to be parsed + IO Tree + +parseFile f = do f' <- packJString f + tree <- haskellHelper f' + x <- haskify tree + return x + +------------------------------------------------------------------------------ +#else + -- Why do I need this? + instance SubJavaClassMarker + Header_edu_berkeley_sbp_Tree.Class_Jedu_berkeley_sbp_Tree + Header_HaskellHelper.Class_Jedu_berkeley_sbp_Tree + + parseFile :: + [String] -> -- class path + String -> -- grammar *.g file + String -> -- file to be parsed + IO Tree + + parseFile classPath grammarFile inputFile = + runJVM classPath + ((do class_JHaskellHelper + s1 <- new_JString_ArrayJchar $ toJavaString grammarFile + s2 <- new_JString_ArrayJchar $ toJavaString inputFile + tree <- main_JHaskellHelper_JString_JString (s1, s2) + t <- haskifyTree tree + return t + ) :: JVM Tree) + + haskifyTree t = + ((do class_JHaskellHelper + class_JTree + head <- getHead_JTree t () + isNull <- getIsNothing head + str <- if isNull then (return "") else (toString_JObject ((castTLRef head) :: Jjava_lang_Object) () >>= getStringUTF >>= \x -> return (showUTF8 x)) + numChildren <- size_JTree t() + children <- if numChildren == 0 + then do return [] + else do children <- mapM (\i -> child_JTree_Jint t ((fromIntegral i)::Int32)) + $ take (fromIntegral numChildren) + $ iterate (+1) 0 + h <- mapM (\c -> haskifyTree (castTLRef c)) children + return h + return $ Tree str children nullRegion + ) :: JVM Tree) + +#endif +\end{code} diff --git a/src/Text/PrettyPrint/Leijen.hs b/src/Text/PrettyPrint/Leijen.hs new file mode 100644 index 0000000..af93e81 --- /dev/null +++ b/src/Text/PrettyPrint/Leijen.hs @@ -0,0 +1,959 @@ +----------------------------------------------------------------------------- +-- | +-- Module : Text.PrettyPrint.Leijen +-- Copyright : Daan Leijen (c) 2000, http://www.cs.uu.nl/~daan +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : stefanor@cox.net +-- Stability : provisional +-- Portability : portable +-- +-- Pretty print module based on Philip Wadler's \"prettier printer\" +-- +-- @ +-- \"A prettier printer\" +-- Draft paper, April 1997, revised March 1998. +-- +-- @ +-- +-- PPrint is an implementation of the pretty printing combinators +-- described by Philip Wadler (1997). In their bare essence, the +-- combinators of Wadler are not expressive enough to describe some +-- commonly occurring layouts. The PPrint library adds new primitives +-- to describe these layouts and works well in practice. +-- +-- The library is based on a single way to concatenate documents, +-- which is associative and has both a left and right unit. This +-- simple design leads to an efficient and short implementation. The +-- simplicity is reflected in the predictable behaviour of the +-- combinators which make them easy to use in practice. +-- +-- A thorough description of the primitive combinators and their +-- implementation can be found in Philip Wadler's paper +-- (1997). Additions and the main differences with his original paper +-- are: +-- +-- * The nil document is called empty. +-- +-- * The above combinator is called '<$>'. The operator '' is used +-- for soft line breaks. +-- +-- * There are three new primitives: 'align', 'fill' and +-- 'fillBreak'. These are very useful in practice. +-- +-- * Lots of other useful combinators, like 'fillSep' and 'list'. +-- +-- * There are two renderers, 'renderPretty' for pretty printing and +-- 'renderCompact' for compact output. The pretty printing algorithm +-- also uses a ribbon-width now for even prettier output. +-- +-- * There are two displayers, 'displayS' for strings and 'displayIO' for +-- file based output. +-- +-- * There is a 'Pretty' class. +-- +-- * The implementation uses optimised representations and strictness +-- annotations. +-- +-- Full documentation available at . +----------------------------------------------------------- +module Text.PrettyPrint.Leijen ( + -- * Documents + Doc, putDoc, hPutDoc, + + -- * Basic combinators + empty, char, text, (<>), nest, line, linebreak, group, softline, + softbreak, + + -- * Alignment + -- + -- The combinators in this section can not be described by Wadler's + -- original combinators. They align their output relative to the + -- current output position - in contrast to @nest@ which always + -- aligns to the current nesting level. This deprives these + -- combinators from being \`optimal\'. In practice however they + -- prove to be very useful. The combinators in this section should + -- be used with care, since they are more expensive than the other + -- combinators. For example, @align@ shouldn't be used to pretty + -- print all top-level declarations of a language, but using @hang@ + -- for let expressions is fine. + align, hang, indent, encloseSep, list, tupled, semiBraces, + + -- * Operators + (<+>), (<$>), (), (<$$>), (), + + -- * List combinators + hsep, vsep, fillSep, sep, hcat, vcat, fillCat, cat, punctuate, + + -- * Fillers + fill, fillBreak, + + -- * Bracketing combinators + enclose, squotes, dquotes, parens, angles, braces, brackets, + + -- * Character documents + lparen, rparen, langle, rangle, lbrace, rbrace, lbracket, rbracket, + squote, dquote, semi, colon, comma, space, dot, backslash, equals, + + -- * Primitive type documents + string, int, integer, float, double, rational, + + -- * Pretty class + Pretty(..), + + -- * Rendering + SimpleDoc(..), renderPretty, renderCompact, displayS, displayIO + + -- * Undocumented + , bool + + , column, nesting, width + + ) where + +import System.IO (Handle,hPutStr,hPutChar,stdout) + +infixr 5 ,,<$>,<$$> +infixr 6 <>,<+> + + +----------------------------------------------------------- +-- list, tupled and semiBraces pretty print a list of +-- documents either horizontally or vertically aligned. +----------------------------------------------------------- + + +-- | The document @(list xs)@ comma separates the documents @xs@ and +-- encloses them in square brackets. The documents are rendered +-- horizontally if that fits the page. Otherwise they are aligned +-- vertically. All comma separators are put in front of the elements. +list :: [Doc] -> Doc +list = encloseSep lbracket rbracket comma + +-- | The document @(tupled xs)@ comma separates the documents @xs@ and +-- encloses them in parenthesis. The documents are rendered +-- horizontally if that fits the page. Otherwise they are aligned +-- vertically. All comma separators are put in front of the elements. +tupled :: [Doc] -> Doc +tupled = encloseSep lparen rparen comma + + +-- | The document @(semiBraces xs)@ separates the documents @xs@ with +-- semi colons and encloses them in braces. The documents are rendered +-- horizontally if that fits the page. Otherwise they are aligned +-- vertically. All semi colons are put in front of the elements. +semiBraces :: [Doc] -> Doc +semiBraces = encloseSep lbrace rbrace semi + +-- | The document @(encloseSep l r sep xs)@ concatenates the documents +-- @xs@ separated by @sep@ and encloses the resulting document by @l@ +-- and @r@. The documents are rendered horizontally if that fits the +-- page. Otherwise they are aligned vertically. All separators are put +-- in front of the elements. For example, the combinator 'list' can be +-- defined with @encloseSep@: +-- +-- > list xs = encloseSep lbracket rbracket comma xs +-- > test = text "list" <+> (list (map int [10,200,3000])) +-- +-- Which is layed out with a page width of 20 as: +-- +-- @ +-- list [10,200,3000] +-- @ +-- +-- But when the page width is 15, it is layed out as: +-- +-- @ +-- list [10 +-- ,200 +-- ,3000] +-- @ +encloseSep :: Doc -> Doc -> Doc -> [Doc] -> Doc +encloseSep left right sep ds + = case ds of + [] -> left <> right + [d] -> left <> d <> right + _ -> align (cat (zipWith (<>) (left : repeat sep) ds) <> right) + + +----------------------------------------------------------- +-- punctuate p [d1,d2,...,dn] => [d1 <> p,d2 <> p, ... ,dn] +----------------------------------------------------------- + + +-- | @(punctuate p xs)@ concatenates all documents in @xs@ with +-- document @p@ except for the last document. +-- +-- > someText = map text ["words","in","a","tuple"] +-- > test = parens (align (cat (punctuate comma someText))) +-- +-- This is layed out on a page width of 20 as: +-- +-- @ +-- (words,in,a,tuple) +-- @ +-- +-- But when the page width is 15, it is layed out as: +-- +-- @ +-- (words, +-- in, +-- a, +-- tuple) +-- @ +-- +-- (If you want put the commas in front of their elements instead of +-- at the end, you should use 'tupled' or, in general, 'encloseSep'.) +punctuate :: Doc -> [Doc] -> [Doc] +punctuate p [] = [] +punctuate p [d] = [d] +punctuate p (d:ds) = (d <> p) : punctuate p ds + + +----------------------------------------------------------- +-- high-level combinators +----------------------------------------------------------- + + +-- | The document @(sep xs)@ concatenates all documents @xs@ either +-- horizontally with @(\<+\>)@, if it fits the page, or vertically with +-- @(\<$\>)@. +-- +-- > sep xs = group (vsep xs) +sep :: [Doc] -> Doc +sep = group . vsep + +-- | The document @(fillSep xs)@ concatenates documents @xs@ +-- horizontally with @(\<+\>)@ as long as its fits the page, than +-- inserts a @line@ and continues doing that for all documents in +-- @xs@. +-- +-- > fillSep xs = foldr (\<\/\>) empty xs +fillSep :: [Doc] -> Doc +fillSep = fold () + +-- | The document @(hsep xs)@ concatenates all documents @xs@ +-- horizontally with @(\<+\>)@. +hsep :: [Doc] -> Doc +hsep = fold (<+>) + + +-- | The document @(vsep xs)@ concatenates all documents @xs@ +-- vertically with @(\<$\>)@. If a 'group' undoes the line breaks +-- inserted by @vsep@, all documents are separated with a space. +-- +-- > someText = map text (words ("text to lay out")) +-- > +-- > test = text "some" <+> vsep someText +-- +-- This is layed out as: +-- +-- @ +-- some text +-- to +-- lay +-- out +-- @ +-- +-- The 'align' combinator can be used to align the documents under +-- their first element +-- +-- > test = text "some" <+> align (vsep someText) +-- +-- Which is printed as: +-- +-- @ +-- some text +-- to +-- lay +-- out +-- @ +vsep :: [Doc] -> Doc +vsep = fold (<$>) + +-- | The document @(cat xs)@ concatenates all documents @xs@ either +-- horizontally with @(\<\>)@, if it fits the page, or vertically with +-- @(\<$$\>)@. +-- +-- > cat xs = group (vcat xs) +cat :: [Doc] -> Doc +cat = group . vcat + +-- | The document @(fillCat xs)@ concatenates documents @xs@ +-- horizontally with @(\<\>)@ as long as its fits the page, than inserts +-- a @linebreak@ and continues doing that for all documents in @xs@. +-- +-- > fillCat xs = foldr (\<\/\/\>) empty xs +fillCat :: [Doc] -> Doc +fillCat = fold () + +-- | The document @(hcat xs)@ concatenates all documents @xs@ +-- horizontally with @(\<\>)@. +hcat :: [Doc] -> Doc +hcat = fold (<>) + +-- | The document @(vcat xs)@ concatenates all documents @xs@ +-- vertically with @(\<$$\>)@. If a 'group' undoes the line breaks +-- inserted by @vcat@, all documents are directly concatenated. +vcat :: [Doc] -> Doc +vcat = fold (<$$>) + +fold f [] = empty +fold f ds = foldr1 f ds + +-- | The document @(x \<\> y)@ concatenates document @x@ and document +-- @y@. It is an associative operation having 'empty' as a left and +-- right unit. (infixr 6) +(<>) :: Doc -> Doc -> Doc +x <> y = x `beside` y + +-- | The document @(x \<+\> y)@ concatenates document @x@ and @y@ with a +-- @space@ in between. (infixr 6) +(<+>) :: Doc -> Doc -> Doc +x <+> y = x <> space <> y + +-- | The document @(x \<\/\> y)@ concatenates document @x@ and @y@ with a +-- 'softline' in between. This effectively puts @x@ and @y@ either +-- next to each other (with a @space@ in between) or underneath each +-- other. (infixr 5) +() :: Doc -> Doc -> Doc +x y = x <> softline <> y + +-- | The document @(x \<\/\/\> y)@ concatenates document @x@ and @y@ with +-- a 'softbreak' in between. This effectively puts @x@ and @y@ either +-- right next to each other or underneath each other. (infixr 5) +() :: Doc -> Doc -> Doc +x y = x <> softbreak <> y + +-- | The document @(x \<$\> y)@ concatenates document @x@ and @y@ with a +-- 'line' in between. (infixr 5) +(<$>) :: Doc -> Doc -> Doc +x <$> y = x <> line <> y + +-- | The document @(x \<$$\> y)@ concatenates document @x@ and @y@ with +-- a @linebreak@ in between. (infixr 5) +(<$$>) :: Doc -> Doc -> Doc +x <$$> y = x <> linebreak <> y + +-- | The document @softline@ behaves like 'space' if the resulting +-- output fits the page, otherwise it behaves like 'line'. +-- +-- > softline = group line +softline :: Doc +softline = group line + +-- | The document @softbreak@ behaves like 'empty' if the resulting +-- output fits the page, otherwise it behaves like 'line'. +-- +-- > softbreak = group linebreak +softbreak :: Doc +softbreak = group linebreak + +-- | Document @(squotes x)@ encloses document @x@ with single quotes +-- \"'\". +squotes :: Doc -> Doc +squotes = enclose squote squote + +-- | Document @(dquotes x)@ encloses document @x@ with double quotes +-- '\"'. +dquotes :: Doc -> Doc +dquotes = enclose dquote dquote + +-- | Document @(braces x)@ encloses document @x@ in braces, \"{\" and +-- \"}\". +braces :: Doc -> Doc +braces = enclose lbrace rbrace + +-- | Document @(parens x)@ encloses document @x@ in parenthesis, \"(\" +-- and \")\". +parens :: Doc -> Doc +parens = enclose lparen rparen + +-- | Document @(angles x)@ encloses document @x@ in angles, \"\<\" and +-- \"\>\". +angles :: Doc -> Doc +angles = enclose langle rangle + +-- | Document @(brackets x)@ encloses document @x@ in square brackets, +-- \"[\" and \"]\". +brackets :: Doc -> Doc +brackets = enclose lbracket rbracket + +-- | The document @(enclose l r x)@ encloses document @x@ between +-- documents @l@ and @r@ using @(\<\>)@. +-- +-- > enclose l r x = l <> x <> r +enclose :: Doc -> Doc -> Doc -> Doc +enclose l r x = l <> x <> r + +-- | The document @lparen@ contains a left parenthesis, \"(\". +lparen :: Doc +lparen = char '(' +-- | The document @rparen@ contains a right parenthesis, \")\". +rparen :: Doc +rparen = char ')' +-- | The document @langle@ contains a left angle, \"\<\". +langle :: Doc +langle = char '<' +-- | The document @rangle@ contains a right angle, \">\". +rangle :: Doc +rangle = char '>' +-- | The document @lbrace@ contains a left brace, \"{\". +lbrace :: Doc +lbrace = char '{' +-- | The document @rbrace@ contains a right brace, \"}\". +rbrace :: Doc +rbrace = char '}' +-- | The document @lbracket@ contains a left square bracket, \"[\". +lbracket :: Doc +lbracket = char '[' +-- | The document @rbracket@ contains a right square bracket, \"]\". +rbracket :: Doc +rbracket = char ']' + + +-- | The document @squote@ contains a single quote, \"'\". +squote :: Doc +squote = char '\'' +-- | The document @dquote@ contains a double quote, '\"'. +dquote :: Doc +dquote = char '"' +-- | The document @semi@ contains a semi colon, \";\". +semi :: Doc +semi = char ';' +-- | The document @colon@ contains a colon, \":\". +colon :: Doc +colon = char ':' +-- | The document @comma@ contains a comma, \",\". +comma :: Doc +comma = char ',' +-- | The document @space@ contains a single space, \" \". +-- +-- > x <+> y = x <> space <> y +space :: Doc +space = char ' ' +-- | The document @dot@ contains a single dot, \".\". +dot :: Doc +dot = char '.' +-- | The document @backslash@ contains a back slash, \"\\\". +backslash :: Doc +backslash = char '\\' +-- | The document @equals@ contains an equal sign, \"=\". +equals :: Doc +equals = char '=' + + +----------------------------------------------------------- +-- Combinators for prelude types +----------------------------------------------------------- + +-- string is like "text" but replaces '\n' by "line" + +-- | The document @(string s)@ concatenates all characters in @s@ +-- using @line@ for newline characters and @char@ for all other +-- characters. It is used instead of 'text' whenever the text contains +-- newline characters. +string :: String -> Doc +string "" = empty +string ('\n':s) = line <> string s +string s = case (span (/='\n') s) of + (xs,ys) -> text xs <> string ys + +bool :: Bool -> Doc +bool b = text (show b) + +-- | The document @(int i)@ shows the literal integer @i@ using +-- 'text'. +int :: Int -> Doc +int i = text (show i) + +-- | The document @(integer i)@ shows the literal integer @i@ using +-- 'text'. +integer :: Integer -> Doc +integer i = text (show i) + +-- | The document @(float f)@ shows the literal float @f@ using +-- 'text'. +float :: Float -> Doc +float f = text (show f) + +-- | The document @(double d)@ shows the literal double @d@ using +-- 'text'. +double :: Double -> Doc +double d = text (show d) + +-- | The document @(rational r)@ shows the literal rational @r@ using +-- 'text'. +rational :: Rational -> Doc +rational r = text (show r) + + +----------------------------------------------------------- +-- overloading "pretty" +----------------------------------------------------------- + +-- | The member @prettyList@ is only used to define the @instance Pretty +-- a => Pretty [a]@. In normal circumstances only the @pretty@ function +-- is used. +class Pretty a where + pretty :: a -> Doc + prettyList :: [a] -> Doc + prettyList = list . map pretty + +instance Pretty a => Pretty [a] where + pretty = prettyList + +instance Pretty Doc where + pretty = id + +instance Pretty () where + pretty () = text "()" + +instance Pretty Bool where + pretty b = bool b + +instance Pretty Char where + pretty c = char c + prettyList s = string s + +instance Pretty Int where + pretty i = int i + +instance Pretty Integer where + pretty i = integer i + +instance Pretty Float where + pretty f = float f + +instance Pretty Double where + pretty d = double d + + +--instance Pretty Rational where +-- pretty r = rational r + +instance (Pretty a,Pretty b) => Pretty (a,b) where + pretty (x,y) = tupled [pretty x, pretty y] + +instance (Pretty a,Pretty b,Pretty c) => Pretty (a,b,c) where + pretty (x,y,z)= tupled [pretty x, pretty y, pretty z] + +instance Pretty a => Pretty (Maybe a) where + pretty Nothing = empty + pretty (Just x) = pretty x + + + +----------------------------------------------------------- +-- semi primitive: fill and fillBreak +----------------------------------------------------------- + +-- | The document @(fillBreak i x)@ first renders document @x@. It +-- than appends @space@s until the width is equal to @i@. If the +-- width of @x@ is already larger than @i@, the nesting level is +-- increased by @i@ and a @line@ is appended. When we redefine @ptype@ +-- in the previous example to use @fillBreak@, we get a useful +-- variation of the previous output: +-- +-- > ptype (name,tp) +-- > = fillBreak 6 (text name) <+> text "::" <+> text tp +-- +-- The output will now be: +-- +-- @ +-- let empty :: Doc +-- nest :: Int -> Doc -> Doc +-- linebreak +-- :: Doc +-- @ +fillBreak :: Int -> Doc -> Doc +fillBreak f x = width x (\w -> + if (w > f) then nest f linebreak + else text (spaces (f - w))) + + +-- | The document @(fill i x)@ renders document @x@. It than appends +-- @space@s until the width is equal to @i@. If the width of @x@ is +-- already larger, nothing is appended. This combinator is quite +-- useful in practice to output a list of bindings. The following +-- example demonstrates this. +-- +-- > types = [("empty","Doc") +-- > ,("nest","Int -> Doc -> Doc") +-- > ,("linebreak","Doc")] +-- > +-- > ptype (name,tp) +-- > = fill 6 (text name) <+> text "::" <+> text tp +-- > +-- > test = text "let" <+> align (vcat (map ptype types)) +-- +-- Which is layed out as: +-- +-- @ +-- let empty :: Doc +-- nest :: Int -> Doc -> Doc +-- linebreak :: Doc +-- @ +fill :: Int -> Doc -> Doc +fill f d = width d (\w -> + if (w >= f) then empty + else text (spaces (f - w))) + +width :: Doc -> (Int -> Doc) -> Doc +width d f = column (\k1 -> d <> column (\k2 -> f (k2 - k1))) + + +----------------------------------------------------------- +-- semi primitive: Alignment and indentation +----------------------------------------------------------- + +-- | The document @(indent i x)@ indents document @x@ with @i@ spaces. +-- +-- > test = indent 4 (fillSep (map text +-- > (words "the indent combinator indents these words !"))) +-- +-- Which lays out with a page width of 20 as: +-- +-- @ +-- the indent +-- combinator +-- indents these +-- words ! +-- @ +indent :: Int -> Doc -> Doc +indent i d = hang i (text (spaces i) <> d) + +-- | The hang combinator implements hanging indentation. The document +-- @(hang i x)@ renders document @x@ with a nesting level set to the +-- current column plus @i@. The following example uses hanging +-- indentation for some text: +-- +-- > test = hang 4 (fillSep (map text +-- > (words "the hang combinator indents these words !"))) +-- +-- Which lays out on a page with a width of 20 characters as: +-- +-- @ +-- the hang combinator +-- indents these +-- words ! +-- @ +-- +-- The @hang@ combinator is implemented as: +-- +-- > hang i x = align (nest i x) +hang :: Int -> Doc -> Doc +hang i d = align (nest i d) + +-- | The document @(align x)@ renders document @x@ with the nesting +-- level set to the current column. It is used for example to +-- implement 'hang'. +-- +-- As an example, we will put a document right above another one, +-- regardless of the current nesting level: +-- +-- > x $$ y = align (x <$> y) +-- +-- > test = text "hi" <+> (text "nice" $$ text "world") +-- +-- which will be layed out as: +-- +-- @ +-- hi nice +-- world +-- @ +align :: Doc -> Doc +align d = column (\k -> + nesting (\i -> nest (k - i) d)) --nesting might be negative :-) + + + +----------------------------------------------------------- +-- Primitives +----------------------------------------------------------- + +-- | The abstract data type @Doc@ represents pretty documents. +-- +-- @Doc@ is an instance of the 'Show' class. @(show doc)@ pretty +-- prints document @doc@ with a page width of 100 characters and a +-- ribbon width of 40 characters. +-- +-- > show (text "hello" <$> text "world") +-- +-- Which would return the string \"hello\\nworld\", i.e. +-- +-- @ +-- hello +-- world +-- @ +data Doc = Empty + | Char Char -- invariant: char is not '\n' + | Text !Int String -- invariant: text doesn't contain '\n' + | Line !Bool -- True <=> when undone by group, do not insert a space + | Cat Doc Doc + | Nest !Int Doc + | Union Doc Doc -- invariant: first lines of first doc longer than the first lines of the second doc + | Column (Int -> Doc) + | Nesting (Int -> Doc) + + +-- | The data type @SimpleDoc@ represents rendered documents and is +-- used by the display functions. +-- +-- The @Int@ in @SText@ contains the length of the string. The @Int@ +-- in @SLine@ contains the indentation for that line. The library +-- provides two default display functions 'displayS' and +-- 'displayIO'. You can provide your own display function by writing a +-- function from a @SimpleDoc@ to your own output format. +data SimpleDoc = SEmpty + | SChar Char SimpleDoc + | SText !Int String SimpleDoc + | SLine !Int SimpleDoc + + +-- | The empty document is, indeed, empty. Although @empty@ has no +-- content, it does have a \'height\' of 1 and behaves exactly like +-- @(text \"\")@ (and is therefore not a unit of @\<$\>@). +empty :: Doc +empty = Empty + +-- | The document @(char c)@ contains the literal character @c@. The +-- character shouldn't be a newline (@'\n'@), the function 'line' +-- should be used for line breaks. +char :: Char -> Doc +char '\n' = line +char c = Char c + +-- | The document @(text s)@ contains the literal string @s@. The +-- string shouldn't contain any newline (@'\n'@) characters. If the +-- string contains newline characters, the function 'string' should be +-- used. +text :: String -> Doc +text "" = Empty +text s = Text (length s) s + +-- | The @line@ document advances to the next line and indents to the +-- current nesting level. Document @line@ behaves like @(text \" \")@ +-- if the line break is undone by 'group'. +line :: Doc +line = Line False + +-- | The @linebreak@ document advances to the next line and indents to +-- the current nesting level. Document @linebreak@ behaves like +-- 'empty' if the line break is undone by 'group'. +linebreak :: Doc +linebreak = Line True + +beside x y = Cat x y + +-- | The document @(nest i x)@ renders document @x@ with the current +-- indentation level increased by i (See also 'hang', 'align' and +-- 'indent'). +-- +-- > nest 2 (text "hello" <$> text "world") <$> text "!" +-- +-- outputs as: +-- +-- @ +-- hello +-- world +-- ! +-- @ +nest :: Int -> Doc -> Doc +nest i x = Nest i x + +column, nesting :: (Int -> Doc) -> Doc +column f = Column f +nesting f = Nesting f + +-- | The @group@ combinator is used to specify alternative +-- layouts. The document @(group x)@ undoes all line breaks in +-- document @x@. The resulting line is added to the current line if +-- that fits the page. Otherwise, the document @x@ is rendered without +-- any changes. +group :: Doc -> Doc +group x = Union (flatten x) x + +flatten :: Doc -> Doc +flatten (Cat x y) = Cat (flatten x) (flatten y) +flatten (Nest i x) = Nest i (flatten x) +flatten (Line break) = if break then Empty else Text 1 " " +flatten (Union x y) = flatten x +flatten (Column f) = Column (flatten . f) +flatten (Nesting f) = Nesting (flatten . f) +flatten other = other --Empty,Char,Text + + + +----------------------------------------------------------- +-- Renderers +----------------------------------------------------------- + +----------------------------------------------------------- +-- renderPretty: the default pretty printing algorithm +----------------------------------------------------------- + +-- list of indentation/document pairs; saves an indirection over [(Int,Doc)] +data Docs = Nil + | Cons !Int Doc Docs + + +-- | This is the default pretty printer which is used by 'show', +-- 'putDoc' and 'hPutDoc'. @(renderPretty ribbonfrac width x)@ renders +-- document @x@ with a page width of @width@ and a ribbon width of +-- @(ribbonfrac * width)@ characters. The ribbon width is the maximal +-- amount of non-indentation characters on a line. The parameter +-- @ribbonfrac@ should be between @0.0@ and @1.0@. If it is lower or +-- higher, the ribbon width will be 0 or @width@ respectively. +renderPretty :: Float -> Int -> Doc -> SimpleDoc +renderPretty rfrac w x + = best 0 0 (Cons 0 x Nil) + where + -- r :: the ribbon width in characters + r = max 0 (min w (round (fromIntegral w * rfrac))) + + -- best :: n = indentation of current line + -- k = current column + -- (ie. (k >= n) && (k - n == count of inserted characters) + best n k Nil = SEmpty + best n k (Cons i d ds) + = case d of + Empty -> best n k ds + Char c -> let k' = k+1 in seq k' (SChar c (best n k' ds)) + Text l s -> let k' = k+l in seq k' (SText l s (best n k' ds)) + Line _ -> SLine i (best i i ds) + Cat x y -> best n k (Cons i x (Cons i y ds)) + Nest j x -> let i' = i+j in seq i' (best n k (Cons i' x ds)) + Union x y -> nicest n k (best n k (Cons i x ds)) + (best n k (Cons i y ds)) + + Column f -> best n k (Cons i (f k) ds) + Nesting f -> best n k (Cons i (f i) ds) + + --nicest :: r = ribbon width, w = page width, + -- n = indentation of current line, k = current column + -- x and y, the (simple) documents to chose from. + -- precondition: first lines of x are longer than the first lines of y. + nicest n k x y | fits width x = x + | otherwise = y + where + width = min (w - k) (r - k + n) + + +fits w x | w < 0 = False +fits w SEmpty = True +fits w (SChar c x) = fits (w - 1) x +fits w (SText l s x) = fits (w - l) x +fits w (SLine i x) = True + + +----------------------------------------------------------- +-- renderCompact: renders documents without indentation +-- fast and fewer characters output, good for machines +----------------------------------------------------------- + + +-- | @(renderCompact x)@ renders document @x@ without adding any +-- indentation. Since no \'pretty\' printing is involved, this +-- renderer is very fast. The resulting output contains fewer +-- characters than a pretty printed version and can be used for output +-- that is read by other programs. +renderCompact :: Doc -> SimpleDoc +renderCompact x + = scan 0 [x] + where + scan k [] = SEmpty + scan k (d:ds) = case d of + Empty -> scan k ds + Char c -> let k' = k+1 in seq k' (SChar c (scan k' ds)) + Text l s -> let k' = k+l in seq k' (SText l s (scan k' ds)) + Line _ -> SLine 0 (scan 0 ds) + Cat x y -> scan k (x:y:ds) + Nest j x -> scan k (x:ds) + Union x y -> scan k (y:ds) + Column f -> scan k (f k:ds) + Nesting f -> scan k (f 0:ds) + + + +----------------------------------------------------------- +-- Displayers: displayS and displayIO +----------------------------------------------------------- + + +-- | @(displayS simpleDoc)@ takes the output @simpleDoc@ from a +-- rendering function and transforms it to a 'ShowS' type (for use in +-- the 'Show' class). +-- +-- > showWidth :: Int -> Doc -> String +-- > showWidth w x = displayS (renderPretty 0.4 w x) "" +displayS :: SimpleDoc -> ShowS +displayS SEmpty = id +displayS (SChar c x) = showChar c . displayS x +displayS (SText l s x) = showString s . displayS x +displayS (SLine i x) = showString ('\n':indentation i) . displayS x + + +-- | @(displayIO handle simpleDoc)@ writes @simpleDoc@ to the file +-- handle @handle@. This function is used for example by 'hPutDoc': +-- +-- > hPutDoc handle doc = displayIO handle (renderPretty 0.4 100 doc) +displayIO :: Handle -> SimpleDoc -> IO () +displayIO handle simpleDoc + = display simpleDoc + where + display SEmpty = return () + display (SChar c x) = do{ hPutChar handle c; display x} + display (SText l s x) = do{ hPutStr handle s; display x} + display (SLine i x) = do{ hPutStr handle ('\n':indentation i); display x} + + +----------------------------------------------------------- +-- default pretty printers: show, putDoc and hPutDoc +----------------------------------------------------------- +instance Show Doc where + showsPrec d doc = displayS (renderPretty 0.4 80 doc) + +-- | The action @(putDoc doc)@ pretty prints document @doc@ to the +-- standard output, with a page width of 100 characters and a ribbon +-- width of 40 characters. +-- +-- > main :: IO () +-- > main = do{ putDoc (text "hello" <+> text "world") } +-- +-- Which would output +-- +-- @ +-- hello world +-- @ +putDoc :: Doc -> IO () +putDoc doc = hPutDoc stdout doc + +-- | @(hPutDoc handle doc)@ pretty prints document @doc@ to the file +-- handle @handle@ with a page width of 100 characters and a ribbon +-- width of 40 characters. +-- +-- > main = do{ handle <- openFile "MyFile" WriteMode +-- > ; hPutDoc handle (vcat (map text +-- > ["vertical","text"])) +-- > ; hClose handle +-- > } +hPutDoc :: Handle -> Doc -> IO () +hPutDoc handle doc = displayIO handle (renderPretty 0.4 80 doc) + + + +----------------------------------------------------------- +-- insert spaces +-- "indentation" used to insert tabs but tabs seem to cause +-- more trouble than they solve :-) +----------------------------------------------------------- +spaces n | n <= 0 = "" + | otherwise = replicate n ' ' + +indentation n = spaces n + +--indentation n | n >= 8 = '\t' : indentation (n-8) +-- | otherwise = spaces n + +-- LocalWords: PPrint combinators Wadler Wadler's encloseSep diff --git a/src/Util.lhs b/src/Util.lhs new file mode 100644 index 0000000..a161344 --- /dev/null +++ b/src/Util.lhs @@ -0,0 +1,7 @@ +\begin{code} +module Util where + +join x [] = [] +join x [a] = a +join x (a:b:c) = a++x++(join x (b:c)) +\end{code} diff --git a/src/Wix.lhs b/src/Wix.lhs new file mode 100644 index 0000000..e969aab --- /dev/null +++ b/src/Wix.lhs @@ -0,0 +1,45 @@ +\begin{code} +module Wix(main') +where +import SBP +import FromTree +import Doc +import Html +import Foreign.Java +import qualified Text.PrettyPrint.Leijen as PP + +process :: String -> IO String +process file = do t <- parseFile file + return + $ toHtml + $ ((fromTree $ coalesceFlatHeadlessNodes t) :: Doc) + +foreign import jvm safe "HaskellHelper.putBack" putBack :: JString -> IO () + +main' file verbosity = + do t <- parseFile file + if verbosity > 0 + then + putStrLn $ (PP.displayS + $ PP.renderPretty (0.9) 80 + $ prettyPrintTree (coalesceFlatHeadlessNodes t)) + "" + else + return () + doc <- return ((fromTree $ coalesceFlatHeadlessNodes t) :: Doc) + if verbosity > 1 + then + putStrLn $ (PP.displayS + $ PP.renderPretty (0.9) 80 + $ PP.pretty doc) + "" + else + return () + js <- packJString $ toHtml doc + putBack js + +\end{code} + + + + diff --git a/src/indent.g b/src/indent.g new file mode 100644 index 0000000..42fa84d --- /dev/null +++ b/src/indent.g @@ -0,0 +1,16 @@ + +// productions which match indentation-balanced text regions; that is, +// regions in which the indentation level of all parts of the region +// is strictly non-negative relative to the start of the region. + +// A region which is indentation-balanced: no prefix of the region +// contains more outdents than indents. +Balanced! = Balanced ~[>><<] + | Balanced >> Balanced << + | () + +// A region which is unbalanced: some prefix has more outdents than +// indents. May be faster to match than (~~Balanced). +UnBalanced! = << + | ~[>><<] UnBalanced + | >> UnBalanced UnBalanced diff --git a/src/tokens.g b/src/tokens.g new file mode 100644 index 0000000..0116327 --- /dev/null +++ b/src/tokens.g @@ -0,0 +1,7 @@ +alpha = [a-zA-Z] +alphanum = alpha | digit +digit = [0-9] +Int:: = digit++ +escaped = lf:: "\\n" + | cr:: "\\r" + | "\\" ~[nr] diff --git a/src/url.g b/src/url.g new file mode 100644 index 0000000..05ba612 --- /dev/null +++ b/src/url.g @@ -0,0 +1,32 @@ +// URLs ////////////////////////////////////////////////////////////////////////////// + +#import tokens.g as tok + +// "public" ///////////////////////////////////////////////////////////////////// + +Path:: = urlchar+ +Email:: = username "@" Host +URL:: = method "://" Login Host Port (()|"/"|"/" Path) (()|"#"|"#" Path) + -> (~(urlc|[%\#]) | avoidOnUrlTail) + +// An url must not end with these characters if it appears "bare" +// inline within a text block (ie without braces surrounding it). +// This ensures that punctuation doesn't "stick" to the end of the url. +avoidOnUrlTail = [,.;)!] + +// "private" //////////////////////////////////////////////////////////////////// + +Login:: = username "@" + | username ":" password "@" + | () +Host = IP:: tok.digit "." tok.digit "." tok.digit "." tok.digit + | DNS:: (part:: [a-zA-Z0-9\-]++) ++/ "." +Port:: = (":" `tok.Int)? + +username:: = [a-zA-Z0-9;/?:&=$\-_.+]++ +password:: = [a-zA-Z0-9;/?:&=$\-_.+]++ +method:: = [+\-.a-z0-9]+ +urlchar = urlc + | "%":: "%" [0-9] [0-9] +urlc = [a-zA-Z0-9;/?:&=$\-_.+] + | [@~,] // technically illegal (RFC1738) diff --git a/src/wix.g b/src/wix.g new file mode 100644 index 0000000..8289970 --- /dev/null +++ b/src/wix.g @@ -0,0 +1,114 @@ +// WiX Grammar ////////////////////////////////////////////////////////////////////////////// + +#import url.g as url +#import indent.g as indent + +//#import tokens.g as tokens // disabled until transinclusion bug is fixed + +s = Doc + +Doc:: = Header Body +Header:: = () // not yet specified +Body:: = Section */ br + +// multi-line header by indenting second line? +Section:: = SectionHeader + | SectionHeader (nl|br) >> Pars << + > SectionHeader (nl|br) Pars +SectionHeader:: = (^"=" [=]++) ws (Text &~ ... "\n" ...) + +Pars:: = (Par <<* & indent.Balanced &~ "=" ...) */ br + +// An "unbalanced" paragraph (may need trailing outdents to balance) +Par = NonTextParagraph + > TextParagraph + +TextParagraph:: = Text + +NonTextParagraph = UL:: `("":: ("*" BBullet nl)*) "*" UBullet + | OL:: `("":: (tok.Int! [.\)]! BBullet nl)*) tok.Int! [.\)]! UBullet + | Blockquote:: "\"\"" ws >> Pars + | HR:: "---" "-"* + | Verbatim:: "\\pre" " "* "\n" (I:: [ ]*) >> Verbatim << + +// a "balanced bullet" +BBullet = (UBullet <<*) & indent.Balanced + +// an "unbalanced bullet" +UBullet = whitespace (LI:: Par */ br) + & ~[\n]* ("\n" [ \r\n]* >> ...)? + +Verbatim = VerbatimBrace:: Verbatim >> Verbatim << + | Verbatim:: Verbatim ~[>><<] + | Verbatim:: () + +// note that Text and TextWS are left-recursive because this is more +// efficient in LR parsers + +// text followed by optional whitespace +TextWS = () + | Text + | Text:: `Text (WS:: whitespace) + +Text:: = `Text nl >> (Pars:: NonTextParagraph) + > `TextWS (Link:: Atom "->" ws href) + > `TextWS styled + > `TextWS Ordinal + | `TextWS Fraction + > `TextWS Atom + | `TextWS (Command:: "\\" ("":: [a-zA-Z0-9:]++ &~ "pre") Block) + | `TextWS (Command:: "\\" ("":: [a-zA-Z0-9:]++ &~ "pre") -> ~"{") + | `TextWS (Quotes:: "\"" (Text &~ ... "\"" ...) "\"") + | `TextWS glyph + > `TextWS (Word:: sym) + > `Text (WS:: br) >> (Pars -> <<) // subparagraph + +href = url.Email + > url.URL &~ ... url.avoidOnUrlTail + > url.Path &~ ... url.avoidOnUrlTail + | "{" ws url.Email ws "}" + > "{" ws url.URL ws "}" + > "{" ws url.Path ws "}" + // > Citation:: "[" tok.alphanum++ "]" + +Atom = Word | Block +Block = "{" ws TextWS "}" +Word:: = tok.alphanum++ +Fraction:: = ("":: [0-9]+) "/" ("":: [0-9]+) +Ordinal:: = `("":: [0-9]*) ( [1] "st" + | [2] "nd" + | [3] "rd" + | [04-9] "th" ) + +styled = Underline:: "__" Text "__" + | Footnote:: "((" Text "))" + | TT:: "[[" ws (Text &~ ... "]]" ...) ws "]]" + | Strikethrough:: "!!" Text "!!" + | Superscript:: "^^" Atom + | Subscript:: ",," Atom + | Bold:: "++" (Text &~ ... "++" ...) "++" + | Highlight:: "##" (Text &~ ... "##" ...) "##" + | Math:: "$$" (~[$] | ([$] -> ~[$]))+ "$$" + | Keyword:: "!" Atom + | Italic:: "**" (Text &~ ... "**" ...) "**" + +glyph = ^"(e)" | ^"(r)" | ^"(c)" | ^"(tm)" | ^"--" + | ^"..." | ^"<-" | ^"<=" | ^"=>" | ^"<->" + | ^"<=>" | ^"<-" | ^"^o" + // ^"->" + +// Chars /////////////////////////////////////////////////////////////// + +sym = "\\" ~tok.alphanum + | ~(tok.alphanum | [\r\n \\{}>><<]) + +hws! = [ \r>>] +ws! = (hws* "\n")? hws* -> ~[ \n\r] +whitespace! = hws+ -> ~[ \n\r] + | hws* "\n" hws* -> ~[ \n\r] + +// these don't include indents-as-whitespace +nl! = [ \r]* "\n" [ \r]* -> ~[ \n\r] +br! = [ \r]* "\n" [ \r]* "\n" [ \r\n]* -> ~[ \n\r] + +