2 -- Haddock - A Haskell Documentation Tool
4 -- (c) Simon Marlow 2002
6 -- This file was modified and integrated into GHC by David Waern 2006
11 -- The above warning supression flag is a temporary kludge.
12 -- While working on this module you are encouraged to remove it and fix
13 -- any warnings in the module. See
14 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
22 import Lexer hiding (Token)
23 import Parser ( parseIdentifier )
31 import System.IO.Unsafe
36 $hexdigit = [0-9a-fA-F]
38 $alphanum = [A-Za-z0-9]
39 $ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~]
43 -- beginning of a paragraph
46 $ws* \> { begin birdtrack }
47 $ws* [\*\-] { token TokBullet `andBegin` string }
48 $ws* \[ { token TokDefStart `andBegin` def }
49 $ws* \( $digit+ \) { token TokNumber `andBegin` string }
53 -- beginning of a line
55 $ws* \> { begin birdtrack }
56 $ws* \n { token TokPara `andBegin` para }
57 -- Here, we really want to be able to say
58 -- $ws* (\n | <eof>) { token TokPara `andBegin` para}
59 -- because otherwise a trailing line of whitespace will result in
60 -- a spurious TokString at the end of a docstring. We don't have <eof>,
61 -- though (NOW I realise what it was for :-). To get around this, we always
62 -- append \n to the end of a docstring.
66 <birdtrack> .* \n? { strtoken TokBirdTrack `andBegin` line }
69 $special { strtoken $ \s -> TokSpecial (head s) }
70 \<\<.*\>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
71 \<.*\> { strtoken $ \s -> TokURL (init (tail s)) }
72 \#.*\# { strtoken $ \s -> TokAName (init (tail s)) }
73 \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) }
74 [\'\`] $ident+ [\'\`] { ident }
75 \\ . { strtoken (TokString . tail) }
76 "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
77 "&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
78 -- allow special characters through if they don't fit one of the previous
80 [\/\'\`\<\#\&\\] { strtoken TokString }
81 [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtoken TokString `andBegin` line }
82 [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
86 \] { token TokDefEnd `andBegin` string }
89 -- ']' doesn't have any special meaning outside of the [...] at the beginning
90 -- of a definition paragraph.
92 \] { strtoken TokString }
109 | TokBirdTrack String
112 -- -----------------------------------------------------------------------------
113 -- Alex support stuff
116 type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token]
118 type AlexInput = (Char,String)
120 alexGetChar (_, []) = Nothing
121 alexGetChar (_, c:cs) = Just (c, (c,cs))
123 alexInputPrevChar (c,_) = c
125 tokenise :: String -> [Token]
126 tokenise str = let toks = go ('\n', eofHack str) para in {-trace (show toks)-} toks
127 where go inp@(_,str) sc =
128 case alexScan inp sc of
130 AlexError _ -> error "lexical error"
131 AlexSkip inp' len -> go inp' sc
132 AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc)
134 -- NB. we add a final \n to the string, (see comment in the beginning of line
135 -- production above).
136 eofHack str = str++"\n"
138 andBegin :: Action -> StartCode -> Action
139 andBegin act new_sc = \str sc cont -> act str new_sc cont
141 token :: Token -> Action
142 token t = \str sc cont -> t : cont sc
144 strtoken :: (String -> Token) -> Action
145 strtoken t = \str sc cont -> t str : cont sc
147 begin :: StartCode -> Action
148 begin sc = \str _ cont -> cont sc
150 -- -----------------------------------------------------------------------------
151 -- Lex a string as a Haskell identifier
155 case strToHsQNames id of
156 Just names -> TokIdent names : cont sc
157 Nothing -> TokString str : cont sc
158 where id = init (tail str)
160 strToHsQNames :: String -> Maybe [RdrName]
162 let buffer = unsafePerformIO (stringToStringBuffer str0)
163 pstate = mkPState buffer noSrcLoc defaultDynFlags
164 lex = lexer (\t -> return t)
165 result = unP parseIdentifier pstate
167 POk _ name -> Just [unLoc name]