1 -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow, 2004-2006
5 -- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there
6 -- are a few minor differences:
8 -- * extra keywords for our macros, and float32/float64 types
9 -- * global registers (Sp,Hp, etc.)
11 -----------------------------------------------------------------------------
14 {-# OPTIONS -Wwarn -w #-}
15 -- The above warning supression flag is a temporary kludge.
16 -- While working on this module you are encouraged to remove it and fix
17 -- any warnings in the module. See
18 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
25 #include "HsVersions.h"
39 $whitechar = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
40 $white_no_nl = $whitechar # \n
43 $unidigit = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
44 $digit = [$ascdigit $unidigit]
46 $hexit = [$digit A-F a-f]
48 $unilarge = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
49 $asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
50 $large = [$asclarge $unilarge]
52 $unismall = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
53 $ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
54 $small = [$ascsmall $unismall \_]
56 $namebegin = [$large $small \. \$ \@]
57 $namechar = [$namebegin $digit]
61 @hexadecimal = $hexit+
62 @exponent = [eE] [\-\+]? @decimal
64 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
66 @escape = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
67 @strchar = ($printable # [\"\\]) | @escape
72 ^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output
74 ^\# (line)? { begin line_prag }
76 -- single-line line pragmas, of the form
77 -- # <line> "<file>" <extra-stuff> \n
78 <line_prag> $digit+ { setLine line_prag1 }
79 <line_prag1> \" ($printable # \")* \" { setFile line_prag2 }
80 <line_prag2> .* { pop }
85 [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char }
87 ".." { kw CmmT_DotDot }
88 "::" { kw CmmT_DoubleColon }
95 "&&" { kw CmmT_BoolAnd }
96 "||" { kw CmmT_BoolOr }
98 P@decimal { global_regN (\n -> VanillaReg n VGcPtr) }
99 R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) }
100 F@decimal { global_regN FloatReg }
101 D@decimal { global_regN DoubleReg }
102 L@decimal { global_regN LongReg }
104 SpLim { global_reg SpLim }
106 HpLim { global_reg HpLim }
107 CurrentTSO { global_reg CurrentTSO }
108 CurrentNursery { global_reg CurrentNursery }
109 HpAlloc { global_reg HpAlloc }
110 BaseReg { global_reg BaseReg }
112 $namebegin $namechar* { name }
114 0 @octal { tok_octal }
115 @decimal { tok_decimal }
116 0[xX] @hexadecimal { tok_hexadecimal }
117 @floating_point { strtoken tok_float }
119 \" @strchar* \" { strtoken tok_string }
137 | CmmT_INFO_TABLE_RET
138 | CmmT_INFO_TABLE_FUN
139 | CmmT_INFO_TABLE_CONSTR
140 | CmmT_INFO_TABLE_SELECTOR
164 | CmmT_GlobalReg GlobalReg
165 | CmmT_Name FastString
168 | CmmT_Float Rational
174 -- -----------------------------------------------------------------------------
177 type Action = SrcSpan -> StringBuffer -> Int -> P (Located CmmToken)
179 begin :: Int -> Action
180 begin code _span _str _len = do pushLexState code; lexToken
183 pop _span _buf _len = do popLexState; lexToken
185 special_char :: Action
186 special_char span buf len = return (L span (CmmT_SpecChar (currentChar buf)))
188 kw :: CmmToken -> Action
189 kw tok span buf len = return (L span tok)
191 global_regN :: (Int -> GlobalReg) -> Action
192 global_regN con span buf len
193 = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
194 where buf' = stepOn buf
195 n = parseUnsignedInteger buf' (len-1) 10 octDecDigit
197 global_reg :: GlobalReg -> Action
198 global_reg r span buf len = return (L span (CmmT_GlobalReg r))
200 strtoken :: (String -> CmmToken) -> Action
201 strtoken f span buf len =
202 return (L span $! (f $! lexemeToString buf len))
206 case lookupUFM reservedWordsFM fs of
207 Just tok -> return (L span tok)
208 Nothing -> return (L span (CmmT_Name fs))
210 fs = lexemeToFastString buf len
212 reservedWordsFM = listToUFM $
213 map (\(x, y) -> (mkFastString x, y)) [
214 ( "CLOSURE", CmmT_CLOSURE ),
215 ( "INFO_TABLE", CmmT_INFO_TABLE ),
216 ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ),
217 ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ),
218 ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ),
219 ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
220 ( "else", CmmT_else ),
221 ( "export", CmmT_export ),
222 ( "section", CmmT_section ),
223 ( "align", CmmT_align ),
224 ( "goto", CmmT_goto ),
226 ( "jump", CmmT_jump ),
227 ( "foreign", CmmT_foreign ),
228 ( "never", CmmT_never ),
229 ( "prim", CmmT_prim ),
230 ( "return", CmmT_return ),
231 ( "returns", CmmT_returns ),
232 ( "import", CmmT_import ),
233 ( "switch", CmmT_switch ),
234 ( "case", CmmT_case ),
235 ( "default", CmmT_default ),
236 ( "bits8", CmmT_bits8 ),
237 ( "bits16", CmmT_bits16 ),
238 ( "bits32", CmmT_bits32 ),
239 ( "bits64", CmmT_bits64 ),
240 ( "float32", CmmT_float32 ),
241 ( "float64", CmmT_float64 ),
243 ( "b8", CmmT_bits8 ),
244 ( "b16", CmmT_bits16 ),
245 ( "b32", CmmT_bits32 ),
246 ( "b64", CmmT_bits64 ),
247 ( "f32", CmmT_float32 ),
248 ( "f64", CmmT_float64 ),
249 ( "gcptr", CmmT_gcptr )
252 tok_decimal span buf len
253 = return (L span (CmmT_Int $! parseUnsignedInteger buf len 10 octDecDigit))
255 tok_octal span buf len
256 = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
258 tok_hexadecimal span buf len
259 = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
261 tok_float str = CmmT_Float $! readRational str
263 tok_string str = CmmT_String (read str)
264 -- urk, not quite right, but it'll do for now
266 -- -----------------------------------------------------------------------------
269 setLine :: Int -> Action
270 setLine code span buf len = do
271 let line = parseUnsignedInteger buf len 10 octDecDigit
272 setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
273 -- subtract one: the line number refers to the *following* line
274 -- trace ("setLine " ++ show line) $ do
279 setFile :: Int -> Action
280 setFile code span buf len = do
281 let file = lexemeToFastString (stepOn buf) (len-2)
282 setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
287 -- -----------------------------------------------------------------------------
288 -- This is the top-level function: called from the parser each time a
289 -- new token is to be read from the input.
291 cmmlex :: (Located CmmToken -> P a) -> P a
293 tok@(L _ tok__) <- lexToken
294 --trace ("token: " ++ show tok__) $ do
297 lexToken :: P (Located CmmToken)
299 inp@(loc1,buf) <- getInput
301 case alexScan inp sc of
302 AlexEOF -> do let span = mkSrcSpan loc1 loc1
303 setLastToken span 0 0
304 return (L span CmmT_EOF)
305 AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
306 AlexSkip inp2 _ -> do
309 AlexToken inp2@(end,buf2) len t -> do
311 let span = mkSrcSpan loc1 end
312 span `seq` setLastToken span len len
315 -- -----------------------------------------------------------------------------
318 -- Stuff that Alex needs to know about our input type:
319 type AlexInput = (SrcLoc,StringBuffer)
321 alexInputPrevChar :: AlexInput -> Char
322 alexInputPrevChar (_,s) = prevChar s '\n'
324 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
327 | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
328 where c = currentChar s
329 loc' = advanceSrcLoc loc c
332 getInput :: P AlexInput
333 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
335 setInput :: AlexInput -> P ()
336 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()