1 -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow, 2004-2006
5 -- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there
6 -- are a few minor differences:
8 -- * extra keywords for our macros, and float32/float64 types
9 -- * global registers (Sp,Hp, etc.)
11 -----------------------------------------------------------------------------
18 #include "HsVersions.h"
32 $whitechar = [\ \t\n\r\f\v\xa0]
33 $white_no_nl = $whitechar # \n
37 $digit = [$ascdigit $unidigit]
39 $hexit = [$digit A-F a-f]
42 $asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
43 $large = [$asclarge $unilarge]
46 $ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
47 $small = [$ascsmall $unismall \_]
49 $namebegin = [$large $small \_ \. \$ \@]
50 $namechar = [$namebegin $digit]
54 @hexadecimal = $hexit+
55 @exponent = [eE] [\-\+]? @decimal
57 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
59 @escape = \\ ([abfnrt\\\'\"\?] | x @hexadecimal | @octal)
60 @strchar = ($printable # [\"\\]) | @escape
65 ^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output
67 ^\# (line)? { begin line_prag }
69 -- single-line line pragmas, of the form
70 -- # <line> "<file>" <extra-stuff> \n
71 <line_prag> $digit+ { setLine line_prag1 }
72 <line_prag1> \" ($printable # \")* \" { setFile line_prag2 }
73 <line_prag2> .* { pop }
78 [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char }
80 ".." { kw CmmT_DotDot }
81 "::" { kw CmmT_DoubleColon }
88 "&&" { kw CmmT_BoolAnd }
89 "||" { kw CmmT_BoolOr }
91 R@decimal { global_regN VanillaReg }
92 F@decimal { global_regN FloatReg }
93 D@decimal { global_regN DoubleReg }
94 L@decimal { global_regN LongReg }
96 SpLim { global_reg SpLim }
98 HpLim { global_reg HpLim }
99 CurrentTSO { global_reg CurrentTSO }
100 CurrentNursery { global_reg CurrentNursery }
101 HpAlloc { global_reg HpAlloc }
102 BaseReg { global_reg BaseReg }
104 $namebegin $namechar* { name }
106 0 @octal { tok_octal }
107 @decimal { tok_decimal }
108 0[xX] @hexadecimal { tok_hexadecimal }
109 @floating_point { strtoken tok_float }
111 \" @strchar* \" { strtoken tok_string }
129 | CmmT_INFO_TABLE_RET
130 | CmmT_INFO_TABLE_FUN
131 | CmmT_INFO_TABLE_CONSTR
132 | CmmT_INFO_TABLE_SELECTOR
152 | CmmT_GlobalReg GlobalReg
153 | CmmT_Name FastString
156 | CmmT_Float Rational
162 -- -----------------------------------------------------------------------------
165 type Action = SrcSpan -> StringBuffer -> Int -> P (Located CmmToken)
167 begin :: Int -> Action
168 begin code _span _str _len = do pushLexState code; lexToken
171 pop _span _buf _len = do popLexState; lexToken
173 special_char :: Action
174 special_char span buf len = return (L span (CmmT_SpecChar (currentChar buf)))
176 kw :: CmmToken -> Action
177 kw tok span buf len = return (L span tok)
179 global_regN :: (Int -> GlobalReg) -> Action
180 global_regN con span buf len
181 = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
182 where buf' = stepOn buf
183 n = parseInteger buf' (len-1) 10 octDecDigit
185 global_reg :: GlobalReg -> Action
186 global_reg r span buf len = return (L span (CmmT_GlobalReg r))
188 strtoken :: (String -> CmmToken) -> Action
189 strtoken f span buf len =
190 return (L span $! (f $! lexemeToString buf len))
194 case lookupUFM reservedWordsFM fs of
195 Just tok -> return (L span tok)
196 Nothing -> return (L span (CmmT_Name fs))
198 fs = lexemeToFastString buf len
200 reservedWordsFM = listToUFM $
201 map (\(x, y) -> (mkFastString x, y)) [
202 ( "CLOSURE", CmmT_CLOSURE ),
203 ( "INFO_TABLE", CmmT_INFO_TABLE ),
204 ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ),
205 ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ),
206 ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ),
207 ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
208 ( "else", CmmT_else ),
209 ( "export", CmmT_export ),
210 ( "section", CmmT_section ),
211 ( "align", CmmT_align ),
212 ( "goto", CmmT_goto ),
214 ( "jump", CmmT_jump ),
215 ( "foreign", CmmT_foreign ),
216 ( "prim", CmmT_prim ),
217 ( "import", CmmT_import ),
218 ( "switch", CmmT_switch ),
219 ( "case", CmmT_case ),
220 ( "default", CmmT_default ),
221 ( "bits8", CmmT_bits8 ),
222 ( "bits16", CmmT_bits16 ),
223 ( "bits32", CmmT_bits32 ),
224 ( "bits64", CmmT_bits64 ),
225 ( "float32", CmmT_float32 ),
226 ( "float64", CmmT_float64 )
229 tok_decimal span buf len
230 = return (L span (CmmT_Int $! parseInteger buf len 10 octDecDigit))
232 tok_octal span buf len
233 = return (L span (CmmT_Int $! parseInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
235 tok_hexadecimal span buf len
236 = return (L span (CmmT_Int $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
238 tok_float str = CmmT_Float $! readRational str
240 tok_string str = CmmT_String (read str)
241 -- urk, not quite right, but it'll do for now
243 -- -----------------------------------------------------------------------------
246 setLine :: Int -> Action
247 setLine code span buf len = do
248 let line = parseInteger buf len 10 octDecDigit
249 setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
250 -- subtract one: the line number refers to the *following* line
251 -- trace ("setLine " ++ show line) $ do
256 setFile :: Int -> Action
257 setFile code span buf len = do
258 let file = lexemeToFastString (stepOn buf) (len-2)
259 setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
264 -- -----------------------------------------------------------------------------
265 -- This is the top-level function: called from the parser each time a
266 -- new token is to be read from the input.
268 cmmlex :: (Located CmmToken -> P a) -> P a
270 tok@(L _ tok__) <- lexToken
271 --trace ("token: " ++ show tok__) $ do
274 lexToken :: P (Located CmmToken)
276 inp@(loc1,buf) <- getInput
278 case alexScan inp sc of
279 AlexEOF -> do let span = mkSrcSpan loc1 loc1
280 setLastToken span 0 0
281 return (L span CmmT_EOF)
282 AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
283 AlexSkip inp2 _ -> do
286 AlexToken inp2@(end,buf2) len t -> do
288 let span = mkSrcSpan loc1 end
289 span `seq` setLastToken span len len
292 -- -----------------------------------------------------------------------------
295 -- Stuff that Alex needs to know about our input type:
296 type AlexInput = (SrcLoc,StringBuffer)
298 alexInputPrevChar :: AlexInput -> Char
299 alexInputPrevChar (_,s) = prevChar s '\n'
301 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
304 | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
305 where c = currentChar s
306 loc' = advanceSrcLoc loc c
309 getInput :: P AlexInput
310 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
312 setInput :: AlexInput -> P ()
313 setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()