1 -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow, 2004-2006
5 -- Parser for concrete Cmm.
7 -----------------------------------------------------------------------------
10 module CmmParse ( parseCmmFile ) where
50 import Data.Char ( ord )
53 #include "HsVersions.h"
57 ':' { L _ (CmmT_SpecChar ':') }
58 ';' { L _ (CmmT_SpecChar ';') }
59 '{' { L _ (CmmT_SpecChar '{') }
60 '}' { L _ (CmmT_SpecChar '}') }
61 '[' { L _ (CmmT_SpecChar '[') }
62 ']' { L _ (CmmT_SpecChar ']') }
63 '(' { L _ (CmmT_SpecChar '(') }
64 ')' { L _ (CmmT_SpecChar ')') }
65 '=' { L _ (CmmT_SpecChar '=') }
66 '`' { L _ (CmmT_SpecChar '`') }
67 '~' { L _ (CmmT_SpecChar '~') }
68 '/' { L _ (CmmT_SpecChar '/') }
69 '*' { L _ (CmmT_SpecChar '*') }
70 '%' { L _ (CmmT_SpecChar '%') }
71 '-' { L _ (CmmT_SpecChar '-') }
72 '+' { L _ (CmmT_SpecChar '+') }
73 '&' { L _ (CmmT_SpecChar '&') }
74 '^' { L _ (CmmT_SpecChar '^') }
75 '|' { L _ (CmmT_SpecChar '|') }
76 '>' { L _ (CmmT_SpecChar '>') }
77 '<' { L _ (CmmT_SpecChar '<') }
78 ',' { L _ (CmmT_SpecChar ',') }
79 '!' { L _ (CmmT_SpecChar '!') }
81 '..' { L _ (CmmT_DotDot) }
82 '::' { L _ (CmmT_DoubleColon) }
83 '>>' { L _ (CmmT_Shr) }
84 '<<' { L _ (CmmT_Shl) }
85 '>=' { L _ (CmmT_Ge) }
86 '<=' { L _ (CmmT_Le) }
87 '==' { L _ (CmmT_Eq) }
88 '!=' { L _ (CmmT_Ne) }
89 '&&' { L _ (CmmT_BoolAnd) }
90 '||' { L _ (CmmT_BoolOr) }
92 'CLOSURE' { L _ (CmmT_CLOSURE) }
93 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) }
94 'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) }
95 'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) }
96 'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) }
97 'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) }
98 'else' { L _ (CmmT_else) }
99 'export' { L _ (CmmT_export) }
100 'section' { L _ (CmmT_section) }
101 'align' { L _ (CmmT_align) }
102 'goto' { L _ (CmmT_goto) }
103 'if' { L _ (CmmT_if) }
104 'jump' { L _ (CmmT_jump) }
105 'foreign' { L _ (CmmT_foreign) }
106 'prim' { L _ (CmmT_prim) }
107 'return' { L _ (CmmT_return) }
108 'import' { L _ (CmmT_import) }
109 'switch' { L _ (CmmT_switch) }
110 'case' { L _ (CmmT_case) }
111 'default' { L _ (CmmT_default) }
112 'bits8' { L _ (CmmT_bits8) }
113 'bits16' { L _ (CmmT_bits16) }
114 'bits32' { L _ (CmmT_bits32) }
115 'bits64' { L _ (CmmT_bits64) }
116 'float32' { L _ (CmmT_float32) }
117 'float64' { L _ (CmmT_float64) }
119 GLOBALREG { L _ (CmmT_GlobalReg $$) }
120 NAME { L _ (CmmT_Name $$) }
121 STRING { L _ (CmmT_String $$) }
122 INT { L _ (CmmT_Int $$) }
123 FLOAT { L _ (CmmT_Float $$) }
125 %monad { P } { >>= } { return }
126 %lexer { cmmlex } { L _ CmmT_EOF }
128 %tokentype { Located CmmToken }
130 -- C-- operator precedences, taken from the C-- spec
131 %right '||' -- non-std extension, called %disjoin in C--
132 %right '&&' -- non-std extension, called %conjoin in C--
134 %nonassoc '>=' '>' '<=' '<' '!=' '=='
146 : {- empty -} { return () }
147 | cmmtop cmm { do $1; $2 }
149 cmmtop :: { ExtCode }
153 | 'CLOSURE' '(' NAME ',' NAME lits ')' ';'
154 { do lits <- sequence $6;
155 staticClosure $3 $5 (map getLit lits) }
157 -- The only static closures in the RTS are dummy closures like
158 -- stg_END_TSO_QUEUE_closure and stg_dummy_ret. We don't need
159 -- to provide the full generality of static closures here.
161 -- * CCS can always be CCS_DONT_CARE
162 -- * closure is always extern
163 -- * payload is always empty
164 -- * we can derive closure and info table labels from a single NAME
166 cmmdata :: { ExtCode }
167 : 'section' STRING '{' statics '}'
168 { do ss <- sequence $4;
169 code (emitData (section $2) (concat ss)) }
171 statics :: { [ExtFCode [CmmStatic]] }
173 | static statics { $1 : $2 }
175 -- Strings aren't used much in the RTS HC code, so it doesn't seem
176 -- worth allowing inline strings. C-- doesn't allow them anyway.
177 static :: { ExtFCode [CmmStatic] }
178 : NAME ':' { return [CmmDataLabel (mkRtsDataLabelFS $1)] }
179 | type expr ';' { do e <- $2;
180 return [CmmStaticLit (getLit e)] }
181 | type ';' { return [CmmUninitialised
182 (machRepByteWidth $1)] }
183 | 'bits8' '[' ']' STRING ';' { return [mkString $4] }
184 | 'bits8' '[' INT ']' ';' { return [CmmUninitialised
186 | typenot8 '[' INT ']' ';' { return [CmmUninitialised
187 (machRepByteWidth $1 *
189 | 'align' INT ';' { return [CmmAlign (fromIntegral $2)] }
190 | 'CLOSURE' '(' NAME lits ')'
191 { do lits <- sequence $4;
192 return $ map CmmStaticLit $
193 mkStaticClosure (mkRtsInfoLabelFS $3)
194 dontCareCCS (map getLit lits) [] [] [] }
195 -- arrays of closures required for the CHARLIKE & INTLIKE arrays
197 lits :: { [ExtFCode CmmExpr] }
199 | ',' expr lits { $2 : $3 }
201 cmmproc :: { ExtCode }
202 -- TODO: add real SRT/info tables to parsed Cmm
203 : info maybe_formals maybe_frame maybe_gc_block '{' body '}'
204 { do ((info_lbl, entry_ret_label, info, live, formals, frame, gc_block), stmts) <-
205 getCgStmtsEC' $ loopDecls $ do {
206 (info_lbl, entry_ret_label, info, live) <- $1;
207 formals <- sequence $2;
211 return (info_lbl, entry_ret_label, info, live, formals, frame, gc_block) }
212 blks <- code (cgStmtsToBlocks stmts)
213 code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) }
215 | info maybe_formals ';'
216 { do (info_lbl, entry_ret_label, info, live) <- $1;
217 formals <- sequence $2;
218 code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) }
220 | NAME maybe_formals maybe_frame maybe_gc_block '{' body '}'
221 { do ((formals, frame, gc_block), stmts) <-
222 getCgStmtsEC' $ loopDecls $ do {
223 formals <- sequence $2;
227 return (formals, frame, gc_block) }
228 blks <- code (cgStmtsToBlocks stmts)
229 code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkRtsCodeLabelFS $1) formals blks) }
231 info :: { ExtFCode (CLabel, CLabel, CmmInfoTable, [Maybe LocalReg]) }
232 : 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
233 -- ptrs, nptrs, closure type, description, type
234 { do prof <- profilingInfo $11 $13
235 let infoLabel = mkRtsInfoLabelFS $3
236 return (infoLabel, infoLblToEntryLbl infoLabel,
237 CmmInfoTable prof (fromIntegral $9)
238 (ThunkInfo (fromIntegral $5, fromIntegral $7) NoC_SRT),
241 | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
242 -- ptrs, nptrs, closure type, description, type, fun type
243 { do prof <- profilingInfo $11 $13
244 let infoLabel = mkRtsInfoLabelFS $3
245 return (infoLabel, infoLblToEntryLbl infoLabel,
246 CmmInfoTable prof (fromIntegral $9)
247 (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT (fromIntegral $15) 0
251 -- we leave most of the fields zero here. This is only used
252 -- to generate the BCO info table in the RTS at the moment.
254 | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
255 -- ptrs, nptrs, tag, closure type, description, type
256 { do prof <- profilingInfo $13 $15
257 -- If profiling is on, this string gets duplicated,
258 -- but that's the way the old code did it we can fix it some other time.
259 desc_lit <- code $ mkStringCLit $13
260 let infoLabel = mkRtsInfoLabelFS $3
261 return (infoLabel, infoLblToEntryLbl infoLabel,
262 CmmInfoTable prof (fromIntegral $11)
263 (ConstrInfo (fromIntegral $5, fromIntegral $7) (fromIntegral $9) desc_lit),
266 | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')'
267 -- selector, closure type, description, type
268 { do prof <- profilingInfo $9 $11
269 let infoLabel = mkRtsInfoLabelFS $3
270 return (infoLabel, infoLblToEntryLbl infoLabel,
271 CmmInfoTable prof (fromIntegral $7)
272 (ThunkSelectorInfo (fromIntegral $5) NoC_SRT),
275 | 'INFO_TABLE_RET' '(' NAME ',' INT ')'
276 -- closure type (no live regs)
277 { do let infoLabel = mkRtsInfoLabelFS $3
278 return (infoLabel, infoLblToRetLbl infoLabel,
279 CmmInfoTable (ProfilingInfo zeroCLit zeroCLit) (fromIntegral $5)
280 (ContInfo [] NoC_SRT),
283 | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')'
284 -- closure type, live regs
285 { do live <- sequence (map (liftM Just) $7)
286 let infoLabel = mkRtsInfoLabelFS $3
287 return (infoLabel, infoLblToRetLbl infoLabel,
288 CmmInfoTable (ProfilingInfo zeroCLit zeroCLit) (fromIntegral $5)
289 (ContInfo live NoC_SRT),
293 : {- empty -} { return () }
294 | decl body { do $1; $2 }
295 | stmt body { do $1; $2 }
298 : type names ';' { mapM_ (newLocal defaultKind $1) $2 }
299 | STRING type names ';' {% do k <- parseKind $1;
300 return $ mapM_ (newLocal k $2) $3 }
302 | 'import' names ';' { return () } -- ignore imports
303 | 'export' names ';' { return () } -- ignore exports
305 names :: { [FastString] }
307 | NAME ',' names { $1 : $3 }
313 { do l <- newLabel $1; code (labelC l) }
316 { do reg <- $1; e <- $3; stmtEC (CmmAssign reg e) }
317 | type '[' expr ']' '=' expr ';'
320 -- Gah! We really want to say "maybe_results" but that causes
321 -- a shift/reduce conflict with assignment. We either
322 -- we expand out the no-result and single result cases or
323 -- we tweak the syntax to avoid the conflict. The later
324 -- option is taken here because the other way would require
325 -- multiple levels of expanding and get unwieldy.
326 | maybe_results 'foreign' STRING expr '(' hint_exprs0 ')' safety vols ';'
327 {% foreignCall $3 $1 $4 $6 $9 $8 }
328 | maybe_results 'prim' '%' NAME '(' hint_exprs0 ')' safety vols ';'
329 {% primCall $1 $4 $6 $9 $8 }
330 -- stmt-level macros, stealing syntax from ordinary C-- function calls.
331 -- Perhaps we ought to use the %%-form?
332 | NAME '(' exprs0 ')' ';'
334 | 'switch' maybe_range expr '{' arms default '}'
335 { doSwitch $2 $3 $5 $6 }
337 { do l <- lookupLabel $2; stmtEC (CmmBranch l) }
338 | 'jump' expr maybe_actuals ';'
339 { do e1 <- $2; e2 <- sequence $3; stmtEC (CmmJump e1 e2) }
340 | 'return' maybe_actuals ';'
341 { do e <- sequence $2; stmtEC (CmmReturn e) }
342 | 'if' bool_expr '{' body '}' else
343 { ifThenElse $2 $4 $6 }
345 bool_expr :: { ExtFCode BoolExpr }
347 | expr { do e <- $1; return (BoolTest e) }
349 bool_op :: { ExtFCode BoolExpr }
350 : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3;
351 return (BoolAnd e1 e2) }
352 | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3;
353 return (BoolOr e1 e2) }
354 | '!' bool_expr { do e <- $2; return (BoolNot e) }
355 | '(' bool_op ')' { $2 }
357 -- This is not C-- syntax. What to do?
358 safety :: { CmmSafety }
359 : {- empty -} { CmmUnsafe } -- Default may change soon
360 | STRING {% parseSafety $1 }
362 -- This is not C-- syntax. What to do?
363 vols :: { Maybe [GlobalReg] }
364 : {- empty -} { Nothing }
365 | '[' ']' { Just [] }
366 | '[' globals ']' { Just $2 }
368 globals :: { [GlobalReg] }
370 | GLOBALREG ',' globals { $1 : $3 }
372 maybe_range :: { Maybe (Int,Int) }
373 : '[' INT '..' INT ']' { Just (fromIntegral $2, fromIntegral $4) }
374 | {- empty -} { Nothing }
376 arms :: { [([Int],ExtCode)] }
378 | arm arms { $1 : $2 }
380 arm :: { ([Int],ExtCode) }
381 : 'case' ints ':' '{' body '}' { ($2, $5) }
384 : INT { [ fromIntegral $1 ] }
385 | INT ',' ints { fromIntegral $1 : $3 }
387 default :: { Maybe ExtCode }
388 : 'default' ':' '{' body '}' { Just $4 }
389 -- taking a few liberties with the C-- syntax here; C-- doesn't have
390 -- 'default' branches
391 | {- empty -} { Nothing }
394 : {- empty -} { nopEC }
395 | 'else' '{' body '}' { $3 }
397 -- we have to write this out longhand so that Happy's precedence rules
399 expr :: { ExtFCode CmmExpr }
400 : expr '/' expr { mkMachOp MO_U_Quot [$1,$3] }
401 | expr '*' expr { mkMachOp MO_Mul [$1,$3] }
402 | expr '%' expr { mkMachOp MO_U_Rem [$1,$3] }
403 | expr '-' expr { mkMachOp MO_Sub [$1,$3] }
404 | expr '+' expr { mkMachOp MO_Add [$1,$3] }
405 | expr '>>' expr { mkMachOp MO_U_Shr [$1,$3] }
406 | expr '<<' expr { mkMachOp MO_Shl [$1,$3] }
407 | expr '&' expr { mkMachOp MO_And [$1,$3] }
408 | expr '^' expr { mkMachOp MO_Xor [$1,$3] }
409 | expr '|' expr { mkMachOp MO_Or [$1,$3] }
410 | expr '>=' expr { mkMachOp MO_U_Ge [$1,$3] }
411 | expr '>' expr { mkMachOp MO_U_Gt [$1,$3] }
412 | expr '<=' expr { mkMachOp MO_U_Le [$1,$3] }
413 | expr '<' expr { mkMachOp MO_U_Lt [$1,$3] }
414 | expr '!=' expr { mkMachOp MO_Ne [$1,$3] }
415 | expr '==' expr { mkMachOp MO_Eq [$1,$3] }
416 | '~' expr { mkMachOp MO_Not [$2] }
417 | '-' expr { mkMachOp MO_S_Neg [$2] }
418 | expr0 '`' NAME '`' expr0 {% do { mo <- nameToMachOp $3 ;
419 return (mkMachOp mo [$1,$5]) } }
422 expr0 :: { ExtFCode CmmExpr }
423 : INT maybe_ty { return (CmmLit (CmmInt $1 $2)) }
424 | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 $2)) }
425 | STRING { do s <- code (mkStringCLit $1);
428 | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) }
429 | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 }
430 | '(' expr ')' { $2 }
433 -- leaving out the type of a literal gives you the native word size in C--
434 maybe_ty :: { MachRep }
435 : {- empty -} { wordRep }
438 maybe_actuals :: { [ExtFCode (CmmExpr, MachHint)] }
440 | '(' hint_exprs0 ')' { $2 }
442 hint_exprs0 :: { [ExtFCode (CmmExpr, MachHint)] }
446 hint_exprs :: { [ExtFCode (CmmExpr, MachHint)] }
448 | hint_expr ',' hint_exprs { $1 : $3 }
450 hint_expr :: { ExtFCode (CmmExpr, MachHint) }
451 : expr { do e <- $1; return (e, inferHint e) }
452 | expr STRING {% do h <- parseHint $2;
454 e <- $1; return (e,h) }
456 exprs0 :: { [ExtFCode CmmExpr] }
460 exprs :: { [ExtFCode CmmExpr] }
462 | expr ',' exprs { $1 : $3 }
464 reg :: { ExtFCode CmmExpr }
465 : NAME { lookupName $1 }
466 | GLOBALREG { return (CmmReg (CmmGlobal $1)) }
468 maybe_results :: { [ExtFCode (CmmFormal, MachHint)] }
470 | '(' hint_lregs ')' '=' { $2 }
472 hint_lregs :: { [ExtFCode (CmmFormal, MachHint)] }
474 | hint_lreg ',' { [$1] }
475 | hint_lreg ',' hint_lregs { $1 : $3 }
477 hint_lreg :: { ExtFCode (CmmFormal, MachHint) }
478 : local_lreg { do e <- $1; return (e, inferHint (CmmReg (CmmLocal e))) }
479 | STRING local_lreg {% do h <- parseHint $1;
481 e <- $2; return (e,h) }
483 local_lreg :: { ExtFCode LocalReg }
484 : NAME { do e <- lookupName $1;
487 CmmReg (CmmLocal r) -> r
488 other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") }
490 lreg :: { ExtFCode CmmReg }
491 : NAME { do e <- lookupName $1;
495 other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") }
496 | GLOBALREG { return (CmmGlobal $1) }
498 maybe_formals :: { [ExtFCode LocalReg] }
500 | '(' formals0 ')' { $2 }
502 formals0 :: { [ExtFCode LocalReg] }
506 formals :: { [ExtFCode LocalReg] }
507 : formal ',' { [$1] }
509 | formal ',' formals { $1 : $3 }
511 formal :: { ExtFCode LocalReg }
512 : type NAME { newLocal defaultKind $1 $2 }
513 | STRING type NAME {% do k <- parseKind $1;
514 return $ newLocal k $2 $3 }
516 maybe_frame :: { ExtFCode (Maybe UpdateFrame) }
517 : {- empty -} { return Nothing }
518 | 'jump' expr '(' exprs0 ')' { do { target <- $2;
520 return $ Just (UpdateFrame target args) } }
522 maybe_gc_block :: { ExtFCode (Maybe BlockId) }
523 : {- empty -} { return Nothing }
525 { do l <- lookupLabel $2; return (Just l) }
531 typenot8 :: { MachRep }
538 section :: String -> Section
539 section "text" = Text
540 section "data" = Data
541 section "rodata" = ReadOnlyData
542 section "relrodata" = RelocatableReadOnlyData
543 section "bss" = UninitialisedData
544 section s = OtherSection s
546 mkString :: String -> CmmStatic
547 mkString s = CmmString (map (fromIntegral.ord) s)
549 -- mkMachOp infers the type of the MachOp from the type of its first
550 -- argument. We assume that this is correct: for MachOps that don't have
551 -- symmetrical args (e.g. shift ops), the first arg determines the type of
553 mkMachOp :: (MachRep -> MachOp) -> [ExtFCode CmmExpr] -> ExtFCode CmmExpr
554 mkMachOp fn args = do
555 arg_exprs <- sequence args
556 return (CmmMachOp (fn (cmmExprRep (head arg_exprs))) arg_exprs)
558 getLit :: CmmExpr -> CmmLit
559 getLit (CmmLit l) = l
560 getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)]) = CmmInt (negate i) r
561 getLit _ = panic "invalid literal" -- TODO messy failure
563 nameToMachOp :: FastString -> P (MachRep -> MachOp)
565 case lookupUFM machOps name of
566 Nothing -> fail ("unknown primitive " ++ unpackFS name)
569 exprOp :: FastString -> [ExtFCode CmmExpr] -> P (ExtFCode CmmExpr)
570 exprOp name args_code =
571 case lookupUFM exprMacros name of
572 Just f -> return $ do
573 args <- sequence args_code
576 mo <- nameToMachOp name
577 return $ mkMachOp mo args_code
579 exprMacros :: UniqFM ([CmmExpr] -> CmmExpr)
580 exprMacros = listToUFM [
581 ( FSLIT("ENTRY_CODE"), \ [x] -> entryCode x ),
582 ( FSLIT("INFO_PTR"), \ [x] -> closureInfoPtr x ),
583 ( FSLIT("STD_INFO"), \ [x] -> infoTable x ),
584 ( FSLIT("FUN_INFO"), \ [x] -> funInfoTable x ),
585 ( FSLIT("GET_ENTRY"), \ [x] -> entryCode (closureInfoPtr x) ),
586 ( FSLIT("GET_STD_INFO"), \ [x] -> infoTable (closureInfoPtr x) ),
587 ( FSLIT("GET_FUN_INFO"), \ [x] -> funInfoTable (closureInfoPtr x) ),
588 ( FSLIT("INFO_TYPE"), \ [x] -> infoTableClosureType x ),
589 ( FSLIT("INFO_PTRS"), \ [x] -> infoTablePtrs x ),
590 ( FSLIT("INFO_NPTRS"), \ [x] -> infoTableNonPtrs x )
593 -- we understand a subset of C-- primitives:
594 machOps = listToUFM $
595 map (\(x, y) -> (mkFastString x, y)) [
602 ( "quot", MO_S_Quot ),
604 ( "divu", MO_U_Quot ),
605 ( "modu", MO_U_Rem ),
623 ( "fneg", MO_S_Neg ),
630 ( "shrl", MO_U_Shr ),
631 ( "shra", MO_S_Shr ),
633 ( "lobits8", flip MO_U_Conv I8 ),
634 ( "lobits16", flip MO_U_Conv I16 ),
635 ( "lobits32", flip MO_U_Conv I32 ),
636 ( "lobits64", flip MO_U_Conv I64 ),
637 ( "sx16", flip MO_S_Conv I16 ),
638 ( "sx32", flip MO_S_Conv I32 ),
639 ( "sx64", flip MO_S_Conv I64 ),
640 ( "zx16", flip MO_U_Conv I16 ),
641 ( "zx32", flip MO_U_Conv I32 ),
642 ( "zx64", flip MO_U_Conv I64 ),
643 ( "f2f32", flip MO_S_Conv F32 ), -- TODO; rounding mode
644 ( "f2f64", flip MO_S_Conv F64 ), -- TODO; rounding mode
645 ( "f2i8", flip MO_S_Conv I8 ),
646 ( "f2i16", flip MO_S_Conv I16 ),
647 ( "f2i32", flip MO_S_Conv I32 ),
648 ( "f2i64", flip MO_S_Conv I64 ),
649 ( "i2f32", flip MO_S_Conv F32 ),
650 ( "i2f64", flip MO_S_Conv F64 )
653 callishMachOps = listToUFM $
654 map (\(x, y) -> (mkFastString x, y)) [
655 ( "write_barrier", MO_WriteBarrier )
656 -- ToDo: the rest, maybe
659 parseSafety :: String -> P CmmSafety
660 parseSafety "safe" = return (CmmSafe NoC_SRT)
661 parseSafety "unsafe" = return CmmUnsafe
662 parseSafety str = fail ("unrecognised safety: " ++ str)
664 parseHint :: String -> P MachHint
665 parseHint "ptr" = return PtrHint
666 parseHint "signed" = return SignedHint
667 parseHint "float" = return FloatHint
668 parseHint str = fail ("unrecognised hint: " ++ str)
670 parseKind :: String -> P Kind
671 parseKind "ptr" = return KindPtr
672 parseKind str = fail ("unrecognized kin: " ++ str)
675 defaultKind = KindNonPtr
677 -- labels are always pointers, so we might as well infer the hint
678 inferHint :: CmmExpr -> MachHint
679 inferHint (CmmLit (CmmLabel _)) = PtrHint
680 inferHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = PtrHint
683 isPtrGlobalReg Sp = True
684 isPtrGlobalReg SpLim = True
685 isPtrGlobalReg Hp = True
686 isPtrGlobalReg HpLim = True
687 isPtrGlobalReg CurrentTSO = True
688 isPtrGlobalReg CurrentNursery = True
689 isPtrGlobalReg _ = False
692 happyError = srcParseFail
694 -- -----------------------------------------------------------------------------
695 -- Statement-level macros
697 stmtMacro :: FastString -> [ExtFCode CmmExpr] -> P ExtCode
698 stmtMacro fun args_code = do
699 case lookupUFM stmtMacros fun of
700 Nothing -> fail ("unknown macro: " ++ unpackFS fun)
701 Just fcode -> return $ do
702 args <- sequence args_code
705 stmtMacros :: UniqFM ([CmmExpr] -> Code)
706 stmtMacros = listToUFM [
707 ( FSLIT("CCS_ALLOC"), \[words,ccs] -> profAlloc words ccs ),
708 ( FSLIT("CLOSE_NURSERY"), \[] -> emitCloseNursery ),
709 ( FSLIT("ENTER_CCS_PAP_CL"), \[e] -> enterCostCentrePAP e ),
710 ( FSLIT("ENTER_CCS_THUNK"), \[e] -> enterCostCentreThunk e ),
711 ( FSLIT("HP_CHK_GEN"), \[words,liveness,reentry] ->
712 hpChkGen words liveness reentry ),
713 ( FSLIT("HP_CHK_NP_ASSIGN_SP0"), \[e,f] -> hpChkNodePointsAssignSp0 e f ),
714 ( FSLIT("LOAD_THREAD_STATE"), \[] -> emitLoadThreadState ),
715 ( FSLIT("LDV_ENTER"), \[e] -> ldvEnter e ),
716 ( FSLIT("LDV_RECORD_CREATE"), \[e] -> ldvRecordCreate e ),
717 ( FSLIT("OPEN_NURSERY"), \[] -> emitOpenNursery ),
718 ( FSLIT("PUSH_UPD_FRAME"), \[sp,e] -> emitPushUpdateFrame sp e ),
719 ( FSLIT("SAVE_THREAD_STATE"), \[] -> emitSaveThreadState ),
720 ( FSLIT("SET_HDR"), \[ptr,info,ccs] ->
721 emitSetDynHdr ptr info ccs ),
722 ( FSLIT("STK_CHK_GEN"), \[words,liveness,reentry] ->
723 stkChkGen words liveness reentry ),
724 ( FSLIT("STK_CHK_NP"), \[e] -> stkChkNodePoints e ),
725 ( FSLIT("TICK_ALLOC_PRIM"), \[hdr,goods,slop] ->
726 tickyAllocPrim hdr goods slop ),
727 ( FSLIT("TICK_ALLOC_PAP"), \[goods,slop] ->
728 tickyAllocPAP goods slop ),
729 ( FSLIT("TICK_ALLOC_UP_THK"), \[goods,slop] ->
730 tickyAllocThunk goods slop ),
731 ( FSLIT("UPD_BH_UPDATABLE"), \[] -> emitBlackHoleCode False ),
732 ( FSLIT("UPD_BH_SINGLE_ENTRY"), \[] -> emitBlackHoleCode True ),
734 ( FSLIT("RET_P"), \[a] -> emitRetUT [(PtrArg,a)]),
735 ( FSLIT("RET_N"), \[a] -> emitRetUT [(NonPtrArg,a)]),
736 ( FSLIT("RET_PP"), \[a,b] -> emitRetUT [(PtrArg,a),(PtrArg,b)]),
737 ( FSLIT("RET_NN"), \[a,b] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b)]),
738 ( FSLIT("RET_NP"), \[a,b] -> emitRetUT [(NonPtrArg,a),(PtrArg,b)]),
739 ( FSLIT("RET_PPP"), \[a,b,c] -> emitRetUT [(PtrArg,a),(PtrArg,b),(PtrArg,c)]),
740 ( FSLIT("RET_NPP"), \[a,b,c] -> emitRetUT [(NonPtrArg,a),(PtrArg,b),(PtrArg,c)]),
741 ( FSLIT("RET_NNP"), \[a,b,c] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(PtrArg,c)]),
742 ( FSLIT("RET_NNNP"), \[a,b,c,d] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(NonPtrArg,c),(PtrArg,d)]),
743 ( FSLIT("RET_NPNP"), \[a,b,c,d] -> emitRetUT [(NonPtrArg,a),(PtrArg,b),(NonPtrArg,c),(PtrArg,d)])
747 -- -----------------------------------------------------------------------------
748 -- Our extended FCode monad.
750 -- We add a mapping from names to CmmExpr, to support local variable names in
751 -- the concrete C-- code. The unique supply of the underlying FCode monad
752 -- is used to grab a new unique for each local variable.
754 -- In C--, a local variable can be declared anywhere within a proc,
755 -- and it scopes from the beginning of the proc to the end. Hence, we have
756 -- to collect declarations as we parse the proc, and feed the environment
757 -- back in circularly (to avoid a two-pass algorithm).
759 data Named = Var CmmExpr | Label BlockId
760 type Decls = [(FastString,Named)]
761 type Env = UniqFM Named
763 newtype ExtFCode a = EC { unEC :: Env -> Decls -> FCode (Decls, a) }
765 type ExtCode = ExtFCode ()
767 returnExtFC a = EC $ \e s -> return (s, a)
768 thenExtFC (EC m) k = EC $ \e s -> do (s',r) <- m e s; unEC (k r) e s'
770 instance Monad ExtFCode where
774 -- This function takes the variable decarations and imports and makes
775 -- an environment, which is looped back into the computation. In this
776 -- way, we can have embedded declarations that scope over the whole
777 -- procedure, and imports that scope over the entire module.
778 loopDecls :: ExtFCode a -> ExtFCode a
779 loopDecls (EC fcode) =
780 EC $ \e s -> fixC (\ ~(decls,a) -> fcode (addListToUFM e decls) [])
782 getEnv :: ExtFCode Env
783 getEnv = EC $ \e s -> return (s, e)
785 addVarDecl :: FastString -> CmmExpr -> ExtCode
786 addVarDecl var expr = EC $ \e s -> return ((var, Var expr):s, ())
788 addLabel :: FastString -> BlockId -> ExtCode
789 addLabel name block_id = EC $ \e s -> return ((name, Label block_id):s, ())
791 newLocal :: Kind -> MachRep -> FastString -> ExtFCode LocalReg
792 newLocal kind ty name = do
794 let reg = LocalReg u ty kind
795 addVarDecl name (CmmReg (CmmLocal reg))
798 newLabel :: FastString -> ExtFCode BlockId
801 addLabel name (BlockId u)
804 lookupLabel :: FastString -> ExtFCode BlockId
805 lookupLabel name = do
808 case lookupUFM env name of
810 _other -> BlockId (newTagUnique (getUnique name) 'L')
812 -- Unknown names are treated as if they had been 'import'ed.
813 -- This saves us a lot of bother in the RTS sources, at the expense of
814 -- deferring some errors to link time.
815 lookupName :: FastString -> ExtFCode CmmExpr
819 case lookupUFM env name of
821 _other -> CmmLit (CmmLabel (mkRtsCodeLabelFS name))
823 -- Lifting FCode computations into the ExtFCode monad:
824 code :: FCode a -> ExtFCode a
825 code fc = EC $ \e s -> do r <- fc; return (s, r)
827 code2 :: (FCode (Decls,b) -> FCode ((Decls,b),c))
828 -> ExtFCode b -> ExtFCode c
829 code2 f (EC ec) = EC $ \e s -> do ((s',b),c) <- f (ec e s); return (s',c)
832 stmtEC stmt = code (stmtC stmt)
833 stmtsEC stmts = code (stmtsC stmts)
834 getCgStmtsEC = code2 getCgStmts'
835 getCgStmtsEC' = code2 (\m -> getCgStmts' m >>= f)
836 where f ((decl, b), c) = return ((decl, b), (b, c))
838 forkLabelledCodeEC ec = do
839 stmts <- getCgStmtsEC ec
840 code (forkCgStmts stmts)
843 profilingInfo desc_str ty_str = do
844 lit1 <- if opt_SccProfilingOn
845 then code $ mkStringCLit desc_str
846 else return (mkIntCLit 0)
847 lit2 <- if opt_SccProfilingOn
848 then code $ mkStringCLit ty_str
849 else return (mkIntCLit 0)
850 return (ProfilingInfo lit1 lit2)
853 staticClosure :: FastString -> FastString -> [CmmLit] -> ExtCode
854 staticClosure cl_label info payload
855 = code $ emitDataLits (mkRtsDataLabelFS cl_label) lits
856 where lits = mkStaticClosure (mkRtsInfoLabelFS info) dontCareCCS payload [] [] []
860 -> [ExtFCode (CmmFormal,MachHint)]
862 -> [ExtFCode (CmmExpr,MachHint)]
866 foreignCall conv_string results_code expr_code args_code vols safety
867 = do convention <- case conv_string of
868 "C" -> return CCallConv
869 "C--" -> return CmmCallConv
870 _ -> fail ("unknown calling convention: " ++ conv_string)
872 results <- sequence results_code
874 args <- sequence args_code
875 --code (stmtC (CmmCall (CmmForeignCall expr convention) results args safety))
877 -- Temporary hack so at least some functions are CmmSafe
878 CmmCallConv -> code (stmtC (CmmCall (CmmForeignCall expr convention) results args safety))
881 code (emitForeignCall' PlayRisky results
882 (CmmForeignCall expr convention) args vols NoC_SRT)
884 code (emitForeignCall' (PlaySafe unused) results
885 (CmmForeignCall expr convention) args vols NoC_SRT) where
886 unused = panic "not used by emitForeignCall'"
889 :: [ExtFCode (CmmFormal,MachHint)]
891 -> [ExtFCode (CmmExpr,MachHint)]
895 primCall results_code name args_code vols safety
896 = case lookupUFM callishMachOps name of
897 Nothing -> fail ("unknown primitive " ++ unpackFS name)
898 Just p -> return $ do
899 results <- sequence results_code
900 args <- sequence args_code
903 code (emitForeignCall' PlayRisky results
904 (CmmPrim p) args vols NoC_SRT)
906 code (emitForeignCall' (PlaySafe unused) results
907 (CmmPrim p) args vols NoC_SRT) where
908 unused = panic "not used by emitForeignCall'"
910 doStore :: MachRep -> ExtFCode CmmExpr -> ExtFCode CmmExpr -> ExtCode
911 doStore rep addr_code val_code
912 = do addr <- addr_code
914 -- if the specified store type does not match the type of the expr
915 -- on the rhs, then we insert a coercion that will cause the type
916 -- mismatch to be flagged by cmm-lint. If we don't do this, then
917 -- the store will happen at the wrong type, and the error will not
920 | cmmExprRep val /= rep = CmmMachOp (MO_U_Conv rep rep) [val]
922 stmtEC (CmmStore addr coerce_val)
924 -- Return an unboxed tuple.
925 emitRetUT :: [(CgRep,CmmExpr)] -> Code
927 tickyUnboxedTupleReturn (length args) -- TICK
928 (sp, stmts) <- pushUnboxedTuple 0 args
930 when (sp /= 0) $ stmtC (CmmAssign spReg (cmmRegOffW spReg (-sp)))
931 stmtC (CmmJump (entryCode (CmmLoad (cmmRegOffW spReg sp) wordRep)) [])
932 -- TODO (when using CPS): emitStmt (CmmReturn (map snd args))
934 -- -----------------------------------------------------------------------------
935 -- If-then-else and boolean expressions
938 = BoolExpr `BoolAnd` BoolExpr
939 | BoolExpr `BoolOr` BoolExpr
943 -- ToDo: smart constructors which simplify the boolean expression.
945 ifThenElse cond then_part else_part = do
946 then_id <- code newLabelC
947 join_id <- code newLabelC
951 stmtEC (CmmBranch join_id)
952 code (labelC then_id)
954 -- fall through to join
955 code (labelC join_id)
957 -- 'emitCond cond true_id' emits code to test whether the cond is true,
958 -- branching to true_id if so, and falling through otherwise.
959 emitCond (BoolTest e) then_id = do
960 stmtEC (CmmCondBranch e then_id)
961 emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id
962 | Just op' <- maybeInvertComparison op
963 = emitCond (BoolTest (CmmMachOp op' args)) then_id
964 emitCond (BoolNot e) then_id = do
965 else_id <- code newLabelC
967 stmtEC (CmmBranch then_id)
968 code (labelC else_id)
969 emitCond (e1 `BoolOr` e2) then_id = do
972 emitCond (e1 `BoolAnd` e2) then_id = do
973 -- we'd like to invert one of the conditionals here to avoid an
974 -- extra branch instruction, but we can't use maybeInvertComparison
975 -- here because we can't look too closely at the expression since
977 and_id <- code newLabelC
978 else_id <- code newLabelC
980 stmtEC (CmmBranch else_id)
983 code (labelC else_id)
986 -- -----------------------------------------------------------------------------
989 -- We use a simplified form of C-- switch statements for now. A
990 -- switch statement always compiles to a table jump. Each arm can
991 -- specify a list of values (not ranges), and there can be a single
992 -- default branch. The range of the table is given either by the
993 -- optional range on the switch (eg. switch [0..7] {...}), or by
994 -- the minimum/maximum values from the branches.
996 doSwitch :: Maybe (Int,Int) -> ExtFCode CmmExpr -> [([Int],ExtCode)]
997 -> Maybe ExtCode -> ExtCode
998 doSwitch mb_range scrut arms deflt
1000 -- Compile code for the default branch
1003 Nothing -> return Nothing
1004 Just e -> do b <- forkLabelledCodeEC e; return (Just b)
1006 -- Compile each case branch
1007 table_entries <- mapM emitArm arms
1009 -- Construct the table
1011 all_entries = concat table_entries
1012 ixs = map fst all_entries
1014 | Just (l,u) <- mb_range = (l,u)
1015 | otherwise = (minimum ixs, maximum ixs)
1017 entries = elems (accumArray (\_ a -> Just a) dflt_entry (min,max)
1020 -- ToDo: check for out of range and jump to default if necessary
1021 stmtEC (CmmSwitch expr entries)
1023 emitArm :: ([Int],ExtCode) -> ExtFCode [(Int,BlockId)]
1024 emitArm (ints,code) = do
1025 blockid <- forkLabelledCodeEC code
1026 return [ (i,blockid) | i <- ints ]
1029 -- -----------------------------------------------------------------------------
1030 -- Putting it all together
1032 -- The initial environment: we define some constants that the compiler
1033 -- knows about here.
1035 initEnv = listToUFM [
1036 ( FSLIT("SIZEOF_StgHeader"),
1037 Var (CmmLit (CmmInt (fromIntegral (fixedHdrSize * wORD_SIZE)) wordRep) )),
1038 ( FSLIT("SIZEOF_StgInfoTable"),
1039 Var (CmmLit (CmmInt (fromIntegral stdInfoTableSizeB) wordRep) ))
1042 parseCmmFile :: DynFlags -> FilePath -> IO (Maybe Cmm)
1043 parseCmmFile dflags filename = do
1044 showPass dflags "ParseCmm"
1045 buf <- hGetStringBuffer filename
1047 init_loc = mkSrcLoc (mkFastString filename) 1 0
1048 init_state = (mkPState buf init_loc dflags) { lex_state = [0] }
1049 -- reset the lex_state: the Lexer monad leaves some stuff
1050 -- in there we don't want.
1051 case unP cmmParse init_state of
1052 PFailed span err -> do printError span err; return Nothing
1054 cmm <- initC dflags no_module (getCmm (unEC code initEnv [] >> return ()))
1055 let ms = getMessages pst
1056 printErrorsAndWarnings dflags ms
1057 when (errorsFound dflags ms) $ exitWith (ExitFailure 1)
1058 dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms [cmm])
1061 no_module = panic "parseCmmFile: no module"