2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
8 -- The above warning supression flag is a temporary kludge.
9 -- While working on this module you are encouraged to remove it and fix
10 -- any warnings in the module. See
11 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
14 module CgCase ( cgCase, saveVolatileVarsAndRegs,
15 restoreCurrentCostCentre
18 #include "HsVersions.h"
20 import {-# SOURCE #-} CgExpr ( cgExpr )
55 = GCMayHappen -- The scrutinee may involve GC, so everything must be
56 -- tidy before the code for the scrutinee.
58 | NoGC -- The scrutinee is a primitive value, or a call to a
59 -- primitive op which does no GC. Hence the case can
60 -- be done inline, without tidying up first.
63 It is quite interesting to decide whether to put a heap-check
64 at the start of each alternative. Of course we certainly have
65 to do so if the case forces an evaluation, or if there is a primitive
66 op which can trigger GC.
68 A more interesting situation is this:
75 default -> !C!; ...C...
78 where \tr{!x!} indicates a possible heap-check point. The heap checks
79 in the alternatives {\em can} be omitted, in which case the topmost
80 heapcheck will take their worst case into account.
82 In favour of omitting \tr{!B!}, \tr{!C!}:
84 - {\em May} save a heap overflow test,
85 if ...A... allocates anything. The other advantage
86 of this is that we can use relative addressing
87 from a single Hp to get at all the closures so allocated.
89 - No need to save volatile vars etc across the case
93 - May do more allocation than reqd. This sometimes bites us
94 badly. For example, nfib (ha!) allocates about 30\% more space if the
95 worst-casing is done, because many many calls to nfib are leaf calls
96 which don't need to allocate anything.
98 This never hurts us if there is only one alternative.
110 Special case #1: case of literal.
113 cgCase (StgLit lit) live_in_whole_case live_in_alts bndr
114 alt_type@(PrimAlt tycon) alts
115 = do { tmp_reg <- bindNewToTemp bndr
116 ; cm_lit <- cgLit lit
117 ; stmtC (CmmAssign (CmmLocal tmp_reg) (CmmLit cm_lit))
118 ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts }
121 Special case #2: scrutinising a primitive-typed variable. No
122 evaluation required. We don't save volatile variables, nor do we do a
123 heap-check in the alternatives. Instead, the heap usage of the
124 alternatives is worst-cased and passed upstream. This can result in
125 allocating more heap than strictly necessary, but it will sometimes
126 eliminate a heap check altogether.
129 cgCase (StgApp v []) live_in_whole_case live_in_alts bndr
130 alt_type@(PrimAlt tycon) alts
131 = do { -- Careful! we can't just bind the default binder to the same thing
132 -- as the scrutinee, since it might be a stack location, and having
133 -- two bindings pointing at the same stack locn doesn't work (it
134 -- confuses nukeDeadBindings). Hence, use a new temp.
135 v_info <- getCgIdInfo v
136 ; amode <- idInfoToAmode v_info
137 ; tmp_reg <- bindNewToTemp bndr
138 ; stmtC (CmmAssign (CmmLocal tmp_reg) amode)
139 ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts }
142 Special case #3: inline PrimOps and foreign calls.
145 cgCase (StgOpApp op@(StgPrimOp primop) args _)
146 live_in_whole_case live_in_alts bndr alt_type alts
147 | not (primOpOutOfLine primop)
148 = cgInlinePrimOp primop args bndr alt_type live_in_alts alts
151 TODO: Case-of-case of primop can probably be done inline too (but
152 maybe better to translate it out beforehand). See
153 ghc/lib/misc/PackedString.lhs for examples where this crops up (with
156 Special case #4: inline foreign calls: an unsafe foreign call can be done
157 right here, just like an inline primop.
160 cgCase (StgOpApp op@(StgFCallOp fcall _) args _)
161 live_in_whole_case live_in_alts bndr alt_type alts
162 | unsafe_foreign_call
163 = ASSERT( isSingleton alts )
164 do -- *must* be an unboxed tuple alt.
165 -- exactly like the cgInlinePrimOp case for unboxed tuple alts..
166 { res_tmps <- mapFCs bindNewToTemp non_void_res_ids
167 ; let res_hints = map (typeHint.idType) non_void_res_ids
168 ; cgForeignCall (zipWith CmmHinted res_tmps res_hints) fcall args live_in_alts
171 (_, res_ids, _, rhs) = head alts
172 non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
176 CCall (CCallSpec _ _ s) -> not (playSafe s)
180 Special case: scrutinising a non-primitive variable.
181 This can be done a little better than the general case, because
182 we can reuse/trim the stack slot holding the variable (if it is in one).
185 cgCase (StgApp fun args)
186 live_in_whole_case live_in_alts bndr alt_type alts
187 = do { fun_info <- getCgIdInfo fun
188 ; arg_amodes <- getArgAmodes args
190 -- Nuking dead bindings *before* calculating the saves is the
191 -- value-add here. We might end up freeing up some slots currently
192 -- occupied by variables only required for the call.
193 -- NOTE: we need to look up the variables used in the call before
194 -- doing this, because some of them may not be in the environment
196 ; nukeDeadBindings live_in_alts
197 ; (save_assts, alts_eob_info, maybe_cc_slot)
198 <- saveVolatileVarsAndRegs live_in_alts
201 <- forkEval alts_eob_info
202 (allocStackTop retAddrSizeW >> nopC)
203 (do { deAllocStackTop retAddrSizeW
204 ; cgEvalAlts maybe_cc_slot bndr alt_type alts })
206 ; setEndOfBlockInfo scrut_eob_info
207 (performTailCall fun_info arg_amodes save_assts) }
210 Note about return addresses: we *always* push a return address, even
211 if because of an optimisation we end up jumping direct to the return
212 code (not through the address itself). The alternatives always assume
213 that the return address is on the stack. The return address is
214 required in case the alternative performs a heap check, since it
215 encodes the liveness of the slots in the activation record.
217 On entry to the case alternative, we can re-use the slot containing
218 the return address immediately after the heap check. That's what the
219 deAllocStackTop call is doing above.
221 Finally, here is the general case.
224 cgCase expr live_in_whole_case live_in_alts bndr alt_type alts
225 = do { -- Figure out what volatile variables to save
226 nukeDeadBindings live_in_whole_case
228 ; (save_assts, alts_eob_info, maybe_cc_slot)
229 <- saveVolatileVarsAndRegs live_in_alts
231 -- Save those variables right now!
232 ; emitStmts save_assts
234 -- generate code for the alts
236 <- forkEval alts_eob_info
237 (do { nukeDeadBindings live_in_alts
238 ; allocStackTop retAddrSizeW -- space for retn address
240 (do { deAllocStackTop retAddrSizeW
241 ; cgEvalAlts maybe_cc_slot bndr alt_type alts })
243 ; setEndOfBlockInfo scrut_eob_info (cgExpr expr)
247 There's a lot of machinery going on behind the scenes to manage the
248 stack pointer here. forkEval takes the virtual Sp and free list from
249 the first argument, and turns that into the *real* Sp for the second
250 argument. It also uses this virtual Sp as the args-Sp in the EOB info
251 returned, so that the scrutinee will trim the real Sp back to the
252 right place before doing whatever it does.
253 --SDM (who just spent an hour figuring this out, and didn't want to
256 Why don't we push the return address just before evaluating the
257 scrutinee? Because the slot reserved for the return address might
258 contain something useful, so we wait until performing a tail call or
259 return before pushing the return address (see
260 CgTailCall.pushReturnAddress).
262 This also means that the environment doesn't need to know about the
263 free stack slot for the return address (for generating bitmaps),
264 because we don't reserve it until just before the eval.
266 TODO!! Problem: however, we have to save the current cost centre
267 stack somewhere, because at the eval point the current CCS might be
268 different. So we pick a free stack slot and save CCCS in it. One
269 consequence of this is that activation records on the stack don't
270 follow the layout of closures when we're profiling. The CCS could be
271 anywhere within the record).
273 %************************************************************************
277 %************************************************************************
280 cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
281 | isVoidArg (idCgRep bndr)
282 = ASSERT( con == DEFAULT && isSingleton alts && null bs )
283 do { -- VOID RESULT; just sequencing,
284 -- so get in there and do it
285 cgPrimOp [] primop args live_in_alts
288 (con,bs,_,rhs) = head alts
290 cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
291 = do { -- PRIMITIVE ALTS, with non-void result
292 tmp_reg <- bindNewToTemp bndr
293 ; cgPrimOp [tmp_reg] primop args live_in_alts
294 ; cgPrimAlts NoGC (PrimAlt tycon) (CmmLocal tmp_reg) alts }
296 cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts
297 = ASSERT( isSingleton alts )
298 do { -- UNBOXED TUPLE ALTS
299 -- No heap check, no yield, just get in there and do it.
300 -- NB: the case binder isn't bound to anything;
301 -- it has a unboxed tuple type
303 res_tmps <- mapFCs bindNewToTemp non_void_res_ids
304 ; cgPrimOp res_tmps primop args live_in_alts
307 (_, res_ids, _, rhs) = head alts
308 non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
310 cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts
311 = do { -- ENUMERATION TYPE RETURN
312 -- Typical: case a ># b of { True -> ..; False -> .. }
313 -- The primop itself returns an index into the table of
314 -- closures for the enumeration type.
315 tag_amode <- ASSERT( isEnumerationTyCon tycon )
316 do_enum_primop primop
318 -- Bind the default binder if necessary
319 -- (avoiding it avoids the assignment)
320 -- The deadness info is set by StgVarInfo
321 ; this_pkg <- getThisPackage
322 ; whenC (not (isDeadBinder bndr))
323 (do { tmp_reg <- bindNewToTemp bndr
326 (tagToClosure tycon tag_amode)) })
329 ; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-}
333 ; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1)
337 do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result
338 do_enum_primop TagToEnumOp -- No code!
340 (_,e) <- getArgAmode arg
342 do_enum_primop primop
343 = do tmp <- newNonPtrTemp wordRep
344 cgPrimOp [tmp] primop args live_in_alts
345 returnFC (CmmReg (CmmLocal tmp))
347 cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts
348 = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr)
351 %************************************************************************
353 \subsection[CgCase-alts]{Alternatives}
355 %************************************************************************
357 @cgEvalAlts@ returns an addressing mode for a continuation for the
358 alternatives of a @case@, used in a context when there
359 is some evaluation to be done.
362 cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, if any
366 -> FCode Sequel -- Any addr modes inside are guaranteed
367 -- to be a label so that we can duplicate it
368 -- without risk of duplicating code
370 cgEvalAlts cc_slot bndr alt_type@(PrimAlt tycon) alts
371 = do { let rep = tyConCgRep tycon
372 reg = dataReturnConvPrim rep -- Bottom for voidRep
374 ; abs_c <- forkProc $ do
375 { -- Bind the case binder, except if it's void
376 -- (reg is bottom in that case)
377 whenC (nonVoidArg rep) $
378 bindNewToReg bndr reg (mkLFArgument bndr)
379 ; restoreCurrentCostCentre cc_slot True
380 ; cgPrimAlts GCMayHappen alt_type reg alts }
382 ; lbl <- emitReturnTarget (idName bndr) abs_c
383 ; returnFC (CaseAlts lbl Nothing bndr) }
385 cgEvalAlts cc_slot bndr (UbxTupAlt _) [(con,args,_,rhs)]
386 = -- Unboxed tuple case
387 -- By now, the simplifier should have have turned it
388 -- into case e of (# a,b #) -> e
389 -- There shouldn't be a
390 -- case e of DEFAULT -> e
391 ASSERT2( case con of { DataAlt _ -> True; other -> False },
392 text "cgEvalAlts: dodgy case of unboxed tuple type" )
393 do { -- forkAbsC for the RHS, so that the envt is
394 -- not changed for the emitReturn call
395 abs_c <- forkProc $ do
396 { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args
397 -- Restore the CC *after* binding the tuple components,
398 -- so that we get the stack offset of the saved CC right.
399 ; restoreCurrentCostCentre cc_slot True
400 -- Generate a heap check if necessary
401 -- and finally the code for the alternative
402 ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts
404 ; lbl <- emitReturnTarget (idName bndr) abs_c
405 ; returnFC (CaseAlts lbl Nothing bndr) }
407 cgEvalAlts cc_slot bndr alt_type alts
408 = -- Algebraic and polymorphic case
409 do { -- Bind the default binder
410 bindNewToReg bndr nodeReg (mkLFArgument bndr)
412 -- Generate sequel info for use downstream
413 -- At the moment, we only do it if the type is vector-returnable.
414 -- Reason: if not, then it costs extra to label the
415 -- alternatives, because we'd get return code like:
417 -- switch TagReg { 0 : JMP(alt_1); 1 : JMP(alt_2) ..etc }
419 -- which is worse than having the alt code in the switch statement
421 ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts
423 ; (lbl, branches) <- emitAlgReturnTarget (idName bndr)
426 ; returnFC (CaseAlts lbl branches bndr) }
428 fam_sz = case alt_type of
429 AlgAlt tc -> tyConFamilySize tc
434 HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): If
435 we do an inlining of the case no separate functions for returning are
436 created, so we don't have to generate a GRAN_YIELD in that case. This info
437 must be propagated to cgAlgAltRhs (where the GRAN_YIELD macro might be
438 emitted). Hence, the new Bool arg to cgAlgAltRhs.
440 %************************************************************************
442 \subsection[CgCase-alg-alts]{Algebraic alternatives}
444 %************************************************************************
446 In @cgAlgAlts@, none of the binders in the alternatives are
447 assumed to be yet bound.
449 HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): The
450 last arg of cgAlgAlts indicates if we want a context switch at the
451 beginning of each alternative. Normally we want that. The only exception
452 are inlined alternatives.
456 -> Maybe VirtualSpOffset
457 -> AltType -- ** AlgAlt or PolyAlt only **
458 -> [StgAlt] -- The alternatives
459 -> FCode ( [(ConTagZ, CgStmts)], -- The branches
460 Maybe CgStmts ) -- The default case
462 cgAlgAlts gc_flag cc_slot alt_type alts
463 = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts]
465 mb_deflt = case alts of -- DEFAULT is always first, if present
466 ((DEFAULT,blks) : _) -> Just blks
469 branches = [(dataConTagZ con, blks)
470 | (DataAlt con, blks) <- alts]
472 return (branches, mb_deflt)
476 -> Maybe VirtualSpOffset -- Turgid state
477 -> AltType -- ** AlgAlt or PolyAlt only **
479 -> FCode (AltCon, CgStmts)
481 cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs)
482 = do { abs_c <- getCgStmts $ do
483 { bind_con_args con args
484 ; restoreCurrentCostCentre cc_slot True
485 ; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) }
486 ; return (con, abs_c) }
488 bind_con_args DEFAULT args = nopC
489 bind_con_args (DataAlt dc) args = bindConArgs dc args
493 %************************************************************************
495 \subsection[CgCase-prim-alts]{Primitive alternatives}
497 %************************************************************************
499 @cgPrimAlts@ generates suitable a @CSwitch@
500 for dealing with the alternatives of a primitive @case@, given an
501 addressing mode for the thing to scrutinise. It also keeps track of
502 the maximum stack depth encountered down any branch.
504 As usual, no binders in the alternatives are yet bound.
508 -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck
509 -> CmmReg -- Scrutinee
510 -> [StgAlt] -- Alternatives
512 -- NB: cgPrimAlts emits code that does the case analysis.
513 -- It's often used in inline situations, rather than to genearte
514 -- a labelled return point. That's why its interface is a little
515 -- different to cgAlgAlts
517 -- INVARIANT: the default binder is already bound
518 cgPrimAlts gc_flag alt_type scrutinee alts
519 = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts)
520 ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default
521 alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others]
522 ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC }
526 -> StgAlt -- The alternative
527 -> FCode (AltCon, CgStmts) -- Its compiled form
529 cgPrimAlt gc_flag alt_type (con, [], [], rhs)
530 = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } )
531 do { abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs))
532 ; returnFC (con, abs_c) }
536 %************************************************************************
538 \subsection[CgCase-tidy]{Code for tidying up prior to an eval}
540 %************************************************************************
545 -> AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
546 -> Code -- Continuation
548 maybeAltHeapCheck NoGC _ code = code
549 maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code
551 saveVolatileVarsAndRegs
552 :: StgLiveVars -- Vars which should be made safe
553 -> FCode (CmmStmts, -- Assignments to do the saves
554 EndOfBlockInfo, -- sequel for the alts
555 Maybe VirtualSpOffset) -- Slot for current cost centre
557 saveVolatileVarsAndRegs vars
558 = do { var_saves <- saveVolatileVars vars
559 ; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre
560 ; eob_info <- getEndOfBlockInfo
561 ; returnFC (var_saves `plusStmts` cc_save,
566 saveVolatileVars :: StgLiveVars -- Vars which should be made safe
567 -> FCode CmmStmts -- Assignments to to the saves
569 saveVolatileVars vars
570 = do { stmts_s <- mapFCs save_it (varSetElems vars)
571 ; return (foldr plusStmts noStmts stmts_s) }
574 = do { v <- getCAddrModeIfVolatile var
576 Nothing -> return noStmts -- Non-volatile
577 Just vol_amode -> save_var var vol_amode -- Aha! It's volatile
580 save_var var vol_amode
581 = do { slot <- allocPrimStack (idCgRep var)
582 ; rebindToStack var slot
583 ; sp_rel <- getSpRelOffset slot
584 ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) }
587 ---------------------------------------------------------------------------
589 When we save the current cost centre (which is done for lexical
590 scoping), we allocate a free stack location, and return (a)~the
591 virtual offset of the location, to pass on to the alternatives, and
592 (b)~the assignment to do the save (just as for @saveVolatileVars@).
595 saveCurrentCostCentre ::
596 FCode (Maybe VirtualSpOffset, -- Where we decide to store it
597 CmmStmts) -- Assignment to save it
599 saveCurrentCostCentre
600 | not opt_SccProfilingOn
601 = returnFC (Nothing, noStmts)
603 = do { slot <- allocPrimStack PtrArg
604 ; sp_rel <- getSpRelOffset slot
605 ; returnFC (Just slot,
606 oneStmt (CmmStore sp_rel curCCS)) }
608 -- Sometimes we don't free the slot containing the cost centre after restoring it
609 -- (see CgLetNoEscape.cgLetNoEscapeBody).
610 restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code
611 restoreCurrentCostCentre Nothing _freeit = nopC
612 restoreCurrentCostCentre (Just slot) freeit
613 = do { sp_rel <- getSpRelOffset slot
614 ; whenC freeit (freeStackSlots [slot])
615 ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) }