2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
8 -- The above warning supression flag is a temporary kludge.
9 -- While working on this module you are encouraged to remove it and fix
10 -- any warnings in the module. See
11 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
14 module CgCase ( cgCase, saveVolatileVarsAndRegs,
15 restoreCurrentCostCentre
18 #include "HsVersions.h"
20 import {-# SOURCE #-} CgExpr ( cgExpr )
54 = GCMayHappen -- The scrutinee may involve GC, so everything must be
55 -- tidy before the code for the scrutinee.
57 | NoGC -- The scrutinee is a primitive value, or a call to a
58 -- primitive op which does no GC. Hence the case can
59 -- be done inline, without tidying up first.
62 It is quite interesting to decide whether to put a heap-check
63 at the start of each alternative. Of course we certainly have
64 to do so if the case forces an evaluation, or if there is a primitive
65 op which can trigger GC.
67 A more interesting situation is this:
74 default -> !C!; ...C...
77 where \tr{!x!} indicates a possible heap-check point. The heap checks
78 in the alternatives {\em can} be omitted, in which case the topmost
79 heapcheck will take their worst case into account.
81 In favour of omitting \tr{!B!}, \tr{!C!}:
83 - {\em May} save a heap overflow test,
84 if ...A... allocates anything. The other advantage
85 of this is that we can use relative addressing
86 from a single Hp to get at all the closures so allocated.
88 - No need to save volatile vars etc across the case
92 - May do more allocation than reqd. This sometimes bites us
93 badly. For example, nfib (ha!) allocates about 30\% more space if the
94 worst-casing is done, because many many calls to nfib are leaf calls
95 which don't need to allocate anything.
97 This never hurts us if there is only one alternative.
109 Special case #1: case of literal.
112 cgCase (StgLit lit) live_in_whole_case live_in_alts bndr
113 alt_type@(PrimAlt tycon) alts
114 = do { tmp_reg <- bindNewToTemp bndr
115 ; cm_lit <- cgLit lit
116 ; stmtC (CmmAssign (CmmLocal tmp_reg) (CmmLit cm_lit))
117 ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts }
120 Special case #2: scrutinising a primitive-typed variable. No
121 evaluation required. We don't save volatile variables, nor do we do a
122 heap-check in the alternatives. Instead, the heap usage of the
123 alternatives is worst-cased and passed upstream. This can result in
124 allocating more heap than strictly necessary, but it will sometimes
125 eliminate a heap check altogether.
128 cgCase (StgApp v []) live_in_whole_case live_in_alts bndr
129 alt_type@(PrimAlt tycon) alts
130 = do { -- Careful! we can't just bind the default binder to the same thing
131 -- as the scrutinee, since it might be a stack location, and having
132 -- two bindings pointing at the same stack locn doesn't work (it
133 -- confuses nukeDeadBindings). Hence, use a new temp.
134 v_info <- getCgIdInfo v
135 ; amode <- idInfoToAmode v_info
136 ; tmp_reg <- bindNewToTemp bndr
137 ; stmtC (CmmAssign (CmmLocal tmp_reg) amode)
138 ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts }
141 Special case #3: inline PrimOps and foreign calls.
144 cgCase (StgOpApp op@(StgPrimOp primop) args _)
145 live_in_whole_case live_in_alts bndr alt_type alts
146 | not (primOpOutOfLine primop)
147 = cgInlinePrimOp primop args bndr alt_type live_in_alts alts
150 TODO: Case-of-case of primop can probably be done inline too (but
151 maybe better to translate it out beforehand). See
152 ghc/lib/misc/PackedString.lhs for examples where this crops up (with
155 Special case #4: inline foreign calls: an unsafe foreign call can be done
156 right here, just like an inline primop.
159 cgCase (StgOpApp op@(StgFCallOp fcall _) args _)
160 live_in_whole_case live_in_alts bndr alt_type alts
161 | unsafe_foreign_call
162 = ASSERT( isSingleton alts )
163 do -- *must* be an unboxed tuple alt.
164 -- exactly like the cgInlinePrimOp case for unboxed tuple alts..
165 { res_tmps <- mapFCs bindNewToTemp non_void_res_ids
166 ; let res_hints = map (typeForeignHint.idType) non_void_res_ids
167 ; cgForeignCall (zipWith CmmHinted res_tmps res_hints) fcall args live_in_alts
170 (_, res_ids, _, rhs) = head alts
171 non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
175 CCall (CCallSpec _ _ s) -> not (playSafe s)
179 Special case: scrutinising a non-primitive variable.
180 This can be done a little better than the general case, because
181 we can reuse/trim the stack slot holding the variable (if it is in one).
184 cgCase (StgApp fun args)
185 live_in_whole_case live_in_alts bndr alt_type alts
186 = do { fun_info <- getCgIdInfo fun
187 ; arg_amodes <- getArgAmodes args
189 -- Nuking dead bindings *before* calculating the saves is the
190 -- value-add here. We might end up freeing up some slots currently
191 -- occupied by variables only required for the call.
192 -- NOTE: we need to look up the variables used in the call before
193 -- doing this, because some of them may not be in the environment
195 ; nukeDeadBindings live_in_alts
196 ; (save_assts, alts_eob_info, maybe_cc_slot)
197 <- saveVolatileVarsAndRegs live_in_alts
200 <- forkEval alts_eob_info
201 (allocStackTop retAddrSizeW >> nopC)
202 (do { deAllocStackTop retAddrSizeW
203 ; cgEvalAlts maybe_cc_slot bndr alt_type alts })
205 ; setEndOfBlockInfo scrut_eob_info
206 (performTailCall fun_info arg_amodes save_assts) }
209 Note about return addresses: we *always* push a return address, even
210 if because of an optimisation we end up jumping direct to the return
211 code (not through the address itself). The alternatives always assume
212 that the return address is on the stack. The return address is
213 required in case the alternative performs a heap check, since it
214 encodes the liveness of the slots in the activation record.
216 On entry to the case alternative, we can re-use the slot containing
217 the return address immediately after the heap check. That's what the
218 deAllocStackTop call is doing above.
220 Finally, here is the general case.
223 cgCase expr live_in_whole_case live_in_alts bndr alt_type alts
224 = do { -- Figure out what volatile variables to save
225 nukeDeadBindings live_in_whole_case
227 ; (save_assts, alts_eob_info, maybe_cc_slot)
228 <- saveVolatileVarsAndRegs live_in_alts
230 -- Save those variables right now!
231 ; emitStmts save_assts
233 -- generate code for the alts
235 <- forkEval alts_eob_info
236 (do { nukeDeadBindings live_in_alts
237 ; allocStackTop retAddrSizeW -- space for retn address
239 (do { deAllocStackTop retAddrSizeW
240 ; cgEvalAlts maybe_cc_slot bndr alt_type alts })
242 ; setEndOfBlockInfo scrut_eob_info (cgExpr expr)
246 There's a lot of machinery going on behind the scenes to manage the
247 stack pointer here. forkEval takes the virtual Sp and free list from
248 the first argument, and turns that into the *real* Sp for the second
249 argument. It also uses this virtual Sp as the args-Sp in the EOB info
250 returned, so that the scrutinee will trim the real Sp back to the
251 right place before doing whatever it does.
252 --SDM (who just spent an hour figuring this out, and didn't want to
255 Why don't we push the return address just before evaluating the
256 scrutinee? Because the slot reserved for the return address might
257 contain something useful, so we wait until performing a tail call or
258 return before pushing the return address (see
259 CgTailCall.pushReturnAddress).
261 This also means that the environment doesn't need to know about the
262 free stack slot for the return address (for generating bitmaps),
263 because we don't reserve it until just before the eval.
265 TODO!! Problem: however, we have to save the current cost centre
266 stack somewhere, because at the eval point the current CCS might be
267 different. So we pick a free stack slot and save CCCS in it. One
268 consequence of this is that activation records on the stack don't
269 follow the layout of closures when we're profiling. The CCS could be
270 anywhere within the record).
272 %************************************************************************
276 %************************************************************************
279 cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
280 | isVoidArg (idCgRep bndr)
281 = ASSERT( con == DEFAULT && isSingleton alts && null bs )
282 do { -- VOID RESULT; just sequencing,
283 -- so get in there and do it
284 cgPrimOp [] primop args live_in_alts
287 (con,bs,_,rhs) = head alts
289 cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
290 = do { -- PRIMITIVE ALTS, with non-void result
291 tmp_reg <- bindNewToTemp bndr
292 ; cgPrimOp [tmp_reg] primop args live_in_alts
293 ; cgPrimAlts NoGC (PrimAlt tycon) (CmmLocal tmp_reg) alts }
295 cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts
296 = ASSERT( isSingleton alts )
297 do { -- UNBOXED TUPLE ALTS
298 -- No heap check, no yield, just get in there and do it.
299 -- NB: the case binder isn't bound to anything;
300 -- it has a unboxed tuple type
302 res_tmps <- mapFCs bindNewToTemp non_void_res_ids
303 ; cgPrimOp res_tmps primop args live_in_alts
306 (_, res_ids, _, rhs) = head alts
307 non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
309 cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts
310 = do { -- ENUMERATION TYPE RETURN
311 -- Typical: case a ># b of { True -> ..; False -> .. }
312 -- The primop itself returns an index into the table of
313 -- closures for the enumeration type.
314 tag_amode <- ASSERT( isEnumerationTyCon tycon )
315 do_enum_primop primop
317 -- Bind the default binder if necessary
318 -- (avoiding it avoids the assignment)
319 -- The deadness info is set by StgVarInfo
320 ; this_pkg <- getThisPackage
321 ; whenC (not (isDeadBinder bndr))
322 (do { tmp_reg <- bindNewToTemp bndr
325 (tagToClosure tycon tag_amode)) })
328 ; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-}
332 ; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1)
336 do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result
337 do_enum_primop TagToEnumOp -- No code!
339 (_,e) <- getArgAmode arg
341 do_enum_primop primop
342 = do tmp <- newTemp bWord
343 cgPrimOp [tmp] primop args live_in_alts
344 returnFC (CmmReg (CmmLocal tmp))
346 cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts
347 = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr)
350 %************************************************************************
352 \subsection[CgCase-alts]{Alternatives}
354 %************************************************************************
356 @cgEvalAlts@ returns an addressing mode for a continuation for the
357 alternatives of a @case@, used in a context when there
358 is some evaluation to be done.
361 cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, if any
365 -> FCode Sequel -- Any addr modes inside are guaranteed
366 -- to be a label so that we can duplicate it
367 -- without risk of duplicating code
369 cgEvalAlts cc_slot bndr alt_type@(PrimAlt tycon) alts
370 = do { let rep = tyConCgRep tycon
371 reg = dataReturnConvPrim rep -- Bottom for voidRep
373 ; abs_c <- forkProc $ do
374 { -- Bind the case binder, except if it's void
375 -- (reg is bottom in that case)
376 whenC (nonVoidArg rep) $
377 bindNewToReg bndr reg (mkLFArgument bndr)
378 ; restoreCurrentCostCentre cc_slot True
379 ; cgPrimAlts GCMayHappen alt_type reg alts }
381 ; lbl <- emitReturnTarget (idName bndr) abs_c
382 ; returnFC (CaseAlts lbl Nothing bndr) }
384 cgEvalAlts cc_slot bndr (UbxTupAlt _) [(con,args,_,rhs)]
385 = -- Unboxed tuple case
386 -- By now, the simplifier should have have turned it
387 -- into case e of (# a,b #) -> e
388 -- There shouldn't be a
389 -- case e of DEFAULT -> e
390 ASSERT2( case con of { DataAlt _ -> True; other -> False },
391 text "cgEvalAlts: dodgy case of unboxed tuple type" )
392 do { -- forkAbsC for the RHS, so that the envt is
393 -- not changed for the emitReturn call
394 abs_c <- forkProc $ do
395 { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args
396 -- Restore the CC *after* binding the tuple components,
397 -- so that we get the stack offset of the saved CC right.
398 ; restoreCurrentCostCentre cc_slot True
399 -- Generate a heap check if necessary
400 -- and finally the code for the alternative
401 ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts
403 ; lbl <- emitReturnTarget (idName bndr) abs_c
404 ; returnFC (CaseAlts lbl Nothing bndr) }
406 cgEvalAlts cc_slot bndr alt_type alts
407 = -- Algebraic and polymorphic case
408 do { -- Bind the default binder
409 bindNewToReg bndr nodeReg (mkLFArgument bndr)
411 -- Generate sequel info for use downstream
412 -- At the moment, we only do it if the type is vector-returnable.
413 -- Reason: if not, then it costs extra to label the
414 -- alternatives, because we'd get return code like:
416 -- switch TagReg { 0 : JMP(alt_1); 1 : JMP(alt_2) ..etc }
418 -- which is worse than having the alt code in the switch statement
420 ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts
422 ; (lbl, branches) <- emitAlgReturnTarget (idName bndr)
425 ; returnFC (CaseAlts lbl branches bndr) }
427 fam_sz = case alt_type of
428 AlgAlt tc -> tyConFamilySize tc
433 HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): If
434 we do an inlining of the case no separate functions for returning are
435 created, so we don't have to generate a GRAN_YIELD in that case. This info
436 must be propagated to cgAlgAltRhs (where the GRAN_YIELD macro might be
437 emitted). Hence, the new Bool arg to cgAlgAltRhs.
439 %************************************************************************
441 \subsection[CgCase-alg-alts]{Algebraic alternatives}
443 %************************************************************************
445 In @cgAlgAlts@, none of the binders in the alternatives are
446 assumed to be yet bound.
448 HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): The
449 last arg of cgAlgAlts indicates if we want a context switch at the
450 beginning of each alternative. Normally we want that. The only exception
451 are inlined alternatives.
455 -> Maybe VirtualSpOffset
456 -> AltType -- ** AlgAlt or PolyAlt only **
457 -> [StgAlt] -- The alternatives
458 -> FCode ( [(ConTagZ, CgStmts)], -- The branches
459 Maybe CgStmts ) -- The default case
461 cgAlgAlts gc_flag cc_slot alt_type alts
462 = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts]
464 mb_deflt = case alts of -- DEFAULT is always first, if present
465 ((DEFAULT,blks) : _) -> Just blks
468 branches = [(dataConTagZ con, blks)
469 | (DataAlt con, blks) <- alts]
471 return (branches, mb_deflt)
475 -> Maybe VirtualSpOffset -- Turgid state
476 -> AltType -- ** AlgAlt or PolyAlt only **
478 -> FCode (AltCon, CgStmts)
480 cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs)
481 = do { abs_c <- getCgStmts $ do
482 { bind_con_args con args
483 ; restoreCurrentCostCentre cc_slot True
484 ; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) }
485 ; return (con, abs_c) }
487 bind_con_args DEFAULT args = nopC
488 bind_con_args (DataAlt dc) args = bindConArgs dc args
492 %************************************************************************
494 \subsection[CgCase-prim-alts]{Primitive alternatives}
496 %************************************************************************
498 @cgPrimAlts@ generates suitable a @CSwitch@
499 for dealing with the alternatives of a primitive @case@, given an
500 addressing mode for the thing to scrutinise. It also keeps track of
501 the maximum stack depth encountered down any branch.
503 As usual, no binders in the alternatives are yet bound.
507 -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck
508 -> CmmReg -- Scrutinee
509 -> [StgAlt] -- Alternatives
511 -- NB: cgPrimAlts emits code that does the case analysis.
512 -- It's often used in inline situations, rather than to genearte
513 -- a labelled return point. That's why its interface is a little
514 -- different to cgAlgAlts
516 -- INVARIANT: the default binder is already bound
517 cgPrimAlts gc_flag alt_type scrutinee alts
518 = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts)
519 ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default
520 alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others]
521 ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC }
525 -> StgAlt -- The alternative
526 -> FCode (AltCon, CgStmts) -- Its compiled form
528 cgPrimAlt gc_flag alt_type (con, [], [], rhs)
529 = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } )
530 do { abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs))
531 ; returnFC (con, abs_c) }
535 %************************************************************************
537 \subsection[CgCase-tidy]{Code for tidying up prior to an eval}
539 %************************************************************************
544 -> AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
545 -> Code -- Continuation
547 maybeAltHeapCheck NoGC _ code = code
548 maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code
550 saveVolatileVarsAndRegs
551 :: StgLiveVars -- Vars which should be made safe
552 -> FCode (CmmStmts, -- Assignments to do the saves
553 EndOfBlockInfo, -- sequel for the alts
554 Maybe VirtualSpOffset) -- Slot for current cost centre
556 saveVolatileVarsAndRegs vars
557 = do { var_saves <- saveVolatileVars vars
558 ; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre
559 ; eob_info <- getEndOfBlockInfo
560 ; returnFC (var_saves `plusStmts` cc_save,
565 saveVolatileVars :: StgLiveVars -- Vars which should be made safe
566 -> FCode CmmStmts -- Assignments to to the saves
568 saveVolatileVars vars
569 = do { stmts_s <- mapFCs save_it (varSetElems vars)
570 ; return (foldr plusStmts noStmts stmts_s) }
573 = do { v <- getCAddrModeIfVolatile var
575 Nothing -> return noStmts -- Non-volatile
576 Just vol_amode -> save_var var vol_amode -- Aha! It's volatile
579 save_var var vol_amode
580 = do { slot <- allocPrimStack (idCgRep var)
581 ; rebindToStack var slot
582 ; sp_rel <- getSpRelOffset slot
583 ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) }
586 ---------------------------------------------------------------------------
588 When we save the current cost centre (which is done for lexical
589 scoping), we allocate a free stack location, and return (a)~the
590 virtual offset of the location, to pass on to the alternatives, and
591 (b)~the assignment to do the save (just as for @saveVolatileVars@).
594 saveCurrentCostCentre ::
595 FCode (Maybe VirtualSpOffset, -- Where we decide to store it
596 CmmStmts) -- Assignment to save it
598 saveCurrentCostCentre
599 | not opt_SccProfilingOn
600 = returnFC (Nothing, noStmts)
602 = do { slot <- allocPrimStack PtrArg
603 ; sp_rel <- getSpRelOffset slot
604 ; returnFC (Just slot,
605 oneStmt (CmmStore sp_rel curCCS)) }
607 -- Sometimes we don't free the slot containing the cost centre after restoring it
608 -- (see CgLetNoEscape.cgLetNoEscapeBody).
609 restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code
610 restoreCurrentCostCentre Nothing _freeit = nopC
611 restoreCurrentCostCentre (Just slot) freeit
612 = do { sp_rel <- getSpRelOffset slot
613 ; whenC freeit (freeStackSlots [slot])
614 ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) }