2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
7 module CgCase ( cgCase, saveVolatileVarsAndRegs,
8 restoreCurrentCostCentre
11 #include "HsVersions.h"
12 #include "../includes/ClosureTypes.h"
14 import {-# SOURCE #-} CgExpr ( cgExpr )
50 = GCMayHappen -- The scrutinee may involve GC, so everything must be
51 -- tidy before the code for the scrutinee.
53 | NoGC -- The scrutinee is a primitive value, or a call to a
54 -- primitive op which does no GC. Hence the case can
55 -- be done inline, without tidying up first.
58 It is quite interesting to decide whether to put a heap-check
59 at the start of each alternative. Of course we certainly have
60 to do so if the case forces an evaluation, or if there is a primitive
61 op which can trigger GC.
63 A more interesting situation is this:
70 default -> !C!; ...C...
73 where \tr{!x!} indicates a possible heap-check point. The heap checks
74 in the alternatives {\em can} be omitted, in which case the topmost
75 heapcheck will take their worst case into account.
77 In favour of omitting \tr{!B!}, \tr{!C!}:
79 - {\em May} save a heap overflow test,
80 if ...A... allocates anything. The other advantage
81 of this is that we can use relative addressing
82 from a single Hp to get at all the closures so allocated.
84 - No need to save volatile vars etc across the case
88 - May do more allocation than reqd. This sometimes bites us
89 badly. For example, nfib (ha!) allocates about 30\% more space if the
90 worst-casing is done, because many many calls to nfib are leaf calls
91 which don't need to allocate anything.
93 This never hurts us if there is only one alternative.
106 Special case #1: case of literal.
109 cgCase (StgLit lit) live_in_whole_case live_in_alts bndr srt
110 alt_type@(PrimAlt tycon) alts
111 = do { tmp_reg <- bindNewToTemp bndr
112 ; cm_lit <- cgLit lit
113 ; stmtC (CmmAssign tmp_reg (CmmLit cm_lit))
114 ; cgPrimAlts NoGC alt_type tmp_reg alts }
117 Special case #2: scrutinising a primitive-typed variable. No
118 evaluation required. We don't save volatile variables, nor do we do a
119 heap-check in the alternatives. Instead, the heap usage of the
120 alternatives is worst-cased and passed upstream. This can result in
121 allocating more heap than strictly necessary, but it will sometimes
122 eliminate a heap check altogether.
125 cgCase (StgApp v []) live_in_whole_case live_in_alts bndr srt
126 alt_type@(PrimAlt tycon) alts
127 = do { -- Careful! we can't just bind the default binder to the same thing
128 -- as the scrutinee, since it might be a stack location, and having
129 -- two bindings pointing at the same stack locn doesn't work (it
130 -- confuses nukeDeadBindings). Hence, use a new temp.
131 v_info <- getCgIdInfo v
132 ; amode <- idInfoToAmode v_info
133 ; tmp_reg <- bindNewToTemp bndr
134 ; stmtC (CmmAssign tmp_reg amode)
135 ; cgPrimAlts NoGC alt_type tmp_reg alts }
138 Special case #3: inline PrimOps and foreign calls.
141 cgCase (StgOpApp op@(StgPrimOp primop) args _)
142 live_in_whole_case live_in_alts bndr srt alt_type alts
143 | not (primOpOutOfLine primop)
144 = cgInlinePrimOp primop args bndr alt_type live_in_alts alts
147 TODO: Case-of-case of primop can probably be done inline too (but
148 maybe better to translate it out beforehand). See
149 ghc/lib/misc/PackedString.lhs for examples where this crops up (with
152 Special case #4: inline foreign calls: an unsafe foreign call can be done
153 right here, just like an inline primop.
156 cgCase (StgOpApp op@(StgFCallOp fcall _) args _)
157 live_in_whole_case live_in_alts bndr srt alt_type alts
158 | unsafe_foreign_call
159 = ASSERT( isSingleton alts )
160 do -- *must* be an unboxed tuple alt.
161 -- exactly like the cgInlinePrimOp case for unboxed tuple alts..
162 { res_tmps <- mapFCs bindNewToTemp non_void_res_ids
163 ; let res_hints = map (typeHint.idType) non_void_res_ids
164 ; cgForeignCall (zip res_tmps res_hints) fcall args live_in_alts
167 (_, res_ids, _, rhs) = head alts
168 non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
172 CCall (CCallSpec _ _ s) -> not (playSafe s)
176 Special case: scrutinising a non-primitive variable. This is where we
177 want to do semi-tagging. The code generated will be something like this:
183 <info table goes here>
186 type = infoptr[-4] // or something
187 if (type > 8) goto no_cons
189 if (tag == 1) ... etc.
194 cgCase (StgApp fun [])
195 live_in_whole_case live_in_alts bndr srt (AlgAlt tycon) alts
196 = do { fun_info <- getCgIdInfo fun
197 ; fun_amode <- idInfoToAmode fun_info
199 ; nukeDeadBindings live_in_alts
200 ; (save_assts, alts_eob_info, maybe_cc_slot)
201 <- saveVolatileVarsAndRegs live_in_alts
204 <- forkEval alts_eob_info
205 (allocStackTop retAddrSizeW >> nopC)
206 (do { deAllocStackTop retAddrSizeW
207 ; cgEvalAltsSemiTag maybe_cc_slot bndr srt
210 -- jump to the continuation immediately
211 ; case scrut_eob_info of
212 EndOfBlockInfo sp (CaseAlts lbl _ _ _) -> do
213 let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
214 emitSimultaneously (node_asst `plusStmts` save_assts)
215 let jmp = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
216 setEndOfBlockInfo scrut_eob_info $
217 doFinalJump sp False jmp
221 Special case: scrutinising a non-primitive application. This can be
222 done a little better than the general case, because we can reuse/trim
223 the stack slot holding the variables involved in the application.
226 cgCase (StgApp fun args)
227 live_in_whole_case live_in_alts bndr srt alt_type alts
228 = do { fun_info <- getCgIdInfo fun
229 ; arg_amodes <- getArgAmodes args
231 -- Nuking dead bindings *before* calculating the saves is the
232 -- value-add here. We might end up freeing up some slots currently
233 -- occupied by variables only required for the call.
234 -- NOTE: we need to look up the variables used in the call before
235 -- doing this, because some of them may not be in the environment
237 ; nukeDeadBindings live_in_alts
238 ; (save_assts, alts_eob_info, maybe_cc_slot)
239 <- saveVolatileVarsAndRegs live_in_alts
242 <- forkEval alts_eob_info
243 (allocStackTop retAddrSizeW >> nopC)
244 (do { deAllocStackTop retAddrSizeW
245 ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts })
247 ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info)
248 (performTailCall fun_info arg_amodes save_assts) }
251 Note about return addresses: we *always* push a return address, even
252 if because of an optimisation we end up jumping direct to the return
253 code (not through the address itself). The alternatives always assume
254 that the return address is on the stack. The return address is
255 required in case the alternative performs a heap check, since it
256 encodes the liveness of the slots in the activation record.
258 On entry to the case alternative, we can re-use the slot containing
259 the return address immediately after the heap check. That's what the
260 deAllocStackTop call is doing above.
262 Finally, here is the general case.
265 cgCase expr live_in_whole_case live_in_alts bndr srt alt_type alts
266 = do { -- Figure out what volatile variables to save
267 nukeDeadBindings live_in_whole_case
269 ; (save_assts, alts_eob_info, maybe_cc_slot)
270 <- saveVolatileVarsAndRegs live_in_alts
272 -- Save those variables right now!
273 ; emitStmts save_assts
275 -- generate code for the alts
277 <- forkEval alts_eob_info
278 (do { nukeDeadBindings live_in_alts
279 ; allocStackTop retAddrSizeW -- space for retn address
281 (do { deAllocStackTop retAddrSizeW
282 ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts })
284 ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info)
289 There's a lot of machinery going on behind the scenes to manage the
290 stack pointer here. forkEval takes the virtual Sp and free list from
291 the first argument, and turns that into the *real* Sp for the second
292 argument. It also uses this virtual Sp as the args-Sp in the EOB info
293 returned, so that the scrutinee will trim the real Sp back to the
294 right place before doing whatever it does.
295 --SDM (who just spent an hour figuring this out, and didn't want to
298 Why don't we push the return address just before evaluating the
299 scrutinee? Because the slot reserved for the return address might
300 contain something useful, so we wait until performing a tail call or
301 return before pushing the return address (see
302 CgTailCall.pushReturnAddress).
304 This also means that the environment doesn't need to know about the
305 free stack slot for the return address (for generating bitmaps),
306 because we don't reserve it until just before the eval.
308 TODO!! Problem: however, we have to save the current cost centre
309 stack somewhere, because at the eval point the current CCS might be
310 different. So we pick a free stack slot and save CCCS in it. One
311 consequence of this is that activation records on the stack don't
312 follow the layout of closures when we're profiling. The CCS could be
313 anywhere within the record).
316 maybeReserveSeqFrame PolyAlt (EndOfBlockInfo args_sp (CaseAlts amode stuff bndr _))
317 = EndOfBlockInfo (args_sp + retAddrSizeW) (CaseAlts amode stuff bndr True)
318 maybeReserveSeqFrame other scrut_eob_info = scrut_eob_info
322 %************************************************************************
326 %************************************************************************
329 cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
330 | isVoidArg (idCgRep bndr)
331 = ASSERT( con == DEFAULT && isSingleton alts && null bs )
332 do { -- VOID RESULT; just sequencing,
333 -- so get in there and do it
334 cgPrimOp [] primop args live_in_alts
337 (con,bs,_,rhs) = head alts
339 cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
340 = do { -- PRIMITIVE ALTS, with non-void result
341 tmp_reg <- bindNewToTemp bndr
342 ; cgPrimOp [tmp_reg] primop args live_in_alts
343 ; cgPrimAlts NoGC (PrimAlt tycon) tmp_reg alts }
345 cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts
346 = ASSERT( isSingleton alts )
347 do { -- UNBOXED TUPLE ALTS
348 -- No heap check, no yield, just get in there and do it.
349 -- NB: the case binder isn't bound to anything;
350 -- it has a unboxed tuple type
352 res_tmps <- mapFCs bindNewToTemp non_void_res_ids
353 ; cgPrimOp res_tmps primop args live_in_alts
356 (_, res_ids, _, rhs) = head alts
357 non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
359 cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts
360 = do { -- ENUMERATION TYPE RETURN
361 -- Typical: case a ># b of { True -> ..; False -> .. }
362 -- The primop itself returns an index into the table of
363 -- closures for the enumeration type.
364 tag_amode <- ASSERT( isEnumerationTyCon tycon )
365 do_enum_primop primop
367 -- Bind the default binder if necessary
368 -- (avoiding it avoids the assignment)
369 -- The deadness info is set by StgVarInfo
370 ; this_pkg <- getThisPackage
371 ; whenC (not (isDeadBinder bndr))
372 (do { tmp_reg <- bindNewToTemp bndr
373 ; stmtC (CmmAssign tmp_reg (tagToClosure this_pkg tycon tag_amode)) })
376 ; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-}
380 ; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1)
384 do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result
385 do_enum_primop TagToEnumOp -- No code!
387 (_,e) <- getArgAmode arg
389 do_enum_primop primop
390 = do tmp <- newTemp wordRep
391 cgPrimOp [tmp] primop args live_in_alts
392 returnFC (CmmReg tmp)
394 cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts
395 = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr)
398 %************************************************************************
400 \subsection[CgCase-alts]{Alternatives}
402 %************************************************************************
404 @cgEvalAlts@ returns an addressing mode for a continuation for the
405 alternatives of a @case@, used in a context when there
406 is some evaluation to be done.
409 cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, if any
411 -> SRT -- SRT for the continuation
414 -> FCode Sequel -- Any addr modes inside are guaranteed
415 -- to be a label so that we can duplicate it
416 -- without risk of duplicating code
418 cgEvalAlts cc_slot bndr srt alt_type@(PrimAlt tycon) alts
419 = do { let rep = tyConCgRep tycon
420 reg = dataReturnConvPrim rep -- Bottom for voidRep
422 ; abs_c <- forkProc $ do
423 { -- Bind the case binder, except if it's void
424 -- (reg is bottom in that case)
425 whenC (nonVoidArg rep) $
426 bindNewToReg bndr reg (mkLFArgument bndr)
427 ; restoreCurrentCostCentre cc_slot True
428 ; cgPrimAlts GCMayHappen alt_type reg alts }
430 ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt
431 ; returnFC (CaseAlts lbl Nothing bndr False) }
433 cgEvalAlts cc_slot bndr srt (UbxTupAlt _) [(con,args,_,rhs)]
434 = -- Unboxed tuple case
435 -- By now, the simplifier should have have turned it
436 -- into case e of (# a,b #) -> e
437 -- There shouldn't be a
438 -- case e of DEFAULT -> e
439 ASSERT2( case con of { DataAlt _ -> True; other -> False },
440 text "cgEvalAlts: dodgy case of unboxed tuple type" )
441 do { -- forkAbsC for the RHS, so that the envt is
442 -- not changed for the emitDirectReturn call
443 abs_c <- forkProc $ do
444 { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args
445 -- Restore the CC *after* binding the tuple components,
446 -- so that we get the stack offset of the saved CC right.
447 ; restoreCurrentCostCentre cc_slot True
448 -- Generate a heap check if necessary
449 -- and finally the code for the alternative
450 ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts
452 ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt
453 ; returnFC (CaseAlts lbl Nothing bndr False) }
455 cgEvalAlts cc_slot bndr srt alt_type alts
456 = -- Algebraic and polymorphic case
457 do { -- Bind the default binder
458 bindNewToReg bndr nodeReg (mkLFArgument bndr)
460 ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts
462 ; (lbl, branches) <- emitAlgReturnTarget (idName bndr)
463 alts mb_deflt srt ret_conv
465 ; returnFC (CaseAlts lbl branches bndr False) }
467 ret_conv = case alt_type of
468 AlgAlt tc -> ctrlReturnConvAlg tc
469 PolyAlt -> UnvectoredReturn 0
472 -- Alternatives for a semi-tagging case expression
473 cgEvalAltsSemiTag cc_slot bndr srt tycon alts
474 = do -- Bind the default binder
475 bindNewToReg bndr nodeReg (mkLFArgument bndr)
477 blks <- getCgStmts $ cgEvalAltsSemiTag' cc_slot tycon alts
478 lbl <- emitDirectReturnTarget (idName bndr) blks srt
479 return (CaseAlts lbl Nothing bndr False)
481 cgEvalAltsSemiTag' cc_slot tycon alts
483 (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot (AlgAlt tycon) alts
485 iptr <- newTemp wordRep
486 stmtC (CmmAssign iptr (closureInfoPtr (CmmReg nodeReg)))
487 -- share the iptr between ctype and tag, below
489 -- we don't have a 1-indexed tag field, we have to use the type
490 -- field first to find out whether the closure is a constructor
491 not_constr <- newLabelC
493 let highCons = CmmLit (CmmInt CONSTR_NOCAF_STATIC halfWordRep)
494 stmtC (CmmCondBranch (CmmMachOp (MO_U_Gt halfWordRep)
495 [infoTableClosureType (infoTable (CmmReg iptr)),
499 let tag_expr = CmmMachOp (MO_U_Conv halfWordRep wordRep)
500 [infoTableConstrTag (infoTable (CmmReg iptr))]
502 let family_size = tyConFamilySize tycon
503 emitSwitch tag_expr alts mb_deflt 0 (family_size - 1)
506 stmtC (CmmJump (entryCode (CmmReg iptr)) [])
510 HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): If
511 we do an inlining of the case no separate functions for returning are
512 created, so we don't have to generate a GRAN_YIELD in that case. This info
513 must be propagated to cgAlgAltRhs (where the GRAN_YIELD macro might be
514 emitted). Hence, the new Bool arg to cgAlgAltRhs.
516 %************************************************************************
518 \subsection[CgCase-alg-alts]{Algebraic alternatives}
520 %************************************************************************
522 In @cgAlgAlts@, none of the binders in the alternatives are
523 assumed to be yet bound.
525 HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): The
526 last arg of cgAlgAlts indicates if we want a context switch at the
527 beginning of each alternative. Normally we want that. The only exception
528 are inlined alternatives.
532 -> Maybe VirtualSpOffset
533 -> AltType -- ** AlgAlt or PolyAlt only **
534 -> [StgAlt] -- The alternatives
535 -> FCode ( [(ConTagZ, CgStmts)], -- The branches
536 Maybe CgStmts ) -- The default case
538 cgAlgAlts gc_flag cc_slot alt_type alts
539 = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts]
541 mb_deflt = case alts of -- DEFAULT is always first, if present
542 ((DEFAULT,blks) : _) -> Just blks
545 branches = [(dataConTagZ con, blks)
546 | (DataAlt con, blks) <- alts]
548 return (branches, mb_deflt)
552 -> Maybe VirtualSpOffset -- Turgid state
553 -> AltType -- ** AlgAlt or PolyAlt only **
555 -> FCode (AltCon, CgStmts)
557 cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs)
558 = do { abs_c <- getCgStmts $ do
559 { bind_con_args con args
560 ; restoreCurrentCostCentre cc_slot True
561 ; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) }
562 ; return (con, abs_c) }
564 bind_con_args DEFAULT args = nopC
565 bind_con_args (DataAlt dc) args = bindConArgs dc args
569 %************************************************************************
571 \subsection[CgCase-prim-alts]{Primitive alternatives}
573 %************************************************************************
575 @cgPrimAlts@ generates suitable a @CSwitch@
576 for dealing with the alternatives of a primitive @case@, given an
577 addressing mode for the thing to scrutinise. It also keeps track of
578 the maximum stack depth encountered down any branch.
580 As usual, no binders in the alternatives are yet bound.
584 -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck
585 -> CmmReg -- Scrutinee
586 -> [StgAlt] -- Alternatives
588 -- NB: cgPrimAlts emits code that does the case analysis.
589 -- It's often used in inline situations, rather than to genearte
590 -- a labelled return point. That's why its interface is a little
591 -- different to cgAlgAlts
593 -- INVARIANT: the default binder is already bound
594 cgPrimAlts gc_flag alt_type scrutinee alts
595 = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts)
596 ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default
597 alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others]
598 ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC }
602 -> StgAlt -- The alternative
603 -> FCode (AltCon, CgStmts) -- Its compiled form
605 cgPrimAlt gc_flag alt_type (con, [], [], rhs)
606 = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } )
607 do { abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs))
608 ; returnFC (con, abs_c) }
612 %************************************************************************
614 \subsection[CgCase-tidy]{Code for tidying up prior to an eval}
616 %************************************************************************
621 -> AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
622 -> Code -- Continuation
624 maybeAltHeapCheck NoGC _ code = code
625 maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code
627 saveVolatileVarsAndRegs
628 :: StgLiveVars -- Vars which should be made safe
629 -> FCode (CmmStmts, -- Assignments to do the saves
630 EndOfBlockInfo, -- sequel for the alts
631 Maybe VirtualSpOffset) -- Slot for current cost centre
633 saveVolatileVarsAndRegs vars
634 = do { var_saves <- saveVolatileVars vars
635 ; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre
636 ; eob_info <- getEndOfBlockInfo
637 ; returnFC (var_saves `plusStmts` cc_save,
642 saveVolatileVars :: StgLiveVars -- Vars which should be made safe
643 -> FCode CmmStmts -- Assignments to to the saves
645 saveVolatileVars vars
646 = do { stmts_s <- mapFCs save_it (varSetElems vars)
647 ; return (foldr plusStmts noStmts stmts_s) }
650 = do { v <- getCAddrModeIfVolatile var
652 Nothing -> return noStmts -- Non-volatile
653 Just vol_amode -> save_var var vol_amode -- Aha! It's volatile
656 save_var var vol_amode
657 = do { slot <- allocPrimStack (idCgRep var)
658 ; rebindToStack var slot
659 ; sp_rel <- getSpRelOffset slot
660 ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) }
663 ---------------------------------------------------------------------------
665 When we save the current cost centre (which is done for lexical
666 scoping), we allocate a free stack location, and return (a)~the
667 virtual offset of the location, to pass on to the alternatives, and
668 (b)~the assignment to do the save (just as for @saveVolatileVars@).
671 saveCurrentCostCentre ::
672 FCode (Maybe VirtualSpOffset, -- Where we decide to store it
673 CmmStmts) -- Assignment to save it
675 saveCurrentCostCentre
676 | not opt_SccProfilingOn
677 = returnFC (Nothing, noStmts)
679 = do { slot <- allocPrimStack PtrArg
680 ; sp_rel <- getSpRelOffset slot
681 ; returnFC (Just slot,
682 oneStmt (CmmStore sp_rel curCCS)) }
684 -- Sometimes we don't free the slot containing the cost centre after restoring it
685 -- (see CgLetNoEscape.cgLetNoEscapeBody).
686 restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code
687 restoreCurrentCostCentre Nothing _freeit = nopC
688 restoreCurrentCostCentre (Just slot) freeit
689 = do { sp_rel <- getSpRelOffset slot
690 ; whenC freeit (freeStackSlots [slot])
691 ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) }