1 -----------------------------------------------------------------------------
3 -- Stg to C--: heap management functions
5 -- (c) The University of Glasgow 2004-2006
7 -----------------------------------------------------------------------------
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset, hpRel,
13 entryHeapCheck, altHeapCheck,
15 layOutDynConstr, layOutStaticConstr,
16 mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
18 allocDynClosure, allocDynClosureCmm, emitSetDynHdr
21 #include "HsVersions.h"
45 import FastString( mkFastString, fsLit )
48 -----------------------------------------------------------
49 -- Layout of heap objects
50 -----------------------------------------------------------
52 layOutDynConstr, layOutStaticConstr
53 :: DataCon -> [(PrimRep, a)]
54 -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)])
55 -- No Void arguments in result
57 layOutDynConstr = layOutConstr False
58 layOutStaticConstr = layOutConstr True
60 layOutConstr :: Bool -> DataCon -> [(PrimRep, a)]
61 -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)])
62 layOutConstr is_static data_con args
63 = (mkConInfo is_static data_con tot_wds ptr_wds,
66 (tot_wds, -- #ptr_wds + #nonptr_wds
68 things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
71 -----------------------------------------------------------
72 -- Initialise dynamic heap objects
73 -----------------------------------------------------------
77 -> CmmExpr -- Cost Centre to stick in the object
78 -> CmmExpr -- Cost Centre to blame for this alloc
79 -- (usually the same; sometimes "OVERHEAD")
81 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
82 -- ie Info ptr has offset zero.
83 -- No void args in here
84 -> FCode (LocalReg, CmmAGraph)
87 :: ClosureInfo -> CmmExpr -> CmmExpr
88 -> [(CmmExpr, VirtualHpOffset)]
89 -> FCode (LocalReg, CmmAGraph)
91 -- allocDynClosure allocates the thing in the heap,
92 -- and modifies the virtual Hp to account for this.
93 -- The second return value is the graph that sets the value of the
94 -- returned LocalReg, which should point to the closure after executing
97 -- Note [Return a LocalReg]
98 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99 -- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr.
101 -- ...allocate object...
104 -- ...here obj is still valid,
105 -- but Hp+8 means something quite different...
108 allocDynClosure cl_info use_cc _blame_cc args_w_offsets
109 = do { let (args, offsets) = unzip args_w_offsets
110 ; cmm_args <- mapM getArgAmode args -- No void args
111 ; allocDynClosureCmm cl_info use_cc _blame_cc (zip cmm_args offsets)
114 allocDynClosureCmm cl_info use_cc _blame_cc amodes_w_offsets
115 = do { virt_hp <- getVirtHp
117 -- SAY WHAT WE ARE ABOUT TO DO
118 ; tickyDynAlloc cl_info
119 ; profDynAlloc cl_info use_cc
120 -- ToDo: This is almost certainly wrong
121 -- We're ignoring blame_cc. But until we've
122 -- fixed the boxing hack in chooseDynCostCentres etc,
123 -- we're worried about making things worse by "fixing"
124 -- this part to use blame_cc!
126 -- FIND THE OFFSET OF THE INFO-PTR WORD
127 ; let info_offset = virt_hp + 1
128 -- info_offset is the VirtualHpOffset of the first
129 -- word of the new object
130 -- Remember, virtHp points to last allocated word,
131 -- ie 1 *before* the info-ptr word of new object.
133 info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info))
135 -- ALLOCATE THE OBJECT
136 ; base <- getHpRelOffset info_offset
137 ; emit (mkComment $ mkFastString "allocDynClosure")
138 ; emitSetDynHdr base info_ptr use_cc
139 ; let (cmm_args, offsets) = unzip amodes_w_offsets
140 ; hpStore base cmm_args offsets
142 -- BUMP THE VIRTUAL HEAP POINTER
143 ; setVirtHp (virt_hp + closureSize cl_info)
145 -- Assign to a temporary and return
146 -- Note [Return a LocalReg]
147 ; hp_rel <- getHpRelOffset info_offset
148 ; getCodeR $ assignTemp hp_rel }
150 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
151 emitSetDynHdr base info_ptr ccs
152 = hpStore base header [0..]
155 header = [info_ptr] ++ dynProfHdr ccs
156 -- ToDo: Gransim stuff
157 -- ToDo: Parallel stuff
160 hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
161 -- Store the item (expr,off) in base[off]
162 hpStore base vals offs
163 = emit (catAGraphs (zipWith mk_store vals offs))
165 mk_store val off = mkStore (cmmOffsetW base off) val
168 -----------------------------------------------------------
169 -- Layout of static closures
170 -----------------------------------------------------------
172 -- Make a static closure, adding on any extra padding needed for CAFs,
173 -- and adding a static link field if necessary.
175 mkStaticClosureFields
178 -> Bool -- Has CAF refs
179 -> [CmmLit] -- Payload
180 -> [CmmLit] -- The full closure
181 mkStaticClosureFields cl_info ccs caf_refs payload
182 = mkStaticClosure info_lbl ccs payload padding
183 static_link_field saved_info_field
185 info_lbl = infoTableLabelFromCI cl_info
187 -- CAFs must have consistent layout, regardless of whether they
188 -- are actually updatable or not. The layout of a CAF is:
195 -- the static_link and saved_info fields must always be in the same
196 -- place. So we use closureNeedsUpdSpace rather than
197 -- closureUpdReqd here:
199 is_caf = closureNeedsUpdSpace cl_info
203 | otherwise = ASSERT(null payload) [mkIntCLit 0]
206 | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
210 | is_caf = [mkIntCLit 0]
213 -- for a static constructor which has NoCafRefs, we set the
214 -- static link field to a non-zero value so the garbage
215 -- collector will ignore it.
217 | caf_refs = mkIntCLit 0
218 | otherwise = mkIntCLit 1
221 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
222 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
223 mkStaticClosure info_lbl ccs payload padding static_link_field saved_info_field
224 = [CmmLabel info_lbl]
225 ++ variable_header_words
226 ++ concatMap padLitToWord payload
231 variable_header_words
237 -- JD: Simon had ellided this padding, but without it the C back end asserts
238 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
239 padLitToWord :: CmmLit -> [CmmLit]
240 padLitToWord lit = lit : padding pad_length
241 where width = typeWidth (cmmLitType lit)
242 pad_length = wORD_SIZE - widthInBytes width :: Int
244 padding n | n <= 0 = []
245 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
246 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
247 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
248 | otherwise = CmmInt 0 W64 : padding (n-8)
250 -----------------------------------------------------------
251 -- Heap overflow checking
252 -----------------------------------------------------------
254 {- Note [Heap checks]
256 Heap checks come in various forms. We provide the following entry
257 points to the runtime system, all of which use the native C-- entry
260 * gc() performs garbage collection and returns
261 nothing to its caller
263 * A series of canned entry points like
265 where r is a pointer. This performs gc, and
266 then returns its argument r to its caller.
268 * A series of canned entry points like
270 where f is a function closure of arity 2
271 This performs garbage collection, keeping alive the
272 three argument ptrs, and then tail-calls f(x,y)
274 These are used in the following circumstances
276 * entryHeapCheck: Function entry
277 (a) With a canned GC entry sequence
278 f( f_clo, x:ptr, y:ptr ) {
283 jump gcfun_2p( f_clo, x, y ) }
284 Note the tail call to the garbage collector;
285 it should do no register shuffling
287 (b) No canned sequence
288 f( f_clo, x:ptr, y:ptr, ...etc... ) {
293 call gc() -- Needs an info table
296 * altHeapCheck: Immediately following an eval
298 case f x y of r { (p,q) -> rhs }
299 (a) With a canned sequence for the results of f
300 (which is the very common case since
301 all boxed cases return just one pointer
304 K: -- K needs an info table
312 Here, the info table needed by the call
313 to gc_1p should be the *same* as the
314 one for the call to f; the C-- optimiser
315 spots this sharing opportunity)
317 (b) No canned sequence for results of f
318 Note second info table
320 (r1,r2,r3) = call f( x, y )
326 L: call gc() -- Extra info table here
329 * generalHeapCheck: Anywhere else
331 case branch *not* following eval,
333 Exactly the same as the previous case:
335 K: -- K needs an info table
344 --------------------------------------------------------------
345 -- A heap/stack check at a function or thunk entry point.
347 entryHeapCheck :: ClosureInfo
349 -> Maybe LocalReg -- Function (closure environment)
350 -> Int -- Arity -- not same as len args b/c of voids
351 -> [LocalReg] -- Non-void args (empty for thunk)
355 entryHeapCheck cl_info offset nodeSet arity args code
356 = do updfr_sz <- getUpdFrameOff
357 heapCheck True (gc_call updfr_sz) code
360 is_thunk = arity == 0
361 is_fastf = case closureFunInfo cl_info of
362 Just (_, ArgGen _) -> False
365 args' = map (CmmReg . CmmLocal) args
366 setN = case nodeSet of
367 Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n)
368 Nothing -> mkAssign nodeReg $
369 CmmLit (CmmLabel $ closureLabelFromCI cl_info)
371 {- Thunks: Set R1 = node, jump GCEnter1
372 Function (fast): Set R1 = node, jump GCFun
373 Function (slow): Set R1 = node, call generic_gc -}
374 gc_call upd = setN <*> gc_lbl upd
376 | is_thunk = mkDirectJump (CmmReg $ CmmGlobal GCEnter1) [] sp
377 | is_fastf = mkDirectJump (CmmReg $ CmmGlobal GCFun) [] sp
378 | otherwise = mkForeignJump Slow (CmmReg $ CmmGlobal GCFun) args' upd
379 where sp = max offset upd
380 {- DT (12/08/10) This is a little fishy, mainly the sp fix up amount.
381 - This is since the ncg inserts spills before the stack/heap check.
382 - This should be fixed up and then we won't need to fix up the Sp on
383 - GC calls, but until then this fishy code works -}
386 -- This code is slightly outdated now and we could easily keep the above
387 -- GC methods. However, there may be some performance gains to be made by
388 -- using more specialised GC entry points. Since the semi generic GCFun
389 -- entry needs to check the node and figure out what registers to save...
390 -- if we provided and used more specialised GC entry points then these
391 -- runtime decisions could be turned into compile time decisions.
393 args' = case fun of Just f -> f : args
395 arg_exprs = map (CmmReg . CmmLocal) args'
397 | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz
400 Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished"
401 -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl)))
402 -- arg_exprs updfr_sz
403 Nothing -> mkCall generic_gc (GC, GC) [] [] updfr_sz
405 gc_lbl :: [LocalReg] -> Maybe FastString
407 | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p"
408 | isFloatType ty = case width of
409 W32 -> Just (sLit "stg_gc_f1")
410 W64 -> Just (sLit "stg_gc_d1")
412 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
413 | width == W64 = Just (mkGcLabel "stg_gc_l1")
414 | otherwise = Nothing
416 ty = localRegType reg
419 gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs)
421 gc_lbl_ptrs :: [Bool] -> Maybe FastString
422 -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST...
423 --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p")
424 --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p")
425 gc_lbl_ptrs _ = Nothing
429 --------------------------------------------------------------
430 -- A heap/stack check at in a case alternative
432 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
433 altHeapCheck regs code
434 = do updfr_sz <- getUpdFrameOff
435 heapCheck False (gc_call updfr_sz) code
438 reg_exprs = map (CmmReg . CmmLocal) regs
441 case rts_label regs of
442 Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
443 Nothing -> mkCall generic_gc (GC, GC) [] [] sp
446 | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
447 | isFloatType ty = case width of
448 W32 -> Just (mkGcLabel "stg_gc_f1")
449 W64 -> Just (mkGcLabel "stg_gc_d1")
452 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
453 | width == W64 = Just (mkGcLabel "stg_gc_l1")
454 | otherwise = Nothing
456 ty = localRegType reg
459 rts_label _ = Nothing
462 -- | The generic GC procedure; no params, no results
463 generic_gc :: CmmExpr
464 generic_gc = CmmLit $ mkGcLabel "stg_gc_noregs"
466 -- | Create a CLabel for calling a garbage collector entry point
467 mkGcLabel :: String -> CmmLit
468 mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit)
470 -------------------------------
471 heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a
472 heapCheck checkStack do_gc code
473 = getHeapUsage $ \ hpHw ->
474 -- Emit heap checks, but be sure to do it lazily so
475 -- that the conditionals on hpHw don't cause a black hole
476 do { emit $ do_checks checkStack hpHw do_gc
477 ; tickyAllocHeap hpHw
478 ; doGranAllocate hpHw
482 do_checks :: Bool -- Should we check the stack?
483 -> WordOff -- Heap headroom
484 -> CmmAGraph -- What to do on failure
486 do_checks checkStack alloc do_gc
487 = withFreshLabel "gc" $ \ loop_id ->
488 withFreshLabel "gc" $ \ gc_id ->
490 <*> (let hpCheck = if alloc == 0 then mkNop
491 else mkAssign hpReg bump_hp <*>
492 mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
494 then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck
496 <*> mkComment (mkFastString "outOfLine should follow:")
497 <*> outOfLine (mkLabel gc_id
498 <*> mkComment (mkFastString "outOfLine here")
500 <*> mkBranch loop_id)
501 -- Test for stack pointer exhaustion, then
502 -- bump heap pointer, and test for heap exhaustion
503 -- Note that we don't move the heap pointer unless the
504 -- stack check succeeds. Otherwise we might end up
505 -- with slop at the end of the current block, which can
506 -- confuse the LDV profiler.
508 alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes
509 bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit
511 -- Sp overflow if (Sp - CmmHighStack < SpLim)
512 sp_oflo = CmmMachOp mo_wordULt
513 [CmmMachOp (MO_Sub (typeWidth (cmmRegType spReg)))
514 [CmmReg spReg, CmmLit CmmHighStackMark],
517 -- Hp overflow if (Hp > HpLim)
518 -- (Hp has been incremented by now)
519 -- HpLim points to the LAST WORD of valid allocation space.
520 hp_oflo = CmmMachOp mo_wordUGt
521 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
523 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
527 {- Unboxed tuple alternatives and let-no-escapes (the two most annoying
528 constructs to generate code for!) For unboxed tuple returns, there
529 are an arbitrary number of possibly unboxed return values, some of
530 which will be in registers, and the others will be on the stack. We
531 always organise the stack-resident fields into pointers &
532 non-pointers, and pass the number of each to the heap check code. -}
535 :: [(Id, GlobalReg)] -- Live registers
536 -> WordOff -- no. of stack slots containing ptrs
537 -> WordOff -- no. of stack slots containing nonptrs
538 -> CmmAGraph -- code to insert in the failure path
542 unbxTupleHeapCheck regs ptrs nptrs fail_code code
543 -- We can't manage more than 255 pointers/non-pointers
544 -- in a generic heap check.
545 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
547 = initHeapUsage $ \ hpHw -> do
548 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
549 full_fail_code rts_label
550 ; tickyAllocHeap hpHw }
554 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
555 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
556 (CmmLit (mkWordCLit liveness))
557 liveness = mkRegLiveness regs ptrs nptrs
558 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
561 {- Old Gransim com -- I have no idea whether it still makes sense (SLPJ Sep07)
562 For GrAnSim the code for doing a heap check and doing a context switch
563 has been separated. Especially, the HEAP_CHK macro only performs a
564 heap check. THREAD_CONTEXT_SWITCH should be used for doing a context
565 switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of
566 every slow entry code in order to simulate the fetching of
567 closures. If fetching is necessary (i.e. current closure is not local)
568 then an automatic context switch is done. -}
571 When failing a check, we save a return address on the stack and
572 jump to a pre-compiled code fragment that saves the live registers
573 and returns to the scheduler.
575 The return address in most cases will be the beginning of the basic
576 block in which the check resides, since we need to perform the check
577 again on re-entry because someone else might have stolen the resource
580 %************************************************************************
582 Generic Heap/Stack Checks - used in the RTS
584 %************************************************************************
587 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
588 hpChkGen bytes liveness reentry
589 = do_checks' bytes True assigns stg_gc_gen
592 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
593 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
596 -- a heap check where R1 points to the closure to enter on return, and
597 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
598 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode ()
599 hpChkNodePointsAssignSp0 bytes sp0
600 = do_checks' bytes True assign stg_gc_enter1
601 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
603 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))