2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[CgHeapery]{Heap management functions}
9 initHeapUsage, getVirtHp, setVirtHp, setRealHp,
10 getHpRelOffset, hpRel,
12 funEntryChecks, thunkEntryChecks,
13 altHeapCheck, unbxTupleHeapCheck,
14 hpChkGen, hpChkNodePointsAssignSp0,
15 stkChkGen, stkChkNodePoints,
17 layOutDynConstr, layOutStaticConstr,
18 mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
20 allocDynClosure, emitSetDynHdr
23 #include "HsVersions.h"
52 %************************************************************************
54 \subsection[CgUsages-heapery]{Monad things for fiddling with heap usage}
56 %************************************************************************
58 The heap always grows upwards, so hpRel is easy
61 hpRel :: VirtualHpOffset -- virtual offset of Hp
62 -> VirtualHpOffset -- virtual offset of The Thing
63 -> WordOff -- integer word offset
64 hpRel hp off = off - hp
67 @initHeapUsage@ applies a function to the amount of heap that it uses.
68 It initialises the heap usage to zeros, and passes on an unchanged
71 It is usually a prelude to performing a GC check, so everything must
72 be in a tidy and consistent state.
74 rje: Note the slightly suble fixed point behaviour needed here
77 initHeapUsage :: (VirtualHpOffset -> Code) -> Code
79 = do { orig_hp_usage <- getHpUsage
80 ; setHpUsage initHpUsage
81 ; fixC (\heap_usage2 -> do
82 { fcode (heapHWM heap_usage2)
84 ; setHpUsage orig_hp_usage }
86 setVirtHp :: VirtualHpOffset -> Code
88 = do { hp_usage <- getHpUsage
89 ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
91 getVirtHp :: FCode VirtualHpOffset
93 = do { hp_usage <- getHpUsage
94 ; return (virtHp hp_usage) }
96 setRealHp :: VirtualHpOffset -> Code
98 = do { hp_usage <- getHpUsage
99 ; setHpUsage (hp_usage {realHp = new_realHp}) }
101 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
102 getHpRelOffset virtual_offset
103 = do { hp_usg <- getHpUsage
104 ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) }
108 %************************************************************************
110 Layout of heap objects
112 %************************************************************************
115 layOutDynConstr, layOutStaticConstr
119 [(a,VirtualHpOffset)])
121 layOutDynConstr = layOutConstr False
122 layOutStaticConstr = layOutConstr True
124 layOutConstr :: Bool -> DataCon -> [(CgRep, a)]
125 -> (ClosureInfo, [(a, VirtualHpOffset)])
126 layOutConstr is_static data_con args
127 = (mkConInfo is_static data_con tot_wds ptr_wds,
130 (tot_wds, -- #ptr_wds + #nonptr_wds
132 things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
135 @mkVirtHeapOffsets@ always returns boxed things with smaller offsets
136 than the unboxed things, and furthermore, the offsets in the result
141 :: Bool -- True <=> is a thunk
142 -> [(CgRep,a)] -- Things to make offsets for
143 -> (WordOff, -- _Total_ number of words allocated
144 WordOff, -- Number of words allocated for *pointers*
145 [(a, VirtualHpOffset)])
146 -- Things with their offsets from start of
147 -- object in order of increasing offset
149 -- First in list gets lowest offset, which is initial offset + 1.
151 mkVirtHeapOffsets is_thunk things
152 = let non_void_things = filterOut (isVoidArg . fst) things
153 (ptrs, non_ptrs) = separateByPtrFollowness non_void_things
154 (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
155 (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
157 (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
159 hdr_size | is_thunk = thunkHdrSize
160 | otherwise = fixedHdrSize
162 computeOffset wds_so_far (rep, thing)
163 = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far))
167 %************************************************************************
169 Lay out a static closure
171 %************************************************************************
173 Make a static closure, adding on any extra padding needed for CAFs,
174 and adding a static link field if necessary.
177 mkStaticClosureFields
180 -> Bool -- Has CAF refs
181 -> [CmmLit] -- Payload
182 -> [CmmLit] -- The full closure
183 mkStaticClosureFields cl_info ccs caf_refs payload
184 = mkStaticClosure info_lbl ccs payload padding_wds
185 static_link_field saved_info_field
187 info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info
189 -- CAFs must have consistent layout, regardless of whether they
190 -- are actually updatable or not. The layout of a CAF is:
197 -- the static_link and saved_info fields must always be in the same
198 -- place. So we use closureNeedsUpdSpace rather than
199 -- closureUpdReqd here:
201 is_caf = closureNeedsUpdSpace cl_info
205 | otherwise = ASSERT(null payload) [mkIntCLit 0]
208 | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
212 | is_caf = [mkIntCLit 0]
215 -- for a static constructor which has NoCafRefs, we set the
216 -- static link field to a non-zero value so the garbage
217 -- collector will ignore it.
219 | caf_refs = mkIntCLit 0
220 | otherwise = mkIntCLit 1
222 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
223 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
224 mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field
225 = [CmmLabel info_lbl]
226 ++ variable_header_words
227 ++ concatMap padLitToWord payload
232 variable_header_words
238 padLitToWord :: CmmLit -> [CmmLit]
239 padLitToWord lit = lit : padding pad_length
240 where width = typeWidth (cmmLitType lit)
241 pad_length = wORD_SIZE - widthInBytes width :: Int
243 padding n | n <= 0 = []
244 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
245 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
246 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
247 | otherwise = CmmInt 0 W64 : padding (n-8)
250 %************************************************************************
252 \subsection[CgHeapery-heap-overflow]{Heap overflow checking}
254 %************************************************************************
256 The new code for heapChecks. For GrAnSim the code for doing a heap check
257 and doing a context switch has been separated. Especially, the HEAP_CHK
258 macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for
259 doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the
260 beginning of every slow entry code in order to simulate the fetching of
261 closures. If fetching is necessary (i.e. current closure is not local) then
262 an automatic context switch is done.
264 --------------------------------------------------------------
265 A heap/stack check at a function or thunk entry point.
268 funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code
269 funEntryChecks cl_info reg_save_code code
270 = hpStkCheck cl_info True reg_save_code code
272 thunkEntryChecks :: ClosureInfo -> Code -> Code
273 thunkEntryChecks cl_info code
274 = hpStkCheck cl_info False noStmts code
276 hpStkCheck :: ClosureInfo -- Function closure
277 -> Bool -- Is a function? (not a thunk)
278 -> CmmStmts -- Register saves
282 hpStkCheck cl_info is_fun reg_save_code code
283 = getFinalStackHW $ \ spHw -> do
285 ; let stk_words = spHw - sp
286 ; initHeapUsage $ \ hpHw -> do
287 { -- Emit heap checks, but be sure to do it lazily so
288 -- that the conditionals on hpHw don't cause a black hole
290 { do_checks stk_words hpHw full_save_code rts_label
291 ; tickyAllocHeap hpHw }
297 | nodeMustPointToIt (closureLFInfo cl_info)
300 = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl)))
301 -- Strictly speaking, we should tag node here. But if
302 -- node doesn't point to the closure, the code for the closure
303 -- cannot depend on the value of R1 anyway, so we're safe.
304 closure_lbl = closureLabelFromCI cl_info (clHasCafRefs cl_info)
306 full_save_code = node_asst `plusStmts` reg_save_code
308 rts_label | is_fun = CmmReg (CmmGlobal GCFun)
309 -- Function entry point
310 | otherwise = CmmReg (CmmGlobal GCEnter1)
311 -- Thunk or case return
312 -- In the thunk/case-return case, R1 points to a closure
313 -- which should be (re)-entered after GC
316 Heap checks in a case alternative are nice and easy, provided this is
317 a bog-standard algebraic case. We have in our hand:
319 * one return address, on the stack,
320 * one return value, in Node.
322 the canned code for this heap check failure just pushes Node on the
323 stack, saying 'EnterGHC' to return. The scheduler will return by
324 entering the top value on the stack, which in turn will return through
325 the return address, getting us back to where we were. This is
326 therefore only valid if the return value is *lifted* (just being
327 boxed isn't good enough).
329 For primitive returns, we have an unlifted value in some register
330 (either R1 or FloatReg1 or DblReg1). This means using specialised
331 heap-check code for these cases.
335 :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
336 -- (Unboxed tuples are dealt with by ubxTupleHeapCheck)
337 -> Code -- Continuation
339 altHeapCheck alt_type code
340 = initHeapUsage $ \ hpHw -> do
342 { do_checks 0 {- no stack chk -} hpHw
343 noStmts {- nothign to save -}
345 ; tickyAllocHeap hpHw }
349 rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_unpt_r1")))
350 -- Do *not* enter R1 after a heap check in
351 -- a polymorphic case. It might be a function
352 -- and the entry code for a function (currently)
355 -- However R1 is guaranteed to be a pointer
357 rts_label (AlgAlt _) = stg_gc_enter1
358 -- Enter R1 after the heap check; it's a pointer
360 rts_label (PrimAlt tc)
361 = CmmLit $ CmmLabel $
362 case primRepToCgRep (tyConPrimRep tc) of
363 VoidArg -> mkRtsCodeLabel (sLit "stg_gc_noregs")
364 FloatArg -> mkRtsCodeLabel (sLit "stg_gc_f1")
365 DoubleArg -> mkRtsCodeLabel (sLit "stg_gc_d1")
366 LongArg -> mkRtsCodeLabel (sLit "stg_gc_l1")
367 -- R1 is boxed but unlifted:
368 PtrArg -> mkRtsCodeLabel (sLit "stg_gc_unpt_r1")
370 NonPtrArg -> mkRtsCodeLabel (sLit "stg_gc_unbx_r1")
372 rts_label (UbxTupAlt _) = panic "altHeapCheck"
376 Unboxed tuple alternatives and let-no-escapes (the two most annoying
377 constructs to generate code for!) For unboxed tuple returns, there
378 are an arbitrary number of possibly unboxed return values, some of
379 which will be in registers, and the others will be on the stack. We
380 always organise the stack-resident fields into pointers &
381 non-pointers, and pass the number of each to the heap check code.
385 :: [(Id, GlobalReg)] -- Live registers
386 -> WordOff -- no. of stack slots containing ptrs
387 -> WordOff -- no. of stack slots containing nonptrs
388 -> CmmStmts -- code to insert in the failure path
392 unbxTupleHeapCheck regs ptrs nptrs fail_code code
393 -- We can't manage more than 255 pointers/non-pointers
394 -- in a generic heap check.
395 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
397 = initHeapUsage $ \ hpHw -> do
398 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
399 full_fail_code rts_label
400 ; tickyAllocHeap hpHw }
404 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
405 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9 VNonGcPtr)) -- Ho ho ho!
406 (CmmLit (mkWordCLit liveness))
407 liveness = mkRegLiveness regs ptrs nptrs
408 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
413 %************************************************************************
417 %************************************************************************
419 When failing a check, we save a return address on the stack and
420 jump to a pre-compiled code fragment that saves the live registers
421 and returns to the scheduler.
423 The return address in most cases will be the beginning of the basic
424 block in which the check resides, since we need to perform the check
425 again on re-entry because someone else might have stolen the resource
429 do_checks :: WordOff -- Stack headroom
430 -> WordOff -- Heap headroom
431 -> CmmStmts -- Assignments to perform on failure
432 -> CmmExpr -- Rts address to jump to on failure
434 do_checks 0 0 _ _ = nopC
435 do_checks stk hp reg_save_code rts_lbl
436 = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE)))
437 (CmmLit (mkIntCLit (hp*wORD_SIZE)))
438 (stk /= 0) (hp /= 0) reg_save_code rts_lbl
440 -- The offsets are now in *bytes*
441 do_checks' :: CmmExpr -> CmmExpr -> Bool -> Bool -> CmmStmts -> CmmExpr -> Code
442 do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl
443 = do { doGranAllocate hp_expr
445 -- Emit a block for the heap-check-failure code
446 ; blk_id <- forkLabelledCode $ do
448 stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr)
449 ; emitStmts reg_save_code
450 ; stmtC (CmmJump rts_lbl []) }
452 -- Check for stack overflow *FIRST*; otherwise
453 -- we might bumping Hp and then failing stack oflo
455 (stmtC (CmmCondBranch stk_oflo blk_id))
458 (stmtsC [CmmAssign hpReg
459 (cmmOffsetExprB (CmmReg hpReg) hp_expr),
460 CmmCondBranch hp_oflo blk_id])
461 -- Bump heap pointer, and test for heap exhaustion
462 -- Note that we don't move the heap pointer unless the
463 -- stack check succeeds. Otherwise we might end up
464 -- with slop at the end of the current block, which can
465 -- confuse the LDV profiler.
468 -- Stk overflow if (Sp - stk_bytes < SpLim)
469 stk_oflo = CmmMachOp mo_wordULt
470 [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr],
471 CmmReg (CmmGlobal SpLim)]
473 -- Hp overflow if (Hp > HpLim)
474 -- (Hp has been incremented by now)
475 -- HpLim points to the LAST WORD of valid allocation space.
476 hp_oflo = CmmMachOp mo_wordUGt
477 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
480 %************************************************************************
482 Generic Heap/Stack Checks - used in the RTS
484 %************************************************************************
487 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
488 hpChkGen bytes liveness reentry
489 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen
491 assigns = mkStmts [ mk_vanilla_assignment 9 liveness,
492 mk_vanilla_assignment 10 reentry ]
494 -- a heap check where R1 points to the closure to enter on return, and
495 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
496 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code
497 hpChkNodePointsAssignSp0 bytes sp0
498 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1
499 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
501 stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
502 stkChkGen bytes liveness reentry
503 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen
505 assigns = mkStmts [ mk_vanilla_assignment 9 liveness,
506 mk_vanilla_assignment 10 reentry ]
508 mk_vanilla_assignment :: Int -> CmmExpr -> CmmStmt
509 mk_vanilla_assignment n e
510 = CmmAssign (CmmGlobal (VanillaReg n (vgcFlag (cmmExprType e)))) e
512 stkChkNodePoints :: CmmExpr -> Code
513 stkChkNodePoints bytes
514 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1
516 stg_gc_gen :: CmmExpr
517 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))
518 stg_gc_enter1 :: CmmExpr
519 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
522 %************************************************************************
524 \subsection[initClosure]{Initialise a dynamic closure}
526 %************************************************************************
528 @allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp
534 -> CmmExpr -- Cost Centre to stick in the object
535 -> CmmExpr -- Cost Centre to blame for this alloc
536 -- (usually the same; sometimes "OVERHEAD")
538 -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object
539 -- ie Info ptr has offset zero.
540 -> FCode VirtualHpOffset -- Returns virt offset of object
542 allocDynClosure cl_info use_cc _blame_cc amodes_with_offsets
543 = do { virt_hp <- getVirtHp
545 -- FIND THE OFFSET OF THE INFO-PTR WORD
546 ; let info_offset = virt_hp + 1
547 -- info_offset is the VirtualHpOffset of the first
548 -- word of the new object
549 -- Remember, virtHp points to last allocated word,
550 -- ie 1 *before* the info-ptr word of new object.
552 info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info
553 (clHasCafRefs cl_info)))
554 hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..]
556 -- SAY WHAT WE ARE ABOUT TO DO
557 ; profDynAlloc cl_info use_cc
558 -- ToDo: This is almost certainly wrong
559 -- We're ignoring blame_cc. But until we've
560 -- fixed the boxing hack in chooseDynCostCentres etc,
561 -- we're worried about making things worse by "fixing"
562 -- this part to use blame_cc!
564 ; tickyDynAlloc cl_info
566 -- ALLOCATE THE OBJECT
567 ; base <- getHpRelOffset info_offset
568 ; hpStore base (hdr_w_offsets ++ amodes_with_offsets)
570 -- BUMP THE VIRTUAL HEAP POINTER
571 ; setVirtHp (virt_hp + closureSize cl_info)
573 -- RETURN PTR TO START OF OBJECT
574 ; returnFC info_offset }
577 initDynHdr :: CmmExpr
578 -> CmmExpr -- Cost centre to put in object
580 initDynHdr info_ptr cc
582 -- ToDo: Gransim stuff
583 -- ToDo: Parallel stuff
587 hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code
588 -- Store the item (expr,off) in base[off]
590 = stmtsC [ CmmStore (cmmOffsetW base off) val
593 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code
594 emitSetDynHdr base info_ptr ccs
595 = hpStore base (zip (initDynHdr info_ptr ccs) [0..])