2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[CgHeapery]{Heap management functions}
9 initHeapUsage, getVirtHp, setVirtHp, setRealHp,
10 getHpRelOffset, hpRel,
12 funEntryChecks, thunkEntryChecks,
13 altHeapCheck, unbxTupleHeapCheck,
14 hpChkGen, hpChkNodePointsAssignSp0,
15 stkChkGen, stkChkNodePoints,
17 layOutDynConstr, layOutStaticConstr,
18 mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
20 allocDynClosure, emitSetDynHdr
23 #include "HsVersions.h"
53 %************************************************************************
55 \subsection[CgUsages-heapery]{Monad things for fiddling with heap usage}
57 %************************************************************************
59 The heap always grows upwards, so hpRel is easy
62 hpRel :: VirtualHpOffset -- virtual offset of Hp
63 -> VirtualHpOffset -- virtual offset of The Thing
64 -> WordOff -- integer word offset
65 hpRel hp off = off - hp
68 @initHeapUsage@ applies a function to the amount of heap that it uses.
69 It initialises the heap usage to zeros, and passes on an unchanged
72 It is usually a prelude to performing a GC check, so everything must
73 be in a tidy and consistent state.
75 rje: Note the slightly suble fixed point behaviour needed here
78 initHeapUsage :: (VirtualHpOffset -> Code) -> Code
80 = do { orig_hp_usage <- getHpUsage
81 ; setHpUsage initHpUsage
82 ; fixC (\heap_usage2 -> do
83 { fcode (heapHWM heap_usage2)
85 ; setHpUsage orig_hp_usage }
87 setVirtHp :: VirtualHpOffset -> Code
89 = do { hp_usage <- getHpUsage
90 ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
92 getVirtHp :: FCode VirtualHpOffset
94 = do { hp_usage <- getHpUsage
95 ; return (virtHp hp_usage) }
97 setRealHp :: VirtualHpOffset -> Code
99 = do { hp_usage <- getHpUsage
100 ; setHpUsage (hp_usage {realHp = new_realHp}) }
102 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
103 getHpRelOffset virtual_offset
104 = do { hp_usg <- getHpUsage
105 ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) }
109 %************************************************************************
111 Layout of heap objects
113 %************************************************************************
116 layOutDynConstr, layOutStaticConstr
121 [(a,VirtualHpOffset)])
123 layOutDynConstr = layOutConstr False
124 layOutStaticConstr = layOutConstr True
126 layOutConstr is_static this_pkg data_con args
127 = (mkConInfo this_pkg is_static data_con tot_wds ptr_wds,
130 (tot_wds, -- #ptr_wds + #nonptr_wds
132 things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
135 @mkVirtHeapOffsets@ always returns boxed things with smaller offsets
136 than the unboxed things, and furthermore, the offsets in the result
141 :: Bool -- True <=> is a thunk
142 -> [(CgRep,a)] -- Things to make offsets for
143 -> (WordOff, -- _Total_ number of words allocated
144 WordOff, -- Number of words allocated for *pointers*
145 [(a, VirtualHpOffset)])
146 -- Things with their offsets from start of
147 -- object in order of increasing offset
149 -- First in list gets lowest offset, which is initial offset + 1.
151 mkVirtHeapOffsets is_thunk things
152 = let non_void_things = filterOut (isVoidArg . fst) things
153 (ptrs, non_ptrs) = separateByPtrFollowness non_void_things
154 (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
155 (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
157 (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
159 hdr_size | is_thunk = thunkHdrSize
160 | otherwise = fixedHdrSize
162 computeOffset wds_so_far (rep, thing)
163 = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far))
167 %************************************************************************
169 Lay out a static closure
171 %************************************************************************
173 Make a static closure, adding on any extra padding needed for CAFs,
174 and adding a static link field if necessary.
177 mkStaticClosureFields
180 -> Bool -- Has CAF refs
181 -> [CmmLit] -- Payload
182 -> [CmmLit] -- The full closure
183 mkStaticClosureFields cl_info ccs caf_refs payload
184 = mkStaticClosure info_lbl ccs payload padding_wds
185 static_link_field saved_info_field
187 info_lbl = infoTableLabelFromCI cl_info
189 -- CAFs must have consistent layout, regardless of whether they
190 -- are actually updatable or not. The layout of a CAF is:
197 -- the static_link and saved_info fields must always be in the same
198 -- place. So we use closureNeedsUpdSpace rather than
199 -- closureUpdReqd here:
201 is_caf = closureNeedsUpdSpace cl_info
205 | otherwise = ASSERT(null payload) [mkIntCLit 0]
208 | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
212 | is_caf = [mkIntCLit 0]
215 -- for a static constructor which has NoCafRefs, we set the
216 -- static link field to a non-zero value so the garbage
217 -- collector will ignore it.
219 | caf_refs = mkIntCLit 0
220 | otherwise = mkIntCLit 1
223 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
224 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
225 mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field
226 = [CmmLabel info_lbl]
227 ++ variable_header_words
233 variable_header_words
240 %************************************************************************
242 \subsection[CgHeapery-heap-overflow]{Heap overflow checking}
244 %************************************************************************
246 The new code for heapChecks. For GrAnSim the code for doing a heap check
247 and doing a context switch has been separated. Especially, the HEAP_CHK
248 macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for
249 doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the
250 beginning of every slow entry code in order to simulate the fetching of
251 closures. If fetching is necessary (i.e. current closure is not local) then
252 an automatic context switch is done.
254 --------------------------------------------------------------
255 A heap/stack check at a function or thunk entry point.
258 funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code
259 funEntryChecks cl_info reg_save_code code
260 = hpStkCheck cl_info True reg_save_code code
262 thunkEntryChecks :: ClosureInfo -> Code -> Code
263 thunkEntryChecks cl_info code
264 = hpStkCheck cl_info False noStmts code
266 hpStkCheck :: ClosureInfo -- Function closure
267 -> Bool -- Is a function? (not a thunk)
268 -> CmmStmts -- Register saves
272 hpStkCheck cl_info is_fun reg_save_code code
273 = getFinalStackHW $ \ spHw -> do
275 ; let stk_words = spHw - sp
276 ; initHeapUsage $ \ hpHw -> do
277 { -- Emit heap checks, but be sure to do it lazily so
278 -- that the conditionals on hpHw don't cause a black hole
280 { do_checks stk_words hpHw full_save_code rts_label
281 ; tickyAllocHeap hpHw }
287 | nodeMustPointToIt (closureLFInfo cl_info)
290 = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl)))
291 closure_lbl = closureLabelFromCI cl_info
293 full_save_code = node_asst `plusStmts` reg_save_code
295 rts_label | is_fun = CmmReg (CmmGlobal GCFun)
296 -- Function entry point
297 | otherwise = CmmReg (CmmGlobal GCEnter1)
298 -- Thunk or case return
299 -- In the thunk/case-return case, R1 points to a closure
300 -- which should be (re)-entered after GC
303 Heap checks in a case alternative are nice and easy, provided this is
304 a bog-standard algebraic case. We have in our hand:
306 * one return address, on the stack,
307 * one return value, in Node.
309 the canned code for this heap check failure just pushes Node on the
310 stack, saying 'EnterGHC' to return. The scheduler will return by
311 entering the top value on the stack, which in turn will return through
312 the return address, getting us back to where we were. This is
313 therefore only valid if the return value is *lifted* (just being
314 boxed isn't good enough).
316 For primitive returns, we have an unlifted value in some register
317 (either R1 or FloatReg1 or DblReg1). This means using specialised
318 heap-check code for these cases.
322 :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
323 -- (Unboxed tuples are dealt with by ubxTupleHeapCheck)
324 -> Code -- Continuation
326 altHeapCheck alt_type code
327 = initHeapUsage $ \ hpHw -> do
329 { do_checks 0 {- no stack chk -} hpHw
330 noStmts {- nothign to save -}
332 ; tickyAllocHeap hpHw }
336 rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")))
337 -- Do *not* enter R1 after a heap check in
338 -- a polymorphic case. It might be a function
339 -- and the entry code for a function (currently)
342 -- However R1 is guaranteed to be a pointer
344 rts_label (AlgAlt tc) = stg_gc_enter1
345 -- Enter R1 after the heap check; it's a pointer
347 rts_label (PrimAlt tc)
348 = CmmLit $ CmmLabel $
349 case primRepToCgRep (tyConPrimRep tc) of
350 VoidArg -> mkRtsCodeLabel SLIT( "stg_gc_noregs")
351 FloatArg -> mkRtsCodeLabel SLIT( "stg_gc_f1")
352 DoubleArg -> mkRtsCodeLabel SLIT( "stg_gc_d1")
353 LongArg -> mkRtsCodeLabel SLIT( "stg_gc_l1")
354 -- R1 is boxed but unlifted:
355 PtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")
357 NonPtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unbx_r1")
359 rts_label (UbxTupAlt _) = panic "altHeapCheck"
363 Unboxed tuple alternatives and let-no-escapes (the two most annoying
364 constructs to generate code for!) For unboxed tuple returns, there
365 are an arbitrary number of possibly unboxed return values, some of
366 which will be in registers, and the others will be on the stack. We
367 always organise the stack-resident fields into pointers &
368 non-pointers, and pass the number of each to the heap check code.
372 :: [(Id, GlobalReg)] -- Live registers
373 -> WordOff -- no. of stack slots containing ptrs
374 -> WordOff -- no. of stack slots containing nonptrs
375 -> CmmStmts -- code to insert in the failure path
379 unbxTupleHeapCheck regs ptrs nptrs fail_code code
380 -- We can't manage more than 255 pointers/non-pointers
381 -- in a generic heap check.
382 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
384 = initHeapUsage $ \ hpHw -> do
385 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
386 full_fail_code rts_label
387 ; tickyAllocHeap hpHw }
391 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
392 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
393 (CmmLit (mkWordCLit liveness))
394 liveness = mkRegLiveness regs ptrs nptrs
395 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_ut")))
400 %************************************************************************
404 %************************************************************************
406 When failing a check, we save a return address on the stack and
407 jump to a pre-compiled code fragment that saves the live registers
408 and returns to the scheduler.
410 The return address in most cases will be the beginning of the basic
411 block in which the check resides, since we need to perform the check
412 again on re-entry because someone else might have stolen the resource
416 do_checks :: WordOff -- Stack headroom
417 -> WordOff -- Heap headroom
418 -> CmmStmts -- Assignments to perform on failure
419 -> CmmExpr -- Rts address to jump to on failure
421 do_checks 0 0 _ _ = nopC
422 do_checks stk hp reg_save_code rts_lbl
423 = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE)))
424 (CmmLit (mkIntCLit (hp*wORD_SIZE)))
425 (stk /= 0) (hp /= 0) reg_save_code rts_lbl
427 -- The offsets are now in *bytes*
428 do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl
429 = do { doGranAllocate hp_expr
431 -- Emit a block for the heap-check-failure code
432 ; blk_id <- forkLabelledCode $ do
434 stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr)
435 ; emitStmts reg_save_code
436 ; stmtC (CmmJump rts_lbl []) }
438 -- Check for stack overflow *FIRST*; otherwise
439 -- we might bumping Hp and then failing stack oflo
441 (stmtC (CmmCondBranch stk_oflo blk_id))
444 (stmtsC [CmmAssign hpReg
445 (cmmOffsetExprB (CmmReg hpReg) hp_expr),
446 CmmCondBranch hp_oflo blk_id])
447 -- Bump heap pointer, and test for heap exhaustion
448 -- Note that we don't move the heap pointer unless the
449 -- stack check succeeds. Otherwise we might end up
450 -- with slop at the end of the current block, which can
451 -- confuse the LDV profiler.
454 -- Stk overflow if (Sp - stk_bytes < SpLim)
455 stk_oflo = CmmMachOp mo_wordULt
456 [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr],
457 CmmReg (CmmGlobal SpLim)]
459 -- Hp overflow if (Hpp > HpLim)
460 -- (Hp has been incremented by now)
461 -- HpLim points to the LAST WORD of valid allocation space.
462 hp_oflo = CmmMachOp mo_wordUGt
463 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
466 %************************************************************************
468 Generic Heap/Stack Checks - used in the RTS
470 %************************************************************************
473 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
474 hpChkGen bytes liveness reentry
475 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen
478 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
479 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
482 -- a heap check where R1 points to the closure to enter on return, and
483 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
484 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code
485 hpChkNodePointsAssignSp0 bytes sp0
486 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1
487 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
489 stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
490 stkChkGen bytes liveness reentry
491 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen
494 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
495 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
498 stkChkNodePoints :: CmmExpr -> Code
499 stkChkNodePoints bytes
500 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1
502 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_gen")))
503 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
506 %************************************************************************
508 \subsection[initClosure]{Initialise a dynamic closure}
510 %************************************************************************
512 @allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp
518 -> CmmExpr -- Cost Centre to stick in the object
519 -> CmmExpr -- Cost Centre to blame for this alloc
520 -- (usually the same; sometimes "OVERHEAD")
522 -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object
523 -- ie Info ptr has offset zero.
524 -> FCode VirtualHpOffset -- Returns virt offset of object
526 allocDynClosure cl_info use_cc blame_cc amodes_with_offsets
527 = do { virt_hp <- getVirtHp
529 -- FIND THE OFFSET OF THE INFO-PTR WORD
530 ; let info_offset = virt_hp + 1
531 -- info_offset is the VirtualHpOffset of the first
532 -- word of the new object
533 -- Remember, virtHp points to last allocated word,
534 -- ie 1 *before* the info-ptr word of new object.
536 info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info))
537 hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..]
539 -- SAY WHAT WE ARE ABOUT TO DO
540 ; profDynAlloc cl_info use_cc
541 -- ToDo: This is almost certainly wrong
542 -- We're ignoring blame_cc. But until we've
543 -- fixed the boxing hack in chooseDynCostCentres etc,
544 -- we're worried about making things worse by "fixing"
545 -- this part to use blame_cc!
547 ; tickyDynAlloc cl_info
549 -- ALLOCATE THE OBJECT
550 ; base <- getHpRelOffset info_offset
551 ; hpStore base (hdr_w_offsets ++ amodes_with_offsets)
553 -- BUMP THE VIRTUAL HEAP POINTER
554 ; setVirtHp (virt_hp + closureSize cl_info)
556 -- RETURN PTR TO START OF OBJECT
557 ; returnFC info_offset }
560 initDynHdr :: CmmExpr
561 -> CmmExpr -- Cost centre to put in object
563 initDynHdr info_ptr cc
565 -- ToDo: Gransim stuff
566 -- ToDo: Parallel stuff
570 hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code
571 -- Store the item (expr,off) in base[off]
573 = stmtsC [ CmmStore (cmmOffsetW base off) val
576 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code
577 emitSetDynHdr base info_ptr ccs
578 = hpStore base (zip (initDynHdr info_ptr ccs) [0..])