2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[CgHeapery]{Heap management functions}
9 initHeapUsage, getVirtHp, setVirtHp, setRealHp,
10 getHpRelOffset, hpRel,
12 funEntryChecks, thunkEntryChecks,
13 altHeapCheck, unbxTupleHeapCheck,
14 hpChkGen, hpChkNodePointsAssignSp0,
15 stkChkGen, stkChkNodePoints,
17 layOutDynConstr, layOutStaticConstr,
18 mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
20 allocDynClosure, emitSetDynHdr
23 #include "HsVersions.h"
51 %************************************************************************
53 \subsection[CgUsages-heapery]{Monad things for fiddling with heap usage}
55 %************************************************************************
57 The heap always grows upwards, so hpRel is easy
60 hpRel :: VirtualHpOffset -- virtual offset of Hp
61 -> VirtualHpOffset -- virtual offset of The Thing
62 -> WordOff -- integer word offset
63 hpRel hp off = off - hp
66 @initHeapUsage@ applies a function to the amount of heap that it uses.
67 It initialises the heap usage to zeros, and passes on an unchanged
70 It is usually a prelude to performing a GC check, so everything must
71 be in a tidy and consistent state.
73 rje: Note the slightly suble fixed point behaviour needed here
76 initHeapUsage :: (VirtualHpOffset -> Code) -> Code
78 = do { orig_hp_usage <- getHpUsage
79 ; setHpUsage initHpUsage
80 ; fixC (\heap_usage2 -> do
81 { fcode (heapHWM heap_usage2)
83 ; setHpUsage orig_hp_usage }
85 setVirtHp :: VirtualHpOffset -> Code
87 = do { hp_usage <- getHpUsage
88 ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
90 getVirtHp :: FCode VirtualHpOffset
92 = do { hp_usage <- getHpUsage
93 ; return (virtHp hp_usage) }
95 setRealHp :: VirtualHpOffset -> Code
97 = do { hp_usage <- getHpUsage
98 ; setHpUsage (hp_usage {realHp = new_realHp}) }
100 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
101 getHpRelOffset virtual_offset
102 = do { hp_usg <- getHpUsage
103 ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) }
107 %************************************************************************
109 Layout of heap objects
111 %************************************************************************
114 layOutDynConstr, layOutStaticConstr
119 [(a,VirtualHpOffset)])
121 layOutDynConstr = layOutConstr False
122 layOutStaticConstr = layOutConstr True
124 layOutConstr is_static this_pkg data_con args
125 = (mkConInfo this_pkg is_static data_con tot_wds ptr_wds,
128 (tot_wds, -- #ptr_wds + #nonptr_wds
130 things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
133 @mkVirtHeapOffsets@ always returns boxed things with smaller offsets
134 than the unboxed things, and furthermore, the offsets in the result
139 :: Bool -- True <=> is a thunk
140 -> [(CgRep,a)] -- Things to make offsets for
141 -> (WordOff, -- _Total_ number of words allocated
142 WordOff, -- Number of words allocated for *pointers*
143 [(a, VirtualHpOffset)])
144 -- Things with their offsets from start of
145 -- object in order of increasing offset
147 -- First in list gets lowest offset, which is initial offset + 1.
149 mkVirtHeapOffsets is_thunk things
150 = let non_void_things = filterOut (isVoidArg . fst) things
151 (ptrs, non_ptrs) = separateByPtrFollowness non_void_things
152 (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
153 (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
155 (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
157 hdr_size | is_thunk = thunkHdrSize
158 | otherwise = fixedHdrSize
160 computeOffset wds_so_far (rep, thing)
161 = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far))
165 %************************************************************************
167 Lay out a static closure
169 %************************************************************************
171 Make a static closure, adding on any extra padding needed for CAFs,
172 and adding a static link field if necessary.
175 mkStaticClosureFields
178 -> Bool -- Has CAF refs
179 -> [CmmLit] -- Payload
180 -> [CmmLit] -- The full closure
181 mkStaticClosureFields cl_info ccs caf_refs payload
182 = mkStaticClosure info_lbl ccs payload padding_wds
183 static_link_field saved_info_field
185 info_lbl = infoTableLabelFromCI cl_info
187 -- CAFs must have consistent layout, regardless of whether they
188 -- are actually updatable or not. The layout of a CAF is:
195 -- the static_link and saved_info fields must always be in the same
196 -- place. So we use closureNeedsUpdSpace rather than
197 -- closureUpdReqd here:
199 is_caf = closureNeedsUpdSpace cl_info
203 | otherwise = ASSERT(null payload) [mkIntCLit 0]
206 | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
210 | is_caf = [mkIntCLit 0]
213 -- for a static constructor which has NoCafRefs, we set the
214 -- static link field to a non-zero value so the garbage
215 -- collector will ignore it.
217 | caf_refs = mkIntCLit 0
218 | otherwise = mkIntCLit 1
221 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
222 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
223 mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field
224 = [CmmLabel info_lbl]
225 ++ variable_header_words
231 variable_header_words
238 %************************************************************************
240 \subsection[CgHeapery-heap-overflow]{Heap overflow checking}
242 %************************************************************************
244 The new code for heapChecks. For GrAnSim the code for doing a heap check
245 and doing a context switch has been separated. Especially, the HEAP_CHK
246 macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for
247 doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the
248 beginning of every slow entry code in order to simulate the fetching of
249 closures. If fetching is necessary (i.e. current closure is not local) then
250 an automatic context switch is done.
252 --------------------------------------------------------------
253 A heap/stack check at a function or thunk entry point.
256 funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code
257 funEntryChecks cl_info reg_save_code code
258 = hpStkCheck cl_info True reg_save_code code
260 thunkEntryChecks :: ClosureInfo -> Code -> Code
261 thunkEntryChecks cl_info code
262 = hpStkCheck cl_info False noStmts code
264 hpStkCheck :: ClosureInfo -- Function closure
265 -> Bool -- Is a function? (not a thunk)
266 -> CmmStmts -- Register saves
270 hpStkCheck cl_info is_fun reg_save_code code
271 = getFinalStackHW $ \ spHw -> do
273 ; let stk_words = spHw - sp
274 ; initHeapUsage $ \ hpHw -> do
275 { -- Emit heap checks, but be sure to do it lazily so
276 -- that the conditionals on hpHw don't cause a black hole
278 { do_checks stk_words hpHw full_save_code rts_label
279 ; tickyAllocHeap hpHw }
285 | nodeMustPointToIt (closureLFInfo cl_info)
288 = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl)))
289 closure_lbl = closureLabelFromCI cl_info
291 full_save_code = node_asst `plusStmts` reg_save_code
293 rts_label | is_fun = CmmReg (CmmGlobal GCFun)
294 -- Function entry point
295 | otherwise = CmmReg (CmmGlobal GCEnter1)
296 -- Thunk or case return
297 -- In the thunk/case-return case, R1 points to a closure
298 -- which should be (re)-entered after GC
301 Heap checks in a case alternative are nice and easy, provided this is
302 a bog-standard algebraic case. We have in our hand:
304 * one return address, on the stack,
305 * one return value, in Node.
307 the canned code for this heap check failure just pushes Node on the
308 stack, saying 'EnterGHC' to return. The scheduler will return by
309 entering the top value on the stack, which in turn will return through
310 the return address, getting us back to where we were. This is
311 therefore only valid if the return value is *lifted* (just being
312 boxed isn't good enough).
314 For primitive returns, we have an unlifted value in some register
315 (either R1 or FloatReg1 or DblReg1). This means using specialised
316 heap-check code for these cases.
320 :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
321 -- (Unboxed tuples are dealt with by ubxTupleHeapCheck)
322 -> Code -- Continuation
324 altHeapCheck alt_type code
325 = initHeapUsage $ \ hpHw -> do
327 { do_checks 0 {- no stack chk -} hpHw
328 noStmts {- nothign to save -}
330 ; tickyAllocHeap hpHw }
334 rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")))
335 -- Do *not* enter R1 after a heap check in
336 -- a polymorphic case. It might be a function
337 -- and the entry code for a function (currently)
340 -- However R1 is guaranteed to be a pointer
342 rts_label (AlgAlt tc) = stg_gc_enter1
343 -- Enter R1 after the heap check; it's a pointer
345 rts_label (PrimAlt tc)
346 = CmmLit $ CmmLabel $
347 case primRepToCgRep (tyConPrimRep tc) of
348 VoidArg -> mkRtsCodeLabel SLIT( "stg_gc_noregs")
349 FloatArg -> mkRtsCodeLabel SLIT( "stg_gc_f1")
350 DoubleArg -> mkRtsCodeLabel SLIT( "stg_gc_d1")
351 LongArg -> mkRtsCodeLabel SLIT( "stg_gc_l1")
352 -- R1 is boxed but unlifted:
353 PtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")
355 NonPtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unbx_r1")
357 rts_label (UbxTupAlt _) = panic "altHeapCheck"
361 Unboxed tuple alternatives and let-no-escapes (the two most annoying
362 constructs to generate code for!) For unboxed tuple returns, there
363 are an arbitrary number of possibly unboxed return values, some of
364 which will be in registers, and the others will be on the stack. We
365 always organise the stack-resident fields into pointers &
366 non-pointers, and pass the number of each to the heap check code.
370 :: [(Id, GlobalReg)] -- Live registers
371 -> WordOff -- no. of stack slots containing ptrs
372 -> WordOff -- no. of stack slots containing nonptrs
373 -> CmmStmts -- code to insert in the failure path
377 unbxTupleHeapCheck regs ptrs nptrs fail_code code
378 -- We can't manage more than 255 pointers/non-pointers
379 -- in a generic heap check.
380 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
382 = initHeapUsage $ \ hpHw -> do
383 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
384 full_fail_code rts_label
385 ; tickyAllocHeap hpHw }
389 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
390 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
391 (CmmLit (mkWordCLit liveness))
392 liveness = mkRegLiveness regs ptrs nptrs
393 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_ut")))
398 %************************************************************************
402 %************************************************************************
404 When failing a check, we save a return address on the stack and
405 jump to a pre-compiled code fragment that saves the live registers
406 and returns to the scheduler.
408 The return address in most cases will be the beginning of the basic
409 block in which the check resides, since we need to perform the check
410 again on re-entry because someone else might have stolen the resource
414 do_checks :: WordOff -- Stack headroom
415 -> WordOff -- Heap headroom
416 -> CmmStmts -- Assignments to perform on failure
417 -> CmmExpr -- Rts address to jump to on failure
419 do_checks 0 0 _ _ = nopC
420 do_checks stk hp reg_save_code rts_lbl
421 = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE)))
422 (CmmLit (mkIntCLit (hp*wORD_SIZE)))
423 (stk /= 0) (hp /= 0) reg_save_code rts_lbl
425 -- The offsets are now in *bytes*
426 do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl
427 = do { doGranAllocate hp_expr
429 -- Emit a block for the heap-check-failure code
430 ; blk_id <- forkLabelledCode $ do
432 stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr)
433 ; emitStmts reg_save_code
434 ; stmtC (CmmJump rts_lbl []) }
436 -- Check for stack overflow *FIRST*; otherwise
437 -- we might bumping Hp and then failing stack oflo
439 (stmtC (CmmCondBranch stk_oflo blk_id))
442 (stmtsC [CmmAssign hpReg
443 (cmmOffsetExprB (CmmReg hpReg) hp_expr),
444 CmmCondBranch hp_oflo blk_id])
445 -- Bump heap pointer, and test for heap exhaustion
446 -- Note that we don't move the heap pointer unless the
447 -- stack check succeeds. Otherwise we might end up
448 -- with slop at the end of the current block, which can
449 -- confuse the LDV profiler.
452 -- Stk overflow if (Sp - stk_bytes < SpLim)
453 stk_oflo = CmmMachOp mo_wordULt
454 [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr],
455 CmmReg (CmmGlobal SpLim)]
457 -- Hp overflow if (Hpp > HpLim)
458 -- (Hp has been incremented by now)
459 -- HpLim points to the LAST WORD of valid allocation space.
460 hp_oflo = CmmMachOp mo_wordUGt
461 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
464 %************************************************************************
466 Generic Heap/Stack Checks - used in the RTS
468 %************************************************************************
471 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
472 hpChkGen bytes liveness reentry
473 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen
476 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
477 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
480 -- a heap check where R1 points to the closure to enter on return, and
481 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
482 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code
483 hpChkNodePointsAssignSp0 bytes sp0
484 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1
485 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
487 stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
488 stkChkGen bytes liveness reentry
489 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen
492 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
493 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
496 stkChkNodePoints :: CmmExpr -> Code
497 stkChkNodePoints bytes
498 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1
500 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_gen")))
501 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
504 %************************************************************************
506 \subsection[initClosure]{Initialise a dynamic closure}
508 %************************************************************************
510 @allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp
516 -> CmmExpr -- Cost Centre to stick in the object
517 -> CmmExpr -- Cost Centre to blame for this alloc
518 -- (usually the same; sometimes "OVERHEAD")
520 -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object
521 -- ie Info ptr has offset zero.
522 -> FCode VirtualHpOffset -- Returns virt offset of object
524 allocDynClosure cl_info use_cc blame_cc amodes_with_offsets
525 = do { virt_hp <- getVirtHp
527 -- FIND THE OFFSET OF THE INFO-PTR WORD
528 ; let info_offset = virt_hp + 1
529 -- info_offset is the VirtualHpOffset of the first
530 -- word of the new object
531 -- Remember, virtHp points to last allocated word,
532 -- ie 1 *before* the info-ptr word of new object.
534 info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info))
535 hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..]
537 -- SAY WHAT WE ARE ABOUT TO DO
538 ; profDynAlloc cl_info use_cc
539 -- ToDo: This is almost certainly wrong
540 -- We're ignoring blame_cc. But until we've
541 -- fixed the boxing hack in chooseDynCostCentres etc,
542 -- we're worried about making things worse by "fixing"
543 -- this part to use blame_cc!
545 ; tickyDynAlloc cl_info
547 -- ALLOCATE THE OBJECT
548 ; base <- getHpRelOffset info_offset
549 ; hpStore base (hdr_w_offsets ++ amodes_with_offsets)
551 -- BUMP THE VIRTUAL HEAP POINTER
552 ; setVirtHp (virt_hp + closureSize cl_info)
554 -- RETURN PTR TO START OF OBJECT
555 ; returnFC info_offset }
558 initDynHdr :: CmmExpr
559 -> CmmExpr -- Cost centre to put in object
561 initDynHdr info_ptr cc
563 -- ToDo: Gransim stuff
564 -- ToDo: Parallel stuff
568 hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code
569 -- Store the item (expr,off) in base[off]
571 = stmtsC [ CmmStore (cmmOffsetW base off) val
574 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code
575 emitSetDynHdr base info_ptr ccs
576 = hpStore base (zip (initDynHdr info_ptr ccs) [0..])