2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[CgHeapery]{Heap management functions}
9 initHeapUsage, getVirtHp, setVirtHp, setRealHp,
10 getHpRelOffset, hpRel,
12 funEntryChecks, thunkEntryChecks,
13 altHeapCheck, unbxTupleHeapCheck,
14 hpChkGen, hpChkNodePointsAssignSp0,
15 stkChkGen, stkChkNodePoints,
17 layOutDynConstr, layOutStaticConstr,
18 mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
20 allocDynClosure, emitSetDynHdr
23 #include "HsVersions.h"
53 %************************************************************************
55 \subsection[CgUsages-heapery]{Monad things for fiddling with heap usage}
57 %************************************************************************
59 The heap always grows upwards, so hpRel is easy
62 hpRel :: VirtualHpOffset -- virtual offset of Hp
63 -> VirtualHpOffset -- virtual offset of The Thing
64 -> WordOff -- integer word offset
65 hpRel hp off = off - hp
68 @initHeapUsage@ applies a function to the amount of heap that it uses.
69 It initialises the heap usage to zeros, and passes on an unchanged
72 It is usually a prelude to performing a GC check, so everything must
73 be in a tidy and consistent state.
75 rje: Note the slightly suble fixed point behaviour needed here
78 initHeapUsage :: (VirtualHpOffset -> Code) -> Code
80 = do { orig_hp_usage <- getHpUsage
81 ; setHpUsage initHpUsage
82 ; fixC_(\heap_usage2 -> do
83 { fcode (heapHWM heap_usage2)
85 ; setHpUsage orig_hp_usage }
87 setVirtHp :: VirtualHpOffset -> Code
89 = do { hp_usage <- getHpUsage
90 ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
92 getVirtHp :: FCode VirtualHpOffset
94 = do { hp_usage <- getHpUsage
95 ; return (virtHp hp_usage) }
97 setRealHp :: VirtualHpOffset -> Code
99 = do { hp_usage <- getHpUsage
100 ; setHpUsage (hp_usage {realHp = new_realHp}) }
102 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
103 getHpRelOffset virtual_offset
104 = do { hp_usg <- getHpUsage
105 ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) }
109 %************************************************************************
111 Layout of heap objects
113 %************************************************************************
116 layOutDynConstr, layOutStaticConstr
120 [(a,VirtualHpOffset)])
122 layOutDynConstr = layOutConstr False
123 layOutStaticConstr = layOutConstr True
125 layOutConstr :: Bool -> DataCon -> [(CgRep, a)]
126 -> (ClosureInfo, [(a, VirtualHpOffset)])
127 layOutConstr is_static data_con args
128 = (mkConInfo is_static data_con tot_wds ptr_wds,
131 (tot_wds, -- #ptr_wds + #nonptr_wds
133 things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
136 @mkVirtHeapOffsets@ always returns boxed things with smaller offsets
137 than the unboxed things, and furthermore, the offsets in the result
142 :: Bool -- True <=> is a thunk
143 -> [(CgRep,a)] -- Things to make offsets for
144 -> (WordOff, -- _Total_ number of words allocated
145 WordOff, -- Number of words allocated for *pointers*
146 [(a, VirtualHpOffset)])
147 -- Things with their offsets from start of
148 -- object in order of increasing offset
150 -- First in list gets lowest offset, which is initial offset + 1.
152 mkVirtHeapOffsets is_thunk things
153 = let non_void_things = filterOut (isVoidArg . fst) things
154 (ptrs, non_ptrs) = separateByPtrFollowness non_void_things
155 (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
156 (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
158 (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
160 hdr_size | is_thunk = thunkHdrSize
161 | otherwise = fixedHdrSize
163 computeOffset wds_so_far (rep, thing)
164 = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far))
168 %************************************************************************
170 Lay out a static closure
172 %************************************************************************
174 Make a static closure, adding on any extra padding needed for CAFs,
175 and adding a static link field if necessary.
178 mkStaticClosureFields
181 -> Bool -- Has CAF refs
182 -> [CmmLit] -- Payload
183 -> [CmmLit] -- The full closure
184 mkStaticClosureFields cl_info ccs caf_refs payload
185 = mkStaticClosure info_lbl ccs payload padding_wds
186 static_link_field saved_info_field
188 info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info
190 -- CAFs must have consistent layout, regardless of whether they
191 -- are actually updatable or not. The layout of a CAF is:
198 -- the static_link and saved_info fields must always be in the same
199 -- place. So we use closureNeedsUpdSpace rather than
200 -- closureUpdReqd here:
202 is_caf = closureNeedsUpdSpace cl_info
206 | otherwise = ASSERT(null payload) [mkIntCLit 0]
209 | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
213 | is_caf = [mkIntCLit 0]
216 -- for a static constructor which has NoCafRefs, we set the
217 -- static link field to a non-zero value so the garbage
218 -- collector will ignore it.
220 | caf_refs = mkIntCLit 0
221 | otherwise = mkIntCLit 1
223 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
224 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
225 mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field
226 = [CmmLabel info_lbl]
227 ++ variable_header_words
228 ++ concatMap padLitToWord payload
233 variable_header_words
239 padLitToWord :: CmmLit -> [CmmLit]
240 padLitToWord lit = lit : padding pad_length
241 where width = typeWidth (cmmLitType lit)
242 pad_length = wORD_SIZE - widthInBytes width :: Int
244 padding n | n <= 0 = []
245 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
246 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
247 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
248 | otherwise = CmmInt 0 W64 : padding (n-8)
251 %************************************************************************
253 \subsection[CgHeapery-heap-overflow]{Heap overflow checking}
255 %************************************************************************
257 The new code for heapChecks. For GrAnSim the code for doing a heap check
258 and doing a context switch has been separated. Especially, the HEAP_CHK
259 macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for
260 doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the
261 beginning of every slow entry code in order to simulate the fetching of
262 closures. If fetching is necessary (i.e. current closure is not local) then
263 an automatic context switch is done.
265 --------------------------------------------------------------
266 A heap/stack check at a function or thunk entry point.
269 funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code
270 funEntryChecks cl_info reg_save_code code
271 = hpStkCheck cl_info True reg_save_code code
273 thunkEntryChecks :: ClosureInfo -> Code -> Code
274 thunkEntryChecks cl_info code
275 = hpStkCheck cl_info False noStmts code
277 hpStkCheck :: ClosureInfo -- Function closure
278 -> Bool -- Is a function? (not a thunk)
279 -> CmmStmts -- Register saves
283 hpStkCheck cl_info is_fun reg_save_code code
284 = getFinalStackHW $ \ spHw -> do
286 ; let stk_words = spHw - sp
287 ; initHeapUsage $ \ hpHw -> do
288 { -- Emit heap checks, but be sure to do it lazily so
289 -- that the conditionals on hpHw don't cause a black hole
291 { do_checks stk_words hpHw full_save_code rts_label
292 ; tickyAllocHeap hpHw }
298 | nodeMustPointToIt (closureLFInfo cl_info)
301 = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl)))
302 -- Strictly speaking, we should tag node here. But if
303 -- node doesn't point to the closure, the code for the closure
304 -- cannot depend on the value of R1 anyway, so we're safe.
305 closure_lbl = closureLabelFromCI cl_info (clHasCafRefs cl_info)
307 full_save_code = node_asst `plusStmts` reg_save_code
309 rts_label | is_fun = CmmReg (CmmGlobal GCFun)
310 -- Function entry point
311 | otherwise = CmmReg (CmmGlobal GCEnter1)
312 -- Thunk or case return
313 -- In the thunk/case-return case, R1 points to a closure
314 -- which should be (re)-entered after GC
317 Heap checks in a case alternative are nice and easy, provided this is
318 a bog-standard algebraic case. We have in our hand:
320 * one return address, on the stack,
321 * one return value, in Node.
323 the canned code for this heap check failure just pushes Node on the
324 stack, saying 'EnterGHC' to return. The scheduler will return by
325 entering the top value on the stack, which in turn will return through
326 the return address, getting us back to where we were. This is
327 therefore only valid if the return value is *lifted* (just being
328 boxed isn't good enough).
330 For primitive returns, we have an unlifted value in some register
331 (either R1 or FloatReg1 or DblReg1). This means using specialised
332 heap-check code for these cases.
336 :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
337 -- (Unboxed tuples are dealt with by ubxTupleHeapCheck)
338 -> Code -- Continuation
340 altHeapCheck alt_type code
341 = initHeapUsage $ \ hpHw -> do
343 { do_checks 0 {- no stack chk -} hpHw
344 noStmts {- nothign to save -}
346 ; tickyAllocHeap hpHw }
350 rts_label PolyAlt = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_unpt_r1")))
351 -- Do *not* enter R1 after a heap check in
352 -- a polymorphic case. It might be a function
353 -- and the entry code for a function (currently)
356 -- However R1 is guaranteed to be a pointer
358 rts_label (AlgAlt _) = stg_gc_enter1
359 -- Enter R1 after the heap check; it's a pointer
361 rts_label (PrimAlt tc)
362 = CmmLit $ CmmLabel $
363 case primRepToCgRep (tyConPrimRep tc) of
364 VoidArg -> mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_noregs")
365 FloatArg -> mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_f1")
366 DoubleArg -> mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_d1")
367 LongArg -> mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_l1")
368 -- R1 is boxed but unlifted:
369 PtrArg -> mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_unpt_r1")
371 NonPtrArg -> mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_unbx_r1")
373 rts_label (UbxTupAlt _) = panic "altHeapCheck"
377 Unboxed tuple alternatives and let-no-escapes (the two most annoying
378 constructs to generate code for!) For unboxed tuple returns, there
379 are an arbitrary number of possibly unboxed return values, some of
380 which will be in registers, and the others will be on the stack. We
381 always organise the stack-resident fields into pointers &
382 non-pointers, and pass the number of each to the heap check code.
386 :: [(Id, GlobalReg)] -- Live registers
387 -> WordOff -- no. of stack slots containing ptrs
388 -> WordOff -- no. of stack slots containing nonptrs
389 -> CmmStmts -- code to insert in the failure path
393 unbxTupleHeapCheck regs ptrs nptrs fail_code code
394 -- We can't manage more than 255 pointers/non-pointers
395 -- in a generic heap check.
396 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
398 = initHeapUsage $ \ hpHw -> do
399 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
400 full_fail_code rts_label
401 ; tickyAllocHeap hpHw }
405 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
406 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9 VNonGcPtr)) -- Ho ho ho!
407 (CmmLit (mkWordCLit liveness))
408 liveness = mkRegLiveness regs ptrs nptrs
409 rts_label = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_ut")))
414 %************************************************************************
418 %************************************************************************
420 When failing a check, we save a return address on the stack and
421 jump to a pre-compiled code fragment that saves the live registers
422 and returns to the scheduler.
424 The return address in most cases will be the beginning of the basic
425 block in which the check resides, since we need to perform the check
426 again on re-entry because someone else might have stolen the resource
430 do_checks :: WordOff -- Stack headroom
431 -> WordOff -- Heap headroom
432 -> CmmStmts -- Assignments to perform on failure
433 -> CmmExpr -- Rts address to jump to on failure
435 do_checks 0 0 _ _ = nopC
438 | hp > bLOCKS_PER_MBLOCK * bLOCK_SIZE_W
440 "Trying to allocate more than " ++ show (bLOCKS_PER_MBLOCK * bLOCK_SIZE) ++ " bytes.",
442 "See: http://hackage.haskell.org/trac/ghc/ticket/4505",
443 "Suggestion: read data from a file instead of having large static data",
444 "structures in the code."])
446 do_checks stk hp reg_save_code rts_lbl
447 = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE)))
448 (CmmLit (mkIntCLit (hp*wORD_SIZE)))
449 (stk /= 0) (hp /= 0) reg_save_code rts_lbl
451 -- The offsets are now in *bytes*
452 do_checks' :: CmmExpr -> CmmExpr -> Bool -> Bool -> CmmStmts -> CmmExpr -> Code
453 do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl
454 = do { doGranAllocate hp_expr
456 -- The failure block: this saves the registers and jumps to
457 -- the appropriate RTS stub.
458 ; exit_blk_id <- forkLabelledCode $ do {
459 ; emitStmts reg_save_code
460 ; stmtC (CmmJump rts_lbl []) }
462 -- In the case of a heap-check failure, we must also set
463 -- HpAlloc. NB. HpAlloc is *only* set if Hp has been
464 -- incremented by the heap check, it must not be set in the
465 -- event that a stack check failed, because the RTS stub will
466 -- retreat Hp by HpAlloc.
467 ; hp_blk_id <- if hp_nonzero
468 then forkLabelledCode $ do
469 stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr)
470 stmtC (CmmBranch exit_blk_id)
471 else return exit_blk_id
473 -- Check for stack overflow *FIRST*; otherwise
474 -- we might bumping Hp and then failing stack oflo
476 (stmtC (CmmCondBranch stk_oflo exit_blk_id))
479 (stmtsC [CmmAssign hpReg
480 (cmmOffsetExprB (CmmReg hpReg) hp_expr),
481 CmmCondBranch hp_oflo hp_blk_id])
482 -- Bump heap pointer, and test for heap exhaustion
483 -- Note that we don't move the heap pointer unless the
484 -- stack check succeeds. Otherwise we might end up
485 -- with slop at the end of the current block, which can
486 -- confuse the LDV profiler.
489 -- Stk overflow if (Sp - stk_bytes < SpLim)
490 stk_oflo = CmmMachOp mo_wordULt
491 [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr],
492 CmmReg (CmmGlobal SpLim)]
494 -- Hp overflow if (Hp > HpLim)
495 -- (Hp has been incremented by now)
496 -- HpLim points to the LAST WORD of valid allocation space.
497 hp_oflo = CmmMachOp mo_wordUGt
498 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
501 %************************************************************************
503 Generic Heap/Stack Checks - used in the RTS
505 %************************************************************************
508 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
509 hpChkGen bytes liveness reentry
510 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen
512 assigns = mkStmts [ mk_vanilla_assignment 9 liveness,
513 mk_vanilla_assignment 10 reentry ]
515 -- a heap check where R1 points to the closure to enter on return, and
516 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
517 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code
518 hpChkNodePointsAssignSp0 bytes sp0
519 = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1
520 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
522 stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
523 stkChkGen bytes liveness reentry
524 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen
526 assigns = mkStmts [ mk_vanilla_assignment 9 liveness,
527 mk_vanilla_assignment 10 reentry ]
529 mk_vanilla_assignment :: Int -> CmmExpr -> CmmStmt
530 mk_vanilla_assignment n e
531 = CmmAssign (CmmGlobal (VanillaReg n (vgcFlag (cmmExprType e)))) e
533 stkChkNodePoints :: CmmExpr -> Code
534 stkChkNodePoints bytes
535 = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1
537 stg_gc_gen :: CmmExpr
538 stg_gc_gen = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_gen")))
539 stg_gc_enter1 :: CmmExpr
540 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
543 %************************************************************************
545 \subsection[initClosure]{Initialise a dynamic closure}
547 %************************************************************************
549 @allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp
555 -> CmmExpr -- Cost Centre to stick in the object
556 -> CmmExpr -- Cost Centre to blame for this alloc
557 -- (usually the same; sometimes "OVERHEAD")
559 -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object
560 -- ie Info ptr has offset zero.
561 -> FCode VirtualHpOffset -- Returns virt offset of object
563 allocDynClosure cl_info use_cc _blame_cc amodes_with_offsets
564 = do { virt_hp <- getVirtHp
566 -- FIND THE OFFSET OF THE INFO-PTR WORD
567 ; let info_offset = virt_hp + 1
568 -- info_offset is the VirtualHpOffset of the first
569 -- word of the new object
570 -- Remember, virtHp points to last allocated word,
571 -- ie 1 *before* the info-ptr word of new object.
573 info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info
574 (clHasCafRefs cl_info)))
575 hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..]
577 -- SAY WHAT WE ARE ABOUT TO DO
578 ; profDynAlloc cl_info use_cc
579 -- ToDo: This is almost certainly wrong
580 -- We're ignoring blame_cc. But until we've
581 -- fixed the boxing hack in chooseDynCostCentres etc,
582 -- we're worried about making things worse by "fixing"
583 -- this part to use blame_cc!
585 ; tickyDynAlloc cl_info
587 -- ALLOCATE THE OBJECT
588 ; base <- getHpRelOffset info_offset
589 ; hpStore base (hdr_w_offsets ++ amodes_with_offsets)
591 -- BUMP THE VIRTUAL HEAP POINTER
592 ; setVirtHp (virt_hp + closureSize cl_info)
594 -- RETURN PTR TO START OF OBJECT
595 ; returnFC info_offset }
598 initDynHdr :: CmmExpr
599 -> CmmExpr -- Cost centre to put in object
601 initDynHdr info_ptr cc
603 -- ToDo: Gransim stuff
604 -- ToDo: Parallel stuff
608 hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code
609 -- Store the item (expr,off) in base[off]
611 = stmtsC [ CmmStore (cmmOffsetW base off) val
614 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code
615 emitSetDynHdr base info_ptr ccs
616 = hpStore base (zip (initDynHdr info_ptr ccs) [0..])