1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2008
5 * Storage manager front end
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
12 * ---------------------------------------------------------------------------*/
14 #include "PosixSource.h"
20 #include "BlockAlloc.h"
25 #include "OSThreads.h"
26 #include "Capability.h"
29 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
43 StgClosure *caf_list = NULL;
44 StgClosure *revertible_caf_list = NULL;
47 bdescr *pinned_object_block; /* allocate pinned objects into this block */
48 nat alloc_blocks; /* number of allocate()d blocks since GC */
49 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
51 static bdescr *exec_block;
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56 step *g0s0 = NULL; /* generation 0, step 0, for convenience */
59 step *all_steps = NULL; /* single array of steps */
61 ullong total_allocated = 0; /* total memory allocated during run */
63 nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
64 step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */
68 * Storage manager mutex: protects all the above state from
69 * simultaneous access by two STG threads.
76 initStep (step *stp, int g, int s)
79 stp->abs_no = RtsFlags.GcFlags.steps * g + s;
83 stp->live_estimate = 0;
84 stp->old_blocks = NULL;
85 stp->n_old_blocks = 0;
86 stp->gen = &generations[g];
88 stp->large_objects = NULL;
89 stp->n_large_blocks = 0;
90 stp->scavenged_large_objects = NULL;
91 stp->n_scavenged_large_blocks = 0;
96 initSpinLock(&stp->sync_large_objects);
98 stp->threads = END_TSO_QUEUE;
99 stp->old_threads = END_TSO_QUEUE;
108 if (generations != NULL) {
109 // multi-init protection
115 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
116 * doing something reasonable.
118 /* We use the NOT_NULL variant or gcc warns that the test is always true */
119 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
120 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
121 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
123 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
124 RtsFlags.GcFlags.heapSizeSuggestion >
125 RtsFlags.GcFlags.maxHeapSize) {
126 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
129 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
130 RtsFlags.GcFlags.minAllocAreaSize >
131 RtsFlags.GcFlags.maxHeapSize) {
132 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
133 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
136 initBlockAllocator();
138 #if defined(THREADED_RTS)
139 initMutex(&sm_mutex);
144 /* allocate generation info array */
145 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
146 * sizeof(struct generation_),
147 "initStorage: gens");
149 /* allocate all the steps into an array. It is important that we do
150 it this way, because we need the invariant that two step pointers
151 can be directly compared to see which is the oldest.
152 Remember that the last generation has only one step. */
153 total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
154 all_steps = stgMallocBytes(total_steps * sizeof(struct step_),
155 "initStorage: steps");
157 /* Initialise all generations */
158 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
159 gen = &generations[g];
161 gen->mut_list = allocBlock();
162 gen->collections = 0;
163 gen->par_collections = 0;
164 gen->failed_promotions = 0;
168 /* A couple of convenience pointers */
169 g0 = &generations[0];
170 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
172 /* Allocate step structures in each generation */
173 if (RtsFlags.GcFlags.generations > 1) {
174 /* Only for multiple-generations */
176 /* Oldest generation: one step */
177 oldest_gen->n_steps = 1;
178 oldest_gen->steps = all_steps + (RtsFlags.GcFlags.generations - 1)
179 * RtsFlags.GcFlags.steps;
181 /* set up all except the oldest generation with 2 steps */
182 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
183 generations[g].n_steps = RtsFlags.GcFlags.steps;
184 generations[g].steps = all_steps + g * RtsFlags.GcFlags.steps;
188 /* single generation, i.e. a two-space collector */
190 g0->steps = all_steps;
194 n_nurseries = n_capabilities;
198 nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
199 "initStorage: nurseries");
201 /* Initialise all steps */
202 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
203 for (s = 0; s < generations[g].n_steps; s++) {
204 initStep(&generations[g].steps[s], g, s);
208 for (s = 0; s < n_nurseries; s++) {
209 initStep(&nurseries[s], 0, s);
212 /* Set up the destination pointers in each younger gen. step */
213 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
214 for (s = 0; s < generations[g].n_steps-1; s++) {
215 generations[g].steps[s].to = &generations[g].steps[s+1];
217 generations[g].steps[s].to = &generations[g+1].steps[0];
219 oldest_gen->steps[0].to = &oldest_gen->steps[0];
221 for (s = 0; s < n_nurseries; s++) {
222 nurseries[s].to = generations[0].steps[0].to;
225 /* The oldest generation has one step. */
226 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
227 if (RtsFlags.GcFlags.generations == 1) {
228 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
230 oldest_gen->steps[0].mark = 1;
231 if (RtsFlags.GcFlags.compact)
232 oldest_gen->steps[0].compact = 1;
236 generations[0].max_blocks = 0;
237 g0s0 = &generations[0].steps[0];
239 /* The allocation area. Policy: keep the allocation area
240 * small to begin with, even if we have a large suggested heap
241 * size. Reason: we're going to do a major collection first, and we
242 * don't want it to be a big one. This vague idea is borne out by
243 * rigorous experimental evidence.
247 weak_ptr_list = NULL;
249 revertible_caf_list = NULL;
251 /* initialise the allocate() interface */
253 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
258 initSpinLock(&gc_alloc_block_sync);
266 IF_DEBUG(gc, statDescribeGens());
274 stat_exit(calcAllocated());
280 stgFree(g0s0); // frees all the steps
281 stgFree(generations);
283 #if defined(THREADED_RTS)
284 closeMutex(&sm_mutex);
290 /* -----------------------------------------------------------------------------
293 The entry code for every CAF does the following:
295 - builds a CAF_BLACKHOLE in the heap
296 - pushes an update frame pointing to the CAF_BLACKHOLE
297 - invokes UPD_CAF(), which:
298 - calls newCaf, below
299 - updates the CAF with a static indirection to the CAF_BLACKHOLE
301 Why do we build a BLACKHOLE in the heap rather than just updating
302 the thunk directly? It's so that we only need one kind of update
303 frame - otherwise we'd need a static version of the update frame too.
305 newCaf() does the following:
307 - it puts the CAF on the oldest generation's mut-once list.
308 This is so that we can treat the CAF as a root when collecting
311 For GHCI, we have additional requirements when dealing with CAFs:
313 - we must *retain* all dynamically-loaded CAFs ever entered,
314 just in case we need them again.
315 - we must be able to *revert* CAFs that have been evaluated, to
316 their pre-evaluated form.
318 To do this, we use an additional CAF list. When newCaf() is
319 called on a dynamically-loaded CAF, we add it to the CAF list
320 instead of the old-generation mutable list, and save away its
321 old info pointer (in caf->saved_info) for later reversion.
323 To revert all the CAFs, we traverse the CAF list and reset the
324 info pointer to caf->saved_info, then throw away the CAF list.
325 (see GC.c:revertCAFs()).
329 -------------------------------------------------------------------------- */
332 newCAF(StgClosure* caf)
339 // If we are in GHCi _and_ we are using dynamic libraries,
340 // then we can't redirect newCAF calls to newDynCAF (see below),
341 // so we make newCAF behave almost like newDynCAF.
342 // The dynamic libraries might be used by both the interpreted
343 // program and GHCi itself, so they must not be reverted.
344 // This also means that in GHCi with dynamic libraries, CAFs are not
345 // garbage collected. If this turns out to be a problem, we could
346 // do another hack here and do an address range test on caf to figure
347 // out whether it is from a dynamic library.
348 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
349 ((StgIndStatic *)caf)->static_link = caf_list;
354 /* Put this CAF on the mutable list for the old generation.
355 * This is a HACK - the IND_STATIC closure doesn't really have
356 * a mut_link field, but we pretend it has - in fact we re-use
357 * the STATIC_LINK field for the time being, because when we
358 * come to do a major GC we won't need the mut_link field
359 * any more and can use it as a STATIC_LINK.
361 ((StgIndStatic *)caf)->saved_info = NULL;
362 recordMutableGen(caf, oldest_gen->no);
368 // An alternate version of newCaf which is used for dynamically loaded
369 // object code in GHCi. In this case we want to retain *all* CAFs in
370 // the object code, because they might be demanded at any time from an
371 // expression evaluated on the command line.
372 // Also, GHCi might want to revert CAFs, so we add these to the
373 // revertible_caf_list.
375 // The linker hackily arranges that references to newCaf from dynamic
376 // code end up pointing to newDynCAF.
378 newDynCAF(StgClosure *caf)
382 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
383 ((StgIndStatic *)caf)->static_link = revertible_caf_list;
384 revertible_caf_list = caf;
389 /* -----------------------------------------------------------------------------
391 -------------------------------------------------------------------------- */
394 allocNursery (step *stp, bdescr *tail, nat blocks)
399 // Allocate a nursery: we allocate fresh blocks one at a time and
400 // cons them on to the front of the list, not forgetting to update
401 // the back pointer on the tail of the list to point to the new block.
402 for (i=0; i < blocks; i++) {
405 processNursery() in LdvProfile.c assumes that every block group in
406 the nursery contains only a single block. So, if a block group is
407 given multiple blocks, change processNursery() accordingly.
411 // double-link the nursery: we might need to insert blocks
418 bd->free = bd->start;
426 assignNurseriesToCapabilities (void)
431 for (i = 0; i < n_nurseries; i++) {
432 capabilities[i].r.rNursery = &nurseries[i];
433 capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
434 capabilities[i].r.rCurrentAlloc = NULL;
436 #else /* THREADED_RTS */
437 MainCapability.r.rNursery = &nurseries[0];
438 MainCapability.r.rCurrentNursery = nurseries[0].blocks;
439 MainCapability.r.rCurrentAlloc = NULL;
444 allocNurseries( void )
448 for (i = 0; i < n_nurseries; i++) {
449 nurseries[i].blocks =
450 allocNursery(&nurseries[i], NULL,
451 RtsFlags.GcFlags.minAllocAreaSize);
452 nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
453 nurseries[i].old_blocks = NULL;
454 nurseries[i].n_old_blocks = 0;
456 assignNurseriesToCapabilities();
460 resetNurseries( void )
466 for (i = 0; i < n_nurseries; i++) {
468 for (bd = stp->blocks; bd; bd = bd->link) {
469 bd->free = bd->start;
470 ASSERT(bd->gen_no == 0);
471 ASSERT(bd->step == stp);
472 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
475 assignNurseriesToCapabilities();
479 countNurseryBlocks (void)
484 for (i = 0; i < n_nurseries; i++) {
485 blocks += nurseries[i].n_blocks;
491 resizeNursery ( step *stp, nat blocks )
496 nursery_blocks = stp->n_blocks;
497 if (nursery_blocks == blocks) return;
499 if (nursery_blocks < blocks) {
500 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
502 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
507 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
511 while (nursery_blocks > blocks) {
513 next_bd->u.back = NULL;
514 nursery_blocks -= bd->blocks; // might be a large block
519 // might have gone just under, by freeing a large block, so make
520 // up the difference.
521 if (nursery_blocks < blocks) {
522 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
526 stp->n_blocks = blocks;
527 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
531 // Resize each of the nurseries to the specified size.
534 resizeNurseriesFixed (nat blocks)
537 for (i = 0; i < n_nurseries; i++) {
538 resizeNursery(&nurseries[i], blocks);
543 // Resize the nurseries to the total specified size.
546 resizeNurseries (nat blocks)
548 // If there are multiple nurseries, then we just divide the number
549 // of available blocks between them.
550 resizeNurseriesFixed(blocks / n_nurseries);
554 /* -----------------------------------------------------------------------------
555 move_TSO is called to update the TSO structure after it has been
556 moved from one place to another.
557 -------------------------------------------------------------------------- */
560 move_TSO (StgTSO *src, StgTSO *dest)
564 // relocate the stack pointer...
565 diff = (StgPtr)dest - (StgPtr)src; // In *words*
566 dest->sp = (StgPtr)dest->sp + diff;
569 /* -----------------------------------------------------------------------------
570 The allocate() interface
572 allocateInGen() function allocates memory directly into a specific
573 generation. It always succeeds, and returns a chunk of memory n
574 words long. n can be larger than the size of a block if necessary,
575 in which case a contiguous block group will be allocated.
577 allocate(n) is equivalent to allocateInGen(g0).
578 -------------------------------------------------------------------------- */
581 allocateInGen (generation *g, lnat n)
589 TICK_ALLOC_HEAP_NOCTR(n);
594 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
596 lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
598 // Attempting to allocate an object larger than maxHeapSize
599 // should definitely be disallowed. (bug #1791)
600 if (RtsFlags.GcFlags.maxHeapSize > 0 &&
601 req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
603 // heapOverflow() doesn't exit (see #2592), but we aren't
604 // in a position to do a clean shutdown here: we
605 // either have to allocate the memory or exit now.
606 // Allocating the memory would be bad, because the user
607 // has requested that we not exceed maxHeapSize, so we
609 stg_exit(EXIT_HEAPOVERFLOW);
612 bd = allocGroup(req_blocks);
613 dbl_link_onto(bd, &stp->large_objects);
614 stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
615 alloc_blocks += bd->blocks;
618 bd->flags = BF_LARGE;
619 bd->free = bd->start + n;
624 // small allocation (<LARGE_OBJECT_THRESHOLD) */
626 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
631 bd->link = stp->blocks;
648 return allocateInGen(g0,n);
652 allocatedBytes( void )
656 allocated = alloc_blocks * BLOCK_SIZE_W;
657 if (pinned_object_block != NULL) {
658 allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
659 pinned_object_block->free;
665 // split N blocks off the front of the given bdescr, returning the
666 // new block group. We treat the remainder as if it
667 // had been freshly allocated in generation 0.
669 splitLargeBlock (bdescr *bd, nat blocks)
673 // subtract the original number of blocks from the counter first
674 bd->step->n_large_blocks -= bd->blocks;
676 new_bd = splitBlockGroup (bd, blocks);
678 dbl_link_onto(new_bd, &g0s0->large_objects);
679 g0s0->n_large_blocks += new_bd->blocks;
680 new_bd->gen_no = g0s0->no;
682 new_bd->flags = BF_LARGE;
683 new_bd->free = bd->free;
684 ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
686 // add the new number of blocks to the counter. Due to the gaps
687 // for block descriptor, new_bd->blocks + bd->blocks might not be
688 // equal to the original bd->blocks, which is why we do it this way.
689 bd->step->n_large_blocks += bd->blocks;
694 /* -----------------------------------------------------------------------------
697 This allocates memory in the current thread - it is intended for
698 use primarily from STG-land where we have a Capability. It is
699 better than allocate() because it doesn't require taking the
700 sm_mutex lock in the common case.
702 Memory is allocated directly from the nursery if possible (but not
703 from the current nursery block, so as not to interfere with
705 -------------------------------------------------------------------------- */
708 allocateLocal (Capability *cap, lnat n)
713 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
714 return allocateInGen(g0,n);
717 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
719 TICK_ALLOC_HEAP_NOCTR(n);
722 bd = cap->r.rCurrentAlloc;
723 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
725 // The CurrentAlloc block is full, we need to find another
726 // one. First, we try taking the next block from the
728 bd = cap->r.rCurrentNursery->link;
730 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
731 // The nursery is empty, or the next block is already
732 // full: allocate a fresh block (we can't fail here).
735 cap->r.rNursery->n_blocks++;
738 bd->step = cap->r.rNursery;
740 // NO: alloc_blocks++;
741 // calcAllocated() uses the size of the nursery, and we've
742 // already bumpted nursery->n_blocks above. We'll GC
743 // pretty quickly now anyway, because MAYBE_GC() will
744 // notice that CurrentNursery->link is NULL.
746 // we have a block in the nursery: take it and put
747 // it at the *front* of the nursery list, and use it
748 // to allocate() from.
749 cap->r.rCurrentNursery->link = bd->link;
750 if (bd->link != NULL) {
751 bd->link->u.back = cap->r.rCurrentNursery;
754 dbl_link_onto(bd, &cap->r.rNursery->blocks);
755 cap->r.rCurrentAlloc = bd;
756 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
763 /* ---------------------------------------------------------------------------
764 Allocate a fixed/pinned object.
766 We allocate small pinned objects into a single block, allocating a
767 new block when the current one overflows. The block is chained
768 onto the large_object_list of generation 0 step 0.
770 NOTE: The GC can't in general handle pinned objects. This
771 interface is only safe to use for ByteArrays, which have no
772 pointers and don't require scavenging. It works because the
773 block's descriptor has the BF_LARGE flag set, so the block is
774 treated as a large object and chained onto various lists, rather
775 than the individual objects being copied. However, when it comes
776 to scavenge the block, the GC will only scavenge the first object.
777 The reason is that the GC can't linearly scan a block of pinned
778 objects at the moment (doing so would require using the
779 mostly-copying techniques). But since we're restricting ourselves
780 to pinned ByteArrays, not scavenging is ok.
782 This function is called by newPinnedByteArray# which immediately
783 fills the allocated memory with a MutableByteArray#.
784 ------------------------------------------------------------------------- */
787 allocatePinned( lnat n )
790 bdescr *bd = pinned_object_block;
792 // If the request is for a large object, then allocate()
793 // will give us a pinned object anyway.
794 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
796 Bdescr(p)->flags |= BF_PINNED;
802 TICK_ALLOC_HEAP_NOCTR(n);
805 // If we don't have a block of pinned objects yet, or the current
806 // one isn't large enough to hold the new object, allocate a new one.
807 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
808 pinned_object_block = bd = allocBlock();
809 dbl_link_onto(bd, &g0s0->large_objects);
810 g0s0->n_large_blocks++;
813 bd->flags = BF_PINNED | BF_LARGE;
814 bd->free = bd->start;
824 /* -----------------------------------------------------------------------------
826 -------------------------------------------------------------------------- */
829 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
830 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
831 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
832 and is put on the mutable list.
835 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
837 Capability *cap = regTableToCapability(reg);
839 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
840 p->header.info = &stg_MUT_VAR_DIRTY_info;
841 bd = Bdescr((StgPtr)p);
842 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
846 // Setting a TSO's link field with a write barrier.
847 // It is *not* necessary to call this function when
848 // * setting the link field to END_TSO_QUEUE
849 // * putting a TSO on the blackhole_queue
850 // * setting the link field of the currently running TSO, as it
851 // will already be dirty.
853 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
856 if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
857 tso->flags |= TSO_LINK_DIRTY;
858 bd = Bdescr((StgPtr)tso);
859 if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
865 dirty_TSO (Capability *cap, StgTSO *tso)
868 if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
869 bd = Bdescr((StgPtr)tso);
870 if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
872 tso->flags |= TSO_DIRTY;
876 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
877 on the mutable list; a MVAR_DIRTY is. When written to, a
878 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
879 The check for MVAR_CLEAN is inlined at the call site for speed,
880 this really does make a difference on concurrency-heavy benchmarks
881 such as Chaneneos and cheap-concurrency.
884 dirty_MVAR(StgRegTable *reg, StgClosure *p)
886 Capability *cap = regTableToCapability(reg);
888 bd = Bdescr((StgPtr)p);
889 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
892 /* -----------------------------------------------------------------------------
894 * -------------------------------------------------------------------------- */
896 /* -----------------------------------------------------------------------------
899 * Approximate how much we've allocated: number of blocks in the
900 * nursery + blocks allocated via allocate() - unused nusery blocks.
901 * This leaves a little slop at the end of each block, and doesn't
902 * take into account large objects (ToDo).
903 * -------------------------------------------------------------------------- */
906 calcAllocated( void )
911 allocated = allocatedBytes();
912 allocated += countNurseryBlocks() * BLOCK_SIZE_W;
917 for (i = 0; i < n_nurseries; i++) {
919 for ( bd = capabilities[i].r.rCurrentNursery->link;
920 bd != NULL; bd = bd->link ) {
921 allocated -= BLOCK_SIZE_W;
923 cap = &capabilities[i];
924 if (cap->r.rCurrentNursery->free <
925 cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
926 allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
927 - cap->r.rCurrentNursery->free;
931 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
933 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
934 allocated -= BLOCK_SIZE_W;
936 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
937 allocated -= (current_nursery->start + BLOCK_SIZE_W)
938 - current_nursery->free;
943 total_allocated += allocated;
947 /* Approximate the amount of live data in the heap. To be called just
948 * after garbage collection (see GarbageCollect()).
957 if (RtsFlags.GcFlags.generations == 1) {
958 return g0s0->n_large_blocks + g0s0->n_blocks;
961 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
962 for (s = 0; s < generations[g].n_steps; s++) {
963 /* approximate amount of live data (doesn't take into account slop
964 * at end of each block).
966 if (g == 0 && s == 0) {
969 stp = &generations[g].steps[s];
970 live += stp->n_large_blocks + stp->n_blocks;
977 countOccupied(bdescr *bd)
982 for (; bd != NULL; bd = bd->link) {
983 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
984 words += bd->free - bd->start;
989 // Return an accurate count of the live data in the heap, excluding
998 if (RtsFlags.GcFlags.generations == 1) {
999 return g0s0->n_words + countOccupied(g0s0->large_objects);
1003 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1004 for (s = 0; s < generations[g].n_steps; s++) {
1005 if (g == 0 && s == 0) continue;
1006 stp = &generations[g].steps[s];
1007 live += stp->n_words + countOccupied(stp->large_objects);
1013 /* Approximate the number of blocks that will be needed at the next
1014 * garbage collection.
1016 * Assume: all data currently live will remain live. Steps that will
1017 * be collected next time will therefore need twice as many blocks
1018 * since all the data will be copied.
1027 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1028 for (s = 0; s < generations[g].n_steps; s++) {
1029 if (g == 0 && s == 0) { continue; }
1030 stp = &generations[g].steps[s];
1032 // we need at least this much space
1033 needed += stp->n_blocks + stp->n_large_blocks;
1035 // any additional space needed to collect this gen next time?
1036 if (g == 0 || // always collect gen 0
1037 (generations[g].steps[0].n_blocks +
1038 generations[g].steps[0].n_large_blocks
1039 > generations[g].max_blocks)) {
1040 // we will collect this gen next time
1043 needed += stp->n_blocks / BITS_IN(W_);
1045 needed += stp->n_blocks / 100;
1048 continue; // no additional space needed for compaction
1050 needed += stp->n_blocks;
1058 /* ----------------------------------------------------------------------------
1061 Executable memory must be managed separately from non-executable
1062 memory. Most OSs these days require you to jump through hoops to
1063 dynamically allocate executable memory, due to various security
1066 Here we provide a small memory allocator for executable memory.
1067 Memory is managed with a page granularity; we allocate linearly
1068 in the page, and when the page is emptied (all objects on the page
1069 are free) we free the page again, not forgetting to make it
1072 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1073 the linker cannot use allocateExec for loading object code files
1074 on Windows. Once allocateExec can handle larger objects, the linker
1075 should be modified to use allocateExec instead of VirtualAlloc.
1076 ------------------------------------------------------------------------- */
1078 #if defined(linux_HOST_OS)
1080 // On Linux we need to use libffi for allocating executable memory,
1081 // because it knows how to work around the restrictions put in place
1084 void *allocateExec (nat bytes, void **exec_ret)
1088 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1090 if (ret == NULL) return ret;
1091 *ret = ret; // save the address of the writable mapping, for freeExec().
1092 *exec_ret = exec + 1;
1096 // freeExec gets passed the executable address, not the writable address.
1097 void freeExec (void *addr)
1100 writable = *((void**)addr - 1);
1102 ffi_closure_free (writable);
1108 void *allocateExec (nat bytes, void **exec_ret)
1115 // round up to words.
1116 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1118 if (n+1 > BLOCK_SIZE_W) {
1119 barf("allocateExec: can't handle large objects");
1122 if (exec_block == NULL ||
1123 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1125 lnat pagesize = getPageSize();
1126 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1127 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1129 bd->flags = BF_EXEC;
1130 bd->link = exec_block;
1131 if (exec_block != NULL) {
1132 exec_block->u.back = bd;
1135 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1138 *(exec_block->free) = n; // store the size of this chunk
1139 exec_block->gen_no += n; // gen_no stores the number of words allocated
1140 ret = exec_block->free + 1;
1141 exec_block->free += n + 1;
1148 void freeExec (void *addr)
1150 StgPtr p = (StgPtr)addr - 1;
1151 bdescr *bd = Bdescr((StgPtr)p);
1153 if ((bd->flags & BF_EXEC) == 0) {
1154 barf("freeExec: not executable");
1157 if (*(StgPtr)p == 0) {
1158 barf("freeExec: already free?");
1163 bd->gen_no -= *(StgPtr)p;
1166 if (bd->gen_no == 0) {
1167 // Free the block if it is empty, but not if it is the block at
1168 // the head of the queue.
1169 if (bd != exec_block) {
1170 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1171 dbl_link_remove(bd, &exec_block);
1172 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1175 bd->free = bd->start;
1182 #endif /* mingw32_HOST_OS */
1184 /* -----------------------------------------------------------------------------
1187 memInventory() checks for memory leaks by counting up all the
1188 blocks we know about and comparing that to the number of blocks
1189 allegedly floating around in the system.
1190 -------------------------------------------------------------------------- */
1194 // Useful for finding partially full blocks in gdb
1195 void findSlop(bdescr *bd);
1196 void findSlop(bdescr *bd)
1200 for (; bd != NULL; bd = bd->link) {
1201 slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1202 if (slop > (1024/sizeof(W_))) {
1203 debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1204 bd->start, bd, slop / (1024/sizeof(W_)));
1210 countBlocks(bdescr *bd)
1213 for (n=0; bd != NULL; bd=bd->link) {
1219 // (*1) Just like countBlocks, except that we adjust the count for a
1220 // megablock group so that it doesn't include the extra few blocks
1221 // that would be taken up by block descriptors in the second and
1222 // subsequent megablock. This is so we can tally the count with the
1223 // number of blocks allocated in the system, for memInventory().
1225 countAllocdBlocks(bdescr *bd)
1228 for (n=0; bd != NULL; bd=bd->link) {
1230 // hack for megablock groups: see (*1) above
1231 if (bd->blocks > BLOCKS_PER_MBLOCK) {
1232 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1233 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1240 stepBlocks (step *stp)
1242 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1243 ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1244 return stp->n_blocks + stp->n_old_blocks +
1245 countAllocdBlocks(stp->large_objects);
1248 // If memInventory() calculates that we have a memory leak, this
1249 // function will try to find the block(s) that are leaking by marking
1250 // all the ones that we know about, and search through memory to find
1251 // blocks that are not marked. In the debugger this can help to give
1252 // us a clue about what kind of block leaked. In the future we might
1253 // annotate blocks with their allocation site to give more helpful
1256 findMemoryLeak (void)
1259 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1260 for (i = 0; i < n_capabilities; i++) {
1261 markBlocks(capabilities[i].mut_lists[g]);
1263 markBlocks(generations[g].mut_list);
1264 for (s = 0; s < generations[g].n_steps; s++) {
1265 markBlocks(generations[g].steps[s].blocks);
1266 markBlocks(generations[g].steps[s].large_objects);
1270 for (i = 0; i < n_nurseries; i++) {
1271 markBlocks(nurseries[i].blocks);
1272 markBlocks(nurseries[i].large_objects);
1277 // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1278 // markRetainerBlocks();
1282 // count the blocks allocated by the arena allocator
1284 // markArenaBlocks();
1286 // count the blocks containing executable memory
1287 markBlocks(exec_block);
1289 reportUnmarkedBlocks();
1294 memInventory (rtsBool show)
1298 lnat gen_blocks[RtsFlags.GcFlags.generations];
1299 lnat nursery_blocks, retainer_blocks,
1300 arena_blocks, exec_blocks;
1301 lnat live_blocks = 0, free_blocks = 0;
1304 // count the blocks we current have
1306 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1308 for (i = 0; i < n_capabilities; i++) {
1309 gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1311 gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1312 for (s = 0; s < generations[g].n_steps; s++) {
1313 stp = &generations[g].steps[s];
1314 gen_blocks[g] += stepBlocks(stp);
1319 for (i = 0; i < n_nurseries; i++) {
1320 nursery_blocks += stepBlocks(&nurseries[i]);
1323 retainer_blocks = 0;
1325 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1326 retainer_blocks = retainerStackBlocks();
1330 // count the blocks allocated by the arena allocator
1331 arena_blocks = arenaBlocks();
1333 // count the blocks containing executable memory
1334 exec_blocks = countAllocdBlocks(exec_block);
1336 /* count the blocks on the free list */
1337 free_blocks = countFreeList();
1340 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1341 live_blocks += gen_blocks[g];
1343 live_blocks += nursery_blocks +
1344 + retainer_blocks + arena_blocks + exec_blocks;
1346 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1348 leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1353 debugBelch("Memory leak detected:\n");
1355 debugBelch("Memory inventory:\n");
1357 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1358 debugBelch(" gen %d blocks : %5lu blocks (%lu MB)\n", g,
1359 gen_blocks[g], MB(gen_blocks[g]));
1361 debugBelch(" nursery : %5lu blocks (%lu MB)\n",
1362 nursery_blocks, MB(nursery_blocks));
1363 debugBelch(" retainer : %5lu blocks (%lu MB)\n",
1364 retainer_blocks, MB(retainer_blocks));
1365 debugBelch(" arena blocks : %5lu blocks (%lu MB)\n",
1366 arena_blocks, MB(arena_blocks));
1367 debugBelch(" exec : %5lu blocks (%lu MB)\n",
1368 exec_blocks, MB(exec_blocks));
1369 debugBelch(" free : %5lu blocks (%lu MB)\n",
1370 free_blocks, MB(free_blocks));
1371 debugBelch(" total : %5lu blocks (%lu MB)\n",
1372 live_blocks + free_blocks, MB(live_blocks+free_blocks));
1374 debugBelch("\n in system : %5lu blocks (%lu MB)\n",
1375 mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1383 ASSERT(n_alloc_blocks == live_blocks);
1388 /* Full heap sanity check. */
1394 if (RtsFlags.GcFlags.generations == 1) {
1395 checkHeap(g0s0->blocks);
1396 checkLargeObjects(g0s0->large_objects);
1399 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1400 for (s = 0; s < generations[g].n_steps; s++) {
1401 if (g == 0 && s == 0) { continue; }
1402 ASSERT(countBlocks(generations[g].steps[s].blocks)
1403 == generations[g].steps[s].n_blocks);
1404 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1405 == generations[g].steps[s].n_large_blocks);
1406 checkHeap(generations[g].steps[s].blocks);
1407 checkLargeObjects(generations[g].steps[s].large_objects);
1411 for (s = 0; s < n_nurseries; s++) {
1412 ASSERT(countBlocks(nurseries[s].blocks)
1413 == nurseries[s].n_blocks);
1414 ASSERT(countBlocks(nurseries[s].large_objects)
1415 == nurseries[s].n_large_blocks);
1418 checkFreeListSanity();
1421 #if defined(THREADED_RTS)
1422 // check the stacks too in threaded mode, because we don't do a
1423 // full heap sanity check in this case (see checkHeap())
1424 checkMutableLists(rtsTrue);
1426 checkMutableLists(rtsFalse);
1430 /* Nursery sanity check */
1432 checkNurserySanity( step *stp )
1438 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1439 ASSERT(bd->u.back == prev);
1441 blocks += bd->blocks;
1443 ASSERT(blocks == stp->n_blocks);
1446 // handy function for use in gdb, because Bdescr() is inlined.
1447 extern bdescr *_bdescr( StgPtr p );