1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2008
5 * Storage manager front end
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
12 * ---------------------------------------------------------------------------*/
14 #include "PosixSource.h"
20 #include "BlockAlloc.h"
25 #include "OSThreads.h"
26 #include "Capability.h"
29 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
43 StgClosure *caf_list = NULL;
44 StgClosure *revertible_caf_list = NULL;
47 bdescr *pinned_object_block; /* allocate pinned objects into this block */
48 nat alloc_blocks; /* number of allocate()d blocks since GC */
49 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
51 static bdescr *exec_block;
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56 step *g0s0 = NULL; /* generation 0, step 0, for convenience */
59 step *all_steps = NULL; /* single array of steps */
61 ullong total_allocated = 0; /* total memory allocated during run */
63 nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
64 step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */
68 * Storage manager mutex: protects all the above state from
69 * simultaneous access by two STG threads.
78 static void *stgAllocForGMP (size_t size_in_bytes);
79 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
80 static void stgDeallocForGMP (void *ptr, size_t size);
83 initStep (step *stp, int g, int s)
86 stp->abs_no = RtsFlags.GcFlags.steps * g + s;
90 stp->live_estimate = 0;
91 stp->old_blocks = NULL;
92 stp->n_old_blocks = 0;
93 stp->gen = &generations[g];
95 stp->large_objects = NULL;
96 stp->n_large_blocks = 0;
97 stp->scavenged_large_objects = NULL;
98 stp->n_scavenged_large_blocks = 0;
103 initSpinLock(&stp->sync_large_objects);
105 stp->threads = END_TSO_QUEUE;
106 stp->old_threads = END_TSO_QUEUE;
115 if (generations != NULL) {
116 // multi-init protection
122 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
123 * doing something reasonable.
125 /* We use the NOT_NULL variant or gcc warns that the test is always true */
126 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
127 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
128 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
130 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
131 RtsFlags.GcFlags.heapSizeSuggestion >
132 RtsFlags.GcFlags.maxHeapSize) {
133 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
136 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
137 RtsFlags.GcFlags.minAllocAreaSize >
138 RtsFlags.GcFlags.maxHeapSize) {
139 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
140 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
143 initBlockAllocator();
145 #if defined(THREADED_RTS)
146 initMutex(&sm_mutex);
151 /* allocate generation info array */
152 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
153 * sizeof(struct generation_),
154 "initStorage: gens");
156 /* allocate all the steps into an array. It is important that we do
157 it this way, because we need the invariant that two step pointers
158 can be directly compared to see which is the oldest.
159 Remember that the last generation has only one step. */
160 total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
161 all_steps = stgMallocBytes(total_steps * sizeof(struct step_),
162 "initStorage: steps");
164 /* Initialise all generations */
165 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
166 gen = &generations[g];
168 gen->mut_list = allocBlock();
169 gen->collections = 0;
170 gen->par_collections = 0;
171 gen->failed_promotions = 0;
175 /* A couple of convenience pointers */
176 g0 = &generations[0];
177 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
179 /* Allocate step structures in each generation */
180 if (RtsFlags.GcFlags.generations > 1) {
181 /* Only for multiple-generations */
183 /* Oldest generation: one step */
184 oldest_gen->n_steps = 1;
185 oldest_gen->steps = all_steps + (RtsFlags.GcFlags.generations - 1)
186 * RtsFlags.GcFlags.steps;
188 /* set up all except the oldest generation with 2 steps */
189 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
190 generations[g].n_steps = RtsFlags.GcFlags.steps;
191 generations[g].steps = all_steps + g * RtsFlags.GcFlags.steps;
195 /* single generation, i.e. a two-space collector */
197 g0->steps = all_steps;
201 n_nurseries = n_capabilities;
205 nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
206 "initStorage: nurseries");
208 /* Initialise all steps */
209 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
210 for (s = 0; s < generations[g].n_steps; s++) {
211 initStep(&generations[g].steps[s], g, s);
215 for (s = 0; s < n_nurseries; s++) {
216 initStep(&nurseries[s], 0, s);
219 /* Set up the destination pointers in each younger gen. step */
220 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
221 for (s = 0; s < generations[g].n_steps-1; s++) {
222 generations[g].steps[s].to = &generations[g].steps[s+1];
224 generations[g].steps[s].to = &generations[g+1].steps[0];
226 oldest_gen->steps[0].to = &oldest_gen->steps[0];
228 for (s = 0; s < n_nurseries; s++) {
229 nurseries[s].to = generations[0].steps[0].to;
232 /* The oldest generation has one step. */
233 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
234 if (RtsFlags.GcFlags.generations == 1) {
235 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
237 oldest_gen->steps[0].mark = 1;
238 if (RtsFlags.GcFlags.compact)
239 oldest_gen->steps[0].compact = 1;
243 generations[0].max_blocks = 0;
244 g0s0 = &generations[0].steps[0];
246 /* The allocation area. Policy: keep the allocation area
247 * small to begin with, even if we have a large suggested heap
248 * size. Reason: we're going to do a major collection first, and we
249 * don't want it to be a big one. This vague idea is borne out by
250 * rigorous experimental evidence.
254 weak_ptr_list = NULL;
256 revertible_caf_list = NULL;
258 /* initialise the allocate() interface */
260 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
264 /* Tell GNU multi-precision pkg about our custom alloc functions */
265 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
268 initSpinLock(&gc_alloc_block_sync);
276 IF_DEBUG(gc, statDescribeGens());
284 stat_exit(calcAllocated());
290 stgFree(g0s0); // frees all the steps
291 stgFree(generations);
293 #if defined(THREADED_RTS)
294 closeMutex(&sm_mutex);
299 /* -----------------------------------------------------------------------------
302 The entry code for every CAF does the following:
304 - builds a CAF_BLACKHOLE in the heap
305 - pushes an update frame pointing to the CAF_BLACKHOLE
306 - invokes UPD_CAF(), which:
307 - calls newCaf, below
308 - updates the CAF with a static indirection to the CAF_BLACKHOLE
310 Why do we build a BLACKHOLE in the heap rather than just updating
311 the thunk directly? It's so that we only need one kind of update
312 frame - otherwise we'd need a static version of the update frame too.
314 newCaf() does the following:
316 - it puts the CAF on the oldest generation's mut-once list.
317 This is so that we can treat the CAF as a root when collecting
320 For GHCI, we have additional requirements when dealing with CAFs:
322 - we must *retain* all dynamically-loaded CAFs ever entered,
323 just in case we need them again.
324 - we must be able to *revert* CAFs that have been evaluated, to
325 their pre-evaluated form.
327 To do this, we use an additional CAF list. When newCaf() is
328 called on a dynamically-loaded CAF, we add it to the CAF list
329 instead of the old-generation mutable list, and save away its
330 old info pointer (in caf->saved_info) for later reversion.
332 To revert all the CAFs, we traverse the CAF list and reset the
333 info pointer to caf->saved_info, then throw away the CAF list.
334 (see GC.c:revertCAFs()).
338 -------------------------------------------------------------------------- */
341 newCAF(StgClosure* caf)
348 // If we are in GHCi _and_ we are using dynamic libraries,
349 // then we can't redirect newCAF calls to newDynCAF (see below),
350 // so we make newCAF behave almost like newDynCAF.
351 // The dynamic libraries might be used by both the interpreted
352 // program and GHCi itself, so they must not be reverted.
353 // This also means that in GHCi with dynamic libraries, CAFs are not
354 // garbage collected. If this turns out to be a problem, we could
355 // do another hack here and do an address range test on caf to figure
356 // out whether it is from a dynamic library.
357 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
358 ((StgIndStatic *)caf)->static_link = caf_list;
363 /* Put this CAF on the mutable list for the old generation.
364 * This is a HACK - the IND_STATIC closure doesn't really have
365 * a mut_link field, but we pretend it has - in fact we re-use
366 * the STATIC_LINK field for the time being, because when we
367 * come to do a major GC we won't need the mut_link field
368 * any more and can use it as a STATIC_LINK.
370 ((StgIndStatic *)caf)->saved_info = NULL;
371 recordMutableGen(caf, oldest_gen->no);
377 // An alternate version of newCaf which is used for dynamically loaded
378 // object code in GHCi. In this case we want to retain *all* CAFs in
379 // the object code, because they might be demanded at any time from an
380 // expression evaluated on the command line.
381 // Also, GHCi might want to revert CAFs, so we add these to the
382 // revertible_caf_list.
384 // The linker hackily arranges that references to newCaf from dynamic
385 // code end up pointing to newDynCAF.
387 newDynCAF(StgClosure *caf)
391 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
392 ((StgIndStatic *)caf)->static_link = revertible_caf_list;
393 revertible_caf_list = caf;
398 /* -----------------------------------------------------------------------------
400 -------------------------------------------------------------------------- */
403 allocNursery (step *stp, bdescr *tail, nat blocks)
408 // Allocate a nursery: we allocate fresh blocks one at a time and
409 // cons them on to the front of the list, not forgetting to update
410 // the back pointer on the tail of the list to point to the new block.
411 for (i=0; i < blocks; i++) {
414 processNursery() in LdvProfile.c assumes that every block group in
415 the nursery contains only a single block. So, if a block group is
416 given multiple blocks, change processNursery() accordingly.
420 // double-link the nursery: we might need to insert blocks
427 bd->free = bd->start;
435 assignNurseriesToCapabilities (void)
440 for (i = 0; i < n_nurseries; i++) {
441 capabilities[i].r.rNursery = &nurseries[i];
442 capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
443 capabilities[i].r.rCurrentAlloc = NULL;
445 #else /* THREADED_RTS */
446 MainCapability.r.rNursery = &nurseries[0];
447 MainCapability.r.rCurrentNursery = nurseries[0].blocks;
448 MainCapability.r.rCurrentAlloc = NULL;
453 allocNurseries( void )
457 for (i = 0; i < n_nurseries; i++) {
458 nurseries[i].blocks =
459 allocNursery(&nurseries[i], NULL,
460 RtsFlags.GcFlags.minAllocAreaSize);
461 nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
462 nurseries[i].old_blocks = NULL;
463 nurseries[i].n_old_blocks = 0;
465 assignNurseriesToCapabilities();
469 resetNurseries( void )
475 for (i = 0; i < n_nurseries; i++) {
477 for (bd = stp->blocks; bd; bd = bd->link) {
478 bd->free = bd->start;
479 ASSERT(bd->gen_no == 0);
480 ASSERT(bd->step == stp);
481 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
484 assignNurseriesToCapabilities();
488 countNurseryBlocks (void)
493 for (i = 0; i < n_nurseries; i++) {
494 blocks += nurseries[i].n_blocks;
500 resizeNursery ( step *stp, nat blocks )
505 nursery_blocks = stp->n_blocks;
506 if (nursery_blocks == blocks) return;
508 if (nursery_blocks < blocks) {
509 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
511 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
516 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
520 while (nursery_blocks > blocks) {
522 next_bd->u.back = NULL;
523 nursery_blocks -= bd->blocks; // might be a large block
528 // might have gone just under, by freeing a large block, so make
529 // up the difference.
530 if (nursery_blocks < blocks) {
531 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
535 stp->n_blocks = blocks;
536 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
540 // Resize each of the nurseries to the specified size.
543 resizeNurseriesFixed (nat blocks)
546 for (i = 0; i < n_nurseries; i++) {
547 resizeNursery(&nurseries[i], blocks);
552 // Resize the nurseries to the total specified size.
555 resizeNurseries (nat blocks)
557 // If there are multiple nurseries, then we just divide the number
558 // of available blocks between them.
559 resizeNurseriesFixed(blocks / n_nurseries);
563 /* -----------------------------------------------------------------------------
564 move_TSO is called to update the TSO structure after it has been
565 moved from one place to another.
566 -------------------------------------------------------------------------- */
569 move_TSO (StgTSO *src, StgTSO *dest)
573 // relocate the stack pointer...
574 diff = (StgPtr)dest - (StgPtr)src; // In *words*
575 dest->sp = (StgPtr)dest->sp + diff;
578 /* -----------------------------------------------------------------------------
579 The allocate() interface
581 allocateInGen() function allocates memory directly into a specific
582 generation. It always succeeds, and returns a chunk of memory n
583 words long. n can be larger than the size of a block if necessary,
584 in which case a contiguous block group will be allocated.
586 allocate(n) is equivalent to allocateInGen(g0).
587 -------------------------------------------------------------------------- */
590 allocateInGen (generation *g, lnat n)
598 TICK_ALLOC_HEAP_NOCTR(n);
603 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
605 lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
607 // Attempting to allocate an object larger than maxHeapSize
608 // should definitely be disallowed. (bug #1791)
609 if (RtsFlags.GcFlags.maxHeapSize > 0 &&
610 req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
612 // heapOverflow() doesn't exit (see #2592), but we aren't
613 // in a position to do a clean shutdown here: we
614 // either have to allocate the memory or exit now.
615 // Allocating the memory would be bad, because the user
616 // has requested that we not exceed maxHeapSize, so we
618 stg_exit(EXIT_HEAPOVERFLOW);
621 bd = allocGroup(req_blocks);
622 dbl_link_onto(bd, &stp->large_objects);
623 stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
624 alloc_blocks += bd->blocks;
627 bd->flags = BF_LARGE;
628 bd->free = bd->start + n;
633 // small allocation (<LARGE_OBJECT_THRESHOLD) */
635 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
640 bd->link = stp->blocks;
657 return allocateInGen(g0,n);
661 allocatedBytes( void )
665 allocated = alloc_blocks * BLOCK_SIZE_W;
666 if (pinned_object_block != NULL) {
667 allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
668 pinned_object_block->free;
674 // split N blocks off the front of the given bdescr, returning the
675 // new block group. We treat the remainder as if it
676 // had been freshly allocated in generation 0.
678 splitLargeBlock (bdescr *bd, nat blocks)
682 // subtract the original number of blocks from the counter first
683 bd->step->n_large_blocks -= bd->blocks;
685 new_bd = splitBlockGroup (bd, blocks);
687 dbl_link_onto(new_bd, &g0s0->large_objects);
688 g0s0->n_large_blocks += new_bd->blocks;
689 new_bd->gen_no = g0s0->no;
691 new_bd->flags = BF_LARGE;
692 new_bd->free = bd->free;
693 ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
695 // add the new number of blocks to the counter. Due to the gaps
696 // for block descriptor, new_bd->blocks + bd->blocks might not be
697 // equal to the original bd->blocks, which is why we do it this way.
698 bd->step->n_large_blocks += bd->blocks;
703 /* -----------------------------------------------------------------------------
706 This allocates memory in the current thread - it is intended for
707 use primarily from STG-land where we have a Capability. It is
708 better than allocate() because it doesn't require taking the
709 sm_mutex lock in the common case.
711 Memory is allocated directly from the nursery if possible (but not
712 from the current nursery block, so as not to interfere with
714 -------------------------------------------------------------------------- */
717 allocateLocal (Capability *cap, lnat n)
722 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
723 return allocateInGen(g0,n);
726 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
728 TICK_ALLOC_HEAP_NOCTR(n);
731 bd = cap->r.rCurrentAlloc;
732 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
734 // The CurrentAlloc block is full, we need to find another
735 // one. First, we try taking the next block from the
737 bd = cap->r.rCurrentNursery->link;
739 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
740 // The nursery is empty, or the next block is already
741 // full: allocate a fresh block (we can't fail here).
744 cap->r.rNursery->n_blocks++;
747 bd->step = cap->r.rNursery;
749 // NO: alloc_blocks++;
750 // calcAllocated() uses the size of the nursery, and we've
751 // already bumpted nursery->n_blocks above. We'll GC
752 // pretty quickly now anyway, because MAYBE_GC() will
753 // notice that CurrentNursery->link is NULL.
755 // we have a block in the nursery: take it and put
756 // it at the *front* of the nursery list, and use it
757 // to allocate() from.
758 cap->r.rCurrentNursery->link = bd->link;
759 if (bd->link != NULL) {
760 bd->link->u.back = cap->r.rCurrentNursery;
763 dbl_link_onto(bd, &cap->r.rNursery->blocks);
764 cap->r.rCurrentAlloc = bd;
765 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
772 /* ---------------------------------------------------------------------------
773 Allocate a fixed/pinned object.
775 We allocate small pinned objects into a single block, allocating a
776 new block when the current one overflows. The block is chained
777 onto the large_object_list of generation 0 step 0.
779 NOTE: The GC can't in general handle pinned objects. This
780 interface is only safe to use for ByteArrays, which have no
781 pointers and don't require scavenging. It works because the
782 block's descriptor has the BF_LARGE flag set, so the block is
783 treated as a large object and chained onto various lists, rather
784 than the individual objects being copied. However, when it comes
785 to scavenge the block, the GC will only scavenge the first object.
786 The reason is that the GC can't linearly scan a block of pinned
787 objects at the moment (doing so would require using the
788 mostly-copying techniques). But since we're restricting ourselves
789 to pinned ByteArrays, not scavenging is ok.
791 This function is called by newPinnedByteArray# which immediately
792 fills the allocated memory with a MutableByteArray#.
793 ------------------------------------------------------------------------- */
796 allocatePinned( lnat n )
799 bdescr *bd = pinned_object_block;
801 // If the request is for a large object, then allocate()
802 // will give us a pinned object anyway.
803 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
805 Bdescr(p)->flags |= BF_PINNED;
811 TICK_ALLOC_HEAP_NOCTR(n);
814 // If we don't have a block of pinned objects yet, or the current
815 // one isn't large enough to hold the new object, allocate a new one.
816 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
817 pinned_object_block = bd = allocBlock();
818 dbl_link_onto(bd, &g0s0->large_objects);
819 g0s0->n_large_blocks++;
822 bd->flags = BF_PINNED | BF_LARGE;
823 bd->free = bd->start;
833 /* -----------------------------------------------------------------------------
835 -------------------------------------------------------------------------- */
838 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
839 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
840 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
841 and is put on the mutable list.
844 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
846 Capability *cap = regTableToCapability(reg);
848 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
849 p->header.info = &stg_MUT_VAR_DIRTY_info;
850 bd = Bdescr((StgPtr)p);
851 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
855 // Setting a TSO's link field with a write barrier.
856 // It is *not* necessary to call this function when
857 // * setting the link field to END_TSO_QUEUE
858 // * putting a TSO on the blackhole_queue
859 // * setting the link field of the currently running TSO, as it
860 // will already be dirty.
862 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
865 if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
866 tso->flags |= TSO_LINK_DIRTY;
867 bd = Bdescr((StgPtr)tso);
868 if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
874 dirty_TSO (Capability *cap, StgTSO *tso)
877 if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
878 bd = Bdescr((StgPtr)tso);
879 if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
881 tso->flags |= TSO_DIRTY;
885 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
886 on the mutable list; a MVAR_DIRTY is. When written to, a
887 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
888 The check for MVAR_CLEAN is inlined at the call site for speed,
889 this really does make a difference on concurrency-heavy benchmarks
890 such as Chaneneos and cheap-concurrency.
893 dirty_MVAR(StgRegTable *reg, StgClosure *p)
895 Capability *cap = regTableToCapability(reg);
897 bd = Bdescr((StgPtr)p);
898 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
901 /* -----------------------------------------------------------------------------
902 Allocation functions for GMP.
904 These all use the allocate() interface - we can't have any garbage
905 collection going on during a gmp operation, so we use allocate()
906 which always succeeds. The gmp operations which might need to
907 allocate will ask the storage manager (via doYouWantToGC()) whether
908 a garbage collection is required, in case we get into a loop doing
909 only allocate() style allocation.
910 -------------------------------------------------------------------------- */
913 stgAllocForGMP (size_t size_in_bytes)
916 nat data_size_in_words, total_size_in_words;
918 /* round up to a whole number of words */
919 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
920 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
922 /* allocate and fill it in. */
923 #if defined(THREADED_RTS)
924 arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
926 arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
928 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
930 /* and return a ptr to the goods inside the array */
935 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
938 void *new_stuff_ptr = stgAllocForGMP(new_size);
940 char *p = (char *) ptr;
941 char *q = (char *) new_stuff_ptr;
943 min_size = old_size < new_size ? old_size : new_size;
944 for (; i < min_size; i++, p++, q++) {
948 return(new_stuff_ptr);
952 stgDeallocForGMP (void *ptr STG_UNUSED,
953 size_t size STG_UNUSED)
955 /* easy for us: the garbage collector does the dealloc'n */
958 /* -----------------------------------------------------------------------------
960 * -------------------------------------------------------------------------- */
962 /* -----------------------------------------------------------------------------
965 * Approximate how much we've allocated: number of blocks in the
966 * nursery + blocks allocated via allocate() - unused nusery blocks.
967 * This leaves a little slop at the end of each block, and doesn't
968 * take into account large objects (ToDo).
969 * -------------------------------------------------------------------------- */
972 calcAllocated( void )
977 allocated = allocatedBytes();
978 allocated += countNurseryBlocks() * BLOCK_SIZE_W;
983 for (i = 0; i < n_nurseries; i++) {
985 for ( bd = capabilities[i].r.rCurrentNursery->link;
986 bd != NULL; bd = bd->link ) {
987 allocated -= BLOCK_SIZE_W;
989 cap = &capabilities[i];
990 if (cap->r.rCurrentNursery->free <
991 cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
992 allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
993 - cap->r.rCurrentNursery->free;
997 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
999 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
1000 allocated -= BLOCK_SIZE_W;
1002 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
1003 allocated -= (current_nursery->start + BLOCK_SIZE_W)
1004 - current_nursery->free;
1009 total_allocated += allocated;
1013 /* Approximate the amount of live data in the heap. To be called just
1014 * after garbage collection (see GarbageCollect()).
1017 calcLiveBlocks(void)
1023 if (RtsFlags.GcFlags.generations == 1) {
1024 return g0s0->n_large_blocks + g0s0->n_blocks;
1027 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1028 for (s = 0; s < generations[g].n_steps; s++) {
1029 /* approximate amount of live data (doesn't take into account slop
1030 * at end of each block).
1032 if (g == 0 && s == 0) {
1035 stp = &generations[g].steps[s];
1036 live += stp->n_large_blocks + stp->n_blocks;
1043 countOccupied(bdescr *bd)
1048 for (; bd != NULL; bd = bd->link) {
1049 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1050 words += bd->free - bd->start;
1055 // Return an accurate count of the live data in the heap, excluding
1064 if (RtsFlags.GcFlags.generations == 1) {
1065 return g0s0->n_words + countOccupied(g0s0->large_objects);
1069 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1070 for (s = 0; s < generations[g].n_steps; s++) {
1071 if (g == 0 && s == 0) continue;
1072 stp = &generations[g].steps[s];
1073 live += stp->n_words + countOccupied(stp->large_objects);
1079 /* Approximate the number of blocks that will be needed at the next
1080 * garbage collection.
1082 * Assume: all data currently live will remain live. Steps that will
1083 * be collected next time will therefore need twice as many blocks
1084 * since all the data will be copied.
1093 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1094 for (s = 0; s < generations[g].n_steps; s++) {
1095 if (g == 0 && s == 0) { continue; }
1096 stp = &generations[g].steps[s];
1098 // we need at least this much space
1099 needed += stp->n_blocks + stp->n_large_blocks;
1101 // any additional space needed to collect this gen next time?
1102 if (g == 0 || // always collect gen 0
1103 (generations[g].steps[0].n_blocks +
1104 generations[g].steps[0].n_large_blocks
1105 > generations[g].max_blocks)) {
1106 // we will collect this gen next time
1109 needed += stp->n_blocks / BITS_IN(W_);
1111 needed += stp->n_blocks / 100;
1114 continue; // no additional space needed for compaction
1116 needed += stp->n_blocks;
1124 /* ----------------------------------------------------------------------------
1127 Executable memory must be managed separately from non-executable
1128 memory. Most OSs these days require you to jump through hoops to
1129 dynamically allocate executable memory, due to various security
1132 Here we provide a small memory allocator for executable memory.
1133 Memory is managed with a page granularity; we allocate linearly
1134 in the page, and when the page is emptied (all objects on the page
1135 are free) we free the page again, not forgetting to make it
1138 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1139 the linker cannot use allocateExec for loading object code files
1140 on Windows. Once allocateExec can handle larger objects, the linker
1141 should be modified to use allocateExec instead of VirtualAlloc.
1142 ------------------------------------------------------------------------- */
1144 #if defined(linux_HOST_OS)
1146 // On Linux we need to use libffi for allocating executable memory,
1147 // because it knows how to work around the restrictions put in place
1150 void *allocateExec (nat bytes, void **exec_ret)
1154 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1156 if (ret == NULL) return ret;
1157 *ret = ret; // save the address of the writable mapping, for freeExec().
1158 *exec_ret = exec + 1;
1162 // freeExec gets passed the executable address, not the writable address.
1163 void freeExec (void *addr)
1166 writable = *((void**)addr - 1);
1168 ffi_closure_free (writable);
1174 void *allocateExec (nat bytes, void **exec_ret)
1181 // round up to words.
1182 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1184 if (n+1 > BLOCK_SIZE_W) {
1185 barf("allocateExec: can't handle large objects");
1188 if (exec_block == NULL ||
1189 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1191 lnat pagesize = getPageSize();
1192 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1193 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1195 bd->flags = BF_EXEC;
1196 bd->link = exec_block;
1197 if (exec_block != NULL) {
1198 exec_block->u.back = bd;
1201 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1204 *(exec_block->free) = n; // store the size of this chunk
1205 exec_block->gen_no += n; // gen_no stores the number of words allocated
1206 ret = exec_block->free + 1;
1207 exec_block->free += n + 1;
1214 void freeExec (void *addr)
1216 StgPtr p = (StgPtr)addr - 1;
1217 bdescr *bd = Bdescr((StgPtr)p);
1219 if ((bd->flags & BF_EXEC) == 0) {
1220 barf("freeExec: not executable");
1223 if (*(StgPtr)p == 0) {
1224 barf("freeExec: already free?");
1229 bd->gen_no -= *(StgPtr)p;
1232 if (bd->gen_no == 0) {
1233 // Free the block if it is empty, but not if it is the block at
1234 // the head of the queue.
1235 if (bd != exec_block) {
1236 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1237 dbl_link_remove(bd, &exec_block);
1238 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1241 bd->free = bd->start;
1248 #endif /* mingw32_HOST_OS */
1250 /* -----------------------------------------------------------------------------
1253 memInventory() checks for memory leaks by counting up all the
1254 blocks we know about and comparing that to the number of blocks
1255 allegedly floating around in the system.
1256 -------------------------------------------------------------------------- */
1260 // Useful for finding partially full blocks in gdb
1261 void findSlop(bdescr *bd);
1262 void findSlop(bdescr *bd)
1266 for (; bd != NULL; bd = bd->link) {
1267 slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1268 if (slop > (1024/sizeof(W_))) {
1269 debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1270 bd->start, bd, slop / (1024/sizeof(W_)));
1276 countBlocks(bdescr *bd)
1279 for (n=0; bd != NULL; bd=bd->link) {
1285 // (*1) Just like countBlocks, except that we adjust the count for a
1286 // megablock group so that it doesn't include the extra few blocks
1287 // that would be taken up by block descriptors in the second and
1288 // subsequent megablock. This is so we can tally the count with the
1289 // number of blocks allocated in the system, for memInventory().
1291 countAllocdBlocks(bdescr *bd)
1294 for (n=0; bd != NULL; bd=bd->link) {
1296 // hack for megablock groups: see (*1) above
1297 if (bd->blocks > BLOCKS_PER_MBLOCK) {
1298 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1299 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1306 stepBlocks (step *stp)
1308 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1309 ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1310 return stp->n_blocks + stp->n_old_blocks +
1311 countAllocdBlocks(stp->large_objects);
1314 // If memInventory() calculates that we have a memory leak, this
1315 // function will try to find the block(s) that are leaking by marking
1316 // all the ones that we know about, and search through memory to find
1317 // blocks that are not marked. In the debugger this can help to give
1318 // us a clue about what kind of block leaked. In the future we might
1319 // annotate blocks with their allocation site to give more helpful
1322 findMemoryLeak (void)
1325 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1326 for (i = 0; i < n_capabilities; i++) {
1327 markBlocks(capabilities[i].mut_lists[g]);
1329 markBlocks(generations[g].mut_list);
1330 for (s = 0; s < generations[g].n_steps; s++) {
1331 markBlocks(generations[g].steps[s].blocks);
1332 markBlocks(generations[g].steps[s].large_objects);
1336 for (i = 0; i < n_nurseries; i++) {
1337 markBlocks(nurseries[i].blocks);
1338 markBlocks(nurseries[i].large_objects);
1343 // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1344 // markRetainerBlocks();
1348 // count the blocks allocated by the arena allocator
1350 // markArenaBlocks();
1352 // count the blocks containing executable memory
1353 markBlocks(exec_block);
1355 reportUnmarkedBlocks();
1360 memInventory (rtsBool show)
1364 lnat gen_blocks[RtsFlags.GcFlags.generations];
1365 lnat nursery_blocks, retainer_blocks,
1366 arena_blocks, exec_blocks;
1367 lnat live_blocks = 0, free_blocks = 0;
1370 // count the blocks we current have
1372 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1374 for (i = 0; i < n_capabilities; i++) {
1375 gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1377 gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1378 for (s = 0; s < generations[g].n_steps; s++) {
1379 stp = &generations[g].steps[s];
1380 gen_blocks[g] += stepBlocks(stp);
1385 for (i = 0; i < n_nurseries; i++) {
1386 nursery_blocks += stepBlocks(&nurseries[i]);
1389 retainer_blocks = 0;
1391 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1392 retainer_blocks = retainerStackBlocks();
1396 // count the blocks allocated by the arena allocator
1397 arena_blocks = arenaBlocks();
1399 // count the blocks containing executable memory
1400 exec_blocks = countAllocdBlocks(exec_block);
1402 /* count the blocks on the free list */
1403 free_blocks = countFreeList();
1406 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1407 live_blocks += gen_blocks[g];
1409 live_blocks += nursery_blocks +
1410 + retainer_blocks + arena_blocks + exec_blocks;
1412 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1414 leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1419 debugBelch("Memory leak detected:\n");
1421 debugBelch("Memory inventory:\n");
1423 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1424 debugBelch(" gen %d blocks : %5lu blocks (%lu MB)\n", g,
1425 gen_blocks[g], MB(gen_blocks[g]));
1427 debugBelch(" nursery : %5lu blocks (%lu MB)\n",
1428 nursery_blocks, MB(nursery_blocks));
1429 debugBelch(" retainer : %5lu blocks (%lu MB)\n",
1430 retainer_blocks, MB(retainer_blocks));
1431 debugBelch(" arena blocks : %5lu blocks (%lu MB)\n",
1432 arena_blocks, MB(arena_blocks));
1433 debugBelch(" exec : %5lu blocks (%lu MB)\n",
1434 exec_blocks, MB(exec_blocks));
1435 debugBelch(" free : %5lu blocks (%lu MB)\n",
1436 free_blocks, MB(free_blocks));
1437 debugBelch(" total : %5lu blocks (%lu MB)\n",
1438 live_blocks + free_blocks, MB(live_blocks+free_blocks));
1440 debugBelch("\n in system : %5lu blocks (%lu MB)\n",
1441 mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1449 ASSERT(n_alloc_blocks == live_blocks);
1454 /* Full heap sanity check. */
1460 if (RtsFlags.GcFlags.generations == 1) {
1461 checkHeap(g0s0->blocks);
1462 checkLargeObjects(g0s0->large_objects);
1465 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1466 for (s = 0; s < generations[g].n_steps; s++) {
1467 if (g == 0 && s == 0) { continue; }
1468 ASSERT(countBlocks(generations[g].steps[s].blocks)
1469 == generations[g].steps[s].n_blocks);
1470 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1471 == generations[g].steps[s].n_large_blocks);
1472 checkHeap(generations[g].steps[s].blocks);
1473 checkLargeObjects(generations[g].steps[s].large_objects);
1477 for (s = 0; s < n_nurseries; s++) {
1478 ASSERT(countBlocks(nurseries[s].blocks)
1479 == nurseries[s].n_blocks);
1480 ASSERT(countBlocks(nurseries[s].large_objects)
1481 == nurseries[s].n_large_blocks);
1484 checkFreeListSanity();
1487 #if defined(THREADED_RTS)
1488 // check the stacks too in threaded mode, because we don't do a
1489 // full heap sanity check in this case (see checkHeap())
1490 checkMutableLists(rtsTrue);
1492 checkMutableLists(rtsFalse);
1496 /* Nursery sanity check */
1498 checkNurserySanity( step *stp )
1504 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1505 ASSERT(bd->u.back == prev);
1507 blocks += bd->blocks;
1509 ASSERT(blocks == stp->n_blocks);
1512 // handy function for use in gdb, because Bdescr() is inlined.
1513 extern bdescr *_bdescr( StgPtr p );