1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2008
5 * Storage manager front end
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
12 * ---------------------------------------------------------------------------*/
14 #include "PosixSource.h"
20 #include "BlockAlloc.h"
25 #include "OSThreads.h"
26 #include "Capability.h"
29 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
39 * All these globals require sm_mutex to access in THREADED_RTS mode.
41 StgClosure *caf_list = NULL;
42 StgClosure *revertible_caf_list = NULL;
45 bdescr *pinned_object_block; /* allocate pinned objects into this block */
46 nat alloc_blocks; /* number of allocate()d blocks since GC */
47 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
49 generation *generations = NULL; /* all the generations */
50 generation *g0 = NULL; /* generation 0, for convenience */
51 generation *oldest_gen = NULL; /* oldest generation, for convenience */
52 step *g0s0 = NULL; /* generation 0, step 0, for convenience */
55 step *all_steps = NULL; /* single array of steps */
57 ullong total_allocated = 0; /* total memory allocated during run */
59 nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
60 step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */
64 * Storage manager mutex: protects all the above state from
65 * simultaneous access by two STG threads.
69 * This mutex is used by atomicModifyMutVar# only
71 Mutex atomic_modify_mutvar_mutex;
78 static void *stgAllocForGMP (size_t size_in_bytes);
79 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
80 static void stgDeallocForGMP (void *ptr, size_t size);
83 initStep (step *stp, int g, int s)
86 stp->abs_no = RtsFlags.GcFlags.steps * g + s;
90 stp->old_blocks = NULL;
91 stp->n_old_blocks = 0;
92 stp->gen = &generations[g];
94 stp->large_objects = NULL;
95 stp->n_large_blocks = 0;
96 stp->scavenged_large_objects = NULL;
97 stp->n_scavenged_large_blocks = 0;
98 stp->is_compacted = 0;
101 initSpinLock(&stp->sync_todo);
102 initSpinLock(&stp->sync_large_objects);
104 stp->threads = END_TSO_QUEUE;
105 stp->old_threads = END_TSO_QUEUE;
114 if (generations != NULL) {
115 // multi-init protection
121 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
122 * doing something reasonable.
124 /* We use the NOT_NULL variant or gcc warns that the test is always true */
125 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL(&stg_BLACKHOLE_info));
126 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
127 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
129 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
130 RtsFlags.GcFlags.heapSizeSuggestion >
131 RtsFlags.GcFlags.maxHeapSize) {
132 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
135 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
136 RtsFlags.GcFlags.minAllocAreaSize >
137 RtsFlags.GcFlags.maxHeapSize) {
138 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
139 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
142 initBlockAllocator();
144 #if defined(THREADED_RTS)
145 initMutex(&sm_mutex);
146 initMutex(&atomic_modify_mutvar_mutex);
151 /* allocate generation info array */
152 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
153 * sizeof(struct generation_),
154 "initStorage: gens");
156 /* allocate all the steps into an array. It is important that we do
157 it this way, because we need the invariant that two step pointers
158 can be directly compared to see which is the oldest.
159 Remember that the last generation has only one step. */
160 total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
161 all_steps = stgMallocBytes(total_steps * sizeof(struct step_),
162 "initStorage: steps");
164 /* Initialise all generations */
165 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
166 gen = &generations[g];
168 gen->mut_list = allocBlock();
169 gen->collections = 0;
170 gen->par_collections = 0;
171 gen->failed_promotions = 0;
175 /* A couple of convenience pointers */
176 g0 = &generations[0];
177 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
179 /* Allocate step structures in each generation */
180 if (RtsFlags.GcFlags.generations > 1) {
181 /* Only for multiple-generations */
183 /* Oldest generation: one step */
184 oldest_gen->n_steps = 1;
185 oldest_gen->steps = all_steps + (RtsFlags.GcFlags.generations - 1)
186 * RtsFlags.GcFlags.steps;
188 /* set up all except the oldest generation with 2 steps */
189 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
190 generations[g].n_steps = RtsFlags.GcFlags.steps;
191 generations[g].steps = all_steps + g * RtsFlags.GcFlags.steps;
195 /* single generation, i.e. a two-space collector */
197 g0->steps = all_steps;
201 n_nurseries = n_capabilities;
205 nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
206 "initStorage: nurseries");
208 /* Initialise all steps */
209 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
210 for (s = 0; s < generations[g].n_steps; s++) {
211 initStep(&generations[g].steps[s], g, s);
215 for (s = 0; s < n_nurseries; s++) {
216 initStep(&nurseries[s], 0, s);
219 /* Set up the destination pointers in each younger gen. step */
220 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
221 for (s = 0; s < generations[g].n_steps-1; s++) {
222 generations[g].steps[s].to = &generations[g].steps[s+1];
224 generations[g].steps[s].to = &generations[g+1].steps[0];
226 oldest_gen->steps[0].to = &oldest_gen->steps[0];
228 for (s = 0; s < n_nurseries; s++) {
229 nurseries[s].to = generations[0].steps[0].to;
232 /* The oldest generation has one step. */
233 if (RtsFlags.GcFlags.compact) {
234 if (RtsFlags.GcFlags.generations == 1) {
235 errorBelch("WARNING: compaction is incompatible with -G1; disabled");
237 oldest_gen->steps[0].is_compacted = 1;
241 generations[0].max_blocks = 0;
242 g0s0 = &generations[0].steps[0];
244 /* The allocation area. Policy: keep the allocation area
245 * small to begin with, even if we have a large suggested heap
246 * size. Reason: we're going to do a major collection first, and we
247 * don't want it to be a big one. This vague idea is borne out by
248 * rigorous experimental evidence.
252 weak_ptr_list = NULL;
254 revertible_caf_list = NULL;
256 /* initialise the allocate() interface */
258 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
260 /* Tell GNU multi-precision pkg about our custom alloc functions */
261 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
264 initSpinLock(&gc_alloc_block_sync);
265 initSpinLock(&recordMutableGen_sync);
269 IF_DEBUG(gc, statDescribeGens());
277 stat_exit(calcAllocated());
283 stgFree(g0s0); // frees all the steps
284 stgFree(generations);
286 #if defined(THREADED_RTS)
287 closeMutex(&sm_mutex);
288 closeMutex(&atomic_modify_mutvar_mutex);
293 /* -----------------------------------------------------------------------------
296 The entry code for every CAF does the following:
298 - builds a CAF_BLACKHOLE in the heap
299 - pushes an update frame pointing to the CAF_BLACKHOLE
300 - invokes UPD_CAF(), which:
301 - calls newCaf, below
302 - updates the CAF with a static indirection to the CAF_BLACKHOLE
304 Why do we build a BLACKHOLE in the heap rather than just updating
305 the thunk directly? It's so that we only need one kind of update
306 frame - otherwise we'd need a static version of the update frame too.
308 newCaf() does the following:
310 - it puts the CAF on the oldest generation's mut-once list.
311 This is so that we can treat the CAF as a root when collecting
314 For GHCI, we have additional requirements when dealing with CAFs:
316 - we must *retain* all dynamically-loaded CAFs ever entered,
317 just in case we need them again.
318 - we must be able to *revert* CAFs that have been evaluated, to
319 their pre-evaluated form.
321 To do this, we use an additional CAF list. When newCaf() is
322 called on a dynamically-loaded CAF, we add it to the CAF list
323 instead of the old-generation mutable list, and save away its
324 old info pointer (in caf->saved_info) for later reversion.
326 To revert all the CAFs, we traverse the CAF list and reset the
327 info pointer to caf->saved_info, then throw away the CAF list.
328 (see GC.c:revertCAFs()).
332 -------------------------------------------------------------------------- */
335 newCAF(StgClosure* caf)
342 // If we are in GHCi _and_ we are using dynamic libraries,
343 // then we can't redirect newCAF calls to newDynCAF (see below),
344 // so we make newCAF behave almost like newDynCAF.
345 // The dynamic libraries might be used by both the interpreted
346 // program and GHCi itself, so they must not be reverted.
347 // This also means that in GHCi with dynamic libraries, CAFs are not
348 // garbage collected. If this turns out to be a problem, we could
349 // do another hack here and do an address range test on caf to figure
350 // out whether it is from a dynamic library.
351 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
352 ((StgIndStatic *)caf)->static_link = caf_list;
357 /* Put this CAF on the mutable list for the old generation.
358 * This is a HACK - the IND_STATIC closure doesn't really have
359 * a mut_link field, but we pretend it has - in fact we re-use
360 * the STATIC_LINK field for the time being, because when we
361 * come to do a major GC we won't need the mut_link field
362 * any more and can use it as a STATIC_LINK.
364 ((StgIndStatic *)caf)->saved_info = NULL;
365 recordMutableGen(caf, oldest_gen);
371 // An alternate version of newCaf which is used for dynamically loaded
372 // object code in GHCi. In this case we want to retain *all* CAFs in
373 // the object code, because they might be demanded at any time from an
374 // expression evaluated on the command line.
375 // Also, GHCi might want to revert CAFs, so we add these to the
376 // revertible_caf_list.
378 // The linker hackily arranges that references to newCaf from dynamic
379 // code end up pointing to newDynCAF.
381 newDynCAF(StgClosure *caf)
385 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
386 ((StgIndStatic *)caf)->static_link = revertible_caf_list;
387 revertible_caf_list = caf;
392 /* -----------------------------------------------------------------------------
394 -------------------------------------------------------------------------- */
397 allocNursery (step *stp, bdescr *tail, nat blocks)
402 // Allocate a nursery: we allocate fresh blocks one at a time and
403 // cons them on to the front of the list, not forgetting to update
404 // the back pointer on the tail of the list to point to the new block.
405 for (i=0; i < blocks; i++) {
408 processNursery() in LdvProfile.c assumes that every block group in
409 the nursery contains only a single block. So, if a block group is
410 given multiple blocks, change processNursery() accordingly.
414 // double-link the nursery: we might need to insert blocks
421 bd->free = bd->start;
429 assignNurseriesToCapabilities (void)
434 for (i = 0; i < n_nurseries; i++) {
435 capabilities[i].r.rNursery = &nurseries[i];
436 capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
437 capabilities[i].r.rCurrentAlloc = NULL;
439 #else /* THREADED_RTS */
440 MainCapability.r.rNursery = &nurseries[0];
441 MainCapability.r.rCurrentNursery = nurseries[0].blocks;
442 MainCapability.r.rCurrentAlloc = NULL;
447 allocNurseries( void )
451 for (i = 0; i < n_nurseries; i++) {
452 nurseries[i].blocks =
453 allocNursery(&nurseries[i], NULL,
454 RtsFlags.GcFlags.minAllocAreaSize);
455 nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
456 nurseries[i].old_blocks = NULL;
457 nurseries[i].n_old_blocks = 0;
459 assignNurseriesToCapabilities();
463 resetNurseries( void )
469 for (i = 0; i < n_nurseries; i++) {
471 for (bd = stp->blocks; bd; bd = bd->link) {
472 bd->free = bd->start;
473 ASSERT(bd->gen_no == 0);
474 ASSERT(bd->step == stp);
475 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
478 assignNurseriesToCapabilities();
482 countNurseryBlocks (void)
487 for (i = 0; i < n_nurseries; i++) {
488 blocks += nurseries[i].n_blocks;
494 resizeNursery ( step *stp, nat blocks )
499 nursery_blocks = stp->n_blocks;
500 if (nursery_blocks == blocks) return;
502 if (nursery_blocks < blocks) {
503 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
505 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
510 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
514 while (nursery_blocks > blocks) {
516 next_bd->u.back = NULL;
517 nursery_blocks -= bd->blocks; // might be a large block
522 // might have gone just under, by freeing a large block, so make
523 // up the difference.
524 if (nursery_blocks < blocks) {
525 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
529 stp->n_blocks = blocks;
530 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
534 // Resize each of the nurseries to the specified size.
537 resizeNurseriesFixed (nat blocks)
540 for (i = 0; i < n_nurseries; i++) {
541 resizeNursery(&nurseries[i], blocks);
546 // Resize the nurseries to the total specified size.
549 resizeNurseries (nat blocks)
551 // If there are multiple nurseries, then we just divide the number
552 // of available blocks between them.
553 resizeNurseriesFixed(blocks / n_nurseries);
556 /* -----------------------------------------------------------------------------
557 The allocate() interface
559 allocateInGen() function allocates memory directly into a specific
560 generation. It always succeeds, and returns a chunk of memory n
561 words long. n can be larger than the size of a block if necessary,
562 in which case a contiguous block group will be allocated.
564 allocate(n) is equivalent to allocateInGen(g0).
565 -------------------------------------------------------------------------- */
568 allocateInGen (generation *g, nat n)
576 TICK_ALLOC_HEAP_NOCTR(n);
581 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
583 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
585 // Attempting to allocate an object larger than maxHeapSize
586 // should definitely be disallowed. (bug #1791)
587 if (RtsFlags.GcFlags.maxHeapSize > 0 &&
588 req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
592 bd = allocGroup(req_blocks);
593 dbl_link_onto(bd, &stp->large_objects);
594 stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
597 bd->flags = BF_LARGE;
598 bd->free = bd->start + n;
603 // small allocation (<LARGE_OBJECT_THRESHOLD) */
605 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
610 bd->link = stp->blocks;
627 return allocateInGen(g0,n);
631 allocatedBytes( void )
635 allocated = alloc_blocks * BLOCK_SIZE_W;
636 if (pinned_object_block != NULL) {
637 allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
638 pinned_object_block->free;
644 // split N blocks off the start of the given bdescr, returning the
645 // remainder as a new block group. We treat the remainder as if it
646 // had been freshly allocated in generation 0.
648 splitLargeBlock (bdescr *bd, nat blocks)
652 // subtract the original number of blocks from the counter first
653 bd->step->n_large_blocks -= bd->blocks;
655 new_bd = splitBlockGroup (bd, blocks);
657 dbl_link_onto(new_bd, &g0s0->large_objects);
658 g0s0->n_large_blocks += new_bd->blocks;
659 new_bd->gen_no = g0s0->no;
661 new_bd->flags = BF_LARGE;
662 new_bd->free = bd->free;
664 // add the new number of blocks to the counter. Due to the gaps
665 // for block descriptor, new_bd->blocks + bd->blocks might not be
666 // equal to the original bd->blocks, which is why we do it this way.
667 bd->step->n_large_blocks += bd->blocks;
672 /* -----------------------------------------------------------------------------
675 This allocates memory in the current thread - it is intended for
676 use primarily from STG-land where we have a Capability. It is
677 better than allocate() because it doesn't require taking the
678 sm_mutex lock in the common case.
680 Memory is allocated directly from the nursery if possible (but not
681 from the current nursery block, so as not to interfere with
683 -------------------------------------------------------------------------- */
686 allocateLocal (Capability *cap, nat n)
691 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
692 return allocateInGen(g0,n);
695 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
697 TICK_ALLOC_HEAP_NOCTR(n);
700 bd = cap->r.rCurrentAlloc;
701 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
703 // The CurrentAlloc block is full, we need to find another
704 // one. First, we try taking the next block from the
706 bd = cap->r.rCurrentNursery->link;
708 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
709 // The nursery is empty, or the next block is already
710 // full: allocate a fresh block (we can't fail here).
713 cap->r.rNursery->n_blocks++;
716 bd->step = cap->r.rNursery;
718 // NO: alloc_blocks++;
719 // calcAllocated() uses the size of the nursery, and we've
720 // already bumpted nursery->n_blocks above.
722 // we have a block in the nursery: take it and put
723 // it at the *front* of the nursery list, and use it
724 // to allocate() from.
725 cap->r.rCurrentNursery->link = bd->link;
726 if (bd->link != NULL) {
727 bd->link->u.back = cap->r.rCurrentNursery;
730 dbl_link_onto(bd, &cap->r.rNursery->blocks);
731 cap->r.rCurrentAlloc = bd;
732 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
739 /* ---------------------------------------------------------------------------
740 Allocate a fixed/pinned object.
742 We allocate small pinned objects into a single block, allocating a
743 new block when the current one overflows. The block is chained
744 onto the large_object_list of generation 0 step 0.
746 NOTE: The GC can't in general handle pinned objects. This
747 interface is only safe to use for ByteArrays, which have no
748 pointers and don't require scavenging. It works because the
749 block's descriptor has the BF_LARGE flag set, so the block is
750 treated as a large object and chained onto various lists, rather
751 than the individual objects being copied. However, when it comes
752 to scavenge the block, the GC will only scavenge the first object.
753 The reason is that the GC can't linearly scan a block of pinned
754 objects at the moment (doing so would require using the
755 mostly-copying techniques). But since we're restricting ourselves
756 to pinned ByteArrays, not scavenging is ok.
758 This function is called by newPinnedByteArray# which immediately
759 fills the allocated memory with a MutableByteArray#.
760 ------------------------------------------------------------------------- */
763 allocatePinned( nat n )
766 bdescr *bd = pinned_object_block;
768 // If the request is for a large object, then allocate()
769 // will give us a pinned object anyway.
770 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
776 TICK_ALLOC_HEAP_NOCTR(n);
779 // we always return 8-byte aligned memory. bd->free must be
780 // 8-byte aligned to begin with, so we just round up n to
781 // the nearest multiple of 8 bytes.
782 if (sizeof(StgWord) == 4) {
786 // If we don't have a block of pinned objects yet, or the current
787 // one isn't large enough to hold the new object, allocate a new one.
788 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
789 pinned_object_block = bd = allocBlock();
790 dbl_link_onto(bd, &g0s0->large_objects);
791 g0s0->n_large_blocks++;
794 bd->flags = BF_PINNED | BF_LARGE;
795 bd->free = bd->start;
805 /* -----------------------------------------------------------------------------
807 -------------------------------------------------------------------------- */
810 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
811 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
812 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
813 and is put on the mutable list.
816 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
818 Capability *cap = regTableToCapability(reg);
820 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
821 p->header.info = &stg_MUT_VAR_DIRTY_info;
822 bd = Bdescr((StgPtr)p);
823 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
827 // Setting a TSO's link field with a write barrier.
828 // It is *not* necessary to call this function when
829 // * setting the link field to END_TSO_QUEUE
830 // * putting a TSO on the blackhole_queue
831 // * setting the link field of the currently running TSO, as it
832 // will already be dirty.
834 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
837 if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
838 tso->flags |= TSO_LINK_DIRTY;
839 bd = Bdescr((StgPtr)tso);
840 if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
846 dirty_TSO (Capability *cap, StgTSO *tso)
849 if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
850 bd = Bdescr((StgPtr)tso);
851 if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
853 tso->flags |= TSO_DIRTY;
857 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
858 on the mutable list; a MVAR_DIRTY is. When written to, a
859 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
860 The check for MVAR_CLEAN is inlined at the call site for speed,
861 this really does make a difference on concurrency-heavy benchmarks
862 such as Chaneneos and cheap-concurrency.
865 dirty_MVAR(StgRegTable *reg, StgClosure *p)
867 Capability *cap = regTableToCapability(reg);
869 bd = Bdescr((StgPtr)p);
870 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
873 /* -----------------------------------------------------------------------------
874 Allocation functions for GMP.
876 These all use the allocate() interface - we can't have any garbage
877 collection going on during a gmp operation, so we use allocate()
878 which always succeeds. The gmp operations which might need to
879 allocate will ask the storage manager (via doYouWantToGC()) whether
880 a garbage collection is required, in case we get into a loop doing
881 only allocate() style allocation.
882 -------------------------------------------------------------------------- */
885 stgAllocForGMP (size_t size_in_bytes)
888 nat data_size_in_words, total_size_in_words;
890 /* round up to a whole number of words */
891 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
892 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
894 /* allocate and fill it in. */
895 #if defined(THREADED_RTS)
896 arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
898 arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
900 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
902 /* and return a ptr to the goods inside the array */
907 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
909 void *new_stuff_ptr = stgAllocForGMP(new_size);
911 char *p = (char *) ptr;
912 char *q = (char *) new_stuff_ptr;
914 for (; i < old_size; i++, p++, q++) {
918 return(new_stuff_ptr);
922 stgDeallocForGMP (void *ptr STG_UNUSED,
923 size_t size STG_UNUSED)
925 /* easy for us: the garbage collector does the dealloc'n */
928 /* -----------------------------------------------------------------------------
930 * -------------------------------------------------------------------------- */
932 /* -----------------------------------------------------------------------------
935 * Approximate how much we've allocated: number of blocks in the
936 * nursery + blocks allocated via allocate() - unused nusery blocks.
937 * This leaves a little slop at the end of each block, and doesn't
938 * take into account large objects (ToDo).
939 * -------------------------------------------------------------------------- */
942 calcAllocated( void )
947 allocated = allocatedBytes();
948 allocated += countNurseryBlocks() * BLOCK_SIZE_W;
953 for (i = 0; i < n_nurseries; i++) {
955 for ( bd = capabilities[i].r.rCurrentNursery->link;
956 bd != NULL; bd = bd->link ) {
957 allocated -= BLOCK_SIZE_W;
959 cap = &capabilities[i];
960 if (cap->r.rCurrentNursery->free <
961 cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
962 allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
963 - cap->r.rCurrentNursery->free;
967 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
969 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
970 allocated -= BLOCK_SIZE_W;
972 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
973 allocated -= (current_nursery->start + BLOCK_SIZE_W)
974 - current_nursery->free;
979 total_allocated += allocated;
983 /* Approximate the amount of live data in the heap. To be called just
984 * after garbage collection (see GarbageCollect()).
993 if (RtsFlags.GcFlags.generations == 1) {
994 return g0s0->n_large_blocks + g0s0->n_blocks;
997 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
998 for (s = 0; s < generations[g].n_steps; s++) {
999 /* approximate amount of live data (doesn't take into account slop
1000 * at end of each block).
1002 if (g == 0 && s == 0) {
1005 stp = &generations[g].steps[s];
1006 live += stp->n_large_blocks + stp->n_blocks;
1013 countOccupied(bdescr *bd)
1018 for (; bd != NULL; bd = bd->link) {
1019 words += bd->free - bd->start;
1024 // Return an accurate count of the live data in the heap, excluding
1033 if (RtsFlags.GcFlags.generations == 1) {
1034 return g0s0->n_words + countOccupied(g0s0->large_objects);
1038 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1039 for (s = 0; s < generations[g].n_steps; s++) {
1040 if (g == 0 && s == 0) continue;
1041 stp = &generations[g].steps[s];
1042 live += stp->n_words + countOccupied(stp->large_objects);
1048 /* Approximate the number of blocks that will be needed at the next
1049 * garbage collection.
1051 * Assume: all data currently live will remain live. Steps that will
1052 * be collected next time will therefore need twice as many blocks
1053 * since all the data will be copied.
1062 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1063 for (s = 0; s < generations[g].n_steps; s++) {
1064 if (g == 0 && s == 0) { continue; }
1065 stp = &generations[g].steps[s];
1066 if (g == 0 || // always collect gen 0
1067 (generations[g].steps[0].n_blocks +
1068 generations[g].steps[0].n_large_blocks
1069 > generations[g].max_blocks
1070 && stp->is_compacted == 0)) {
1071 needed += 2 * stp->n_blocks + stp->n_large_blocks;
1073 needed += stp->n_blocks + stp->n_large_blocks;
1080 /* ----------------------------------------------------------------------------
1083 Executable memory must be managed separately from non-executable
1084 memory. Most OSs these days require you to jump through hoops to
1085 dynamically allocate executable memory, due to various security
1088 Here we provide a small memory allocator for executable memory.
1089 Memory is managed with a page granularity; we allocate linearly
1090 in the page, and when the page is emptied (all objects on the page
1091 are free) we free the page again, not forgetting to make it
1094 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1095 the linker cannot use allocateExec for loading object code files
1096 on Windows. Once allocateExec can handle larger objects, the linker
1097 should be modified to use allocateExec instead of VirtualAlloc.
1098 ------------------------------------------------------------------------- */
1100 static bdescr *exec_block;
1102 void *allocateExec (nat bytes)
1109 // round up to words.
1110 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1112 if (n+1 > BLOCK_SIZE_W) {
1113 barf("allocateExec: can't handle large objects");
1116 if (exec_block == NULL ||
1117 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1119 lnat pagesize = getPageSize();
1120 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1121 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1123 bd->flags = BF_EXEC;
1124 bd->link = exec_block;
1125 if (exec_block != NULL) {
1126 exec_block->u.back = bd;
1129 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1132 *(exec_block->free) = n; // store the size of this chunk
1133 exec_block->gen_no += n; // gen_no stores the number of words allocated
1134 ret = exec_block->free + 1;
1135 exec_block->free += n + 1;
1141 void freeExec (void *addr)
1143 StgPtr p = (StgPtr)addr - 1;
1144 bdescr *bd = Bdescr((StgPtr)p);
1146 if ((bd->flags & BF_EXEC) == 0) {
1147 barf("freeExec: not executable");
1150 if (*(StgPtr)p == 0) {
1151 barf("freeExec: already free?");
1156 bd->gen_no -= *(StgPtr)p;
1159 if (bd->gen_no == 0) {
1160 // Free the block if it is empty, but not if it is the block at
1161 // the head of the queue.
1162 if (bd != exec_block) {
1163 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1164 dbl_link_remove(bd, &exec_block);
1165 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1168 bd->free = bd->start;
1175 /* -----------------------------------------------------------------------------
1178 memInventory() checks for memory leaks by counting up all the
1179 blocks we know about and comparing that to the number of blocks
1180 allegedly floating around in the system.
1181 -------------------------------------------------------------------------- */
1185 // Useful for finding partially full blocks in gdb
1186 void findSlop(bdescr *bd);
1187 void findSlop(bdescr *bd)
1191 for (; bd != NULL; bd = bd->link) {
1192 slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1193 if (slop > (1024/sizeof(W_))) {
1194 debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1195 bd->start, bd, slop / (1024/sizeof(W_)));
1201 countBlocks(bdescr *bd)
1204 for (n=0; bd != NULL; bd=bd->link) {
1210 // (*1) Just like countBlocks, except that we adjust the count for a
1211 // megablock group so that it doesn't include the extra few blocks
1212 // that would be taken up by block descriptors in the second and
1213 // subsequent megablock. This is so we can tally the count with the
1214 // number of blocks allocated in the system, for memInventory().
1216 countAllocdBlocks(bdescr *bd)
1219 for (n=0; bd != NULL; bd=bd->link) {
1221 // hack for megablock groups: see (*1) above
1222 if (bd->blocks > BLOCKS_PER_MBLOCK) {
1223 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1224 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1231 stepBlocks (step *stp)
1233 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1234 ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1235 return stp->n_blocks + stp->n_old_blocks +
1236 countAllocdBlocks(stp->large_objects);
1240 memInventory (rtsBool show)
1244 lnat gen_blocks[RtsFlags.GcFlags.generations];
1245 lnat nursery_blocks, retainer_blocks,
1246 arena_blocks, exec_blocks;
1247 lnat live_blocks = 0, free_blocks = 0;
1250 // count the blocks we current have
1252 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1254 for (i = 0; i < n_capabilities; i++) {
1255 gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1257 gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1258 for (s = 0; s < generations[g].n_steps; s++) {
1259 stp = &generations[g].steps[s];
1260 gen_blocks[g] += stepBlocks(stp);
1265 for (i = 0; i < n_nurseries; i++) {
1266 nursery_blocks += stepBlocks(&nurseries[i]);
1269 retainer_blocks = 0;
1271 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1272 retainer_blocks = retainerStackBlocks();
1276 // count the blocks allocated by the arena allocator
1277 arena_blocks = arenaBlocks();
1279 // count the blocks containing executable memory
1280 exec_blocks = countAllocdBlocks(exec_block);
1282 /* count the blocks on the free list */
1283 free_blocks = countFreeList();
1286 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1287 live_blocks += gen_blocks[g];
1289 live_blocks += nursery_blocks +
1290 + retainer_blocks + arena_blocks + exec_blocks;
1292 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1294 leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1298 debugBelch("Memory leak detected:\n");
1300 debugBelch("Memory inventory:\n");
1302 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1303 debugBelch(" gen %d blocks : %5lu blocks (%lu MB)\n", g,
1304 gen_blocks[g], MB(gen_blocks[g]));
1306 debugBelch(" nursery : %5lu blocks (%lu MB)\n",
1307 nursery_blocks, MB(nursery_blocks));
1308 debugBelch(" retainer : %5lu blocks (%lu MB)\n",
1309 retainer_blocks, MB(retainer_blocks));
1310 debugBelch(" arena blocks : %5lu blocks (%lu MB)\n",
1311 arena_blocks, MB(arena_blocks));
1312 debugBelch(" exec : %5lu blocks (%lu MB)\n",
1313 exec_blocks, MB(exec_blocks));
1314 debugBelch(" free : %5lu blocks (%lu MB)\n",
1315 free_blocks, MB(free_blocks));
1316 debugBelch(" total : %5lu blocks (%lu MB)\n",
1317 live_blocks + free_blocks, MB(live_blocks+free_blocks));
1319 debugBelch("\n in system : %5lu blocks (%lu MB)\n",
1320 mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1326 /* Full heap sanity check. */
1332 if (RtsFlags.GcFlags.generations == 1) {
1333 checkHeap(g0s0->blocks);
1334 checkChain(g0s0->large_objects);
1337 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1338 for (s = 0; s < generations[g].n_steps; s++) {
1339 if (g == 0 && s == 0) { continue; }
1340 ASSERT(countBlocks(generations[g].steps[s].blocks)
1341 == generations[g].steps[s].n_blocks);
1342 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1343 == generations[g].steps[s].n_large_blocks);
1344 checkHeap(generations[g].steps[s].blocks);
1345 checkChain(generations[g].steps[s].large_objects);
1347 checkMutableList(generations[g].mut_list, g);
1352 for (s = 0; s < n_nurseries; s++) {
1353 ASSERT(countBlocks(nurseries[s].blocks)
1354 == nurseries[s].n_blocks);
1355 ASSERT(countBlocks(nurseries[s].large_objects)
1356 == nurseries[s].n_large_blocks);
1359 checkFreeListSanity();
1363 /* Nursery sanity check */
1365 checkNurserySanity( step *stp )
1371 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1372 ASSERT(bd->u.back == prev);
1374 blocks += bd->blocks;
1376 ASSERT(blocks == stp->n_blocks);
1379 // handy function for use in gdb, because Bdescr() is inlined.
1380 extern bdescr *_bdescr( StgPtr p );