1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Storage manager front end
7 * ---------------------------------------------------------------------------*/
9 #include "PosixSource.h"
15 #include "BlockAlloc.h"
23 #include "OSThreads.h"
25 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
30 StgClosure *caf_list = NULL;
32 bdescr *small_alloc_list; /* allocate()d small objects */
33 bdescr *pinned_object_block; /* allocate pinned objects into this block */
34 nat alloc_blocks; /* number of allocate()d blocks since GC */
35 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
37 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
38 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
40 generation *generations = NULL; /* all the generations */
41 generation *g0 = NULL; /* generation 0, for convenience */
42 generation *oldest_gen = NULL; /* oldest generation, for convenience */
43 step *g0s0 = NULL; /* generation 0, step 0, for convenience */
45 ullong total_allocated = 0; /* total memory allocated during run */
48 * Storage manager mutex: protects all the above state from
49 * simultaneous access by two STG threads.
52 Mutex sm_mutex = INIT_MUTEX_VAR;
58 static void *stgAllocForGMP (size_t size_in_bytes);
59 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
60 static void stgDeallocForGMP (void *ptr, size_t size);
69 if (generations != NULL) {
70 // multi-init protection
74 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
75 * doing something reasonable.
77 ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
78 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
79 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
81 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
82 RtsFlags.GcFlags.heapSizeSuggestion >
83 RtsFlags.GcFlags.maxHeapSize) {
84 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
87 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
88 RtsFlags.GcFlags.minAllocAreaSize >
89 RtsFlags.GcFlags.maxHeapSize) {
90 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
100 /* allocate generation info array */
101 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
102 * sizeof(struct _generation),
103 "initStorage: gens");
105 /* Initialise all generations */
106 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
107 gen = &generations[g];
109 gen->mut_list = allocBlock();
110 gen->collections = 0;
111 gen->failed_promotions = 0;
115 /* A couple of convenience pointers */
116 g0 = &generations[0];
117 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
119 /* Allocate step structures in each generation */
120 if (RtsFlags.GcFlags.generations > 1) {
121 /* Only for multiple-generations */
123 /* Oldest generation: one step */
124 oldest_gen->n_steps = 1;
126 stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
128 /* set up all except the oldest generation with 2 steps */
129 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
130 generations[g].n_steps = RtsFlags.GcFlags.steps;
131 generations[g].steps =
132 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
133 "initStorage: steps");
137 /* single generation, i.e. a two-space collector */
139 g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
142 /* Initialise all steps */
143 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
144 for (s = 0; s < generations[g].n_steps; s++) {
145 stp = &generations[g].steps[s];
148 stp->n_to_blocks = 0;
150 stp->gen = &generations[g];
157 stp->large_objects = NULL;
158 stp->n_large_blocks = 0;
159 stp->new_large_objects = NULL;
160 stp->scavenged_large_objects = NULL;
161 stp->n_scavenged_large_blocks = 0;
162 stp->is_compacted = 0;
167 /* Set up the destination pointers in each younger gen. step */
168 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
169 for (s = 0; s < generations[g].n_steps-1; s++) {
170 generations[g].steps[s].to = &generations[g].steps[s+1];
172 generations[g].steps[s].to = &generations[g+1].steps[0];
175 /* The oldest generation has one step and it is compacted. */
176 if (RtsFlags.GcFlags.compact) {
177 if (RtsFlags.GcFlags.generations == 1) {
178 errorBelch("WARNING: compaction is incompatible with -G1; disabled");
180 oldest_gen->steps[0].is_compacted = 1;
183 oldest_gen->steps[0].to = &oldest_gen->steps[0];
185 /* generation 0 is special: that's the nursery */
186 generations[0].max_blocks = 0;
188 /* G0S0: the allocation area. Policy: keep the allocation area
189 * small to begin with, even if we have a large suggested heap
190 * size. Reason: we're going to do a major collection first, and we
191 * don't want it to be a big one. This vague idea is borne out by
192 * rigorous experimental evidence.
194 g0s0 = &generations[0].steps[0];
198 weak_ptr_list = NULL;
201 /* initialise the allocate() interface */
202 small_alloc_list = NULL;
204 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
206 /* Tell GNU multi-precision pkg about our custom alloc functions */
207 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
209 IF_DEBUG(gc, statDescribeGens());
215 stat_exit(calcAllocated());
218 /* -----------------------------------------------------------------------------
221 The entry code for every CAF does the following:
223 - builds a CAF_BLACKHOLE in the heap
224 - pushes an update frame pointing to the CAF_BLACKHOLE
225 - invokes UPD_CAF(), which:
226 - calls newCaf, below
227 - updates the CAF with a static indirection to the CAF_BLACKHOLE
229 Why do we build a BLACKHOLE in the heap rather than just updating
230 the thunk directly? It's so that we only need one kind of update
231 frame - otherwise we'd need a static version of the update frame too.
233 newCaf() does the following:
235 - it puts the CAF on the oldest generation's mut-once list.
236 This is so that we can treat the CAF as a root when collecting
239 For GHCI, we have additional requirements when dealing with CAFs:
241 - we must *retain* all dynamically-loaded CAFs ever entered,
242 just in case we need them again.
243 - we must be able to *revert* CAFs that have been evaluated, to
244 their pre-evaluated form.
246 To do this, we use an additional CAF list. When newCaf() is
247 called on a dynamically-loaded CAF, we add it to the CAF list
248 instead of the old-generation mutable list, and save away its
249 old info pointer (in caf->saved_info) for later reversion.
251 To revert all the CAFs, we traverse the CAF list and reset the
252 info pointer to caf->saved_info, then throw away the CAF list.
253 (see GC.c:revertCAFs()).
257 -------------------------------------------------------------------------- */
260 newCAF(StgClosure* caf)
262 /* Put this CAF on the mutable list for the old generation.
263 * This is a HACK - the IND_STATIC closure doesn't really have
264 * a mut_link field, but we pretend it has - in fact we re-use
265 * the STATIC_LINK field for the time being, because when we
266 * come to do a major GC we won't need the mut_link field
267 * any more and can use it as a STATIC_LINK.
271 ((StgIndStatic *)caf)->saved_info = NULL;
273 recordMutableGen(caf, oldest_gen);
278 /* If we are PAR or DIST then we never forget a CAF */
280 //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
281 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
287 // An alternate version of newCaf which is used for dynamically loaded
288 // object code in GHCi. In this case we want to retain *all* CAFs in
289 // the object code, because they might be demanded at any time from an
290 // expression evaluated on the command line.
292 // The linker hackily arranges that references to newCaf from dynamic
293 // code end up pointing to newDynCAF.
295 newDynCAF(StgClosure *caf)
299 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
300 ((StgIndStatic *)caf)->static_link = caf_list;
306 /* -----------------------------------------------------------------------------
308 -------------------------------------------------------------------------- */
311 allocNurseries( void )
319 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
320 cap->r.rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
321 cap->r.rCurrentNursery = cap->r.rNursery;
322 /* Set the back links to be equal to the Capability,
323 * so we can do slightly better informed locking.
325 for (bd = cap->r.rNursery; bd != NULL; bd = bd->link) {
326 bd->u.back = (bdescr *)cap;
330 g0s0->blocks = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
331 g0s0->n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
332 g0s0->to_blocks = NULL;
333 g0s0->n_to_blocks = 0;
334 MainCapability.r.rNursery = g0s0->blocks;
335 MainCapability.r.rCurrentNursery = g0s0->blocks;
336 /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
341 resetNurseries( void )
347 /* All tasks must be stopped */
348 ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
350 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
351 for (bd = cap->r.rNursery; bd; bd = bd->link) {
352 bd->free = bd->start;
353 ASSERT(bd->gen_no == 0);
354 ASSERT(bd->step == g0s0);
355 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
357 cap->r.rCurrentNursery = cap->r.rNursery;
360 for (bd = g0s0->blocks; bd; bd = bd->link) {
361 bd->free = bd->start;
362 ASSERT(bd->gen_no == 0);
363 ASSERT(bd->step == g0s0);
364 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
366 MainCapability.r.rNursery = g0s0->blocks;
367 MainCapability.r.rCurrentNursery = g0s0->blocks;
372 allocNursery (bdescr *tail, nat blocks)
377 // Allocate a nursery: we allocate fresh blocks one at a time and
378 // cons them on to the front of the list, not forgetting to update
379 // the back pointer on the tail of the list to point to the new block.
380 for (i=0; i < blocks; i++) {
383 processNursery() in LdvProfile.c assumes that every block group in
384 the nursery contains only a single block. So, if a block group is
385 given multiple blocks, change processNursery() accordingly.
389 // double-link the nursery: we might need to insert blocks
396 bd->free = bd->start;
404 resizeNursery ( nat blocks )
410 barf("resizeNursery: can't resize in SMP mode");
413 nursery_blocks = g0s0->n_blocks;
414 if (nursery_blocks == blocks) {
418 else if (nursery_blocks < blocks) {
419 IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
421 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
427 IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
431 while (nursery_blocks > blocks) {
433 next_bd->u.back = NULL;
434 nursery_blocks -= bd->blocks; // might be a large block
439 // might have gone just under, by freeing a large block, so make
440 // up the difference.
441 if (nursery_blocks < blocks) {
442 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
446 g0s0->n_blocks = blocks;
447 ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);
450 /* -----------------------------------------------------------------------------
451 The allocate() interface
453 allocate(n) always succeeds, and returns a chunk of memory n words
454 long. n can be larger than the size of a block if necessary, in
455 which case a contiguous block group will be allocated.
456 -------------------------------------------------------------------------- */
466 TICK_ALLOC_HEAP_NOCTR(n);
469 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
470 /* ToDo: allocate directly into generation 1 */
471 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
472 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
473 bd = allocGroup(req_blocks);
474 dbl_link_onto(bd, &g0s0->large_objects);
475 g0s0->n_large_blocks += req_blocks;
478 bd->flags = BF_LARGE;
479 bd->free = bd->start + n;
480 alloc_blocks += req_blocks;
484 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
485 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
486 if (small_alloc_list) {
487 small_alloc_list->free = alloc_Hp;
490 bd->link = small_alloc_list;
491 small_alloc_list = bd;
495 alloc_Hp = bd->start;
496 alloc_HpLim = bd->start + BLOCK_SIZE_W;
507 allocated_bytes( void )
511 allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
512 if (pinned_object_block != NULL) {
513 allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
514 pinned_object_block->free;
521 tidyAllocateLists (void)
523 if (small_alloc_list != NULL) {
524 ASSERT(alloc_Hp >= small_alloc_list->start &&
525 alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
526 small_alloc_list->free = alloc_Hp;
530 /* ---------------------------------------------------------------------------
531 Allocate a fixed/pinned object.
533 We allocate small pinned objects into a single block, allocating a
534 new block when the current one overflows. The block is chained
535 onto the large_object_list of generation 0 step 0.
537 NOTE: The GC can't in general handle pinned objects. This
538 interface is only safe to use for ByteArrays, which have no
539 pointers and don't require scavenging. It works because the
540 block's descriptor has the BF_LARGE flag set, so the block is
541 treated as a large object and chained onto various lists, rather
542 than the individual objects being copied. However, when it comes
543 to scavenge the block, the GC will only scavenge the first object.
544 The reason is that the GC can't linearly scan a block of pinned
545 objects at the moment (doing so would require using the
546 mostly-copying techniques). But since we're restricting ourselves
547 to pinned ByteArrays, not scavenging is ok.
549 This function is called by newPinnedByteArray# which immediately
550 fills the allocated memory with a MutableByteArray#.
551 ------------------------------------------------------------------------- */
554 allocatePinned( nat n )
557 bdescr *bd = pinned_object_block;
559 // If the request is for a large object, then allocate()
560 // will give us a pinned object anyway.
561 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
567 TICK_ALLOC_HEAP_NOCTR(n);
570 // we always return 8-byte aligned memory. bd->free must be
571 // 8-byte aligned to begin with, so we just round up n to
572 // the nearest multiple of 8 bytes.
573 if (sizeof(StgWord) == 4) {
577 // If we don't have a block of pinned objects yet, or the current
578 // one isn't large enough to hold the new object, allocate a new one.
579 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
580 pinned_object_block = bd = allocBlock();
581 dbl_link_onto(bd, &g0s0->large_objects);
584 bd->flags = BF_PINNED | BF_LARGE;
585 bd->free = bd->start;
595 /* -----------------------------------------------------------------------------
596 Allocation functions for GMP.
598 These all use the allocate() interface - we can't have any garbage
599 collection going on during a gmp operation, so we use allocate()
600 which always succeeds. The gmp operations which might need to
601 allocate will ask the storage manager (via doYouWantToGC()) whether
602 a garbage collection is required, in case we get into a loop doing
603 only allocate() style allocation.
604 -------------------------------------------------------------------------- */
607 stgAllocForGMP (size_t size_in_bytes)
610 nat data_size_in_words, total_size_in_words;
612 /* round up to a whole number of words */
613 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
614 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
616 /* allocate and fill it in. */
617 arr = (StgArrWords *)allocate(total_size_in_words);
618 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
620 /* and return a ptr to the goods inside the array */
625 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
627 void *new_stuff_ptr = stgAllocForGMP(new_size);
629 char *p = (char *) ptr;
630 char *q = (char *) new_stuff_ptr;
632 for (; i < old_size; i++, p++, q++) {
636 return(new_stuff_ptr);
640 stgDeallocForGMP (void *ptr STG_UNUSED,
641 size_t size STG_UNUSED)
643 /* easy for us: the garbage collector does the dealloc'n */
646 /* -----------------------------------------------------------------------------
648 * -------------------------------------------------------------------------- */
650 /* -----------------------------------------------------------------------------
653 * Approximate how much we've allocated: number of blocks in the
654 * nursery + blocks allocated via allocate() - unused nusery blocks.
655 * This leaves a little slop at the end of each block, and doesn't
656 * take into account large objects (ToDo).
657 * -------------------------------------------------------------------------- */
660 calcAllocated( void )
668 /* All tasks must be stopped. Can't assert that all the
669 capabilities are owned by the scheduler, though: one or more
670 tasks might have been stopped while they were running (non-main)
672 /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
675 n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
678 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
679 for ( bd = cap->r.rCurrentNursery->link; bd != NULL; bd = bd->link ) {
680 allocated -= BLOCK_SIZE_W;
682 if (cap->r.rCurrentNursery->free < cap->r.rCurrentNursery->start
684 allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
685 - cap->r.rCurrentNursery->free;
690 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
692 allocated = (g0s0->n_blocks * BLOCK_SIZE_W) + allocated_bytes();
693 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
694 allocated -= BLOCK_SIZE_W;
696 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
697 allocated -= (current_nursery->start + BLOCK_SIZE_W)
698 - current_nursery->free;
702 total_allocated += allocated;
706 /* Approximate the amount of live data in the heap. To be called just
707 * after garbage collection (see GarbageCollect()).
716 if (RtsFlags.GcFlags.generations == 1) {
717 live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
718 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
722 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
723 for (s = 0; s < generations[g].n_steps; s++) {
724 /* approximate amount of live data (doesn't take into account slop
725 * at end of each block).
727 if (g == 0 && s == 0) {
730 stp = &generations[g].steps[s];
731 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
732 if (stp->hp_bd != NULL) {
733 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
741 /* Approximate the number of blocks that will be needed at the next
742 * garbage collection.
744 * Assume: all data currently live will remain live. Steps that will
745 * be collected next time will therefore need twice as many blocks
746 * since all the data will be copied.
755 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
756 for (s = 0; s < generations[g].n_steps; s++) {
757 if (g == 0 && s == 0) { continue; }
758 stp = &generations[g].steps[s];
759 if (generations[g].steps[0].n_blocks +
760 generations[g].steps[0].n_large_blocks
761 > generations[g].max_blocks
762 && stp->is_compacted == 0) {
763 needed += 2 * stp->n_blocks;
765 needed += stp->n_blocks;
772 /* -----------------------------------------------------------------------------
775 memInventory() checks for memory leaks by counting up all the
776 blocks we know about and comparing that to the number of blocks
777 allegedly floating around in the system.
778 -------------------------------------------------------------------------- */
788 lnat total_blocks = 0, free_blocks = 0;
790 /* count the blocks we current have */
792 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
793 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
794 total_blocks += bd->blocks;
796 for (s = 0; s < generations[g].n_steps; s++) {
797 stp = &generations[g].steps[s];
798 total_blocks += stp->n_blocks;
799 if (RtsFlags.GcFlags.generations == 1) {
800 /* two-space collector has a to-space too :-) */
801 total_blocks += g0s0->n_to_blocks;
803 for (bd = stp->large_objects; bd; bd = bd->link) {
804 total_blocks += bd->blocks;
805 /* hack for megablock groups: they have an extra block or two in
806 the second and subsequent megablocks where the block
807 descriptors would normally go.
809 if (bd->blocks > BLOCKS_PER_MBLOCK) {
810 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
811 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
817 /* any blocks held by allocate() */
818 for (bd = small_alloc_list; bd; bd = bd->link) {
819 total_blocks += bd->blocks;
823 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
824 total_blocks += retainerStackBlocks();
828 // count the blocks allocated by the arena allocator
829 total_blocks += arenaBlocks();
831 /* count the blocks on the free list */
832 free_blocks = countFreeList();
834 if (total_blocks + free_blocks != mblocks_allocated *
836 debugBelch("Blocks: %ld live + %ld free = %ld total (%ld around)\n",
837 total_blocks, free_blocks, total_blocks + free_blocks,
838 mblocks_allocated * BLOCKS_PER_MBLOCK);
841 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
846 countBlocks(bdescr *bd)
849 for (n=0; bd != NULL; bd=bd->link) {
855 /* Full heap sanity check. */
861 if (RtsFlags.GcFlags.generations == 1) {
862 checkHeap(g0s0->to_blocks);
863 checkChain(g0s0->large_objects);
866 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
867 for (s = 0; s < generations[g].n_steps; s++) {
868 ASSERT(countBlocks(generations[g].steps[s].blocks)
869 == generations[g].steps[s].n_blocks);
870 ASSERT(countBlocks(generations[g].steps[s].large_objects)
871 == generations[g].steps[s].n_large_blocks);
872 if (g == 0 && s == 0) { continue; }
873 checkHeap(generations[g].steps[s].blocks);
874 checkChain(generations[g].steps[s].large_objects);
876 checkMutableList(generations[g].mut_list, g);
880 checkFreeListSanity();
884 // handy function for use in gdb, because Bdescr() is inlined.
885 extern bdescr *_bdescr( StgPtr p );