1 /* -----------------------------------------------------------------------------
2 * $Id: Storage.c,v 1.58 2002/01/24 01:45:55 sof Exp $
4 * (c) The GHC Team, 1998-1999
6 * Storage manager front end
8 * ---------------------------------------------------------------------------*/
10 #include "PosixSource.h"
16 #include "BlockAlloc.h"
24 #include "StoragePriv.h"
26 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 StgClosure *caf_list = NULL;
30 bdescr *small_alloc_list; /* allocate()d small objects */
31 bdescr *large_alloc_list; /* allocate()d large objects */
32 bdescr *pinned_object_block; /* allocate pinned objects into this block */
33 nat alloc_blocks; /* number of allocate()d blocks since GC */
34 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
36 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
37 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
39 generation *generations; /* all the generations */
40 generation *g0; /* generation 0, for convenience */
41 generation *oldest_gen; /* oldest generation, for convenience */
42 step *g0s0; /* generation 0, step 0, for convenience */
44 lnat total_allocated = 0; /* total memory allocated during run */
47 * Storage manager mutex: protects all the above state from
48 * simultaneous access by two STG threads.
51 pthread_mutex_t sm_mutex = PTHREAD_MUTEX_INITIALIZER;
57 static void *stgAllocForGMP (size_t size_in_bytes);
58 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
59 static void stgDeallocForGMP (void *ptr, size_t size);
68 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
69 RtsFlags.GcFlags.heapSizeSuggestion >
70 RtsFlags.GcFlags.maxHeapSize) {
71 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
74 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
75 RtsFlags.GcFlags.minAllocAreaSize >
76 RtsFlags.GcFlags.maxHeapSize) {
77 prog_belch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
83 /* allocate generation info array */
84 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
85 * sizeof(struct _generation),
88 /* Initialise all generations */
89 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
90 gen = &generations[g];
92 gen->mut_list = END_MUT_LIST;
93 gen->mut_once_list = END_MUT_LIST;
95 gen->failed_promotions = 0;
99 /* A couple of convenience pointers */
100 g0 = &generations[0];
101 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
103 /* Allocate step structures in each generation */
104 if (RtsFlags.GcFlags.generations > 1) {
105 /* Only for multiple-generations */
107 /* Oldest generation: one step */
108 oldest_gen->n_steps = 1;
110 stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
112 /* set up all except the oldest generation with 2 steps */
113 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
114 generations[g].n_steps = RtsFlags.GcFlags.steps;
115 generations[g].steps =
116 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
117 "initStorage: steps");
121 /* single generation, i.e. a two-space collector */
123 g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
126 /* Initialise all steps */
127 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
128 for (s = 0; s < generations[g].n_steps; s++) {
129 stp = &generations[g].steps[s];
133 stp->gen = &generations[g];
140 stp->large_objects = NULL;
141 stp->n_large_blocks = 0;
142 stp->new_large_objects = NULL;
143 stp->scavenged_large_objects = NULL;
144 stp->n_scavenged_large_blocks = 0;
145 stp->is_compacted = 0;
150 /* Set up the destination pointers in each younger gen. step */
151 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
152 for (s = 0; s < generations[g].n_steps-1; s++) {
153 generations[g].steps[s].to = &generations[g].steps[s+1];
155 generations[g].steps[s].to = &generations[g+1].steps[0];
158 /* The oldest generation has one step and it is compacted. */
159 if (RtsFlags.GcFlags.compact) {
160 if (RtsFlags.GcFlags.generations == 1) {
161 belch("WARNING: compaction is incompatible with -G1; disabled");
163 oldest_gen->steps[0].is_compacted = 1;
166 oldest_gen->steps[0].to = &oldest_gen->steps[0];
168 /* generation 0 is special: that's the nursery */
169 generations[0].max_blocks = 0;
171 /* G0S0: the allocation area. Policy: keep the allocation area
172 * small to begin with, even if we have a large suggested heap
173 * size. Reason: we're going to do a major collection first, and we
174 * don't want it to be a big one. This vague idea is borne out by
175 * rigorous experimental evidence.
177 g0s0 = &generations[0].steps[0];
181 weak_ptr_list = NULL;
184 /* initialise the allocate() interface */
185 small_alloc_list = NULL;
186 large_alloc_list = NULL;
188 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
190 /* Tell GNU multi-precision pkg about our custom alloc functions */
191 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
194 pthread_mutex_init(&sm_mutex, NULL);
197 IF_DEBUG(gc, statDescribeGens());
203 stat_exit(calcAllocated());
206 /* -----------------------------------------------------------------------------
209 The entry code for every CAF does the following:
211 - builds a CAF_BLACKHOLE in the heap
212 - pushes an update frame pointing to the CAF_BLACKHOLE
213 - invokes UPD_CAF(), which:
214 - calls newCaf, below
215 - updates the CAF with a static indirection to the CAF_BLACKHOLE
217 Why do we build a BLACKHOLE in the heap rather than just updating
218 the thunk directly? It's so that we only need one kind of update
219 frame - otherwise we'd need a static version of the update frame too.
221 newCaf() does the following:
223 - it puts the CAF on the oldest generation's mut-once list.
224 This is so that we can treat the CAF as a root when collecting
227 For GHCI, we have additional requirements when dealing with CAFs:
229 - we must *retain* all dynamically-loaded CAFs ever entered,
230 just in case we need them again.
231 - we must be able to *revert* CAFs that have been evaluated, to
232 their pre-evaluated form.
234 To do this, we use an additional CAF list. When newCaf() is
235 called on a dynamically-loaded CAF, we add it to the CAF list
236 instead of the old-generation mutable list, and save away its
237 old info pointer (in caf->saved_info) for later reversion.
239 To revert all the CAFs, we traverse the CAF list and reset the
240 info pointer to caf->saved_info, then throw away the CAF list.
241 (see GC.c:revertCAFs()).
245 -------------------------------------------------------------------------- */
248 newCAF(StgClosure* caf)
250 /* Put this CAF on the mutable list for the old generation.
251 * This is a HACK - the IND_STATIC closure doesn't really have
252 * a mut_link field, but we pretend it has - in fact we re-use
253 * the STATIC_LINK field for the time being, because when we
254 * come to do a major GC we won't need the mut_link field
255 * any more and can use it as a STATIC_LINK.
257 ACQUIRE_LOCK(&sm_mutex);
259 if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
260 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
261 ((StgIndStatic *)caf)->static_link = caf_list;
264 ((StgIndStatic *)caf)->saved_info = NULL;
265 ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
266 oldest_gen->mut_once_list = (StgMutClosure *)caf;
269 RELEASE_LOCK(&sm_mutex);
272 /* If we are PAR or DIST then we never forget a CAF */
274 //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
275 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
281 /* -----------------------------------------------------------------------------
283 -------------------------------------------------------------------------- */
286 allocNurseries( void )
295 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
296 cap->r.rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
297 cap->r.rCurrentNursery = cap->r.rNursery;
298 for (bd = cap->r.rNursery; bd != NULL; bd = bd->link) {
299 bd->u.back = (bdescr *)cap;
302 /* Set the back links to be equal to the Capability,
303 * so we can do slightly better informed locking.
307 g0s0->blocks = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
308 g0s0->n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
309 g0s0->to_blocks = NULL;
310 g0s0->n_to_blocks = 0;
311 MainCapability.r.rNursery = g0s0->blocks;
312 MainCapability.r.rCurrentNursery = g0s0->blocks;
313 /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
318 resetNurseries( void )
324 /* All tasks must be stopped */
325 ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
327 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
328 for (bd = cap->r.rNursery; bd; bd = bd->link) {
329 bd->free = bd->start;
330 ASSERT(bd->gen_no == 0);
331 ASSERT(bd->step == g0s0);
332 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
334 cap->r.rCurrentNursery = cap->r.rNursery;
337 for (bd = g0s0->blocks; bd; bd = bd->link) {
338 bd->free = bd->start;
339 ASSERT(bd->gen_no == 0);
340 ASSERT(bd->step == g0s0);
341 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
343 MainCapability.r.rNursery = g0s0->blocks;
344 MainCapability.r.rCurrentNursery = g0s0->blocks;
349 allocNursery (bdescr *tail, nat blocks)
354 // Allocate a nursery: we allocate fresh blocks one at a time and
355 // cons them on to the front of the list, not forgetting to update
356 // the back pointer on the tail of the list to point to the new block.
357 for (i=0; i < blocks; i++) {
360 processNursery() in LdvProfile.c assumes that every block group in
361 the nursery contains only a single block. So, if a block group is
362 given multiple blocks, change processNursery() accordingly.
366 // double-link the nursery: we might need to insert blocks
373 bd->free = bd->start;
381 resizeNursery ( nat blocks )
387 barf("resizeNursery: can't resize in SMP mode");
390 nursery_blocks = g0s0->n_blocks;
391 if (nursery_blocks == blocks) {
395 else if (nursery_blocks < blocks) {
396 IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
398 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
404 IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
408 while (nursery_blocks > blocks) {
410 next_bd->u.back = NULL;
411 nursery_blocks -= bd->blocks; // might be a large block
416 // might have gone just under, by freeing a large block, so make
417 // up the difference.
418 if (nursery_blocks < blocks) {
419 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
423 g0s0->n_blocks = blocks;
424 ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);
427 /* -----------------------------------------------------------------------------
428 The allocate() interface
430 allocate(n) always succeeds, and returns a chunk of memory n words
431 long. n can be larger than the size of a block if necessary, in
432 which case a contiguous block group will be allocated.
433 -------------------------------------------------------------------------- */
441 ACQUIRE_LOCK(&sm_mutex);
443 TICK_ALLOC_HEAP_NOCTR(n);
446 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
447 /* ToDo: allocate directly into generation 1 */
448 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
449 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
450 bd = allocGroup(req_blocks);
451 dbl_link_onto(bd, &g0s0->large_objects);
454 bd->flags = BF_LARGE;
455 bd->free = bd->start;
456 /* don't add these blocks to alloc_blocks, since we're assuming
457 * that large objects are likely to remain live for quite a while
458 * (eg. running threads), so garbage collecting early won't make
461 alloc_blocks += req_blocks;
462 RELEASE_LOCK(&sm_mutex);
465 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
466 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
467 if (small_alloc_list) {
468 small_alloc_list->free = alloc_Hp;
471 bd->link = small_alloc_list;
472 small_alloc_list = bd;
476 alloc_Hp = bd->start;
477 alloc_HpLim = bd->start + BLOCK_SIZE_W;
483 RELEASE_LOCK(&sm_mutex);
488 allocated_bytes( void )
490 return (alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp));
493 /* ---------------------------------------------------------------------------
494 Allocate a fixed/pinned object.
496 We allocate small pinned objects into a single block, allocating a
497 new block when the current one overflows. The block is chained
498 onto the large_object_list of generation 0 step 0.
500 NOTE: The GC can't in general handle pinned objects. This
501 interface is only safe to use for ByteArrays, which have no
502 pointers and don't require scavenging. It works because the
503 block's descriptor has the BF_LARGE flag set, so the block is
504 treated as a large object and chained onto various lists, rather
505 than the individual objects being copied. However, when it comes
506 to scavenge the block, the GC will only scavenge the first object.
507 The reason is that the GC can't linearly scan a block of pinned
508 objects at the moment (doing so would require using the
509 mostly-copying techniques). But since we're restricting ourselves
510 to pinned ByteArrays, not scavenging is ok.
512 This function is called by newPinnedByteArray# which immediately
513 fills the allocated memory with a MutableByteArray#.
514 ------------------------------------------------------------------------- */
517 allocatePinned( nat n )
520 bdescr *bd = pinned_object_block;
522 ACQUIRE_LOCK(&sm_mutex);
524 TICK_ALLOC_HEAP_NOCTR(n);
527 // If the request is for a large object, then allocate()
528 // will give us a pinned object anyway.
529 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
530 RELEASE_LOCK(&sm_mutex);
534 // If we don't have a block of pinned objects yet, or the current
535 // one isn't large enough to hold the new object, allocate a new one.
536 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
537 pinned_object_block = bd = allocBlock();
538 dbl_link_onto(bd, &g0s0->large_objects);
541 bd->flags = BF_LARGE;
542 bd->free = bd->start;
548 RELEASE_LOCK(&sm_mutex);
552 /* -----------------------------------------------------------------------------
553 Allocation functions for GMP.
555 These all use the allocate() interface - we can't have any garbage
556 collection going on during a gmp operation, so we use allocate()
557 which always succeeds. The gmp operations which might need to
558 allocate will ask the storage manager (via doYouWantToGC()) whether
559 a garbage collection is required, in case we get into a loop doing
560 only allocate() style allocation.
561 -------------------------------------------------------------------------- */
564 stgAllocForGMP (size_t size_in_bytes)
567 nat data_size_in_words, total_size_in_words;
569 /* should be a multiple of sizeof(StgWord) (whole no. of limbs) */
570 ASSERT(size_in_bytes % sizeof(W_) == 0);
572 data_size_in_words = size_in_bytes / sizeof(W_);
573 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
575 /* allocate and fill it in. */
576 arr = (StgArrWords *)allocate(total_size_in_words);
577 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
579 /* and return a ptr to the goods inside the array */
580 return(BYTE_ARR_CTS(arr));
584 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
586 void *new_stuff_ptr = stgAllocForGMP(new_size);
588 char *p = (char *) ptr;
589 char *q = (char *) new_stuff_ptr;
591 for (; i < old_size; i++, p++, q++) {
595 return(new_stuff_ptr);
599 stgDeallocForGMP (void *ptr STG_UNUSED,
600 size_t size STG_UNUSED)
602 /* easy for us: the garbage collector does the dealloc'n */
605 /* -----------------------------------------------------------------------------
607 * -------------------------------------------------------------------------- */
609 /* -----------------------------------------------------------------------------
612 * Approximate how much we've allocated: number of blocks in the
613 * nursery + blocks allocated via allocate() - unused nusery blocks.
614 * This leaves a little slop at the end of each block, and doesn't
615 * take into account large objects (ToDo).
616 * -------------------------------------------------------------------------- */
619 calcAllocated( void )
627 /* All tasks must be stopped. Can't assert that all the
628 capabilities are owned by the scheduler, though: one or more
629 tasks might have been stopped while they were running (non-main)
631 /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
634 n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
637 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
638 for ( bd = cap->r.rCurrentNursery->link; bd != NULL; bd = bd->link ) {
639 allocated -= BLOCK_SIZE_W;
641 if (cap->r.rCurrentNursery->free < cap->r.rCurrentNursery->start
643 allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
644 - cap->r.rCurrentNursery->free;
649 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
651 allocated = (g0s0->n_blocks * BLOCK_SIZE_W) + allocated_bytes();
652 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
653 allocated -= BLOCK_SIZE_W;
655 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
656 allocated -= (current_nursery->start + BLOCK_SIZE_W)
657 - current_nursery->free;
661 total_allocated += allocated;
665 /* Approximate the amount of live data in the heap. To be called just
666 * after garbage collection (see GarbageCollect()).
675 if (RtsFlags.GcFlags.generations == 1) {
676 live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
677 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
681 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
682 for (s = 0; s < generations[g].n_steps; s++) {
683 /* approximate amount of live data (doesn't take into account slop
684 * at end of each block).
686 if (g == 0 && s == 0) {
689 stp = &generations[g].steps[s];
690 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
691 if (stp->hp_bd != NULL) {
692 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
700 /* Approximate the number of blocks that will be needed at the next
701 * garbage collection.
703 * Assume: all data currently live will remain live. Steps that will
704 * be collected next time will therefore need twice as many blocks
705 * since all the data will be copied.
714 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
715 for (s = 0; s < generations[g].n_steps; s++) {
716 if (g == 0 && s == 0) { continue; }
717 stp = &generations[g].steps[s];
718 if (generations[g].steps[0].n_blocks +
719 generations[g].steps[0].n_large_blocks
720 > generations[g].max_blocks
721 && stp->is_compacted == 0) {
722 needed += 2 * stp->n_blocks;
724 needed += stp->n_blocks;
731 /* -----------------------------------------------------------------------------
734 memInventory() checks for memory leaks by counting up all the
735 blocks we know about and comparing that to the number of blocks
736 allegedly floating around in the system.
737 -------------------------------------------------------------------------- */
747 lnat total_blocks = 0, free_blocks = 0;
749 /* count the blocks we current have */
751 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
752 for (s = 0; s < generations[g].n_steps; s++) {
753 stp = &generations[g].steps[s];
754 total_blocks += stp->n_blocks;
755 if (RtsFlags.GcFlags.generations == 1) {
756 /* two-space collector has a to-space too :-) */
757 total_blocks += g0s0->n_to_blocks;
759 for (bd = stp->large_objects; bd; bd = bd->link) {
760 total_blocks += bd->blocks;
761 /* hack for megablock groups: they have an extra block or two in
762 the second and subsequent megablocks where the block
763 descriptors would normally go.
765 if (bd->blocks > BLOCKS_PER_MBLOCK) {
766 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
767 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
773 /* any blocks held by allocate() */
774 for (bd = small_alloc_list; bd; bd = bd->link) {
775 total_blocks += bd->blocks;
777 for (bd = large_alloc_list; bd; bd = bd->link) {
778 total_blocks += bd->blocks;
782 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
783 for (bd = firstStack; bd != NULL; bd = bd->link)
784 total_blocks += bd->blocks;
788 // count the blocks allocated by the arena allocator
789 total_blocks += arenaBlocks();
791 /* count the blocks on the free list */
792 free_blocks = countFreeList();
794 if (total_blocks + free_blocks != mblocks_allocated *
796 fprintf(stderr, "Blocks: %ld live + %ld free = %ld total (%ld around)\n",
797 total_blocks, free_blocks, total_blocks + free_blocks,
798 mblocks_allocated * BLOCKS_PER_MBLOCK);
801 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
806 countBlocks(bdescr *bd)
809 for (n=0; bd != NULL; bd=bd->link) {
815 /* Full heap sanity check. */
821 if (RtsFlags.GcFlags.generations == 1) {
822 checkHeap(g0s0->to_blocks);
823 checkChain(g0s0->large_objects);
826 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
827 for (s = 0; s < generations[g].n_steps; s++) {
828 ASSERT(countBlocks(generations[g].steps[s].blocks)
829 == generations[g].steps[s].n_blocks);
830 ASSERT(countBlocks(generations[g].steps[s].large_objects)
831 == generations[g].steps[s].n_large_blocks);
832 if (g == 0 && s == 0) { continue; }
833 checkHeap(generations[g].steps[s].blocks);
834 checkChain(generations[g].steps[s].large_objects);
836 checkMutableList(generations[g].mut_list, g);
837 checkMutOnceList(generations[g].mut_once_list, g);
841 checkFreeListSanity();
845 // handy function for use in gdb, because Bdescr() is inlined.
846 extern bdescr *_bdescr( StgPtr p );