1 /* -----------------------------------------------------------------------------
2 * $Id: Storage.c,v 1.56 2001/11/28 14:30:32 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Storage manager front end
8 * ---------------------------------------------------------------------------*/
10 #include "PosixSource.h"
16 #include "BlockAlloc.h"
24 #include "StoragePriv.h"
26 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 StgClosure *caf_list = NULL;
30 bdescr *small_alloc_list; /* allocate()d small objects */
31 bdescr *large_alloc_list; /* allocate()d large objects */
32 bdescr *pinned_object_block; /* allocate pinned objects into this block */
33 nat alloc_blocks; /* number of allocate()d blocks since GC */
34 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
36 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
37 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
39 generation *generations; /* all the generations */
40 generation *g0; /* generation 0, for convenience */
41 generation *oldest_gen; /* oldest generation, for convenience */
42 step *g0s0; /* generation 0, step 0, for convenience */
44 lnat total_allocated = 0; /* total memory allocated during run */
47 * Storage manager mutex: protects all the above state from
48 * simultaneous access by two STG threads.
51 pthread_mutex_t sm_mutex = PTHREAD_MUTEX_INITIALIZER;
57 static void *stgAllocForGMP (size_t size_in_bytes);
58 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
59 static void stgDeallocForGMP (void *ptr, size_t size);
68 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
69 RtsFlags.GcFlags.heapSizeSuggestion >
70 RtsFlags.GcFlags.maxHeapSize) {
71 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
74 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
75 RtsFlags.GcFlags.minAllocAreaSize >
76 RtsFlags.GcFlags.maxHeapSize) {
77 prog_belch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
83 /* allocate generation info array */
84 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
85 * sizeof(struct _generation),
88 /* Initialise all generations */
89 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
90 gen = &generations[g];
92 gen->mut_list = END_MUT_LIST;
93 gen->mut_once_list = END_MUT_LIST;
95 gen->failed_promotions = 0;
99 /* A couple of convenience pointers */
100 g0 = &generations[0];
101 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
103 /* Allocate step structures in each generation */
104 if (RtsFlags.GcFlags.generations > 1) {
105 /* Only for multiple-generations */
107 /* Oldest generation: one step */
108 oldest_gen->n_steps = 1;
110 stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
112 /* set up all except the oldest generation with 2 steps */
113 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
114 generations[g].n_steps = RtsFlags.GcFlags.steps;
115 generations[g].steps =
116 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
117 "initStorage: steps");
121 /* single generation, i.e. a two-space collector */
123 g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
126 /* Initialise all steps */
127 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
128 for (s = 0; s < generations[g].n_steps; s++) {
129 stp = &generations[g].steps[s];
133 stp->gen = &generations[g];
140 stp->large_objects = NULL;
141 stp->n_large_blocks = 0;
142 stp->new_large_objects = NULL;
143 stp->scavenged_large_objects = NULL;
144 stp->n_scavenged_large_blocks = 0;
145 stp->is_compacted = 0;
150 /* Set up the destination pointers in each younger gen. step */
151 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
152 for (s = 0; s < generations[g].n_steps-1; s++) {
153 generations[g].steps[s].to = &generations[g].steps[s+1];
155 generations[g].steps[s].to = &generations[g+1].steps[0];
158 /* The oldest generation has one step and it is compacted. */
159 if (RtsFlags.GcFlags.compact) {
160 if (RtsFlags.GcFlags.generations == 1) {
161 belch("WARNING: compaction is incompatible with -G1; disabled");
163 oldest_gen->steps[0].is_compacted = 1;
166 oldest_gen->steps[0].to = &oldest_gen->steps[0];
168 /* generation 0 is special: that's the nursery */
169 generations[0].max_blocks = 0;
171 /* G0S0: the allocation area. Policy: keep the allocation area
172 * small to begin with, even if we have a large suggested heap
173 * size. Reason: we're going to do a major collection first, and we
174 * don't want it to be a big one. This vague idea is borne out by
175 * rigorous experimental evidence.
177 g0s0 = &generations[0].steps[0];
181 weak_ptr_list = NULL;
184 /* initialise the allocate() interface */
185 small_alloc_list = NULL;
186 large_alloc_list = NULL;
188 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
190 /* Tell GNU multi-precision pkg about our custom alloc functions */
191 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
194 pthread_mutex_init(&sm_mutex, NULL);
197 IF_DEBUG(gc, statDescribeGens());
203 stat_exit(calcAllocated());
206 /* -----------------------------------------------------------------------------
209 The entry code for every CAF does the following:
211 - builds a CAF_BLACKHOLE in the heap
212 - pushes an update frame pointing to the CAF_BLACKHOLE
213 - invokes UPD_CAF(), which:
214 - calls newCaf, below
215 - updates the CAF with a static indirection to the CAF_BLACKHOLE
217 Why do we build a BLACKHOLE in the heap rather than just updating
218 the thunk directly? It's so that we only need one kind of update
219 frame - otherwise we'd need a static version of the update frame too.
221 newCaf() does the following:
223 - it puts the CAF on the oldest generation's mut-once list.
224 This is so that we can treat the CAF as a root when collecting
227 For GHCI, we have additional requirements when dealing with CAFs:
229 - we must *retain* all dynamically-loaded CAFs ever entered,
230 just in case we need them again.
231 - we must be able to *revert* CAFs that have been evaluated, to
232 their pre-evaluated form.
234 To do this, we use an additional CAF list. When newCaf() is
235 called on a dynamically-loaded CAF, we add it to the CAF list
236 instead of the old-generation mutable list, and save away its
237 old info pointer (in caf->saved_info) for later reversion.
239 To revert all the CAFs, we traverse the CAF list and reset the
240 info pointer to caf->saved_info, then throw away the CAF list.
241 (see GC.c:revertCAFs()).
245 -------------------------------------------------------------------------- */
248 newCAF(StgClosure* caf)
250 /* Put this CAF on the mutable list for the old generation.
251 * This is a HACK - the IND_STATIC closure doesn't really have
252 * a mut_link field, but we pretend it has - in fact we re-use
253 * the STATIC_LINK field for the time being, because when we
254 * come to do a major GC we won't need the mut_link field
255 * any more and can use it as a STATIC_LINK.
257 ACQUIRE_LOCK(&sm_mutex);
259 if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
260 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
261 ((StgIndStatic *)caf)->static_link = caf_list;
264 ((StgIndStatic *)caf)->saved_info = NULL;
265 ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
266 oldest_gen->mut_once_list = (StgMutClosure *)caf;
269 RELEASE_LOCK(&sm_mutex);
272 /* If we are PAR or DIST then we never forget a CAF */
274 //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
275 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
281 /* -----------------------------------------------------------------------------
283 -------------------------------------------------------------------------- */
286 allocNurseries( void )
295 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
296 cap->rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
297 cap->rCurrentNursery = cap->rNursery;
298 for (bd = cap->rNursery; bd != NULL; bd = bd->link) {
299 bd->u.back = (bdescr *)cap;
302 /* Set the back links to be equal to the Capability,
303 * so we can do slightly better informed locking.
307 g0s0->blocks = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
308 g0s0->n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
309 g0s0->to_blocks = NULL;
310 g0s0->n_to_blocks = 0;
311 MainCapability.r.rNursery = g0s0->blocks;
312 MainCapability.r.rCurrentNursery = g0s0->blocks;
313 /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
318 resetNurseries( void )
324 /* All tasks must be stopped */
325 ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
327 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
328 for (bd = cap->rNursery; bd; bd = bd->link) {
329 bd->free = bd->start;
330 ASSERT(bd->gen_no == 0);
331 ASSERT(bd->step == g0s0);
332 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
334 cap->rCurrentNursery = cap->rNursery;
337 for (bd = g0s0->blocks; bd; bd = bd->link) {
339 // Reset every word in the nursery to zero when doing LDV profiling.
340 // This relieves the mutator of the burden of zeroing every new closure,
341 // which is stored in the nursery.
343 // Todo: make it more efficient, e.g. memcpy()
346 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
347 for (p = bd->start; p < bd->start + BLOCK_SIZE_W; p++)
351 bd->free = bd->start;
352 ASSERT(bd->gen_no == 0);
353 ASSERT(bd->step == g0s0);
354 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
356 MainCapability.r.rNursery = g0s0->blocks;
357 MainCapability.r.rCurrentNursery = g0s0->blocks;
362 allocNursery (bdescr *tail, nat blocks)
367 // Allocate a nursery: we allocate fresh blocks one at a time and
368 // cons them on to the front of the list, not forgetting to update
369 // the back pointer on the tail of the list to point to the new block.
370 for (i=0; i < blocks; i++) {
373 processNursery() in LdvProfile.c assumes that every block group in
374 the nursery contains only a single block. So, if a block group is
375 given multiple blocks, change processNursery() accordingly.
379 // double-link the nursery: we might need to insert blocks
386 bd->free = bd->start;
394 resizeNursery ( nat blocks )
400 barf("resizeNursery: can't resize in SMP mode");
403 nursery_blocks = g0s0->n_blocks;
404 if (nursery_blocks == blocks) {
408 else if (nursery_blocks < blocks) {
409 IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
411 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
417 IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
421 while (nursery_blocks > blocks) {
423 next_bd->u.back = NULL;
424 nursery_blocks -= bd->blocks; // might be a large block
429 // might have gone just under, by freeing a large block, so make
430 // up the difference.
431 if (nursery_blocks < blocks) {
432 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
436 g0s0->n_blocks = blocks;
437 ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);
440 /* -----------------------------------------------------------------------------
441 The allocate() interface
443 allocate(n) always succeeds, and returns a chunk of memory n words
444 long. n can be larger than the size of a block if necessary, in
445 which case a contiguous block group will be allocated.
446 -------------------------------------------------------------------------- */
454 ACQUIRE_LOCK(&sm_mutex);
456 TICK_ALLOC_HEAP_NOCTR(n);
459 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
460 /* ToDo: allocate directly into generation 1 */
461 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
462 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
463 bd = allocGroup(req_blocks);
464 dbl_link_onto(bd, &g0s0->large_objects);
467 bd->flags = BF_LARGE;
468 bd->free = bd->start;
469 /* don't add these blocks to alloc_blocks, since we're assuming
470 * that large objects are likely to remain live for quite a while
471 * (eg. running threads), so garbage collecting early won't make
474 alloc_blocks += req_blocks;
475 RELEASE_LOCK(&sm_mutex);
478 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
479 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
480 if (small_alloc_list) {
481 small_alloc_list->free = alloc_Hp;
484 bd->link = small_alloc_list;
485 small_alloc_list = bd;
489 alloc_Hp = bd->start;
490 alloc_HpLim = bd->start + BLOCK_SIZE_W;
496 RELEASE_LOCK(&sm_mutex);
501 allocated_bytes( void )
503 return (alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp));
506 /* ---------------------------------------------------------------------------
507 Allocate a fixed/pinned object.
509 We allocate small pinned objects into a single block, allocating a
510 new block when the current one overflows. The block is chained
511 onto the large_object_list of generation 0 step 0.
513 NOTE: The GC can't in general handle pinned objects. This
514 interface is only safe to use for ByteArrays, which have no
515 pointers and don't require scavenging. It works because the
516 block's descriptor has the BF_LARGE flag set, so the block is
517 treated as a large object and chained onto various lists, rather
518 than the individual objects being copied. However, when it comes
519 to scavenge the block, the GC will only scavenge the first object.
520 The reason is that the GC can't linearly scan a block of pinned
521 objects at the moment (doing so would require using the
522 mostly-copying techniques). But since we're restricting ourselves
523 to pinned ByteArrays, not scavenging is ok.
525 This function is called by newPinnedByteArray# which immediately
526 fills the allocated memory with a MutableByteArray#.
527 ------------------------------------------------------------------------- */
530 allocatePinned( nat n )
533 bdescr *bd = pinned_object_block;
535 ACQUIRE_LOCK(&sm_mutex);
537 TICK_ALLOC_HEAP_NOCTR(n);
540 // If the request is for a large object, then allocate()
541 // will give us a pinned object anyway.
542 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
543 RELEASE_LOCK(&sm_mutex);
547 // If we don't have a block of pinned objects yet, or the current
548 // one isn't large enough to hold the new object, allocate a new one.
549 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
550 pinned_object_block = bd = allocBlock();
551 dbl_link_onto(bd, &g0s0->large_objects);
554 bd->flags = BF_LARGE;
555 bd->free = bd->start;
561 RELEASE_LOCK(&sm_mutex);
565 /* -----------------------------------------------------------------------------
566 Allocation functions for GMP.
568 These all use the allocate() interface - we can't have any garbage
569 collection going on during a gmp operation, so we use allocate()
570 which always succeeds. The gmp operations which might need to
571 allocate will ask the storage manager (via doYouWantToGC()) whether
572 a garbage collection is required, in case we get into a loop doing
573 only allocate() style allocation.
574 -------------------------------------------------------------------------- */
577 stgAllocForGMP (size_t size_in_bytes)
580 nat data_size_in_words, total_size_in_words;
582 /* should be a multiple of sizeof(StgWord) (whole no. of limbs) */
583 ASSERT(size_in_bytes % sizeof(W_) == 0);
585 data_size_in_words = size_in_bytes / sizeof(W_);
586 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
588 /* allocate and fill it in. */
589 arr = (StgArrWords *)allocate(total_size_in_words);
590 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
592 /* and return a ptr to the goods inside the array */
593 return(BYTE_ARR_CTS(arr));
597 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
599 void *new_stuff_ptr = stgAllocForGMP(new_size);
601 char *p = (char *) ptr;
602 char *q = (char *) new_stuff_ptr;
604 for (; i < old_size; i++, p++, q++) {
608 return(new_stuff_ptr);
612 stgDeallocForGMP (void *ptr STG_UNUSED,
613 size_t size STG_UNUSED)
615 /* easy for us: the garbage collector does the dealloc'n */
618 /* -----------------------------------------------------------------------------
620 * -------------------------------------------------------------------------- */
622 /* -----------------------------------------------------------------------------
625 * Approximate how much we've allocated: number of blocks in the
626 * nursery + blocks allocated via allocate() - unused nusery blocks.
627 * This leaves a little slop at the end of each block, and doesn't
628 * take into account large objects (ToDo).
629 * -------------------------------------------------------------------------- */
632 calcAllocated( void )
640 /* All tasks must be stopped. Can't assert that all the
641 capabilities are owned by the scheduler, though: one or more
642 tasks might have been stopped while they were running (non-main)
644 /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
647 n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
650 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
651 for ( bd = cap->rCurrentNursery->link; bd != NULL; bd = bd->link ) {
652 allocated -= BLOCK_SIZE_W;
654 if (cap->rCurrentNursery->free < cap->rCurrentNursery->start
656 allocated -= (cap->rCurrentNursery->start + BLOCK_SIZE_W)
657 - cap->rCurrentNursery->free;
662 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
664 allocated = (g0s0->n_blocks * BLOCK_SIZE_W) + allocated_bytes();
665 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
666 allocated -= BLOCK_SIZE_W;
668 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
669 allocated -= (current_nursery->start + BLOCK_SIZE_W)
670 - current_nursery->free;
674 total_allocated += allocated;
678 /* Approximate the amount of live data in the heap. To be called just
679 * after garbage collection (see GarbageCollect()).
688 if (RtsFlags.GcFlags.generations == 1) {
689 live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
690 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
694 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
695 for (s = 0; s < generations[g].n_steps; s++) {
696 /* approximate amount of live data (doesn't take into account slop
697 * at end of each block).
699 if (g == 0 && s == 0) {
702 stp = &generations[g].steps[s];
703 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
704 if (stp->hp_bd != NULL) {
705 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
713 /* Approximate the number of blocks that will be needed at the next
714 * garbage collection.
716 * Assume: all data currently live will remain live. Steps that will
717 * be collected next time will therefore need twice as many blocks
718 * since all the data will be copied.
727 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
728 for (s = 0; s < generations[g].n_steps; s++) {
729 if (g == 0 && s == 0) { continue; }
730 stp = &generations[g].steps[s];
731 if (generations[g].steps[0].n_blocks +
732 generations[g].steps[0].n_large_blocks
733 > generations[g].max_blocks
734 && stp->is_compacted == 0) {
735 needed += 2 * stp->n_blocks;
737 needed += stp->n_blocks;
744 /* -----------------------------------------------------------------------------
747 memInventory() checks for memory leaks by counting up all the
748 blocks we know about and comparing that to the number of blocks
749 allegedly floating around in the system.
750 -------------------------------------------------------------------------- */
760 lnat total_blocks = 0, free_blocks = 0;
762 /* count the blocks we current have */
764 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
765 for (s = 0; s < generations[g].n_steps; s++) {
766 stp = &generations[g].steps[s];
767 total_blocks += stp->n_blocks;
768 if (RtsFlags.GcFlags.generations == 1) {
769 /* two-space collector has a to-space too :-) */
770 total_blocks += g0s0->n_to_blocks;
772 for (bd = stp->large_objects; bd; bd = bd->link) {
773 total_blocks += bd->blocks;
774 /* hack for megablock groups: they have an extra block or two in
775 the second and subsequent megablocks where the block
776 descriptors would normally go.
778 if (bd->blocks > BLOCKS_PER_MBLOCK) {
779 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
780 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
786 /* any blocks held by allocate() */
787 for (bd = small_alloc_list; bd; bd = bd->link) {
788 total_blocks += bd->blocks;
790 for (bd = large_alloc_list; bd; bd = bd->link) {
791 total_blocks += bd->blocks;
795 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
796 for (bd = firstStack; bd != NULL; bd = bd->link)
797 total_blocks += bd->blocks;
801 // count the blocks allocated by the arena allocator
802 total_blocks += arenaBlocks();
804 /* count the blocks on the free list */
805 free_blocks = countFreeList();
807 if (total_blocks + free_blocks != mblocks_allocated *
809 fprintf(stderr, "Blocks: %ld live + %ld free = %ld total (%ld around)\n",
810 total_blocks, free_blocks, total_blocks + free_blocks,
811 mblocks_allocated * BLOCKS_PER_MBLOCK);
814 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
819 countBlocks(bdescr *bd)
822 for (n=0; bd != NULL; bd=bd->link) {
828 /* Full heap sanity check. */
834 if (RtsFlags.GcFlags.generations == 1) {
835 checkHeap(g0s0->to_blocks);
836 checkChain(g0s0->large_objects);
839 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
840 for (s = 0; s < generations[g].n_steps; s++) {
841 ASSERT(countBlocks(generations[g].steps[s].blocks)
842 == generations[g].steps[s].n_blocks);
843 ASSERT(countBlocks(generations[g].steps[s].large_objects)
844 == generations[g].steps[s].n_large_blocks);
845 if (g == 0 && s == 0) { continue; }
846 checkHeap(generations[g].steps[s].blocks);
847 checkChain(generations[g].steps[s].large_objects);
849 checkMutableList(generations[g].mut_list, g);
850 checkMutOnceList(generations[g].mut_once_list, g);
854 checkFreeListSanity();
858 // handy function for use in gdb, because Bdescr() is inlined.
859 extern bdescr *_bdescr( StgPtr p );