1 /* -----------------------------------------------------------------------------
2 * $Id: Storage.c,v 1.54 2001/11/22 14:25:12 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Storage manager front end
8 * ---------------------------------------------------------------------------*/
10 #include "PosixSource.h"
16 #include "BlockAlloc.h"
24 #include "StoragePriv.h"
26 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 StgClosure *caf_list = NULL;
30 bdescr *small_alloc_list; /* allocate()d small objects */
31 bdescr *large_alloc_list; /* allocate()d large objects */
32 bdescr *pinned_object_block; /* allocate pinned objects into this block */
33 nat alloc_blocks; /* number of allocate()d blocks since GC */
34 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
36 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
37 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
39 generation *generations; /* all the generations */
40 generation *g0; /* generation 0, for convenience */
41 generation *oldest_gen; /* oldest generation, for convenience */
42 step *g0s0; /* generation 0, step 0, for convenience */
44 lnat total_allocated = 0; /* total memory allocated during run */
47 * Storage manager mutex: protects all the above state from
48 * simultaneous access by two STG threads.
51 pthread_mutex_t sm_mutex = PTHREAD_MUTEX_INITIALIZER;
57 static void *stgAllocForGMP (size_t size_in_bytes);
58 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
59 static void stgDeallocForGMP (void *ptr, size_t size);
68 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
69 RtsFlags.GcFlags.heapSizeSuggestion >
70 RtsFlags.GcFlags.maxHeapSize) {
71 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
74 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
75 RtsFlags.GcFlags.minAllocAreaSize >
76 RtsFlags.GcFlags.maxHeapSize) {
77 prog_belch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
83 /* allocate generation info array */
84 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
85 * sizeof(struct _generation),
88 /* Initialise all generations */
89 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
90 gen = &generations[g];
92 gen->mut_list = END_MUT_LIST;
93 gen->mut_once_list = END_MUT_LIST;
95 gen->failed_promotions = 0;
99 /* A couple of convenience pointers */
100 g0 = &generations[0];
101 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
103 /* Allocate step structures in each generation */
104 if (RtsFlags.GcFlags.generations > 1) {
105 /* Only for multiple-generations */
107 /* Oldest generation: one step */
108 oldest_gen->n_steps = 1;
110 stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
112 /* set up all except the oldest generation with 2 steps */
113 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
114 generations[g].n_steps = RtsFlags.GcFlags.steps;
115 generations[g].steps =
116 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
117 "initStorage: steps");
121 /* single generation, i.e. a two-space collector */
123 g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
126 /* Initialise all steps */
127 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
128 for (s = 0; s < generations[g].n_steps; s++) {
129 stp = &generations[g].steps[s];
133 stp->gen = &generations[g];
140 stp->large_objects = NULL;
141 stp->n_large_blocks = 0;
142 stp->new_large_objects = NULL;
143 stp->scavenged_large_objects = NULL;
144 stp->n_scavenged_large_blocks = 0;
145 stp->is_compacted = 0;
150 /* Set up the destination pointers in each younger gen. step */
151 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
152 for (s = 0; s < generations[g].n_steps-1; s++) {
153 generations[g].steps[s].to = &generations[g].steps[s+1];
155 generations[g].steps[s].to = &generations[g+1].steps[0];
158 /* The oldest generation has one step and it is compacted. */
159 if (RtsFlags.GcFlags.compact) {
160 if (RtsFlags.GcFlags.generations == 1) {
161 belch("WARNING: compaction is incompatible with -G1; disabled");
163 oldest_gen->steps[0].is_compacted = 1;
166 oldest_gen->steps[0].to = &oldest_gen->steps[0];
168 /* generation 0 is special: that's the nursery */
169 generations[0].max_blocks = 0;
171 /* G0S0: the allocation area. Policy: keep the allocation area
172 * small to begin with, even if we have a large suggested heap
173 * size. Reason: we're going to do a major collection first, and we
174 * don't want it to be a big one. This vague idea is borne out by
175 * rigorous experimental evidence.
177 g0s0 = &generations[0].steps[0];
181 weak_ptr_list = NULL;
184 /* initialise the allocate() interface */
185 small_alloc_list = NULL;
186 large_alloc_list = NULL;
188 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
190 /* Tell GNU multi-precision pkg about our custom alloc functions */
191 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
194 pthread_mutex_init(&sm_mutex, NULL);
197 IF_DEBUG(gc, statDescribeGens());
203 stat_exit(calcAllocated());
206 /* -----------------------------------------------------------------------------
209 The entry code for every CAF does the following:
211 - builds a CAF_BLACKHOLE in the heap
212 - pushes an update frame pointing to the CAF_BLACKHOLE
213 - invokes UPD_CAF(), which:
214 - calls newCaf, below
215 - updates the CAF with a static indirection to the CAF_BLACKHOLE
217 Why do we build a BLACKHOLE in the heap rather than just updating
218 the thunk directly? It's so that we only need one kind of update
219 frame - otherwise we'd need a static version of the update frame too.
221 newCaf() does the following:
223 - it puts the CAF on the oldest generation's mut-once list.
224 This is so that we can treat the CAF as a root when collecting
227 For GHCI, we have additional requirements when dealing with CAFs:
229 - we must *retain* all dynamically-loaded CAFs ever entered,
230 just in case we need them again.
231 - we must be able to *revert* CAFs that have been evaluated, to
232 their pre-evaluated form.
234 To do this, we use an additional CAF list. When newCaf() is
235 called on a dynamically-loaded CAF, we add it to the CAF list
236 instead of the old-generation mutable list, and save away its
237 old info pointer (in caf->saved_info) for later reversion.
239 To revert all the CAFs, we traverse the CAF list and reset the
240 info pointer to caf->saved_info, then throw away the CAF list.
241 (see GC.c:revertCAFs()).
245 -------------------------------------------------------------------------- */
248 newCAF(StgClosure* caf)
250 /* Put this CAF on the mutable list for the old generation.
251 * This is a HACK - the IND_STATIC closure doesn't really have
252 * a mut_link field, but we pretend it has - in fact we re-use
253 * the STATIC_LINK field for the time being, because when we
254 * come to do a major GC we won't need the mut_link field
255 * any more and can use it as a STATIC_LINK.
257 ACQUIRE_LOCK(&sm_mutex);
259 if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
260 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
261 ((StgIndStatic *)caf)->static_link = caf_list;
264 ((StgIndStatic *)caf)->saved_info = NULL;
265 ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
266 oldest_gen->mut_once_list = (StgMutClosure *)caf;
269 RELEASE_LOCK(&sm_mutex);
272 /* If we are PAR or DIST then we never forget a CAF */
274 //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
275 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
281 /* -----------------------------------------------------------------------------
283 -------------------------------------------------------------------------- */
286 allocNurseries( void )
295 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
296 cap->rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
297 cap->rCurrentNursery = cap->rNursery;
298 for (bd = cap->rNursery; bd != NULL; bd = bd->link) {
299 bd->u.back = (bdescr *)cap;
302 /* Set the back links to be equal to the Capability,
303 * so we can do slightly better informed locking.
307 g0s0->blocks = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
308 g0s0->n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
309 g0s0->to_blocks = NULL;
310 g0s0->n_to_blocks = 0;
311 MainCapability.r.rNursery = g0s0->blocks;
312 MainCapability.r.rCurrentNursery = g0s0->blocks;
313 /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
318 resetNurseries( void )
324 /* All tasks must be stopped */
325 ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
327 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
328 for (bd = cap->rNursery; bd; bd = bd->link) {
329 bd->free = bd->start;
330 ASSERT(bd->gen_no == 0);
331 ASSERT(bd->step == g0s0);
332 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
334 cap->rCurrentNursery = cap->rNursery;
337 for (bd = g0s0->blocks; bd; bd = bd->link) {
340 // Reset every word in the nursery to zero when doing LDV profiling.
341 // This relieves the mutator of the burden of zeroing every new closure,
342 // which is stored in the nursery.
344 // Todo: make it more efficient, e.g. memcpy()
347 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
348 for (p = bd->start; p < bd->start + BLOCK_SIZE_W; p++)
352 bd->free = bd->start;
353 ASSERT(bd->gen_no == 0);
354 ASSERT(bd->step == g0s0);
355 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
357 MainCapability.r.rNursery = g0s0->blocks;
358 MainCapability.r.rCurrentNursery = g0s0->blocks;
363 allocNursery (bdescr *tail, nat blocks)
368 // Allocate a nursery: we allocate fresh blocks one at a time and
369 // cons them on to the front of the list, not forgetting to update
370 // the back pointer on the tail of the list to point to the new block.
371 for (i=0; i < blocks; i++) {
374 processNursery() in LdvProfile.c assumes that every block group in
375 the nursery contains only a single block. So, if a block group is
376 given multiple blocks, change processNursery() accordingly.
380 // double-link the nursery: we might need to insert blocks
387 bd->free = bd->start;
395 resizeNursery ( nat blocks )
401 barf("resizeNursery: can't resize in SMP mode");
404 nursery_blocks = g0s0->n_blocks;
405 if (nursery_blocks == blocks) {
409 else if (nursery_blocks < blocks) {
410 IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
412 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
418 IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
422 while (nursery_blocks > blocks) {
424 next_bd->u.back = NULL;
425 nursery_blocks -= bd->blocks; // might be a large block
430 // might have gone just under, by freeing a large block, so make
431 // up the difference.
432 if (nursery_blocks < blocks) {
433 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
437 g0s0->n_blocks = blocks;
438 ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);
441 /* -----------------------------------------------------------------------------
442 The allocate() interface
444 allocate(n) always succeeds, and returns a chunk of memory n words
445 long. n can be larger than the size of a block if necessary, in
446 which case a contiguous block group will be allocated.
447 -------------------------------------------------------------------------- */
455 ACQUIRE_LOCK(&sm_mutex);
457 TICK_ALLOC_HEAP_NOCTR(n);
460 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
461 /* ToDo: allocate directly into generation 1 */
462 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
463 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
464 bd = allocGroup(req_blocks);
465 dbl_link_onto(bd, &g0s0->large_objects);
468 bd->flags = BF_LARGE;
469 bd->free = bd->start;
470 /* don't add these blocks to alloc_blocks, since we're assuming
471 * that large objects are likely to remain live for quite a while
472 * (eg. running threads), so garbage collecting early won't make
475 alloc_blocks += req_blocks;
476 RELEASE_LOCK(&sm_mutex);
479 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
480 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
481 if (small_alloc_list) {
482 small_alloc_list->free = alloc_Hp;
485 bd->link = small_alloc_list;
486 small_alloc_list = bd;
490 alloc_Hp = bd->start;
491 alloc_HpLim = bd->start + BLOCK_SIZE_W;
497 RELEASE_LOCK(&sm_mutex);
502 allocated_bytes( void )
504 return (alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp));
507 /* ---------------------------------------------------------------------------
508 Allocate a fixed/pinned object.
510 We allocate small pinned objects into a single block, allocating a
511 new block when the current one overflows. The block is chained
512 onto the large_object_list of generation 0 step 0.
514 NOTE: The GC can't in general handle pinned objects. This
515 interface is only safe to use for ByteArrays, which have no
516 pointers and don't require scavenging. It works because the
517 block's descriptor has the BF_LARGE flag set, so the block is
518 treated as a large object and chained onto various lists, rather
519 than the individual objects being copied. However, when it comes
520 to scavenge the block, the GC will only scavenge the first object.
521 The reason is that the GC can't linearly scan a block of pinned
522 objects at the moment (doing so would require using the
523 mostly-copying techniques). But since we're restricting ourselves
524 to pinned ByteArrays, not scavenging is ok.
526 This function is called by newPinnedByteArray# which immediately
527 fills the allocated memory with a MutableByteArray#.
528 ------------------------------------------------------------------------- */
531 allocatePinned( nat n )
534 bdescr *bd = pinned_object_block;
536 ACQUIRE_LOCK(&sm_mutex);
538 TICK_ALLOC_HEAP_NOCTR(n);
541 // If the request is for a large object, then allocate()
542 // will give us a pinned object anyway.
543 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
544 RELEASE_LOCK(&sm_mutex);
548 // If we don't have a block of pinned objects yet, or the current
549 // one isn't large enough to hold the new object, allocate a new one.
550 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
551 pinned_object_block = bd = allocBlock();
552 dbl_link_onto(bd, &g0s0->large_objects);
555 bd->flags = BF_LARGE;
556 bd->free = bd->start;
562 RELEASE_LOCK(&sm_mutex);
566 /* -----------------------------------------------------------------------------
567 Allocation functions for GMP.
569 These all use the allocate() interface - we can't have any garbage
570 collection going on during a gmp operation, so we use allocate()
571 which always succeeds. The gmp operations which might need to
572 allocate will ask the storage manager (via doYouWantToGC()) whether
573 a garbage collection is required, in case we get into a loop doing
574 only allocate() style allocation.
575 -------------------------------------------------------------------------- */
578 stgAllocForGMP (size_t size_in_bytes)
581 nat data_size_in_words, total_size_in_words;
583 /* should be a multiple of sizeof(StgWord) (whole no. of limbs) */
584 ASSERT(size_in_bytes % sizeof(W_) == 0);
586 data_size_in_words = size_in_bytes / sizeof(W_);
587 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
589 /* allocate and fill it in. */
590 arr = (StgArrWords *)allocate(total_size_in_words);
591 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
593 /* and return a ptr to the goods inside the array */
594 return(BYTE_ARR_CTS(arr));
598 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
600 void *new_stuff_ptr = stgAllocForGMP(new_size);
602 char *p = (char *) ptr;
603 char *q = (char *) new_stuff_ptr;
605 for (; i < old_size; i++, p++, q++) {
609 return(new_stuff_ptr);
613 stgDeallocForGMP (void *ptr STG_UNUSED,
614 size_t size STG_UNUSED)
616 /* easy for us: the garbage collector does the dealloc'n */
619 /* -----------------------------------------------------------------------------
621 * -------------------------------------------------------------------------- */
623 /* -----------------------------------------------------------------------------
626 * Approximate how much we've allocated: number of blocks in the
627 * nursery + blocks allocated via allocate() - unused nusery blocks.
628 * This leaves a little slop at the end of each block, and doesn't
629 * take into account large objects (ToDo).
630 * -------------------------------------------------------------------------- */
633 calcAllocated( void )
641 /* All tasks must be stopped. Can't assert that all the
642 capabilities are owned by the scheduler, though: one or more
643 tasks might have been stopped while they were running (non-main)
645 /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
648 n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
651 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
652 for ( bd = cap->rCurrentNursery->link; bd != NULL; bd = bd->link ) {
653 allocated -= BLOCK_SIZE_W;
655 if (cap->rCurrentNursery->free < cap->rCurrentNursery->start
657 allocated -= (cap->rCurrentNursery->start + BLOCK_SIZE_W)
658 - cap->rCurrentNursery->free;
663 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
665 allocated = (g0s0->n_blocks * BLOCK_SIZE_W) + allocated_bytes();
666 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
667 allocated -= BLOCK_SIZE_W;
669 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
670 allocated -= (current_nursery->start + BLOCK_SIZE_W)
671 - current_nursery->free;
675 total_allocated += allocated;
679 /* Approximate the amount of live data in the heap. To be called just
680 * after garbage collection (see GarbageCollect()).
689 if (RtsFlags.GcFlags.generations == 1) {
690 live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
691 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
695 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
696 for (s = 0; s < generations[g].n_steps; s++) {
697 /* approximate amount of live data (doesn't take into account slop
698 * at end of each block).
700 if (g == 0 && s == 0) {
703 stp = &generations[g].steps[s];
704 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
705 if (stp->hp_bd != NULL) {
706 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
714 /* Approximate the number of blocks that will be needed at the next
715 * garbage collection.
717 * Assume: all data currently live will remain live. Steps that will
718 * be collected next time will therefore need twice as many blocks
719 * since all the data will be copied.
728 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
729 for (s = 0; s < generations[g].n_steps; s++) {
730 if (g == 0 && s == 0) { continue; }
731 stp = &generations[g].steps[s];
732 if (generations[g].steps[0].n_blocks +
733 generations[g].steps[0].n_large_blocks
734 > generations[g].max_blocks
735 && stp->is_compacted == 0) {
736 needed += 2 * stp->n_blocks;
738 needed += stp->n_blocks;
745 /* -----------------------------------------------------------------------------
748 memInventory() checks for memory leaks by counting up all the
749 blocks we know about and comparing that to the number of blocks
750 allegedly floating around in the system.
751 -------------------------------------------------------------------------- */
761 lnat total_blocks = 0, free_blocks = 0;
763 /* count the blocks we current have */
765 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
766 for (s = 0; s < generations[g].n_steps; s++) {
767 stp = &generations[g].steps[s];
768 total_blocks += stp->n_blocks;
769 if (RtsFlags.GcFlags.generations == 1) {
770 /* two-space collector has a to-space too :-) */
771 total_blocks += g0s0->n_to_blocks;
773 for (bd = stp->large_objects; bd; bd = bd->link) {
774 total_blocks += bd->blocks;
775 /* hack for megablock groups: they have an extra block or two in
776 the second and subsequent megablocks where the block
777 descriptors would normally go.
779 if (bd->blocks > BLOCKS_PER_MBLOCK) {
780 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
781 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
787 /* any blocks held by allocate() */
788 for (bd = small_alloc_list; bd; bd = bd->link) {
789 total_blocks += bd->blocks;
791 for (bd = large_alloc_list; bd; bd = bd->link) {
792 total_blocks += bd->blocks;
796 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
797 for (bd = firstStack; bd != NULL; bd = bd->link)
798 total_blocks += bd->blocks;
802 // count the blocks allocated by the arena allocator
803 total_blocks += arenaBlocks();
805 /* count the blocks on the free list */
806 free_blocks = countFreeList();
808 if (total_blocks + free_blocks != mblocks_allocated *
810 fprintf(stderr, "Blocks: %ld live + %ld free = %ld total (%ld around)\n",
811 total_blocks, free_blocks, total_blocks + free_blocks,
812 mblocks_allocated * BLOCKS_PER_MBLOCK);
815 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
820 countBlocks(bdescr *bd)
823 for (n=0; bd != NULL; bd=bd->link) {
829 /* Full heap sanity check. */
835 if (RtsFlags.GcFlags.generations == 1) {
836 checkHeap(g0s0->to_blocks);
837 checkChain(g0s0->large_objects);
840 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
841 for (s = 0; s < generations[g].n_steps; s++) {
842 ASSERT(countBlocks(generations[g].steps[s].blocks)
843 == generations[g].steps[s].n_blocks);
844 ASSERT(countBlocks(generations[g].steps[s].large_objects)
845 == generations[g].steps[s].n_large_blocks);
846 if (g == 0 && s == 0) { continue; }
847 checkHeap(generations[g].steps[s].blocks);
848 checkChain(generations[g].steps[s].large_objects);
850 checkMutableList(generations[g].mut_list, g);
851 checkMutOnceList(generations[g].mut_once_list, g);
855 checkFreeListSanity();