1 /* -----------------------------------------------------------------------------
2 * $Id: Storage.c,v 1.45 2001/08/08 11:27:17 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Storage manager front end
8 * ---------------------------------------------------------------------------*/
15 #include "BlockAlloc.h"
22 #include "StoragePriv.h"
25 nat nursery_blocks; /* number of blocks in the nursery */
28 StgClosure *caf_list = NULL;
30 bdescr *small_alloc_list; /* allocate()d small objects */
31 bdescr *large_alloc_list; /* allocate()d large objects */
32 bdescr *pinned_object_block; /* allocate pinned objects into this block */
33 nat alloc_blocks; /* number of allocate()d blocks since GC */
34 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
36 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
37 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
39 generation *generations; /* all the generations */
40 generation *g0; /* generation 0, for convenience */
41 generation *oldest_gen; /* oldest generation, for convenience */
42 step *g0s0; /* generation 0, step 0, for convenience */
44 lnat total_allocated = 0; /* total memory allocated during run */
47 * Storage manager mutex: protects all the above state from
48 * simultaneous access by two STG threads.
51 pthread_mutex_t sm_mutex = PTHREAD_MUTEX_INITIALIZER;
57 static void *stgAllocForGMP (size_t size_in_bytes);
58 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
59 static void stgDeallocForGMP (void *ptr, size_t size);
68 /* If we're doing heap profiling, we want a two-space heap with a
69 * fixed-size allocation area so that we get roughly even-spaced
73 /* As an experiment, try a 2 generation collector
76 #if defined(PROFILING) || defined(DEBUG)
77 if (RtsFlags.ProfFlags.doHeapProfile) {
78 RtsFlags.GcFlags.generations = 1;
79 RtsFlags.GcFlags.steps = 1;
80 RtsFlags.GcFlags.oldGenFactor = 0;
81 RtsFlags.GcFlags.heapSizeSuggestion = 0;
85 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
86 RtsFlags.GcFlags.heapSizeSuggestion >
87 RtsFlags.GcFlags.maxHeapSize) {
88 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
93 /* allocate generation info array */
94 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
95 * sizeof(struct _generation),
98 /* Initialise all generations */
99 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
100 gen = &generations[g];
102 gen->mut_list = END_MUT_LIST;
103 gen->mut_once_list = END_MUT_LIST;
104 gen->collections = 0;
105 gen->failed_promotions = 0;
109 /* A couple of convenience pointers */
110 g0 = &generations[0];
111 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
113 /* Allocate step structures in each generation */
114 if (RtsFlags.GcFlags.generations > 1) {
115 /* Only for multiple-generations */
117 /* Oldest generation: one step */
118 oldest_gen->n_steps = 1;
120 stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
122 /* set up all except the oldest generation with 2 steps */
123 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
124 generations[g].n_steps = RtsFlags.GcFlags.steps;
125 generations[g].steps =
126 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
127 "initStorage: steps");
131 /* single generation, i.e. a two-space collector */
133 g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
136 /* Initialise all steps */
137 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
138 for (s = 0; s < generations[g].n_steps; s++) {
139 stp = &generations[g].steps[s];
143 stp->gen = &generations[g];
150 stp->large_objects = NULL;
151 stp->new_large_objects = NULL;
152 stp->scavenged_large_objects = NULL;
153 stp->is_compacted = 0;
157 /* Set up the destination pointers in each younger gen. step */
158 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
159 for (s = 0; s < generations[g].n_steps-1; s++) {
160 generations[g].steps[s].to = &generations[g].steps[s+1];
162 generations[g].steps[s].to = &generations[g+1].steps[0];
165 /* The oldest generation has one step. */
166 oldest_gen->steps[0].to = &oldest_gen->steps[0];
168 /* generation 0 is special: that's the nursery */
169 generations[0].max_blocks = 0;
171 /* G0S0: the allocation area. Policy: keep the allocation area
172 * small to begin with, even if we have a large suggested heap
173 * size. Reason: we're going to do a major collection first, and we
174 * don't want it to be a big one. This vague idea is borne out by
175 * rigorous experimental evidence.
177 g0s0 = &generations[0].steps[0];
181 weak_ptr_list = NULL;
184 /* initialise the allocate() interface */
185 small_alloc_list = NULL;
186 large_alloc_list = NULL;
188 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
190 /* Tell GNU multi-precision pkg about our custom alloc functions */
191 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
194 pthread_mutex_init(&sm_mutex, NULL);
197 IF_DEBUG(gc, statDescribeGens());
203 stat_exit(calcAllocated());
206 /* -----------------------------------------------------------------------------
209 The entry code for every CAF does the following:
211 - builds a CAF_BLACKHOLE in the heap
212 - pushes an update frame pointing to the CAF_BLACKHOLE
213 - invokes UPD_CAF(), which:
214 - calls newCaf, below
215 - updates the CAF with a static indirection to the CAF_BLACKHOLE
217 Why do we build a BLACKHOLE in the heap rather than just updating
218 the thunk directly? It's so that we only need one kind of update
219 frame - otherwise we'd need a static version of the update frame too.
221 newCaf() does the following:
223 - it puts the CAF on the oldest generation's mut-once list.
224 This is so that we can treat the CAF as a root when collecting
227 For GHCI, we have additional requirements when dealing with CAFs:
229 - we must *retain* all dynamically-loaded CAFs ever entered,
230 just in case we need them again.
231 - we must be able to *revert* CAFs that have been evaluated, to
232 their pre-evaluated form.
234 To do this, we use an additional CAF list. When newCaf() is
235 called on a dynamically-loaded CAF, we add it to the CAF list
236 instead of the old-generation mutable list, and save away its
237 old info pointer (in caf->saved_info) for later reversion.
239 To revert all the CAFs, we traverse the CAF list and reset the
240 info pointer to caf->saved_info, then throw away the CAF list.
241 (see GC.c:revertCAFs()).
245 -------------------------------------------------------------------------- */
248 newCAF(StgClosure* caf)
250 /* Put this CAF on the mutable list for the old generation.
251 * This is a HACK - the IND_STATIC closure doesn't really have
252 * a mut_link field, but we pretend it has - in fact we re-use
253 * the STATIC_LINK field for the time being, because when we
254 * come to do a major GC we won't need the mut_link field
255 * any more and can use it as a STATIC_LINK.
257 ACQUIRE_LOCK(&sm_mutex);
259 if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
260 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
261 ((StgIndStatic *)caf)->static_link = caf_list;
264 ((StgIndStatic *)caf)->saved_info = NULL;
265 ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
266 oldest_gen->mut_once_list = (StgMutClosure *)caf;
269 RELEASE_LOCK(&sm_mutex);
272 /* If we are PAR or DIST then we never forget a CAF */
274 //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
275 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
281 /* -----------------------------------------------------------------------------
283 -------------------------------------------------------------------------- */
286 allocNurseries( void )
295 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
296 cap->rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
297 cap->rCurrentNursery = cap->rNursery;
298 for (bd = cap->rNursery; bd != NULL; bd = bd->link) {
299 bd->u.back = (bdescr *)cap;
302 /* Set the back links to be equal to the Capability,
303 * so we can do slightly better informed locking.
307 nursery_blocks = RtsFlags.GcFlags.minAllocAreaSize;
308 g0s0->blocks = allocNursery(NULL, nursery_blocks);
309 g0s0->n_blocks = nursery_blocks;
310 g0s0->to_blocks = NULL;
311 g0s0->n_to_blocks = 0;
312 MainRegTable.rNursery = g0s0->blocks;
313 MainRegTable.rCurrentNursery = g0s0->blocks;
314 /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
319 resetNurseries( void )
325 /* All tasks must be stopped */
326 ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
328 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
329 for (bd = cap->rNursery; bd; bd = bd->link) {
330 bd->free = bd->start;
331 ASSERT(bd->gen_no == 0);
332 ASSERT(bd->step == g0s0);
333 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
335 cap->rCurrentNursery = cap->rNursery;
338 for (bd = g0s0->blocks; bd; bd = bd->link) {
339 bd->free = bd->start;
340 ASSERT(bd->gen_no == 0);
341 ASSERT(bd->step == g0s0);
342 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
344 MainRegTable.rNursery = g0s0->blocks;
345 MainRegTable.rCurrentNursery = g0s0->blocks;
350 allocNursery (bdescr *last_bd, nat blocks)
355 /* Allocate a nursery */
356 for (i=0; i < blocks; i++) {
362 bd->free = bd->start;
369 resizeNursery ( nat blocks )
374 barf("resizeNursery: can't resize in SMP mode");
377 if (nursery_blocks == blocks) {
378 ASSERT(g0s0->n_blocks == blocks);
382 else if (nursery_blocks < blocks) {
383 IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
385 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
391 IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
393 for (bd = g0s0->blocks; nursery_blocks > blocks; nursery_blocks--) {
401 g0s0->n_blocks = nursery_blocks = blocks;
404 /* -----------------------------------------------------------------------------
405 The allocate() interface
407 allocate(n) always succeeds, and returns a chunk of memory n words
408 long. n can be larger than the size of a block if necessary, in
409 which case a contiguous block group will be allocated.
410 -------------------------------------------------------------------------- */
418 ACQUIRE_LOCK(&sm_mutex);
420 TICK_ALLOC_HEAP_NOCTR(n);
423 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
424 /* ToDo: allocate directly into generation 1 */
425 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
426 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
427 bd = allocGroup(req_blocks);
428 dbl_link_onto(bd, &g0s0->large_objects);
431 bd->flags = BF_LARGE;
432 bd->free = bd->start;
433 /* don't add these blocks to alloc_blocks, since we're assuming
434 * that large objects are likely to remain live for quite a while
435 * (eg. running threads), so garbage collecting early won't make
438 alloc_blocks += req_blocks;
439 RELEASE_LOCK(&sm_mutex);
442 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
443 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
444 if (small_alloc_list) {
445 small_alloc_list->free = alloc_Hp;
448 bd->link = small_alloc_list;
449 small_alloc_list = bd;
453 alloc_Hp = bd->start;
454 alloc_HpLim = bd->start + BLOCK_SIZE_W;
460 RELEASE_LOCK(&sm_mutex);
465 allocated_bytes( void )
467 return (alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp));
470 /* ---------------------------------------------------------------------------
471 Allocate a fixed/pinned object.
473 We allocate small pinned objects into a single block, allocating a
474 new block when the current one overflows. The block is chained
475 onto the large_object_list of generation 0 step 0.
477 NOTE: The GC can't in general handle pinned objects. This
478 interface is only safe to use for ByteArrays, which have no
479 pointers and don't require scavenging. It works because the
480 block's descriptor has the BF_LARGE flag set, so the block is
481 treated as a large object and chained onto various lists, rather
482 than the individual objects being copied. However, when it comes
483 to scavenge the block, the GC will only scavenge the first object.
484 The reason is that the GC can't linearly scan a block of pinned
485 objects at the moment (doing so would require using the
486 mostly-copying techniques). But since we're restricting ourselves
487 to pinned ByteArrays, not scavenging is ok.
489 This function is called by newPinnedByteArray# which immediately
490 fills the allocated memory with a MutableByteArray#.
491 ------------------------------------------------------------------------- */
494 allocatePinned( nat n )
497 bdescr *bd = pinned_object_block;
499 ACQUIRE_LOCK(&sm_mutex);
501 TICK_ALLOC_HEAP_NOCTR(n);
504 // If the request is for a large object, then allocate()
505 // will give us a pinned object anyway.
506 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
507 RELEASE_LOCK(&sm_mutex);
511 // If we don't have a block of pinned objects yet, or the current
512 // one isn't large enough to hold the new object, allocate a new one.
513 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
514 pinned_object_block = bd = allocBlock();
515 dbl_link_onto(bd, &g0s0->large_objects);
518 bd->flags = BF_LARGE;
519 bd->free = bd->start;
525 RELEASE_LOCK(&sm_mutex);
529 /* -----------------------------------------------------------------------------
530 Allocation functions for GMP.
532 These all use the allocate() interface - we can't have any garbage
533 collection going on during a gmp operation, so we use allocate()
534 which always succeeds. The gmp operations which might need to
535 allocate will ask the storage manager (via doYouWantToGC()) whether
536 a garbage collection is required, in case we get into a loop doing
537 only allocate() style allocation.
538 -------------------------------------------------------------------------- */
541 stgAllocForGMP (size_t size_in_bytes)
544 nat data_size_in_words, total_size_in_words;
546 /* should be a multiple of sizeof(StgWord) (whole no. of limbs) */
547 ASSERT(size_in_bytes % sizeof(W_) == 0);
549 data_size_in_words = size_in_bytes / sizeof(W_);
550 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
552 /* allocate and fill it in. */
553 arr = (StgArrWords *)allocate(total_size_in_words);
554 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
556 /* and return a ptr to the goods inside the array */
557 return(BYTE_ARR_CTS(arr));
561 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
563 void *new_stuff_ptr = stgAllocForGMP(new_size);
565 char *p = (char *) ptr;
566 char *q = (char *) new_stuff_ptr;
568 for (; i < old_size; i++, p++, q++) {
572 return(new_stuff_ptr);
576 stgDeallocForGMP (void *ptr STG_UNUSED,
577 size_t size STG_UNUSED)
579 /* easy for us: the garbage collector does the dealloc'n */
582 /* -----------------------------------------------------------------------------
584 * -------------------------------------------------------------------------- */
586 /* -----------------------------------------------------------------------------
589 * Approximate how much we've allocated: number of blocks in the
590 * nursery + blocks allocated via allocate() - unused nusery blocks.
591 * This leaves a little slop at the end of each block, and doesn't
592 * take into account large objects (ToDo).
593 * -------------------------------------------------------------------------- */
596 calcAllocated( void )
604 /* All tasks must be stopped. Can't assert that all the
605 capabilities are owned by the scheduler, though: one or more
606 tasks might have been stopped while they were running (non-main)
608 /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
611 n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
614 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
615 for ( bd = cap->rCurrentNursery->link; bd != NULL; bd = bd->link ) {
616 allocated -= BLOCK_SIZE_W;
618 if (cap->rCurrentNursery->free < cap->rCurrentNursery->start
620 allocated -= (cap->rCurrentNursery->start + BLOCK_SIZE_W)
621 - cap->rCurrentNursery->free;
626 bdescr *current_nursery = MainRegTable.rCurrentNursery;
628 allocated = (nursery_blocks * BLOCK_SIZE_W) + allocated_bytes();
629 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
630 allocated -= BLOCK_SIZE_W;
632 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
633 allocated -= (current_nursery->start + BLOCK_SIZE_W)
634 - current_nursery->free;
638 total_allocated += allocated;
642 /* Approximate the amount of live data in the heap. To be called just
643 * after garbage collection (see GarbageCollect()).
652 if (RtsFlags.GcFlags.generations == 1) {
653 live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
654 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
658 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
659 for (s = 0; s < generations[g].n_steps; s++) {
660 /* approximate amount of live data (doesn't take into account slop
661 * at end of each block).
663 if (g == 0 && s == 0) {
666 stp = &generations[g].steps[s];
667 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
668 if (stp->hp_bd != NULL) {
669 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
677 /* Approximate the number of blocks that will be needed at the next
678 * garbage collection.
680 * Assume: all data currently live will remain live. Steps that will
681 * be collected next time will therefore need twice as many blocks
682 * since all the data will be copied.
691 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
692 for (s = 0; s < generations[g].n_steps; s++) {
693 if (g == 0 && s == 0) { continue; }
694 stp = &generations[g].steps[s];
695 if (generations[g].steps[0].n_blocks +
696 generations[g].steps[0].n_large_blocks
697 > generations[g].max_blocks
698 && stp->is_compacted == 0) {
699 needed += 2 * stp->n_blocks;
701 needed += stp->n_blocks;
708 /* -----------------------------------------------------------------------------
711 memInventory() checks for memory leaks by counting up all the
712 blocks we know about and comparing that to the number of blocks
713 allegedly floating around in the system.
714 -------------------------------------------------------------------------- */
724 lnat total_blocks = 0, free_blocks = 0;
726 /* count the blocks we current have */
728 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
729 for (s = 0; s < generations[g].n_steps; s++) {
730 stp = &generations[g].steps[s];
731 total_blocks += stp->n_blocks;
732 if (RtsFlags.GcFlags.generations == 1) {
733 /* two-space collector has a to-space too :-) */
734 total_blocks += g0s0->n_to_blocks;
736 for (bd = stp->large_objects; bd; bd = bd->link) {
737 total_blocks += bd->blocks;
738 /* hack for megablock groups: they have an extra block or two in
739 the second and subsequent megablocks where the block
740 descriptors would normally go.
742 if (bd->blocks > BLOCKS_PER_MBLOCK) {
743 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
744 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
750 /* any blocks held by allocate() */
751 for (bd = small_alloc_list; bd; bd = bd->link) {
752 total_blocks += bd->blocks;
754 for (bd = large_alloc_list; bd; bd = bd->link) {
755 total_blocks += bd->blocks;
758 /* count the blocks on the free list */
759 free_blocks = countFreeList();
761 if (total_blocks + free_blocks != mblocks_allocated *
763 fprintf(stderr, "Blocks: %ld live + %ld free = %ld total (%ld around)\n",
764 total_blocks, free_blocks, total_blocks + free_blocks,
765 mblocks_allocated * BLOCKS_PER_MBLOCK);
768 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
772 countBlocks(bdescr *bd)
775 for (n=0; bd != NULL; bd=bd->link) {
781 /* Full heap sanity check. */
787 if (RtsFlags.GcFlags.generations == 1) {
788 checkHeap(g0s0->to_blocks);
789 checkChain(g0s0->large_objects);
792 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
793 for (s = 0; s < generations[g].n_steps; s++) {
794 if (g == 0 && s == 0) { continue; }
795 checkHeap(generations[g].steps[s].blocks);
796 checkChain(generations[g].steps[s].large_objects);
797 ASSERT(countBlocks(generations[g].steps[s].blocks)
798 == generations[g].steps[s].n_blocks);
799 ASSERT(countBlocks(generations[g].steps[s].large_objects)
800 == generations[g].steps[s].n_large_blocks);
802 checkMutableList(generations[g].mut_list, g);
803 checkMutOnceList(generations[g].mut_once_list, g);
807 checkFreeListSanity();