1 /* -----------------------------------------------------------------------------
2 * $Id: Storage.c,v 1.44 2001/08/08 10:50:37 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Storage manager front end
8 * ---------------------------------------------------------------------------*/
15 #include "BlockAlloc.h"
22 #include "StoragePriv.h"
25 nat nursery_blocks; /* number of blocks in the nursery */
28 StgClosure *caf_list = NULL;
30 bdescr *small_alloc_list; /* allocate()d small objects */
31 bdescr *large_alloc_list; /* allocate()d large objects */
32 bdescr *pinned_object_block; /* allocate pinned objects into this block */
33 nat alloc_blocks; /* number of allocate()d blocks since GC */
34 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
36 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
37 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
39 generation *generations; /* all the generations */
40 generation *g0; /* generation 0, for convenience */
41 generation *oldest_gen; /* oldest generation, for convenience */
42 step *g0s0; /* generation 0, step 0, for convenience */
44 lnat total_allocated = 0; /* total memory allocated during run */
47 * Storage manager mutex: protects all the above state from
48 * simultaneous access by two STG threads.
51 pthread_mutex_t sm_mutex = PTHREAD_MUTEX_INITIALIZER;
57 static void *stgAllocForGMP (size_t size_in_bytes);
58 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
59 static void stgDeallocForGMP (void *ptr, size_t size);
68 /* If we're doing heap profiling, we want a two-space heap with a
69 * fixed-size allocation area so that we get roughly even-spaced
73 /* As an experiment, try a 2 generation collector
76 #if defined(PROFILING) || defined(DEBUG)
77 if (RtsFlags.ProfFlags.doHeapProfile) {
78 RtsFlags.GcFlags.generations = 1;
79 RtsFlags.GcFlags.steps = 1;
80 RtsFlags.GcFlags.oldGenFactor = 0;
81 RtsFlags.GcFlags.heapSizeSuggestion = 0;
85 if (RtsFlags.GcFlags.heapSizeSuggestion >
86 RtsFlags.GcFlags.maxHeapSize) {
87 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
92 /* allocate generation info array */
93 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
94 * sizeof(struct _generation),
97 /* Initialise all generations */
98 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
99 gen = &generations[g];
101 gen->mut_list = END_MUT_LIST;
102 gen->mut_once_list = END_MUT_LIST;
103 gen->collections = 0;
104 gen->failed_promotions = 0;
108 /* A couple of convenience pointers */
109 g0 = &generations[0];
110 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
112 /* Allocate step structures in each generation */
113 if (RtsFlags.GcFlags.generations > 1) {
114 /* Only for multiple-generations */
116 /* Oldest generation: one step */
117 oldest_gen->n_steps = 1;
119 stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
121 /* set up all except the oldest generation with 2 steps */
122 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
123 generations[g].n_steps = RtsFlags.GcFlags.steps;
124 generations[g].steps =
125 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
126 "initStorage: steps");
130 /* single generation, i.e. a two-space collector */
132 g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
135 /* Initialise all steps */
136 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
137 for (s = 0; s < generations[g].n_steps; s++) {
138 stp = &generations[g].steps[s];
142 stp->gen = &generations[g];
149 stp->large_objects = NULL;
150 stp->new_large_objects = NULL;
151 stp->scavenged_large_objects = NULL;
152 stp->is_compacted = 0;
156 /* Set up the destination pointers in each younger gen. step */
157 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
158 for (s = 0; s < generations[g].n_steps-1; s++) {
159 generations[g].steps[s].to = &generations[g].steps[s+1];
161 generations[g].steps[s].to = &generations[g+1].steps[0];
164 /* The oldest generation has one step. */
165 oldest_gen->steps[0].to = &oldest_gen->steps[0];
167 /* generation 0 is special: that's the nursery */
168 generations[0].max_blocks = 0;
170 /* G0S0: the allocation area. Policy: keep the allocation area
171 * small to begin with, even if we have a large suggested heap
172 * size. Reason: we're going to do a major collection first, and we
173 * don't want it to be a big one. This vague idea is borne out by
174 * rigorous experimental evidence.
176 g0s0 = &generations[0].steps[0];
180 weak_ptr_list = NULL;
183 /* initialise the allocate() interface */
184 small_alloc_list = NULL;
185 large_alloc_list = NULL;
187 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
189 /* Tell GNU multi-precision pkg about our custom alloc functions */
190 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
193 pthread_mutex_init(&sm_mutex, NULL);
196 IF_DEBUG(gc, statDescribeGens());
202 stat_exit(calcAllocated());
205 /* -----------------------------------------------------------------------------
208 The entry code for every CAF does the following:
210 - builds a CAF_BLACKHOLE in the heap
211 - pushes an update frame pointing to the CAF_BLACKHOLE
212 - invokes UPD_CAF(), which:
213 - calls newCaf, below
214 - updates the CAF with a static indirection to the CAF_BLACKHOLE
216 Why do we build a BLACKHOLE in the heap rather than just updating
217 the thunk directly? It's so that we only need one kind of update
218 frame - otherwise we'd need a static version of the update frame too.
220 newCaf() does the following:
222 - it puts the CAF on the oldest generation's mut-once list.
223 This is so that we can treat the CAF as a root when collecting
226 For GHCI, we have additional requirements when dealing with CAFs:
228 - we must *retain* all dynamically-loaded CAFs ever entered,
229 just in case we need them again.
230 - we must be able to *revert* CAFs that have been evaluated, to
231 their pre-evaluated form.
233 To do this, we use an additional CAF list. When newCaf() is
234 called on a dynamically-loaded CAF, we add it to the CAF list
235 instead of the old-generation mutable list, and save away its
236 old info pointer (in caf->saved_info) for later reversion.
238 To revert all the CAFs, we traverse the CAF list and reset the
239 info pointer to caf->saved_info, then throw away the CAF list.
240 (see GC.c:revertCAFs()).
244 -------------------------------------------------------------------------- */
247 newCAF(StgClosure* caf)
249 /* Put this CAF on the mutable list for the old generation.
250 * This is a HACK - the IND_STATIC closure doesn't really have
251 * a mut_link field, but we pretend it has - in fact we re-use
252 * the STATIC_LINK field for the time being, because when we
253 * come to do a major GC we won't need the mut_link field
254 * any more and can use it as a STATIC_LINK.
256 ACQUIRE_LOCK(&sm_mutex);
258 if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
259 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
260 ((StgIndStatic *)caf)->static_link = caf_list;
263 ((StgIndStatic *)caf)->saved_info = NULL;
264 ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
265 oldest_gen->mut_once_list = (StgMutClosure *)caf;
268 RELEASE_LOCK(&sm_mutex);
271 /* If we are PAR or DIST then we never forget a CAF */
273 //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
274 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
280 /* -----------------------------------------------------------------------------
282 -------------------------------------------------------------------------- */
285 allocNurseries( void )
294 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
295 cap->rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
296 cap->rCurrentNursery = cap->rNursery;
297 for (bd = cap->rNursery; bd != NULL; bd = bd->link) {
298 bd->u.back = (bdescr *)cap;
301 /* Set the back links to be equal to the Capability,
302 * so we can do slightly better informed locking.
306 nursery_blocks = RtsFlags.GcFlags.minAllocAreaSize;
307 g0s0->blocks = allocNursery(NULL, nursery_blocks);
308 g0s0->n_blocks = nursery_blocks;
309 g0s0->to_blocks = NULL;
310 g0s0->n_to_blocks = 0;
311 MainRegTable.rNursery = g0s0->blocks;
312 MainRegTable.rCurrentNursery = g0s0->blocks;
313 /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
318 resetNurseries( void )
324 /* All tasks must be stopped */
325 ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
327 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
328 for (bd = cap->rNursery; bd; bd = bd->link) {
329 bd->free = bd->start;
330 ASSERT(bd->gen_no == 0);
331 ASSERT(bd->step == g0s0);
332 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
334 cap->rCurrentNursery = cap->rNursery;
337 for (bd = g0s0->blocks; bd; bd = bd->link) {
338 bd->free = bd->start;
339 ASSERT(bd->gen_no == 0);
340 ASSERT(bd->step == g0s0);
341 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
343 MainRegTable.rNursery = g0s0->blocks;
344 MainRegTable.rCurrentNursery = g0s0->blocks;
349 allocNursery (bdescr *last_bd, nat blocks)
354 /* Allocate a nursery */
355 for (i=0; i < blocks; i++) {
361 bd->free = bd->start;
368 resizeNursery ( nat blocks )
373 barf("resizeNursery: can't resize in SMP mode");
376 if (nursery_blocks == blocks) {
377 ASSERT(g0s0->n_blocks == blocks);
381 else if (nursery_blocks < blocks) {
382 IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
384 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
390 IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
392 for (bd = g0s0->blocks; nursery_blocks > blocks; nursery_blocks--) {
400 g0s0->n_blocks = nursery_blocks = blocks;
403 /* -----------------------------------------------------------------------------
404 The allocate() interface
406 allocate(n) always succeeds, and returns a chunk of memory n words
407 long. n can be larger than the size of a block if necessary, in
408 which case a contiguous block group will be allocated.
409 -------------------------------------------------------------------------- */
417 ACQUIRE_LOCK(&sm_mutex);
419 TICK_ALLOC_HEAP_NOCTR(n);
422 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
423 /* ToDo: allocate directly into generation 1 */
424 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
425 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
426 bd = allocGroup(req_blocks);
427 dbl_link_onto(bd, &g0s0->large_objects);
430 bd->flags = BF_LARGE;
431 bd->free = bd->start;
432 /* don't add these blocks to alloc_blocks, since we're assuming
433 * that large objects are likely to remain live for quite a while
434 * (eg. running threads), so garbage collecting early won't make
437 alloc_blocks += req_blocks;
438 RELEASE_LOCK(&sm_mutex);
441 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
442 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
443 if (small_alloc_list) {
444 small_alloc_list->free = alloc_Hp;
447 bd->link = small_alloc_list;
448 small_alloc_list = bd;
452 alloc_Hp = bd->start;
453 alloc_HpLim = bd->start + BLOCK_SIZE_W;
459 RELEASE_LOCK(&sm_mutex);
464 allocated_bytes( void )
466 return (alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp));
469 /* ---------------------------------------------------------------------------
470 Allocate a fixed/pinned object.
472 We allocate small pinned objects into a single block, allocating a
473 new block when the current one overflows. The block is chained
474 onto the large_object_list of generation 0 step 0.
476 NOTE: The GC can't in general handle pinned objects. This
477 interface is only safe to use for ByteArrays, which have no
478 pointers and don't require scavenging. It works because the
479 block's descriptor has the BF_LARGE flag set, so the block is
480 treated as a large object and chained onto various lists, rather
481 than the individual objects being copied. However, when it comes
482 to scavenge the block, the GC will only scavenge the first object.
483 The reason is that the GC can't linearly scan a block of pinned
484 objects at the moment (doing so would require using the
485 mostly-copying techniques). But since we're restricting ourselves
486 to pinned ByteArrays, not scavenging is ok.
488 This function is called by newPinnedByteArray# which immediately
489 fills the allocated memory with a MutableByteArray#.
490 ------------------------------------------------------------------------- */
493 allocatePinned( nat n )
496 bdescr *bd = pinned_object_block;
498 ACQUIRE_LOCK(&sm_mutex);
500 TICK_ALLOC_HEAP_NOCTR(n);
503 // If the request is for a large object, then allocate()
504 // will give us a pinned object anyway.
505 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
506 RELEASE_LOCK(&sm_mutex);
510 // If we don't have a block of pinned objects yet, or the current
511 // one isn't large enough to hold the new object, allocate a new one.
512 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
513 pinned_object_block = bd = allocBlock();
514 dbl_link_onto(bd, &g0s0->large_objects);
517 bd->flags = BF_LARGE;
518 bd->free = bd->start;
524 RELEASE_LOCK(&sm_mutex);
528 /* -----------------------------------------------------------------------------
529 Allocation functions for GMP.
531 These all use the allocate() interface - we can't have any garbage
532 collection going on during a gmp operation, so we use allocate()
533 which always succeeds. The gmp operations which might need to
534 allocate will ask the storage manager (via doYouWantToGC()) whether
535 a garbage collection is required, in case we get into a loop doing
536 only allocate() style allocation.
537 -------------------------------------------------------------------------- */
540 stgAllocForGMP (size_t size_in_bytes)
543 nat data_size_in_words, total_size_in_words;
545 /* should be a multiple of sizeof(StgWord) (whole no. of limbs) */
546 ASSERT(size_in_bytes % sizeof(W_) == 0);
548 data_size_in_words = size_in_bytes / sizeof(W_);
549 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
551 /* allocate and fill it in. */
552 arr = (StgArrWords *)allocate(total_size_in_words);
553 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
555 /* and return a ptr to the goods inside the array */
556 return(BYTE_ARR_CTS(arr));
560 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
562 void *new_stuff_ptr = stgAllocForGMP(new_size);
564 char *p = (char *) ptr;
565 char *q = (char *) new_stuff_ptr;
567 for (; i < old_size; i++, p++, q++) {
571 return(new_stuff_ptr);
575 stgDeallocForGMP (void *ptr STG_UNUSED,
576 size_t size STG_UNUSED)
578 /* easy for us: the garbage collector does the dealloc'n */
581 /* -----------------------------------------------------------------------------
583 * -------------------------------------------------------------------------- */
585 /* -----------------------------------------------------------------------------
588 * Approximate how much we've allocated: number of blocks in the
589 * nursery + blocks allocated via allocate() - unused nusery blocks.
590 * This leaves a little slop at the end of each block, and doesn't
591 * take into account large objects (ToDo).
592 * -------------------------------------------------------------------------- */
595 calcAllocated( void )
603 /* All tasks must be stopped. Can't assert that all the
604 capabilities are owned by the scheduler, though: one or more
605 tasks might have been stopped while they were running (non-main)
607 /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
610 n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
613 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
614 for ( bd = cap->rCurrentNursery->link; bd != NULL; bd = bd->link ) {
615 allocated -= BLOCK_SIZE_W;
617 if (cap->rCurrentNursery->free < cap->rCurrentNursery->start
619 allocated -= (cap->rCurrentNursery->start + BLOCK_SIZE_W)
620 - cap->rCurrentNursery->free;
625 bdescr *current_nursery = MainRegTable.rCurrentNursery;
627 allocated = (nursery_blocks * BLOCK_SIZE_W) + allocated_bytes();
628 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
629 allocated -= BLOCK_SIZE_W;
631 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
632 allocated -= (current_nursery->start + BLOCK_SIZE_W)
633 - current_nursery->free;
637 total_allocated += allocated;
641 /* Approximate the amount of live data in the heap. To be called just
642 * after garbage collection (see GarbageCollect()).
651 if (RtsFlags.GcFlags.generations == 1) {
652 live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
653 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
657 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
658 for (s = 0; s < generations[g].n_steps; s++) {
659 /* approximate amount of live data (doesn't take into account slop
660 * at end of each block).
662 if (g == 0 && s == 0) {
665 stp = &generations[g].steps[s];
666 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
667 if (stp->hp_bd != NULL) {
668 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
676 /* Approximate the number of blocks that will be needed at the next
677 * garbage collection.
679 * Assume: all data currently live will remain live. Steps that will
680 * be collected next time will therefore need twice as many blocks
681 * since all the data will be copied.
690 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
691 for (s = 0; s < generations[g].n_steps; s++) {
692 if (g == 0 && s == 0) { continue; }
693 stp = &generations[g].steps[s];
694 if (generations[g].steps[0].n_blocks +
695 generations[g].steps[0].n_large_blocks
696 > generations[g].max_blocks
697 && stp->is_compacted == 0) {
698 needed += 2 * stp->n_blocks;
700 needed += stp->n_blocks;
707 /* -----------------------------------------------------------------------------
710 memInventory() checks for memory leaks by counting up all the
711 blocks we know about and comparing that to the number of blocks
712 allegedly floating around in the system.
713 -------------------------------------------------------------------------- */
723 lnat total_blocks = 0, free_blocks = 0;
725 /* count the blocks we current have */
727 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
728 for (s = 0; s < generations[g].n_steps; s++) {
729 stp = &generations[g].steps[s];
730 total_blocks += stp->n_blocks;
731 if (RtsFlags.GcFlags.generations == 1) {
732 /* two-space collector has a to-space too :-) */
733 total_blocks += g0s0->n_to_blocks;
735 for (bd = stp->large_objects; bd; bd = bd->link) {
736 total_blocks += bd->blocks;
737 /* hack for megablock groups: they have an extra block or two in
738 the second and subsequent megablocks where the block
739 descriptors would normally go.
741 if (bd->blocks > BLOCKS_PER_MBLOCK) {
742 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
743 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
749 /* any blocks held by allocate() */
750 for (bd = small_alloc_list; bd; bd = bd->link) {
751 total_blocks += bd->blocks;
753 for (bd = large_alloc_list; bd; bd = bd->link) {
754 total_blocks += bd->blocks;
757 /* count the blocks on the free list */
758 free_blocks = countFreeList();
760 if (total_blocks + free_blocks != mblocks_allocated *
762 fprintf(stderr, "Blocks: %ld live + %ld free = %ld total (%ld around)\n",
763 total_blocks, free_blocks, total_blocks + free_blocks,
764 mblocks_allocated * BLOCKS_PER_MBLOCK);
767 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
771 countBlocks(bdescr *bd)
774 for (n=0; bd != NULL; bd=bd->link) {
780 /* Full heap sanity check. */
786 if (RtsFlags.GcFlags.generations == 1) {
787 checkHeap(g0s0->to_blocks);
788 checkChain(g0s0->large_objects);
791 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
792 for (s = 0; s < generations[g].n_steps; s++) {
793 if (g == 0 && s == 0) { continue; }
794 checkHeap(generations[g].steps[s].blocks);
795 checkChain(generations[g].steps[s].large_objects);
796 ASSERT(countBlocks(generations[g].steps[s].blocks)
797 == generations[g].steps[s].n_blocks);
798 ASSERT(countBlocks(generations[g].steps[s].large_objects)
799 == generations[g].steps[s].n_large_blocks);
801 checkMutableList(generations[g].mut_list, g);
802 checkMutOnceList(generations[g].mut_once_list, g);
806 checkFreeListSanity();