1 /* -----------------------------------------------------------------------------
2 * $Id: Storage.c,v 1.39 2001/07/19 07:28:00 andy Exp $
4 * (c) The GHC Team, 1998-1999
6 * Storage manager front end
8 * ---------------------------------------------------------------------------*/
15 #include "BlockAlloc.h"
22 #include "StoragePriv.h"
25 nat nursery_blocks; /* number of blocks in the nursery */
28 StgClosure *caf_list = NULL;
30 bdescr *small_alloc_list; /* allocate()d small objects */
31 bdescr *large_alloc_list; /* allocate()d large objects */
32 nat alloc_blocks; /* number of allocate()d blocks since GC */
33 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
35 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
36 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
38 generation *generations; /* all the generations */
39 generation *g0; /* generation 0, for convenience */
40 generation *oldest_gen; /* oldest generation, for convenience */
41 step *g0s0; /* generation 0, step 0, for convenience */
43 lnat total_allocated = 0; /* total memory allocated during run */
46 * Storage manager mutex: protects all the above state from
47 * simultaneous access by two STG threads.
50 pthread_mutex_t sm_mutex = PTHREAD_MUTEX_INITIALIZER;
56 static void *stgAllocForGMP (size_t size_in_bytes);
57 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
58 static void stgDeallocForGMP (void *ptr, size_t size);
67 /* If we're doing heap profiling, we want a two-space heap with a
68 * fixed-size allocation area so that we get roughly even-spaced
72 /* As an experiment, try a 2 generation collector
75 #if defined(PROFILING) || defined(DEBUG)
76 if (RtsFlags.ProfFlags.doHeapProfile) {
77 RtsFlags.GcFlags.generations = 1;
78 RtsFlags.GcFlags.steps = 1;
79 RtsFlags.GcFlags.oldGenFactor = 0;
80 RtsFlags.GcFlags.heapSizeSuggestion = 0;
84 if (RtsFlags.GcFlags.heapSizeSuggestion >
85 RtsFlags.GcFlags.maxHeapSize) {
86 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
91 /* allocate generation info array */
92 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
93 * sizeof(struct _generation),
96 /* Initialise all generations */
97 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
98 gen = &generations[g];
100 gen->mut_list = END_MUT_LIST;
101 gen->mut_once_list = END_MUT_LIST;
102 gen->collections = 0;
103 gen->failed_promotions = 0;
107 /* A couple of convenience pointers */
108 g0 = &generations[0];
109 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
111 /* Allocate step structures in each generation */
112 if (RtsFlags.GcFlags.generations > 1) {
113 /* Only for multiple-generations */
115 /* Oldest generation: one step */
116 oldest_gen->n_steps = 1;
118 stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
120 /* set up all except the oldest generation with 2 steps */
121 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
122 generations[g].n_steps = RtsFlags.GcFlags.steps;
123 generations[g].steps =
124 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
125 "initStorage: steps");
129 /* single generation, i.e. a two-space collector */
131 g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
134 /* Initialise all steps */
135 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
136 for (s = 0; s < generations[g].n_steps; s++) {
137 stp = &generations[g].steps[s];
141 stp->gen = &generations[g];
147 stp->large_objects = NULL;
148 stp->new_large_objects = NULL;
149 stp->scavenged_large_objects = NULL;
153 /* Set up the destination pointers in each younger gen. step */
154 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
155 for (s = 0; s < generations[g].n_steps-1; s++) {
156 generations[g].steps[s].to = &generations[g].steps[s+1];
158 generations[g].steps[s].to = &generations[g+1].steps[0];
161 /* The oldest generation has one step and its destination is the
163 oldest_gen->steps[0].to = &oldest_gen->steps[0];
165 /* generation 0 is special: that's the nursery */
166 generations[0].max_blocks = 0;
168 /* G0S0: the allocation area. Policy: keep the allocation area
169 * small to begin with, even if we have a large suggested heap
170 * size. Reason: we're going to do a major collection first, and we
171 * don't want it to be a big one. This vague idea is borne out by
172 * rigorous experimental evidence.
174 g0s0 = &generations[0].steps[0];
178 weak_ptr_list = NULL;
181 /* initialise the allocate() interface */
182 small_alloc_list = NULL;
183 large_alloc_list = NULL;
185 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
187 /* Tell GNU multi-precision pkg about our custom alloc functions */
188 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
191 pthread_mutex_init(&sm_mutex, NULL);
194 IF_DEBUG(gc, stat_describe_gens());
200 stat_exit(calcAllocated());
203 /* -----------------------------------------------------------------------------
206 The entry code for every CAF does the following:
208 - builds a CAF_BLACKHOLE in the heap
209 - pushes an update frame pointing to the CAF_BLACKHOLE
210 - invokes UPD_CAF(), which:
211 - calls newCaf, below
212 - updates the CAF with a static indirection to the CAF_BLACKHOLE
214 Why do we build a BLACKHOLE in the heap rather than just updating
215 the thunk directly? It's so that we only need one kind of update
216 frame - otherwise we'd need a static version of the update frame too.
218 newCaf() does the following:
220 - it puts the CAF on the oldest generation's mut-once list.
221 This is so that we can treat the CAF as a root when collecting
224 For GHCI, we have additional requirements when dealing with CAFs:
226 - we must *retain* all dynamically-loaded CAFs ever entered,
227 just in case we need them again.
228 - we must be able to *revert* CAFs that have been evaluated, to
229 their pre-evaluated form.
231 To do this, we use an additional CAF list. When newCaf() is
232 called on a dynamically-loaded CAF, we add it to the CAF list
233 instead of the old-generation mutable list, and save away its
234 old info pointer (in caf->saved_info) for later reversion.
236 To revert all the CAFs, we traverse the CAF list and reset the
237 info pointer to caf->saved_info, then throw away the CAF list.
238 (see GC.c:revertCAFs()).
242 -------------------------------------------------------------------------- */
245 newCAF(StgClosure* caf)
247 /* Put this CAF on the mutable list for the old generation.
248 * This is a HACK - the IND_STATIC closure doesn't really have
249 * a mut_link field, but we pretend it has - in fact we re-use
250 * the STATIC_LINK field for the time being, because when we
251 * come to do a major GC we won't need the mut_link field
252 * any more and can use it as a STATIC_LINK.
254 ACQUIRE_LOCK(&sm_mutex);
256 if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
257 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
258 ((StgIndStatic *)caf)->static_link = caf_list;
261 ((StgIndStatic *)caf)->saved_info = NULL;
262 ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
263 oldest_gen->mut_once_list = (StgMutClosure *)caf;
266 RELEASE_LOCK(&sm_mutex);
269 /* If we are PAR or DIST then we never forget a CAF */
271 //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
272 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
278 /* -----------------------------------------------------------------------------
280 -------------------------------------------------------------------------- */
283 allocNurseries( void )
292 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
293 cap->rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
294 cap->rCurrentNursery = cap->rNursery;
295 for (bd = cap->rNursery; bd != NULL; bd = bd->link) {
296 bd->back = (bdescr *)cap;
299 /* Set the back links to be equal to the Capability,
300 * so we can do slightly better informed locking.
304 nursery_blocks = RtsFlags.GcFlags.minAllocAreaSize;
305 g0s0->blocks = allocNursery(NULL, nursery_blocks);
306 g0s0->n_blocks = nursery_blocks;
307 g0s0->to_space = NULL;
308 MainRegTable.rNursery = g0s0->blocks;
309 MainRegTable.rCurrentNursery = g0s0->blocks;
310 /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
315 resetNurseries( void )
321 /* All tasks must be stopped */
322 ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
324 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
325 for (bd = cap->rNursery; bd; bd = bd->link) {
326 bd->free = bd->start;
327 ASSERT(bd->gen == g0);
328 ASSERT(bd->step == g0s0);
329 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
331 cap->rCurrentNursery = cap->rNursery;
334 for (bd = g0s0->blocks; bd; bd = bd->link) {
335 bd->free = bd->start;
336 ASSERT(bd->gen == g0);
337 ASSERT(bd->step == g0s0);
338 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
340 MainRegTable.rNursery = g0s0->blocks;
341 MainRegTable.rCurrentNursery = g0s0->blocks;
346 allocNursery (bdescr *last_bd, nat blocks)
351 /* Allocate a nursery */
352 for (i=0; i < blocks; i++) {
358 bd->free = bd->start;
365 resizeNursery ( nat blocks )
370 barf("resizeNursery: can't resize in SMP mode");
373 if (nursery_blocks == blocks) {
374 ASSERT(g0s0->n_blocks == blocks);
378 else if (nursery_blocks < blocks) {
379 IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
381 g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
387 IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
389 for (bd = g0s0->blocks; nursery_blocks > blocks; nursery_blocks--) {
397 g0s0->n_blocks = nursery_blocks = blocks;
400 /* -----------------------------------------------------------------------------
401 The allocate() interface
403 allocate(n) always succeeds, and returns a chunk of memory n words
404 long. n can be larger than the size of a block if necessary, in
405 which case a contiguous block group will be allocated.
406 -------------------------------------------------------------------------- */
414 ACQUIRE_LOCK(&sm_mutex);
416 TICK_ALLOC_HEAP_NOCTR(n);
419 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
420 /* ToDo: allocate directly into generation 1 */
421 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
422 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
423 bd = allocGroup(req_blocks);
424 dbl_link_onto(bd, &g0s0->large_objects);
428 bd->free = bd->start;
429 /* don't add these blocks to alloc_blocks, since we're assuming
430 * that large objects are likely to remain live for quite a while
431 * (eg. running threads), so garbage collecting early won't make
434 alloc_blocks += req_blocks;
435 RELEASE_LOCK(&sm_mutex);
438 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
439 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
440 if (small_alloc_list) {
441 small_alloc_list->free = alloc_Hp;
444 bd->link = small_alloc_list;
445 small_alloc_list = bd;
449 alloc_Hp = bd->start;
450 alloc_HpLim = bd->start + BLOCK_SIZE_W;
456 RELEASE_LOCK(&sm_mutex);
460 lnat allocated_bytes(void)
462 return (alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp));
465 /* -----------------------------------------------------------------------------
466 Allocation functions for GMP.
468 These all use the allocate() interface - we can't have any garbage
469 collection going on during a gmp operation, so we use allocate()
470 which always succeeds. The gmp operations which might need to
471 allocate will ask the storage manager (via doYouWantToGC()) whether
472 a garbage collection is required, in case we get into a loop doing
473 only allocate() style allocation.
474 -------------------------------------------------------------------------- */
477 stgAllocForGMP (size_t size_in_bytes)
480 nat data_size_in_words, total_size_in_words;
482 /* should be a multiple of sizeof(StgWord) (whole no. of limbs) */
483 ASSERT(size_in_bytes % sizeof(W_) == 0);
485 data_size_in_words = size_in_bytes / sizeof(W_);
486 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
488 /* allocate and fill it in. */
489 arr = (StgArrWords *)allocate(total_size_in_words);
490 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
492 /* and return a ptr to the goods inside the array */
493 return(BYTE_ARR_CTS(arr));
497 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
499 void *new_stuff_ptr = stgAllocForGMP(new_size);
501 char *p = (char *) ptr;
502 char *q = (char *) new_stuff_ptr;
504 for (; i < old_size; i++, p++, q++) {
508 return(new_stuff_ptr);
512 stgDeallocForGMP (void *ptr STG_UNUSED,
513 size_t size STG_UNUSED)
515 /* easy for us: the garbage collector does the dealloc'n */
518 /* -----------------------------------------------------------------------------
520 * -------------------------------------------------------------------------- */
522 /* -----------------------------------------------------------------------------
525 * Approximate how much we've allocated: number of blocks in the
526 * nursery + blocks allocated via allocate() - unused nusery blocks.
527 * This leaves a little slop at the end of each block, and doesn't
528 * take into account large objects (ToDo).
529 * -------------------------------------------------------------------------- */
532 calcAllocated( void )
540 /* All tasks must be stopped. Can't assert that all the
541 capabilities are owned by the scheduler, though: one or more
542 tasks might have been stopped while they were running (non-main)
544 /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
547 n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
550 for (cap = free_capabilities; cap != NULL; cap = cap->link) {
551 for ( bd = cap->rCurrentNursery->link; bd != NULL; bd = bd->link ) {
552 allocated -= BLOCK_SIZE_W;
554 if (cap->rCurrentNursery->free < cap->rCurrentNursery->start
556 allocated -= (cap->rCurrentNursery->start + BLOCK_SIZE_W)
557 - cap->rCurrentNursery->free;
562 bdescr *current_nursery = MainRegTable.rCurrentNursery;
564 allocated = (nursery_blocks * BLOCK_SIZE_W) + allocated_bytes();
565 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
566 allocated -= BLOCK_SIZE_W;
568 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
569 allocated -= (current_nursery->start + BLOCK_SIZE_W)
570 - current_nursery->free;
574 total_allocated += allocated;
578 /* Approximate the amount of live data in the heap. To be called just
579 * after garbage collection (see GarbageCollect()).
588 if (RtsFlags.GcFlags.generations == 1) {
589 live = (g0s0->to_blocks - 1) * BLOCK_SIZE_W +
590 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
594 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
595 for (s = 0; s < generations[g].n_steps; s++) {
596 /* approximate amount of live data (doesn't take into account slop
597 * at end of each block).
599 if (g == 0 && s == 0) {
602 stp = &generations[g].steps[s];
603 live += (stp->n_blocks - 1) * BLOCK_SIZE_W +
604 ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start) / sizeof(W_);
610 /* Approximate the number of blocks that will be needed at the next
611 * garbage collection.
613 * Assume: all data currently live will remain live. Steps that will
614 * be collected next time will therefore need twice as many blocks
615 * since all the data will be copied.
624 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
625 for (s = 0; s < generations[g].n_steps; s++) {
626 if (g == 0 && s == 0) { continue; }
627 stp = &generations[g].steps[s];
628 if (generations[g].steps[0].n_blocks > generations[g].max_blocks) {
629 needed += 2 * stp->n_blocks;
631 needed += stp->n_blocks;
638 /* -----------------------------------------------------------------------------
641 memInventory() checks for memory leaks by counting up all the
642 blocks we know about and comparing that to the number of blocks
643 allegedly floating around in the system.
644 -------------------------------------------------------------------------- */
654 lnat total_blocks = 0, free_blocks = 0;
656 /* count the blocks we current have */
658 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
659 for (s = 0; s < generations[g].n_steps; s++) {
660 stp = &generations[g].steps[s];
661 total_blocks += stp->n_blocks;
662 if (RtsFlags.GcFlags.generations == 1) {
663 /* two-space collector has a to-space too :-) */
664 total_blocks += g0s0->to_blocks;
666 for (bd = stp->large_objects; bd; bd = bd->link) {
667 total_blocks += bd->blocks;
668 /* hack for megablock groups: they have an extra block or two in
669 the second and subsequent megablocks where the block
670 descriptors would normally go.
672 if (bd->blocks > BLOCKS_PER_MBLOCK) {
673 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
674 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
680 /* any blocks held by allocate() */
681 for (bd = small_alloc_list; bd; bd = bd->link) {
682 total_blocks += bd->blocks;
684 for (bd = large_alloc_list; bd; bd = bd->link) {
685 total_blocks += bd->blocks;
688 /* count the blocks on the free list */
689 free_blocks = countFreeList();
691 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
694 if (total_blocks + free_blocks != mblocks_allocated *
696 fprintf(stderr, "Blocks: %ld live + %ld free = %ld total (%ld around)\n",
697 total_blocks, free_blocks, total_blocks + free_blocks,
698 mblocks_allocated * BLOCKS_PER_MBLOCK);
703 /* Full heap sanity check. */
710 if (RtsFlags.GcFlags.generations == 1) {
711 checkHeap(g0s0->to_space, NULL);
712 checkChain(g0s0->large_objects);
715 for (g = 0; g <= N; g++) {
716 for (s = 0; s < generations[g].n_steps; s++) {
717 if (g == 0 && s == 0) { continue; }
718 checkHeap(generations[g].steps[s].blocks, NULL);
721 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
722 for (s = 0; s < generations[g].n_steps; s++) {
723 checkHeap(generations[g].steps[s].blocks,
724 generations[g].steps[s].blocks->start);
725 checkChain(generations[g].steps[s].large_objects);
728 checkFreeListSanity();