1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * Generational garbage collector
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
12 * ---------------------------------------------------------------------------*/
14 // #include "PosixSource.h"
19 #include "OSThreads.h"
20 #include "LdvProfile.h"
25 #include "BlockAlloc.h"
31 #include "ParTicky.h" // ToDo: move into Rts.h
32 #include "RtsSignals.h"
36 #if defined(RTS_GTK_FRONTPANEL)
37 #include "FrontPanel.h"
40 #include "RetainerProfile.h"
41 #include "RaiseAsync.h"
54 #include <string.h> // for memset()
57 /* -----------------------------------------------------------------------------
59 -------------------------------------------------------------------------- */
61 /* STATIC OBJECT LIST.
64 * We maintain a linked list of static objects that are still live.
65 * The requirements for this list are:
67 * - we need to scan the list while adding to it, in order to
68 * scavenge all the static objects (in the same way that
69 * breadth-first scavenging works for dynamic objects).
71 * - we need to be able to tell whether an object is already on
72 * the list, to break loops.
74 * Each static object has a "static link field", which we use for
75 * linking objects on to the list. We use a stack-type list, consing
76 * objects on the front as they are added (this means that the
77 * scavenge phase is depth-first, not breadth-first, but that
80 * A separate list is kept for objects that have been scavenged
81 * already - this is so that we can zero all the marks afterwards.
83 * An object is on the list if its static link field is non-zero; this
84 * means that we have to mark the end of the list with '1', not NULL.
86 * Extra notes for generational GC:
88 * Each generation has a static object list associated with it. When
89 * collecting generations up to N, we treat the static object lists
90 * from generations > N as roots.
92 * We build up a static object list while collecting generations 0..N,
93 * which is then appended to the static object list of generation N+1.
96 /* N is the oldest generation being collected, where the generations
97 * are numbered starting at 0. A major GC (indicated by the major_gc
98 * flag) is when we're collecting all generations. We only attempt to
99 * deal with static objects and GC CAFs when doing a major GC.
104 /* Data used for allocation area sizing.
106 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
116 /* Thread-local data for each GC thread
118 gc_thread **gc_threads = NULL;
119 // gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
121 // Number of threads running in *this* GC. Affects how many
122 // step->todos[] lists we have to look in to find work.
126 long copied; // *words* copied & scavenged during this GC
129 SpinLock recordMutableGen_sync;
132 /* -----------------------------------------------------------------------------
133 Static function declarations
134 -------------------------------------------------------------------------- */
136 static void mark_root (void *user, StgClosure **root);
137 static void zero_static_object_list (StgClosure* first_static);
138 static nat initialise_N (rtsBool force_major_gc);
139 static void alloc_gc_threads (void);
140 static void init_collected_gen (nat g, nat threads);
141 static void init_uncollected_gen (nat g, nat threads);
142 static void init_gc_thread (gc_thread *t);
143 static void update_task_list (void);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void gc_thread_work (void);
148 static nat inc_running (void);
149 static nat dec_running (void);
150 static void wakeup_gc_threads (nat n_threads);
151 static void shutdown_gc_threads (nat n_threads);
153 #if 0 && defined(DEBUG)
154 static void gcCAFs (void);
157 /* -----------------------------------------------------------------------------
158 The mark bitmap & stack.
159 -------------------------------------------------------------------------- */
161 #define MARK_STACK_BLOCKS 4
163 bdescr *mark_stack_bdescr;
168 // Flag and pointers used for falling back to a linear scan when the
169 // mark stack overflows.
170 rtsBool mark_stack_overflowed;
171 bdescr *oldgen_scan_bd;
174 /* -----------------------------------------------------------------------------
175 GarbageCollect: the main entry point to the garbage collector.
177 Locks held: all capabilities are held throughout GarbageCollect().
178 -------------------------------------------------------------------------- */
181 GarbageCollect ( rtsBool force_major_gc )
185 lnat live, allocated, max_copied, avg_copied, slop;
186 lnat oldgen_saved_blocks = 0;
187 gc_thread *saved_gct;
190 // necessary if we stole a callee-saves register for gct:
194 CostCentreStack *prev_CCS;
199 #if defined(RTS_USER_SIGNALS)
200 if (RtsFlags.MiscFlags.install_signal_handlers) {
206 ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
207 // otherwise adjust the padding in step_workspace.
209 // tell the stats department that we've started a GC
212 // tell the STM to discard any cached closures it's hoping to re-use
221 // attribute any costs to CCS_GC
227 /* Approximate how much we allocated.
228 * Todo: only when generating stats?
230 allocated = calcAllocated();
232 /* Figure out which generation to collect
234 n = initialise_N(force_major_gc);
236 /* Allocate + initialise the gc_thread structures.
240 /* Start threads, so they can be spinning up while we finish initialisation.
244 /* How many threads will be participating in this GC?
245 * We don't try to parallelise minor GC.
247 #if defined(THREADED_RTS)
248 if (n < (4*1024*1024 / BLOCK_SIZE)) {
251 n_gc_threads = RtsFlags.ParFlags.gcThreads;
256 trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)",
257 N, n * (BLOCK_SIZE / 1024), n_gc_threads);
259 #ifdef RTS_GTK_FRONTPANEL
260 if (RtsFlags.GcFlags.frontpanel) {
261 updateFrontPanelBeforeGC(N);
266 // check for memory leaks if DEBUG is on
267 memInventory(traceClass(DEBUG_gc));
270 // check stack sanity *before* GC (ToDo: check all threads)
271 IF_DEBUG(sanity, checkFreeListSanity());
273 // Initialise all our gc_thread structures
274 for (t = 0; t < n_gc_threads; t++) {
275 init_gc_thread(gc_threads[t]);
278 // Initialise all the generations/steps that we're collecting.
279 for (g = 0; g <= N; g++) {
280 init_collected_gen(g,n_gc_threads);
283 // Initialise all the generations/steps that we're *not* collecting.
284 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
285 init_uncollected_gen(g,n_gc_threads);
288 /* Allocate a mark stack if we're doing a major collection.
291 mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
292 mark_stack = (StgPtr *)mark_stack_bdescr->start;
293 mark_sp = mark_stack;
294 mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
296 mark_stack_bdescr = NULL;
299 // this is the main thread
302 /* -----------------------------------------------------------------------
303 * follow all the roots that we know about:
304 * - mutable lists from each generation > N
305 * we want to *scavenge* these roots, not evacuate them: they're not
306 * going to move in this GC.
307 * Also do them in reverse generation order, for the usual reason:
308 * namely to reduce the likelihood of spurious old->new pointers.
310 for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
311 generations[g].saved_mut_list = generations[g].mut_list;
312 generations[g].mut_list = allocBlock();
313 // mut_list always has at least one block.
316 // the main thread is running: this prevents any other threads from
317 // exiting prematurely, so we can start them now.
318 // NB. do this after the mutable lists have been saved above, otherwise
319 // the other GC threads will be writing into the old mutable lists.
321 wakeup_gc_threads(n_gc_threads);
323 for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
324 scavenge_mutable_list(&generations[g]);
327 // follow roots from the CAF list (used by GHCi)
329 markCAFs(mark_root, gct);
331 // follow all the roots that the application knows about.
333 markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads);
335 #if defined(RTS_USER_SIGNALS)
336 // mark the signal handlers (signals should be already blocked)
337 markSignalHandlers(mark_root, gct);
340 // Mark the weak pointer list, and prepare to detect dead weak pointers.
344 // Mark the stable pointer table.
345 markStablePtrTable(mark_root, gct);
347 /* -------------------------------------------------------------------------
348 * Repeatedly scavenge all the areas we know about until there's no
349 * more scavenging to be done.
354 // The other threads are now stopped. We might recurse back to
355 // here, but from now on this is the only thread.
357 // if any blackholes are alive, make the threads that wait on
359 if (traverseBlackholeQueue()) {
364 // must be last... invariant is that everything is fully
365 // scavenged at this point.
366 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
371 // If we get to here, there's really nothing left to do.
375 shutdown_gc_threads(n_gc_threads);
377 // Update pointers from the Task list
380 // Now see which stable names are still alive.
384 // We call processHeapClosureForDead() on every closure destroyed during
385 // the current garbage collection, so we invoke LdvCensusForDead().
386 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
387 || RtsFlags.ProfFlags.bioSelector != NULL)
391 // NO MORE EVACUATION AFTER THIS POINT!
392 // Finally: compaction of the oldest generation.
393 if (major_gc && oldest_gen->steps[0].is_compacted) {
394 // save number of blocks for stats
395 oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
396 compact(gct->scavenged_static_objects);
399 IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
401 // Two-space collector: free the old to-space.
402 // g0s0->old_blocks is the old nursery
403 // g0s0->blocks is to-space from the previous GC
404 if (RtsFlags.GcFlags.generations == 1) {
405 if (g0s0->blocks != NULL) {
406 freeChain(g0s0->blocks);
411 // For each workspace, in each thread:
412 // * clear the BF_EVACUATED flag from each copied block
413 // * move the copied blocks to the step
419 for (t = 0; t < n_gc_threads; t++) {
423 for (s = 1; s < total_steps; s++) {
426 // Push the final block
428 push_scanned_block(ws->todo_bd, ws);
431 ASSERT(gct->scan_bd == NULL);
432 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
435 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
436 bd->flags &= ~BF_EVACUATED; // now from-space
437 ws->step->n_words += bd->free - bd->start;
441 prev->link = ws->step->blocks;
442 ws->step->blocks = ws->scavd_list;
444 ws->step->n_blocks += ws->n_scavd_blocks;
447 for (bd = ws->part_list; bd != NULL; bd = next) {
449 if (bd->free == bd->start) {
451 ws->part_list = next;
458 bd->flags &= ~BF_EVACUATED; // now from-space
459 ws->step->n_words += bd->free - bd->start;
464 prev->link = ws->step->blocks;
465 ws->step->blocks = ws->part_list;
467 ws->step->n_blocks += ws->n_part_blocks;
469 ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
470 ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
475 // Two-space collector: swap the semi-spaces around.
476 // Currently: g0s0->old_blocks is the old nursery
477 // g0s0->blocks is to-space from this GC
478 // We want these the other way around.
479 if (RtsFlags.GcFlags.generations == 1) {
480 bdescr *nursery_blocks = g0s0->old_blocks;
481 nat n_nursery_blocks = g0s0->n_old_blocks;
482 g0s0->old_blocks = g0s0->blocks;
483 g0s0->n_old_blocks = g0s0->n_blocks;
484 g0s0->blocks = nursery_blocks;
485 g0s0->n_blocks = n_nursery_blocks;
488 /* run through all the generations/steps and tidy up
495 for (i=0; i < n_gc_threads; i++) {
496 if (n_gc_threads > 1) {
497 trace(TRACE_gc,"thread %d:", i);
498 trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
499 trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
500 trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work);
501 trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work);
502 trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
504 copied += gc_threads[i]->copied;
505 max_copied = stg_max(gc_threads[i]->copied, max_copied);
507 if (n_gc_threads == 1) {
515 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
518 generations[g].collections++; // for stats
519 if (n_gc_threads > 1) generations[g].par_collections++;
522 // Count the mutable list as bytes "copied" for the purposes of
523 // stats. Every mutable list is copied during every GC.
525 nat mut_list_size = 0;
526 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
527 mut_list_size += bd->free - bd->start;
529 copied += mut_list_size;
532 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
533 (unsigned long)(mut_list_size * sizeof(W_)),
534 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
537 for (s = 0; s < generations[g].n_steps; s++) {
539 stp = &generations[g].steps[s];
541 // for generations we collected...
544 /* free old memory and shift to-space into from-space for all
545 * the collected steps (except the allocation area). These
546 * freed blocks will probaby be quickly recycled.
548 if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
549 if (stp->is_compacted)
551 // for a compacted step, just shift the new to-space
552 // onto the front of the now-compacted existing blocks.
553 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
554 bd->flags &= ~BF_EVACUATED; // now from-space
555 stp->n_words += bd->free - bd->start;
557 // tack the new blocks on the end of the existing blocks
558 if (stp->old_blocks != NULL) {
559 for (bd = stp->old_blocks; bd != NULL; bd = next) {
560 // NB. this step might not be compacted next
561 // time, so reset the BF_COMPACTED flags.
562 // They are set before GC if we're going to
563 // compact. (search for BF_COMPACTED above).
564 bd->flags &= ~BF_COMPACTED;
567 bd->link = stp->blocks;
570 stp->blocks = stp->old_blocks;
572 // add the new blocks to the block tally
573 stp->n_blocks += stp->n_old_blocks;
574 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
575 ASSERT(countOccupied(stp->blocks) == stp->n_words);
579 freeChain(stp->old_blocks);
581 stp->old_blocks = NULL;
582 stp->n_old_blocks = 0;
585 /* LARGE OBJECTS. The current live large objects are chained on
586 * scavenged_large, having been moved during garbage
587 * collection from large_objects. Any objects left on
588 * large_objects list are therefore dead, so we free them here.
590 for (bd = stp->large_objects; bd != NULL; bd = next) {
596 // update the count of blocks used by large objects
597 for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
598 bd->flags &= ~BF_EVACUATED;
600 stp->large_objects = stp->scavenged_large_objects;
601 stp->n_large_blocks = stp->n_scavenged_large_blocks;
604 else // for older generations...
606 /* For older generations, we need to append the
607 * scavenged_large_object list (i.e. large objects that have been
608 * promoted during this GC) to the large_object list for that step.
610 for (bd = stp->scavenged_large_objects; bd; bd = next) {
612 bd->flags &= ~BF_EVACUATED;
613 dbl_link_onto(bd, &stp->large_objects);
616 // add the new blocks we promoted during this GC
617 stp->n_large_blocks += stp->n_scavenged_large_blocks;
622 // update the max size of older generations after a major GC
623 resize_generations();
625 // Calculate the amount of live data for stats.
626 live = calcLiveWords();
628 // Free the small objects allocated via allocate(), since this will
629 // all have been copied into G0S1 now.
630 if (RtsFlags.GcFlags.generations > 1) {
631 if (g0s0->blocks != NULL) {
632 freeChain(g0s0->blocks);
639 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
641 // Start a new pinned_object_block
642 pinned_object_block = NULL;
644 // Free the mark stack.
645 if (mark_stack_bdescr != NULL) {
646 freeGroup(mark_stack_bdescr);
650 for (g = 0; g <= N; g++) {
651 for (s = 0; s < generations[g].n_steps; s++) {
652 stp = &generations[g].steps[s];
653 if (stp->bitmap != NULL) {
654 freeGroup(stp->bitmap);
662 // mark the garbage collected CAFs as dead
663 #if 0 && defined(DEBUG) // doesn't work at the moment
664 if (major_gc) { gcCAFs(); }
668 // resetStaticObjectForRetainerProfiling() must be called before
670 if (n_gc_threads > 1) {
671 barf("profiling is currently broken with multi-threaded GC");
672 // ToDo: fix the gct->scavenged_static_objects below
674 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
677 // zero the scavenged static object list
680 for (i = 0; i < n_gc_threads; i++) {
681 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
688 // start any pending finalizers
690 scheduleFinalizers(last_free_capability, old_weak_ptr_list);
693 // send exceptions to any threads which were about to die
695 resurrectThreads(resurrected_threads);
698 // Update the stable pointer hash table.
699 updateStablePtrTable(major_gc);
701 // check sanity after GC
702 IF_DEBUG(sanity, checkSanity());
704 // extra GC trace info
705 if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
708 // symbol-table based profiling
709 /* heapCensus(to_blocks); */ /* ToDo */
712 // restore enclosing cost centre
718 // check for memory leaks if DEBUG is on
719 memInventory(traceClass(DEBUG_gc));
722 #ifdef RTS_GTK_FRONTPANEL
723 if (RtsFlags.GcFlags.frontpanel) {
724 updateFrontPanelAfterGC( N, live );
728 // ok, GC over: tell the stats department what happened.
729 slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
730 stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
732 #if defined(RTS_USER_SIGNALS)
733 if (RtsFlags.MiscFlags.install_signal_handlers) {
734 // unblock signals again
735 unblockUserSignals();
744 /* -----------------------------------------------------------------------------
745 Figure out which generation to collect, initialise N and major_gc.
747 Also returns the total number of blocks in generations that will be
749 -------------------------------------------------------------------------- */
752 initialise_N (rtsBool force_major_gc)
755 nat s, blocks, blocks_total;
760 if (force_major_gc) {
761 N = RtsFlags.GcFlags.generations - 1;
766 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
768 for (s = 0; s < generations[g].n_steps; s++) {
769 blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
770 blocks += generations[g].steps[s].n_large_blocks;
772 if (blocks >= generations[g].max_blocks) {
776 blocks_total += blocks;
780 blocks_total += countNurseryBlocks();
782 major_gc = (N == RtsFlags.GcFlags.generations-1);
786 /* -----------------------------------------------------------------------------
787 Initialise the gc_thread structures.
788 -------------------------------------------------------------------------- */
791 alloc_gc_thread (int n)
797 t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
802 initCondition(&t->wake_cond);
803 initMutex(&t->wake_mutex);
804 t->wakeup = rtsTrue; // starts true, so we can wait for the
805 // thread to start up, see wakeup_gc_threads
810 t->free_blocks = NULL;
819 for (s = 0; s < total_steps; s++)
822 ws->step = &all_steps[s];
823 ASSERT(s == ws->step->abs_no);
827 ws->buffer_todo_bd = NULL;
829 ws->part_list = NULL;
830 ws->n_part_blocks = 0;
832 ws->scavd_list = NULL;
833 ws->n_scavd_blocks = 0;
841 alloc_gc_threads (void)
843 if (gc_threads == NULL) {
844 #if defined(THREADED_RTS)
846 gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
850 for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
851 gc_threads[i] = alloc_gc_thread(i);
854 gc_threads = stgMallocBytes (sizeof(gc_thread*),
857 gc_threads[0] = alloc_gc_thread(0);
862 /* ----------------------------------------------------------------------------
864 ------------------------------------------------------------------------- */
866 static nat gc_running_threads;
868 #if defined(THREADED_RTS)
869 static Mutex gc_running_mutex;
876 ACQUIRE_LOCK(&gc_running_mutex);
877 n_running = ++gc_running_threads;
878 RELEASE_LOCK(&gc_running_mutex);
879 ASSERT(n_running <= n_gc_threads);
887 ACQUIRE_LOCK(&gc_running_mutex);
888 ASSERT(n_gc_threads != 0);
889 n_running = --gc_running_threads;
890 RELEASE_LOCK(&gc_running_mutex);
895 // gc_thread_work(): Scavenge until there's no work left to do and all
896 // the running threads are idle.
899 gc_thread_work (void)
903 debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
905 // gc_running_threads has already been incremented for us; either
906 // this is the main thread and we incremented it inside
907 // GarbageCollect(), or this is a worker thread and the main
908 // thread bumped gc_running_threads before waking us up.
910 // Every thread evacuates some roots.
912 markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads);
916 // scavenge_loop() only exits when there's no work to do
919 debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
920 gct->thread_index, r);
922 while (gc_running_threads != 0) {
928 // any_work() does not remove the work from the queue, it
929 // just checks for the presence of work. If we find any,
930 // then we increment gc_running_threads and go back to
931 // scavenge_loop() to perform any pending work.
934 // All threads are now stopped
935 debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
939 #if defined(THREADED_RTS)
941 gc_thread_mainloop (void)
945 // Wait until we're told to wake up
946 ACQUIRE_LOCK(&gct->wake_mutex);
947 gct->wakeup = rtsFalse;
948 while (!gct->wakeup) {
949 debugTrace(DEBUG_gc, "GC thread %d standing by...",
951 waitCondition(&gct->wake_cond, &gct->wake_mutex);
953 RELEASE_LOCK(&gct->wake_mutex);
954 if (gct->exit) break;
957 // start performance counters in this thread...
958 if (gct->papi_events == -1) {
959 papi_init_eventset(&gct->papi_events);
961 papi_thread_start_gc1_count(gct->papi_events);
967 // count events in this thread towards the GC totals
968 papi_thread_stop_gc1_count(gct->papi_events);
974 #if defined(THREADED_RTS)
976 gc_thread_entry (gc_thread *my_gct)
979 debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
980 gct->id = osThreadId();
981 gc_thread_mainloop();
986 start_gc_threads (void)
988 #if defined(THREADED_RTS)
991 static rtsBool done = rtsFalse;
993 gc_running_threads = 0;
994 initMutex(&gc_running_mutex);
997 // Start from 1: the main thread is 0
998 for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
999 createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1008 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1010 #if defined(THREADED_RTS)
1012 for (i=1; i < n_threads; i++) {
1014 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1016 ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1017 if (gc_threads[i]->wakeup) {
1018 RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1024 gc_threads[i]->wakeup = rtsTrue;
1025 signalCondition(&gc_threads[i]->wake_cond);
1026 RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1031 // After GC is complete, we must wait for all GC threads to enter the
1032 // standby state, otherwise they may still be executing inside
1033 // any_work(), and may even remain awake until the next GC starts.
1035 shutdown_gc_threads (nat n_threads USED_IF_THREADS)
1037 #if defined(THREADED_RTS)
1040 for (i=1; i < n_threads; i++) {
1042 ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1043 wakeup = gc_threads[i]->wakeup;
1044 // wakeup is false while the thread is waiting
1045 RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1051 /* ----------------------------------------------------------------------------
1052 Initialise a generation that is to be collected
1053 ------------------------------------------------------------------------- */
1056 init_collected_gen (nat g, nat n_threads)
1063 // Throw away the current mutable list. Invariant: the mutable
1064 // list always has at least one block; this means we can avoid a
1065 // check for NULL in recordMutable().
1067 freeChain(generations[g].mut_list);
1068 generations[g].mut_list = allocBlock();
1069 for (i = 0; i < n_capabilities; i++) {
1070 freeChain(capabilities[i].mut_lists[g]);
1071 capabilities[i].mut_lists[g] = allocBlock();
1075 for (s = 0; s < generations[g].n_steps; s++) {
1077 // generation 0, step 0 doesn't need to-space
1078 if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1082 stp = &generations[g].steps[s];
1083 ASSERT(stp->gen_no == g);
1085 // deprecate the existing blocks
1086 stp->old_blocks = stp->blocks;
1087 stp->n_old_blocks = stp->n_blocks;
1092 // we don't have any to-be-scavenged blocks yet
1094 stp->todos_last = NULL;
1097 // initialise the large object queues.
1098 stp->scavenged_large_objects = NULL;
1099 stp->n_scavenged_large_blocks = 0;
1101 // mark the large objects as not evacuated yet
1102 for (bd = stp->large_objects; bd; bd = bd->link) {
1103 bd->flags &= ~BF_EVACUATED;
1106 // for a compacted step, we need to allocate the bitmap
1107 if (stp->is_compacted) {
1108 nat bitmap_size; // in bytes
1109 bdescr *bitmap_bdescr;
1112 bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1114 if (bitmap_size > 0) {
1115 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1117 stp->bitmap = bitmap_bdescr;
1118 bitmap = bitmap_bdescr->start;
1120 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1121 bitmap_size, bitmap);
1123 // don't forget to fill it with zeros!
1124 memset(bitmap, 0, bitmap_size);
1126 // For each block in this step, point to its bitmap from the
1127 // block descriptor.
1128 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1129 bd->u.bitmap = bitmap;
1130 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1132 // Also at this point we set the BF_COMPACTED flag
1133 // for this block. The invariant is that
1134 // BF_COMPACTED is always unset, except during GC
1135 // when it is set on those blocks which will be
1137 bd->flags |= BF_COMPACTED;
1143 // For each GC thread, for each step, allocate a "todo" block to
1144 // store evacuated objects to be scavenged, and a block to store
1145 // evacuated objects that do not need to be scavenged.
1146 for (t = 0; t < n_threads; t++) {
1147 for (s = 0; s < generations[g].n_steps; s++) {
1149 // we don't copy objects into g0s0, unless -G0
1150 if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1152 ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1154 ws->todo_large_objects = NULL;
1156 ws->part_list = NULL;
1157 ws->n_part_blocks = 0;
1159 // allocate the first to-space block; extra blocks will be
1160 // chained on as necessary.
1162 ws->buffer_todo_bd = NULL;
1163 alloc_todo_block(ws,0);
1165 ws->scavd_list = NULL;
1166 ws->n_scavd_blocks = 0;
1172 /* ----------------------------------------------------------------------------
1173 Initialise a generation that is *not* to be collected
1174 ------------------------------------------------------------------------- */
1177 init_uncollected_gen (nat g, nat threads)
1184 for (s = 0; s < generations[g].n_steps; s++) {
1185 stp = &generations[g].steps[s];
1186 stp->scavenged_large_objects = NULL;
1187 stp->n_scavenged_large_blocks = 0;
1190 for (t = 0; t < threads; t++) {
1191 for (s = 0; s < generations[g].n_steps; s++) {
1193 ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1196 ws->buffer_todo_bd = NULL;
1197 ws->todo_large_objects = NULL;
1199 ws->part_list = NULL;
1200 ws->n_part_blocks = 0;
1202 ws->scavd_list = NULL;
1203 ws->n_scavd_blocks = 0;
1205 // If the block at the head of the list in this generation
1206 // is less than 3/4 full, then use it as a todo block.
1207 if (stp->blocks && isPartiallyFull(stp->blocks))
1209 ws->todo_bd = stp->blocks;
1210 ws->todo_free = ws->todo_bd->free;
1211 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1212 stp->blocks = stp->blocks->link;
1214 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1215 ws->todo_bd->link = NULL;
1216 // we must scan from the current end point.
1217 ws->todo_bd->u.scan = ws->todo_bd->free;
1222 alloc_todo_block(ws,0);
1227 // Move the private mutable lists from each capability onto the
1228 // main mutable list for the generation.
1229 for (i = 0; i < n_capabilities; i++) {
1230 for (bd = capabilities[i].mut_lists[g];
1231 bd->link != NULL; bd = bd->link) {
1234 bd->link = generations[g].mut_list;
1235 generations[g].mut_list = capabilities[i].mut_lists[g];
1236 capabilities[i].mut_lists[g] = allocBlock();
1240 /* -----------------------------------------------------------------------------
1241 Initialise a gc_thread before GC
1242 -------------------------------------------------------------------------- */
1245 init_gc_thread (gc_thread *t)
1247 t->static_objects = END_OF_STATIC_LIST;
1248 t->scavenged_static_objects = END_OF_STATIC_LIST;
1251 t->failed_to_evac = rtsFalse;
1252 t->eager_promotion = rtsTrue;
1253 t->thunk_selector_depth = 0;
1258 t->scav_find_work = 0;
1261 /* -----------------------------------------------------------------------------
1262 Function we pass to evacuate roots.
1263 -------------------------------------------------------------------------- */
1266 mark_root(void *user, StgClosure **root)
1268 // we stole a register for gct, but this function is called from
1269 // *outside* the GC where the register variable is not in effect,
1270 // so we need to save and restore it here. NB. only call
1271 // mark_root() from the main GC thread, otherwise gct will be
1273 gc_thread *saved_gct;
1282 /* -----------------------------------------------------------------------------
1283 Initialising the static object & mutable lists
1284 -------------------------------------------------------------------------- */
1287 zero_static_object_list(StgClosure* first_static)
1291 const StgInfoTable *info;
1293 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1295 link = *STATIC_LINK(info, p);
1296 *STATIC_LINK(info,p) = NULL;
1300 /* ----------------------------------------------------------------------------
1301 Update the pointers from the task list
1303 These are treated as weak pointers because we want to allow a main
1304 thread to get a BlockedOnDeadMVar exception in the same way as any
1305 other thread. Note that the threads should all have been retained
1306 by GC by virtue of being on the all_threads list, we're just
1307 updating pointers here.
1308 ------------------------------------------------------------------------- */
1311 update_task_list (void)
1315 for (task = all_tasks; task != NULL; task = task->all_link) {
1316 if (!task->stopped && task->tso) {
1317 ASSERT(task->tso->bound == task);
1318 tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1320 barf("task %p: main thread %d has been GC'd",
1333 /* ----------------------------------------------------------------------------
1334 Reset the sizes of the older generations when we do a major
1337 CURRENT STRATEGY: make all generations except zero the same size.
1338 We have to stay within the maximum heap size, and leave a certain
1339 percentage of the maximum heap size available to allocate into.
1340 ------------------------------------------------------------------------- */
1343 resize_generations (void)
1347 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1348 nat live, size, min_alloc;
1349 nat max = RtsFlags.GcFlags.maxHeapSize;
1350 nat gens = RtsFlags.GcFlags.generations;
1352 // live in the oldest generations
1353 live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+
1354 oldest_gen->steps[0].n_large_blocks;
1356 // default max size for all generations except zero
1357 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1358 RtsFlags.GcFlags.minOldGenSize);
1360 // minimum size for generation zero
1361 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1362 RtsFlags.GcFlags.minAllocAreaSize);
1364 // Auto-enable compaction when the residency reaches a
1365 // certain percentage of the maximum heap size (default: 30%).
1366 if (RtsFlags.GcFlags.generations > 1 &&
1367 (RtsFlags.GcFlags.compact ||
1369 oldest_gen->steps[0].n_blocks >
1370 (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1371 oldest_gen->steps[0].is_compacted = 1;
1372 // debugBelch("compaction: on\n", live);
1374 oldest_gen->steps[0].is_compacted = 0;
1375 // debugBelch("compaction: off\n", live);
1378 // if we're going to go over the maximum heap size, reduce the
1379 // size of the generations accordingly. The calculation is
1380 // different if compaction is turned on, because we don't need
1381 // to double the space required to collect the old generation.
1384 // this test is necessary to ensure that the calculations
1385 // below don't have any negative results - we're working
1386 // with unsigned values here.
1387 if (max < min_alloc) {
1391 if (oldest_gen->steps[0].is_compacted) {
1392 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1393 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1396 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1397 size = (max - min_alloc) / ((gens - 1) * 2);
1407 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1408 min_alloc, size, max);
1411 for (g = 0; g < gens; g++) {
1412 generations[g].max_blocks = size;
1417 /* -----------------------------------------------------------------------------
1418 Calculate the new size of the nursery, and resize it.
1419 -------------------------------------------------------------------------- */
1422 resize_nursery (void)
1424 if (RtsFlags.GcFlags.generations == 1)
1425 { // Two-space collector:
1428 /* set up a new nursery. Allocate a nursery size based on a
1429 * function of the amount of live data (by default a factor of 2)
1430 * Use the blocks from the old nursery if possible, freeing up any
1433 * If we get near the maximum heap size, then adjust our nursery
1434 * size accordingly. If the nursery is the same size as the live
1435 * data (L), then we need 3L bytes. We can reduce the size of the
1436 * nursery to bring the required memory down near 2L bytes.
1438 * A normal 2-space collector would need 4L bytes to give the same
1439 * performance we get from 3L bytes, reducing to the same
1440 * performance at 2L bytes.
1442 blocks = g0s0->n_old_blocks;
1444 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1445 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1446 RtsFlags.GcFlags.maxHeapSize )
1448 long adjusted_blocks; // signed on purpose
1451 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1453 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1454 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1456 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1457 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1461 blocks = adjusted_blocks;
1465 blocks *= RtsFlags.GcFlags.oldGenFactor;
1466 if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1468 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1471 resizeNurseries(blocks);
1473 else // Generational collector
1476 * If the user has given us a suggested heap size, adjust our
1477 * allocation area to make best use of the memory available.
1479 if (RtsFlags.GcFlags.heapSizeSuggestion)
1482 nat needed = calcNeeded(); // approx blocks needed at next GC
1484 /* Guess how much will be live in generation 0 step 0 next time.
1485 * A good approximation is obtained by finding the
1486 * percentage of g0s0 that was live at the last minor GC.
1488 * We have an accurate figure for the amount of copied data in
1489 * 'copied', but we must convert this to a number of blocks, with
1490 * a small adjustment for estimated slop at the end of a block
1495 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1496 / countNurseryBlocks();
1499 /* Estimate a size for the allocation area based on the
1500 * information available. We might end up going slightly under
1501 * or over the suggested heap size, but we should be pretty
1504 * Formula: suggested - needed
1505 * ----------------------------
1506 * 1 + g0s0_pcnt_kept/100
1508 * where 'needed' is the amount of memory needed at the next
1509 * collection for collecting all steps except g0s0.
1512 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1513 (100 + (long)g0s0_pcnt_kept);
1515 if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1516 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1519 resizeNurseries((nat)blocks);
1523 // we might have added extra large blocks to the nursery, so
1524 // resize back to minAllocAreaSize again.
1525 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1530 /* -----------------------------------------------------------------------------
1531 Sanity code for CAF garbage collection.
1533 With DEBUG turned on, we manage a CAF list in addition to the SRT
1534 mechanism. After GC, we run down the CAF list and blackhole any
1535 CAFs which have been garbage collected. This means we get an error
1536 whenever the program tries to enter a garbage collected CAF.
1538 Any garbage collected CAFs are taken off the CAF list at the same
1540 -------------------------------------------------------------------------- */
1542 #if 0 && defined(DEBUG)
1549 const StgInfoTable *info;
1560 ASSERT(info->type == IND_STATIC);
1562 if (STATIC_LINK(info,p) == NULL) {
1563 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1565 SET_INFO(p,&stg_BLACKHOLE_info);
1566 p = STATIC_LINK2(info,p);
1570 pp = &STATIC_LINK2(info,p);
1577 debugTrace(DEBUG_gccafs, "%d CAFs live", i);