1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * Generational garbage collector
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
12 * ---------------------------------------------------------------------------*/
14 #include "PosixSource.h"
25 #include "BlockAlloc.h"
29 #include "RtsSignals.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
35 #include "RetainerProfile.h"
36 #include "RaiseAsync.h"
50 #include <string.h> // for memset()
53 /* -----------------------------------------------------------------------------
55 -------------------------------------------------------------------------- */
57 /* STATIC OBJECT LIST.
60 * We maintain a linked list of static objects that are still live.
61 * The requirements for this list are:
63 * - we need to scan the list while adding to it, in order to
64 * scavenge all the static objects (in the same way that
65 * breadth-first scavenging works for dynamic objects).
67 * - we need to be able to tell whether an object is already on
68 * the list, to break loops.
70 * Each static object has a "static link field", which we use for
71 * linking objects on to the list. We use a stack-type list, consing
72 * objects on the front as they are added (this means that the
73 * scavenge phase is depth-first, not breadth-first, but that
76 * A separate list is kept for objects that have been scavenged
77 * already - this is so that we can zero all the marks afterwards.
79 * An object is on the list if its static link field is non-zero; this
80 * means that we have to mark the end of the list with '1', not NULL.
82 * Extra notes for generational GC:
84 * Each generation has a static object list associated with it. When
85 * collecting generations up to N, we treat the static object lists
86 * from generations > N as roots.
88 * We build up a static object list while collecting generations 0..N,
89 * which is then appended to the static object list of generation N+1.
92 /* N is the oldest generation being collected, where the generations
93 * are numbered starting at 0. A major GC (indicated by the major_gc
94 * flag) is when we're collecting all generations. We only attempt to
95 * deal with static objects and GC CAFs when doing a major GC.
100 /* Data used for allocation area sizing.
102 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
112 /* Thread-local data for each GC thread
114 gc_thread **gc_threads = NULL;
116 #if !defined(THREADED_RTS)
117 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
120 // Number of threads running in *this* GC. Affects how many
121 // step->todos[] lists we have to look in to find work.
125 long copied; // *words* copied & scavenged during this GC
127 rtsBool work_stealing;
131 /* -----------------------------------------------------------------------------
132 Static function declarations
133 -------------------------------------------------------------------------- */
135 static void mark_root (void *user, StgClosure **root);
136 static void zero_static_object_list (StgClosure* first_static);
137 static nat initialise_N (rtsBool force_major_gc);
138 static void init_collected_gen (nat g, nat threads);
139 static void init_uncollected_gen (nat g, nat threads);
140 static void init_gc_thread (gc_thread *t);
141 static void update_task_list (void);
142 static void resize_generations (void);
143 static void resize_nursery (void);
144 static void start_gc_threads (void);
145 static void scavenge_until_all_done (void);
146 static StgWord inc_running (void);
147 static StgWord dec_running (void);
148 static void wakeup_gc_threads (nat n_threads, nat me);
149 static void shutdown_gc_threads (nat n_threads, nat me);
151 #if 0 && defined(DEBUG)
152 static void gcCAFs (void);
155 /* -----------------------------------------------------------------------------
156 The mark bitmap & stack.
157 -------------------------------------------------------------------------- */
159 #define MARK_STACK_BLOCKS 4
161 bdescr *mark_stack_bdescr;
166 // Flag and pointers used for falling back to a linear scan when the
167 // mark stack overflows.
168 rtsBool mark_stack_overflowed;
169 bdescr *oldgen_scan_bd;
172 /* -----------------------------------------------------------------------------
173 GarbageCollect: the main entry point to the garbage collector.
175 Locks held: all capabilities are held throughout GarbageCollect().
176 -------------------------------------------------------------------------- */
179 GarbageCollect (rtsBool force_major_gc,
180 nat gc_type USED_IF_THREADS,
185 lnat live, allocated, max_copied, avg_copied, slop;
186 gc_thread *saved_gct;
189 // necessary if we stole a callee-saves register for gct:
193 CostCentreStack *prev_CCS;
198 #if defined(RTS_USER_SIGNALS)
199 if (RtsFlags.MiscFlags.install_signal_handlers) {
205 ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
206 // otherwise adjust the padding in step_workspace.
208 // tell the stats department that we've started a GC
211 // tell the STM to discard any cached closures it's hoping to re-use
214 // lock the StablePtr table
223 // attribute any costs to CCS_GC
229 /* Approximate how much we allocated.
230 * Todo: only when generating stats?
232 allocated = calcAllocated();
234 /* Figure out which generation to collect
236 n = initialise_N(force_major_gc);
238 #if defined(THREADED_RTS)
239 work_stealing = RtsFlags.ParFlags.parGcLoadBalancing;
240 // It's not always a good idea to do load balancing in parallel
241 // GC. In particular, for a parallel program we don't want to
242 // lose locality by moving cached data into another CPU's cache
243 // (this effect can be quite significant).
245 // We could have a more complex way to deterimine whether to do
246 // work stealing or not, e.g. it might be a good idea to do it
247 // if the heap is big. For now, we just turn it on or off with
251 /* Start threads, so they can be spinning up while we finish initialisation.
255 #if defined(THREADED_RTS)
256 /* How many threads will be participating in this GC?
257 * We don't try to parallelise minor GCs (unless the user asks for
258 * it with +RTS -gn0), or mark/compact/sweep GC.
260 if (gc_type == PENDING_GC_PAR) {
261 n_gc_threads = RtsFlags.ParFlags.nNodes;
269 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
270 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
272 #ifdef RTS_GTK_FRONTPANEL
273 if (RtsFlags.GcFlags.frontpanel) {
274 updateFrontPanelBeforeGC(N);
279 // check for memory leaks if DEBUG is on
280 memInventory(traceClass(DEBUG_gc));
283 // check stack sanity *before* GC
284 IF_DEBUG(sanity, checkFreeListSanity());
285 IF_DEBUG(sanity, checkMutableLists(rtsTrue));
287 // Initialise all our gc_thread structures
288 for (t = 0; t < n_gc_threads; t++) {
289 init_gc_thread(gc_threads[t]);
292 // Initialise all the generations/steps that we're collecting.
293 for (g = 0; g <= N; g++) {
294 init_collected_gen(g,n_gc_threads);
297 // Initialise all the generations/steps that we're *not* collecting.
298 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
299 init_uncollected_gen(g,n_gc_threads);
302 /* Allocate a mark stack if we're doing a major collection.
304 if (major_gc && oldest_gen->steps[0].mark) {
305 nat mark_stack_blocks;
306 mark_stack_blocks = stg_max(MARK_STACK_BLOCKS,
307 oldest_gen->steps[0].n_old_blocks / 100);
308 mark_stack_bdescr = allocGroup(mark_stack_blocks);
309 mark_stack = (StgPtr *)mark_stack_bdescr->start;
310 mark_sp = mark_stack;
311 mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
313 mark_stack_bdescr = NULL;
316 // this is the main thread
318 if (n_gc_threads == 1) {
319 SET_GCT(gc_threads[0]);
321 SET_GCT(gc_threads[cap->no]);
324 SET_GCT(gc_threads[0]);
327 /* -----------------------------------------------------------------------
328 * follow all the roots that we know about:
331 // the main thread is running: this prevents any other threads from
332 // exiting prematurely, so we can start them now.
333 // NB. do this after the mutable lists have been saved above, otherwise
334 // the other GC threads will be writing into the old mutable lists.
336 wakeup_gc_threads(n_gc_threads, gct->thread_index);
338 // Mutable lists from each generation > N
339 // we want to *scavenge* these roots, not evacuate them: they're not
340 // going to move in this GC.
341 // Also do them in reverse generation order, for the usual reason:
342 // namely to reduce the likelihood of spurious old->new pointers.
344 for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
345 scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
346 freeChain_sync(generations[g].saved_mut_list);
347 generations[g].saved_mut_list = NULL;
351 // scavenge the capability-private mutable lists. This isn't part
352 // of markSomeCapabilities() because markSomeCapabilities() can only
353 // call back into the GC via mark_root() (due to the gct register
355 if (n_gc_threads == 1) {
356 for (n = 0; n < n_capabilities; n++) {
357 scavenge_capability_mut_lists(&capabilities[n]);
360 scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
363 // follow roots from the CAF list (used by GHCi)
365 markCAFs(mark_root, gct);
367 // follow all the roots that the application knows about.
369 markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
370 rtsTrue/*prune sparks*/);
372 #if defined(RTS_USER_SIGNALS)
373 // mark the signal handlers (signals should be already blocked)
374 markSignalHandlers(mark_root, gct);
377 // Mark the weak pointer list, and prepare to detect dead weak pointers.
381 // Mark the stable pointer table.
382 markStablePtrTable(mark_root, gct);
384 /* -------------------------------------------------------------------------
385 * Repeatedly scavenge all the areas we know about until there's no
386 * more scavenging to be done.
390 scavenge_until_all_done();
391 // The other threads are now stopped. We might recurse back to
392 // here, but from now on this is the only thread.
394 // if any blackholes are alive, make the threads that wait on
396 if (traverseBlackholeQueue()) {
401 // must be last... invariant is that everything is fully
402 // scavenged at this point.
403 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
408 // If we get to here, there's really nothing left to do.
412 shutdown_gc_threads(n_gc_threads, gct->thread_index);
414 // Update pointers from the Task list
417 // Now see which stable names are still alive.
421 // We call processHeapClosureForDead() on every closure destroyed during
422 // the current garbage collection, so we invoke LdvCensusForDead().
423 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
424 || RtsFlags.ProfFlags.bioSelector != NULL)
428 // NO MORE EVACUATION AFTER THIS POINT!
430 // Two-space collector: free the old to-space.
431 // g0s0->old_blocks is the old nursery
432 // g0s0->blocks is to-space from the previous GC
433 if (RtsFlags.GcFlags.generations == 1) {
434 if (g0s0->blocks != NULL) {
435 freeChain(g0s0->blocks);
440 // For each workspace, in each thread, move the copied blocks to the step
446 for (t = 0; t < n_gc_threads; t++) {
450 if (RtsFlags.GcFlags.generations == 1) {
455 for (; s < total_steps; s++) {
458 // Push the final block
460 push_scanned_block(ws->todo_bd, ws);
463 ASSERT(gct->scan_bd == NULL);
464 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
467 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
468 ws->step->n_words += bd->free - bd->start;
472 prev->link = ws->step->blocks;
473 ws->step->blocks = ws->scavd_list;
475 ws->step->n_blocks += ws->n_scavd_blocks;
479 // Add all the partial blocks *after* we've added all the full
480 // blocks. This is so that we can grab the partial blocks back
481 // again and try to fill them up in the next GC.
482 for (t = 0; t < n_gc_threads; t++) {
486 if (RtsFlags.GcFlags.generations == 1) {
491 for (; s < total_steps; s++) {
495 for (bd = ws->part_list; bd != NULL; bd = next) {
497 if (bd->free == bd->start) {
499 ws->part_list = next;
506 ws->step->n_words += bd->free - bd->start;
511 prev->link = ws->step->blocks;
512 ws->step->blocks = ws->part_list;
514 ws->step->n_blocks += ws->n_part_blocks;
516 ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
517 ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
522 // Finally: compact or sweep the oldest generation.
523 if (major_gc && oldest_gen->steps[0].mark) {
524 if (oldest_gen->steps[0].compact)
525 compact(gct->scavenged_static_objects);
527 sweep(&oldest_gen->steps[0]);
530 /* run through all the generations/steps and tidy up
537 for (i=0; i < n_gc_threads; i++) {
538 if (n_gc_threads > 1) {
539 debugTrace(DEBUG_gc,"thread %d:", i);
540 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
541 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
542 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
543 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
544 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
546 copied += gc_threads[i]->copied;
547 max_copied = stg_max(gc_threads[i]->copied, max_copied);
549 if (n_gc_threads == 1) {
557 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
560 generations[g].collections++; // for stats
561 if (n_gc_threads > 1) generations[g].par_collections++;
564 // Count the mutable list as bytes "copied" for the purposes of
565 // stats. Every mutable list is copied during every GC.
567 nat mut_list_size = 0;
568 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
569 mut_list_size += bd->free - bd->start;
571 for (n = 0; n < n_capabilities; n++) {
572 for (bd = capabilities[n].mut_lists[g];
573 bd != NULL; bd = bd->link) {
574 mut_list_size += bd->free - bd->start;
577 copied += mut_list_size;
580 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
581 (unsigned long)(mut_list_size * sizeof(W_)),
582 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
585 for (s = 0; s < generations[g].n_steps; s++) {
587 stp = &generations[g].steps[s];
589 // for generations we collected...
592 /* free old memory and shift to-space into from-space for all
593 * the collected steps (except the allocation area). These
594 * freed blocks will probaby be quickly recycled.
596 if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
599 // tack the new blocks on the end of the existing blocks
600 if (stp->old_blocks != NULL) {
603 for (bd = stp->old_blocks; bd != NULL; bd = next) {
607 if (!(bd->flags & BF_MARKED))
610 stp->old_blocks = next;
619 stp->n_words += bd->free - bd->start;
621 // NB. this step might not be compacted next
622 // time, so reset the BF_MARKED flags.
623 // They are set before GC if we're going to
624 // compact. (search for BF_MARKED above).
625 bd->flags &= ~BF_MARKED;
627 // between GCs, all blocks in the heap except
628 // for the nursery have the BF_EVACUATED flag set.
629 bd->flags |= BF_EVACUATED;
636 prev->link = stp->blocks;
637 stp->blocks = stp->old_blocks;
640 // add the new blocks to the block tally
641 stp->n_blocks += stp->n_old_blocks;
642 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
643 ASSERT(countOccupied(stp->blocks) == stp->n_words);
647 freeChain(stp->old_blocks);
649 stp->old_blocks = NULL;
650 stp->n_old_blocks = 0;
653 /* LARGE OBJECTS. The current live large objects are chained on
654 * scavenged_large, having been moved during garbage
655 * collection from large_objects. Any objects left on
656 * large_objects list are therefore dead, so we free them here.
658 for (bd = stp->large_objects; bd != NULL; bd = next) {
664 stp->large_objects = stp->scavenged_large_objects;
665 stp->n_large_blocks = stp->n_scavenged_large_blocks;
668 else // for older generations...
670 /* For older generations, we need to append the
671 * scavenged_large_object list (i.e. large objects that have been
672 * promoted during this GC) to the large_object list for that step.
674 for (bd = stp->scavenged_large_objects; bd; bd = next) {
676 dbl_link_onto(bd, &stp->large_objects);
679 // add the new blocks we promoted during this GC
680 stp->n_large_blocks += stp->n_scavenged_large_blocks;
685 // update the max size of older generations after a major GC
686 resize_generations();
688 // Calculate the amount of live data for stats.
689 live = calcLiveWords();
691 // Free the small objects allocated via allocate(), since this will
692 // all have been copied into G0S1 now.
693 if (RtsFlags.GcFlags.generations > 1) {
694 if (g0s0->blocks != NULL) {
695 freeChain(g0s0->blocks);
702 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
704 // Start a new pinned_object_block
705 pinned_object_block = NULL;
707 // Free the mark stack.
708 if (mark_stack_bdescr != NULL) {
709 freeGroup(mark_stack_bdescr);
713 for (g = 0; g <= N; g++) {
714 for (s = 0; s < generations[g].n_steps; s++) {
715 stp = &generations[g].steps[s];
716 if (stp->bitmap != NULL) {
717 freeGroup(stp->bitmap);
725 // mark the garbage collected CAFs as dead
726 #if 0 && defined(DEBUG) // doesn't work at the moment
727 if (major_gc) { gcCAFs(); }
731 // resetStaticObjectForRetainerProfiling() must be called before
733 if (n_gc_threads > 1) {
734 barf("profiling is currently broken with multi-threaded GC");
735 // ToDo: fix the gct->scavenged_static_objects below
737 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
740 // zero the scavenged static object list
743 for (i = 0; i < n_gc_threads; i++) {
744 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
751 // start any pending finalizers
753 scheduleFinalizers(cap, old_weak_ptr_list);
756 // send exceptions to any threads which were about to die
758 resurrectThreads(resurrected_threads);
759 performPendingThrowTos(exception_threads);
762 // Update the stable pointer hash table.
763 updateStablePtrTable(major_gc);
765 // check sanity after GC
766 IF_DEBUG(sanity, checkSanity());
768 // extra GC trace info
769 IF_DEBUG(gc, statDescribeGens());
772 // symbol-table based profiling
773 /* heapCensus(to_blocks); */ /* ToDo */
776 // restore enclosing cost centre
782 // check for memory leaks if DEBUG is on
783 memInventory(traceClass(DEBUG_gc));
786 #ifdef RTS_GTK_FRONTPANEL
787 if (RtsFlags.GcFlags.frontpanel) {
788 updateFrontPanelAfterGC( N, live );
792 // ok, GC over: tell the stats department what happened.
793 slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
794 stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
796 // unlock the StablePtr table
799 // Guess which generation we'll collect *next* time
800 initialise_N(force_major_gc);
802 #if defined(RTS_USER_SIGNALS)
803 if (RtsFlags.MiscFlags.install_signal_handlers) {
804 // unblock signals again
805 unblockUserSignals();
814 /* -----------------------------------------------------------------------------
815 Figure out which generation to collect, initialise N and major_gc.
817 Also returns the total number of blocks in generations that will be
819 -------------------------------------------------------------------------- */
822 initialise_N (rtsBool force_major_gc)
825 nat s, blocks, blocks_total;
830 if (force_major_gc) {
831 N = RtsFlags.GcFlags.generations - 1;
836 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
838 for (s = 0; s < generations[g].n_steps; s++) {
839 blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
840 blocks += generations[g].steps[s].n_large_blocks;
842 if (blocks >= generations[g].max_blocks) {
846 blocks_total += blocks;
850 blocks_total += countNurseryBlocks();
852 major_gc = (N == RtsFlags.GcFlags.generations-1);
856 /* -----------------------------------------------------------------------------
857 Initialise the gc_thread structures.
858 -------------------------------------------------------------------------- */
860 #define GC_THREAD_INACTIVE 0
861 #define GC_THREAD_STANDING_BY 1
862 #define GC_THREAD_RUNNING 2
863 #define GC_THREAD_WAITING_TO_CONTINUE 3
866 new_gc_thread (nat n, gc_thread *t)
873 initSpinLock(&t->gc_spin);
874 initSpinLock(&t->mut_spin);
875 ACQUIRE_SPIN_LOCK(&t->gc_spin);
876 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
877 // thread to start up, see wakeup_gc_threads
881 t->free_blocks = NULL;
890 for (s = 0; s < total_steps; s++)
893 ws->step = &all_steps[s];
894 ASSERT(s == ws->step->abs_no);
898 ws->todo_q = newWSDeque(128);
899 ws->todo_overflow = NULL;
900 ws->n_todo_overflow = 0;
902 ws->part_list = NULL;
903 ws->n_part_blocks = 0;
905 ws->scavd_list = NULL;
906 ws->n_scavd_blocks = 0;
914 if (gc_threads == NULL) {
915 #if defined(THREADED_RTS)
917 gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
921 for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
923 stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
926 new_gc_thread(i, gc_threads[i]);
929 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
931 new_gc_thread(0,gc_threads[0]);
939 if (gc_threads != NULL) {
940 #if defined(THREADED_RTS)
942 for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
943 stgFree (gc_threads[i]);
945 stgFree (gc_threads);
947 stgFree (gc_threads);
953 /* ----------------------------------------------------------------------------
955 ------------------------------------------------------------------------- */
957 static volatile StgWord gc_running_threads;
963 new = atomic_inc(&gc_running_threads);
964 ASSERT(new <= n_gc_threads);
971 ASSERT(gc_running_threads != 0);
972 return atomic_dec(&gc_running_threads);
985 // scavenge objects in compacted generation
986 if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
987 (mark_stack_bdescr != NULL && !mark_stack_empty())) {
991 // Check for global work in any step. We don't need to check for
992 // local work, because we have already exited scavenge_loop(),
993 // which means there is no local work for this thread.
994 for (s = total_steps-1; s >= 0; s--) {
995 if (s == 0 && RtsFlags.GcFlags.generations > 1) {
999 if (ws->todo_large_objects) return rtsTrue;
1000 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1001 if (ws->todo_overflow) return rtsTrue;
1004 #if defined(THREADED_RTS)
1005 if (work_stealing) {
1007 // look for work to steal
1008 for (n = 0; n < n_gc_threads; n++) {
1009 if (n == gct->thread_index) continue;
1010 for (s = total_steps-1; s >= 0; s--) {
1011 ws = &gc_threads[n]->steps[s];
1012 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1024 scavenge_until_all_done (void)
1028 debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1031 #if defined(THREADED_RTS)
1032 if (n_gc_threads > 1) {
1041 // scavenge_loop() only exits when there's no work to do
1044 debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1045 gct->thread_index, r);
1047 while (gc_running_threads != 0) {
1053 // any_work() does not remove the work from the queue, it
1054 // just checks for the presence of work. If we find any,
1055 // then we increment gc_running_threads and go back to
1056 // scavenge_loop() to perform any pending work.
1059 // All threads are now stopped
1060 debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1063 #if defined(THREADED_RTS)
1066 gcWorkerThread (Capability *cap)
1068 cap->in_gc = rtsTrue;
1070 gct = gc_threads[cap->no];
1071 gct->id = osThreadId();
1073 // Wait until we're told to wake up
1074 RELEASE_SPIN_LOCK(&gct->mut_spin);
1075 gct->wakeup = GC_THREAD_STANDING_BY;
1076 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1077 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1080 // start performance counters in this thread...
1081 if (gct->papi_events == -1) {
1082 papi_init_eventset(&gct->papi_events);
1084 papi_thread_start_gc1_count(gct->papi_events);
1087 // Every thread evacuates some roots.
1089 markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1090 rtsTrue/*prune sparks*/);
1091 scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1093 scavenge_until_all_done();
1096 // count events in this thread towards the GC totals
1097 papi_thread_stop_gc1_count(gct->papi_events);
1100 // Wait until we're told to continue
1101 RELEASE_SPIN_LOCK(&gct->gc_spin);
1102 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1103 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1105 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1106 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1111 #if defined(THREADED_RTS)
1114 waitForGcThreads (Capability *cap USED_IF_THREADS)
1116 nat n_threads = RtsFlags.ParFlags.nNodes;
1119 rtsBool retry = rtsTrue;
1122 for (i=0; i < n_threads; i++) {
1123 if (i == me) continue;
1124 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1125 prodCapability(&capabilities[i], cap->running_task);
1128 for (j=0; j < 10000000; j++) {
1130 for (i=0; i < n_threads; i++) {
1131 if (i == me) continue;
1133 setContextSwitches();
1134 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1143 #endif // THREADED_RTS
1146 start_gc_threads (void)
1148 #if defined(THREADED_RTS)
1149 gc_running_threads = 0;
1154 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1156 #if defined(THREADED_RTS)
1158 for (i=0; i < n_threads; i++) {
1159 if (i == me) continue;
1161 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1162 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1164 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1165 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1166 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1171 // After GC is complete, we must wait for all GC threads to enter the
1172 // standby state, otherwise they may still be executing inside
1173 // any_work(), and may even remain awake until the next GC starts.
1175 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1177 #if defined(THREADED_RTS)
1179 for (i=0; i < n_threads; i++) {
1180 if (i == me) continue;
1181 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1186 #if defined(THREADED_RTS)
1188 releaseGCThreads (Capability *cap USED_IF_THREADS)
1190 nat n_threads = RtsFlags.ParFlags.nNodes;
1193 for (i=0; i < n_threads; i++) {
1194 if (i == me) continue;
1195 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1196 barf("releaseGCThreads");
1198 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1199 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1200 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1205 /* ----------------------------------------------------------------------------
1206 Initialise a generation that is to be collected
1207 ------------------------------------------------------------------------- */
1210 init_collected_gen (nat g, nat n_threads)
1217 // Throw away the current mutable list. Invariant: the mutable
1218 // list always has at least one block; this means we can avoid a
1219 // check for NULL in recordMutable().
1221 freeChain(generations[g].mut_list);
1222 generations[g].mut_list = allocBlock();
1223 for (i = 0; i < n_capabilities; i++) {
1224 freeChain(capabilities[i].mut_lists[g]);
1225 capabilities[i].mut_lists[g] = allocBlock();
1229 for (s = 0; s < generations[g].n_steps; s++) {
1231 stp = &generations[g].steps[s];
1232 ASSERT(stp->gen_no == g);
1234 // we'll construct a new list of threads in this step
1235 // during GC, throw away the current list.
1236 stp->old_threads = stp->threads;
1237 stp->threads = END_TSO_QUEUE;
1239 // generation 0, step 0 doesn't need to-space
1240 if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1244 // deprecate the existing blocks
1245 stp->old_blocks = stp->blocks;
1246 stp->n_old_blocks = stp->n_blocks;
1250 stp->live_estimate = 0;
1252 // initialise the large object queues.
1253 stp->scavenged_large_objects = NULL;
1254 stp->n_scavenged_large_blocks = 0;
1256 // mark the small objects as from-space
1257 for (bd = stp->old_blocks; bd; bd = bd->link) {
1258 bd->flags &= ~BF_EVACUATED;
1261 // mark the large objects as from-space
1262 for (bd = stp->large_objects; bd; bd = bd->link) {
1263 bd->flags &= ~BF_EVACUATED;
1266 // for a compacted step, we need to allocate the bitmap
1268 nat bitmap_size; // in bytes
1269 bdescr *bitmap_bdescr;
1272 bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1274 if (bitmap_size > 0) {
1275 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1277 stp->bitmap = bitmap_bdescr;
1278 bitmap = bitmap_bdescr->start;
1280 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1281 bitmap_size, bitmap);
1283 // don't forget to fill it with zeros!
1284 memset(bitmap, 0, bitmap_size);
1286 // For each block in this step, point to its bitmap from the
1287 // block descriptor.
1288 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1289 bd->u.bitmap = bitmap;
1290 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1292 // Also at this point we set the BF_MARKED flag
1293 // for this block. The invariant is that
1294 // BF_MARKED is always unset, except during GC
1295 // when it is set on those blocks which will be
1297 if (!(bd->flags & BF_FRAGMENTED)) {
1298 bd->flags |= BF_MARKED;
1305 // For each GC thread, for each step, allocate a "todo" block to
1306 // store evacuated objects to be scavenged, and a block to store
1307 // evacuated objects that do not need to be scavenged.
1308 for (t = 0; t < n_threads; t++) {
1309 for (s = 0; s < generations[g].n_steps; s++) {
1311 // we don't copy objects into g0s0, unless -G0
1312 if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1314 ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1316 ws->todo_large_objects = NULL;
1318 ws->part_list = NULL;
1319 ws->n_part_blocks = 0;
1321 // allocate the first to-space block; extra blocks will be
1322 // chained on as necessary.
1324 ASSERT(looksEmptyWSDeque(ws->todo_q));
1325 alloc_todo_block(ws,0);
1327 ws->todo_overflow = NULL;
1328 ws->n_todo_overflow = 0;
1330 ws->scavd_list = NULL;
1331 ws->n_scavd_blocks = 0;
1337 /* ----------------------------------------------------------------------------
1338 Initialise a generation that is *not* to be collected
1339 ------------------------------------------------------------------------- */
1342 init_uncollected_gen (nat g, nat threads)
1349 // save the current mutable lists for this generation, and
1350 // allocate a fresh block for each one. We'll traverse these
1351 // mutable lists as roots early on in the GC.
1352 generations[g].saved_mut_list = generations[g].mut_list;
1353 generations[g].mut_list = allocBlock();
1354 for (n = 0; n < n_capabilities; n++) {
1355 capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1356 capabilities[n].mut_lists[g] = allocBlock();
1359 for (s = 0; s < generations[g].n_steps; s++) {
1360 stp = &generations[g].steps[s];
1361 stp->scavenged_large_objects = NULL;
1362 stp->n_scavenged_large_blocks = 0;
1365 for (s = 0; s < generations[g].n_steps; s++) {
1367 stp = &generations[g].steps[s];
1369 for (t = 0; t < threads; t++) {
1370 ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1372 ASSERT(looksEmptyWSDeque(ws->todo_q));
1373 ws->todo_large_objects = NULL;
1375 ws->part_list = NULL;
1376 ws->n_part_blocks = 0;
1378 ws->scavd_list = NULL;
1379 ws->n_scavd_blocks = 0;
1381 // If the block at the head of the list in this generation
1382 // is less than 3/4 full, then use it as a todo block.
1383 if (stp->blocks && isPartiallyFull(stp->blocks))
1385 ws->todo_bd = stp->blocks;
1386 ws->todo_free = ws->todo_bd->free;
1387 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1388 stp->blocks = stp->blocks->link;
1390 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1391 ws->todo_bd->link = NULL;
1392 // we must scan from the current end point.
1393 ws->todo_bd->u.scan = ws->todo_bd->free;
1398 alloc_todo_block(ws,0);
1402 // deal out any more partial blocks to the threads' part_lists
1404 while (stp->blocks && isPartiallyFull(stp->blocks))
1407 stp->blocks = bd->link;
1408 ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1409 bd->link = ws->part_list;
1411 ws->n_part_blocks += 1;
1412 bd->u.scan = bd->free;
1414 stp->n_words -= bd->free - bd->start;
1416 if (t == n_gc_threads) t = 0;
1421 /* -----------------------------------------------------------------------------
1422 Initialise a gc_thread before GC
1423 -------------------------------------------------------------------------- */
1426 init_gc_thread (gc_thread *t)
1428 t->static_objects = END_OF_STATIC_LIST;
1429 t->scavenged_static_objects = END_OF_STATIC_LIST;
1431 t->mut_lists = capabilities[t->thread_index].mut_lists;
1433 t->failed_to_evac = rtsFalse;
1434 t->eager_promotion = rtsTrue;
1435 t->thunk_selector_depth = 0;
1440 t->scav_find_work = 0;
1443 /* -----------------------------------------------------------------------------
1444 Function we pass to evacuate roots.
1445 -------------------------------------------------------------------------- */
1448 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1450 // we stole a register for gct, but this function is called from
1451 // *outside* the GC where the register variable is not in effect,
1452 // so we need to save and restore it here. NB. only call
1453 // mark_root() from the main GC thread, otherwise gct will be
1455 gc_thread *saved_gct;
1464 /* -----------------------------------------------------------------------------
1465 Initialising the static object & mutable lists
1466 -------------------------------------------------------------------------- */
1469 zero_static_object_list(StgClosure* first_static)
1473 const StgInfoTable *info;
1475 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1477 link = *STATIC_LINK(info, p);
1478 *STATIC_LINK(info,p) = NULL;
1482 /* ----------------------------------------------------------------------------
1483 Update the pointers from the task list
1485 These are treated as weak pointers because we want to allow a main
1486 thread to get a BlockedOnDeadMVar exception in the same way as any
1487 other thread. Note that the threads should all have been retained
1488 by GC by virtue of being on the all_threads list, we're just
1489 updating pointers here.
1490 ------------------------------------------------------------------------- */
1493 update_task_list (void)
1497 for (task = all_tasks; task != NULL; task = task->all_link) {
1498 if (!task->stopped && task->tso) {
1499 ASSERT(task->tso->bound == task);
1500 tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1502 barf("task %p: main thread %d has been GC'd",
1515 /* ----------------------------------------------------------------------------
1516 Reset the sizes of the older generations when we do a major
1519 CURRENT STRATEGY: make all generations except zero the same size.
1520 We have to stay within the maximum heap size, and leave a certain
1521 percentage of the maximum heap size available to allocate into.
1522 ------------------------------------------------------------------------- */
1525 resize_generations (void)
1529 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1530 nat live, size, min_alloc, words;
1531 nat max = RtsFlags.GcFlags.maxHeapSize;
1532 nat gens = RtsFlags.GcFlags.generations;
1534 // live in the oldest generations
1535 if (oldest_gen->steps[0].live_estimate != 0) {
1536 words = oldest_gen->steps[0].live_estimate;
1538 words = oldest_gen->steps[0].n_words;
1540 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1541 oldest_gen->steps[0].n_large_blocks;
1543 // default max size for all generations except zero
1544 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1545 RtsFlags.GcFlags.minOldGenSize);
1547 // minimum size for generation zero
1548 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1549 RtsFlags.GcFlags.minAllocAreaSize);
1551 // Auto-enable compaction when the residency reaches a
1552 // certain percentage of the maximum heap size (default: 30%).
1553 if (RtsFlags.GcFlags.generations > 1 &&
1554 (RtsFlags.GcFlags.compact ||
1556 oldest_gen->steps[0].n_blocks >
1557 (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1558 oldest_gen->steps[0].mark = 1;
1559 oldest_gen->steps[0].compact = 1;
1560 // debugBelch("compaction: on\n", live);
1562 oldest_gen->steps[0].mark = 0;
1563 oldest_gen->steps[0].compact = 0;
1564 // debugBelch("compaction: off\n", live);
1567 if (RtsFlags.GcFlags.sweep) {
1568 oldest_gen->steps[0].mark = 1;
1571 // if we're going to go over the maximum heap size, reduce the
1572 // size of the generations accordingly. The calculation is
1573 // different if compaction is turned on, because we don't need
1574 // to double the space required to collect the old generation.
1577 // this test is necessary to ensure that the calculations
1578 // below don't have any negative results - we're working
1579 // with unsigned values here.
1580 if (max < min_alloc) {
1584 if (oldest_gen->steps[0].compact) {
1585 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1586 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1589 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1590 size = (max - min_alloc) / ((gens - 1) * 2);
1600 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1601 min_alloc, size, max);
1604 for (g = 0; g < gens; g++) {
1605 generations[g].max_blocks = size;
1610 /* -----------------------------------------------------------------------------
1611 Calculate the new size of the nursery, and resize it.
1612 -------------------------------------------------------------------------- */
1615 resize_nursery (void)
1617 if (RtsFlags.GcFlags.generations == 1)
1618 { // Two-space collector:
1621 /* set up a new nursery. Allocate a nursery size based on a
1622 * function of the amount of live data (by default a factor of 2)
1623 * Use the blocks from the old nursery if possible, freeing up any
1626 * If we get near the maximum heap size, then adjust our nursery
1627 * size accordingly. If the nursery is the same size as the live
1628 * data (L), then we need 3L bytes. We can reduce the size of the
1629 * nursery to bring the required memory down near 2L bytes.
1631 * A normal 2-space collector would need 4L bytes to give the same
1632 * performance we get from 3L bytes, reducing to the same
1633 * performance at 2L bytes.
1635 blocks = g0s0->n_blocks;
1637 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1638 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1639 RtsFlags.GcFlags.maxHeapSize )
1641 long adjusted_blocks; // signed on purpose
1644 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1646 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1647 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1649 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1650 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1654 blocks = adjusted_blocks;
1658 blocks *= RtsFlags.GcFlags.oldGenFactor;
1659 if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1661 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1664 resizeNurseries(blocks);
1666 else // Generational collector
1669 * If the user has given us a suggested heap size, adjust our
1670 * allocation area to make best use of the memory available.
1672 if (RtsFlags.GcFlags.heapSizeSuggestion)
1675 nat needed = calcNeeded(); // approx blocks needed at next GC
1677 /* Guess how much will be live in generation 0 step 0 next time.
1678 * A good approximation is obtained by finding the
1679 * percentage of g0s0 that was live at the last minor GC.
1681 * We have an accurate figure for the amount of copied data in
1682 * 'copied', but we must convert this to a number of blocks, with
1683 * a small adjustment for estimated slop at the end of a block
1688 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1689 / countNurseryBlocks();
1692 /* Estimate a size for the allocation area based on the
1693 * information available. We might end up going slightly under
1694 * or over the suggested heap size, but we should be pretty
1697 * Formula: suggested - needed
1698 * ----------------------------
1699 * 1 + g0s0_pcnt_kept/100
1701 * where 'needed' is the amount of memory needed at the next
1702 * collection for collecting all steps except g0s0.
1705 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1706 (100 + (long)g0s0_pcnt_kept);
1708 if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1709 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1712 resizeNurseries((nat)blocks);
1716 // we might have added extra large blocks to the nursery, so
1717 // resize back to minAllocAreaSize again.
1718 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1723 /* -----------------------------------------------------------------------------
1724 Sanity code for CAF garbage collection.
1726 With DEBUG turned on, we manage a CAF list in addition to the SRT
1727 mechanism. After GC, we run down the CAF list and blackhole any
1728 CAFs which have been garbage collected. This means we get an error
1729 whenever the program tries to enter a garbage collected CAF.
1731 Any garbage collected CAFs are taken off the CAF list at the same
1733 -------------------------------------------------------------------------- */
1735 #if 0 && defined(DEBUG)
1742 const StgInfoTable *info;
1753 ASSERT(info->type == IND_STATIC);
1755 if (STATIC_LINK(info,p) == NULL) {
1756 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1758 SET_INFO(p,&stg_BLACKHOLE_info);
1759 p = STATIC_LINK2(info,p);
1763 pp = &STATIC_LINK2(info,p);
1770 debugTrace(DEBUG_gccafs, "%d CAFs live", i);