1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 2001-2008
5 * Compacting garbage collector
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
12 * ---------------------------------------------------------------------------*/
14 #include "PosixSource.h"
20 #include "BlockAlloc.h"
30 // Turn off inlining when debugging - it obfuscates things
33 # define STATIC_INLINE static
36 /* ----------------------------------------------------------------------------
37 Threading / unthreading pointers.
39 The basic idea here is to chain together all the fields pointing at
40 a particular object, with the root of the chain in the object's
41 info table field. The original contents of the info pointer goes
42 at the end of the chain.
44 Adding a new field to the chain is a matter of swapping the
45 contents of the field with the contents of the object's info table
48 To unthread the chain, we walk down it updating all the fields on
49 the chain with the new location of the object. We stop when we
50 reach the info pointer at the end.
52 The main difficulty here is that we need to be able to identify the
53 info pointer at the end of the chain. We can't use the low bits of
54 the pointer for this; they are already being used for
55 pointer-tagging. What's more, we need to retain the
56 pointer-tagging tag bits on each pointer during the
57 threading/unthreading process.
59 Our solution is as follows:
60 - an info pointer (chain length zero) is identified by having tag 0
61 - in a threaded chain of length > 0:
62 - the pointer-tagging tag bits are attached to the info pointer
63 - the first entry in the chain has tag 1
64 - second and subsequent entries in the chain have tag 2
66 This exploits the fact that the tag on each pointer to a given
67 closure is normally the same (if they are not the same, then
68 presumably the tag is not essential and it therefore doesn't matter
69 if we throw away some of the tags).
70 ------------------------------------------------------------------------- */
73 thread (StgClosure **p)
81 q = (StgPtr)UNTAG_CLOSURE(q0);
83 // It doesn't look like a closure at the moment, because the info
84 // ptr is possibly threaded:
85 // ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
87 if (HEAP_ALLOCED(q)) {
90 if (bd->flags & BF_MARKED)
93 switch (GET_CLOSURE_TAG((StgClosure *)iptr))
96 // this is the info pointer; we are creating a new chain.
97 // save the original tag at the end of the chain.
98 *p = (StgClosure *)((StgWord)iptr + GET_CLOSURE_TAG(q0));
103 // this is a chain of length 1 or more
104 *p = (StgClosure *)iptr;
113 thread_root (void *user STG_UNUSED, StgClosure **p)
118 // This version of thread() takes a (void *), used to circumvent
119 // warnings from gcc about pointer punning and strict aliasing.
120 STATIC_INLINE void thread_ (void *p) { thread((StgClosure **)p); }
123 unthread( StgPtr p, StgWord free )
130 switch (GET_CLOSURE_TAG((StgClosure *)q))
133 // nothing to do; the chain is length zero
137 r = *q0; // r is the info ptr, tagged with the pointer-tag
139 *p = (StgWord)UNTAG_CLOSURE((StgClosure *)r);
152 // Traverse a threaded chain and pull out the info pointer at the end.
153 // The info pointer is also tagged with the appropriate pointer tag
154 // for this closure, which should be attached to the pointer
155 // subsequently passed to unthread().
156 STATIC_INLINE StgWord
157 get_threaded_info( StgPtr p )
161 q = (W_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p));
164 switch (GET_CLOSURE_TAG((StgClosure *)q))
167 ASSERT(LOOKS_LIKE_INFO_PTR(q));
171 StgWord r = *(StgPtr)(q-1);
172 ASSERT(LOOKS_LIKE_INFO_PTR((StgWord)UNTAG_CLOSURE((StgClosure *)r)));
179 barf("get_threaded_info");
183 // A word-aligned memmove will be faster for small objects than libc's or gcc's.
184 // Remember, the two regions *might* overlap, but: to <= from.
186 move(StgPtr to, StgPtr from, nat size)
188 for(; size > 0; --size) {
194 thread_static( StgClosure* p )
196 const StgInfoTable *info;
198 // keep going until we've threaded all the objects on the linked
200 while (p != END_OF_STATIC_LIST) {
203 switch (info->type) {
206 thread(&((StgInd *)p)->indirectee);
207 p = *IND_STATIC_LINK(p);
211 p = *THUNK_STATIC_LINK(p);
214 p = *FUN_STATIC_LINK(p);
217 p = *STATIC_LINK(info,p);
221 barf("thread_static: strange closure %d", (int)(info->type));
228 thread_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size )
234 bitmap = large_bitmap->bitmap[b];
235 for (i = 0; i < size; ) {
236 if ((bitmap & 1) == 0) {
237 thread((StgClosure **)p);
241 if (i % BITS_IN(W_) == 0) {
243 bitmap = large_bitmap->bitmap[b];
245 bitmap = bitmap >> 1;
251 thread_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
258 switch (fun_info->f.fun_type) {
260 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
261 size = BITMAP_SIZE(fun_info->f.b.bitmap);
264 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
265 thread_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
269 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
270 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
273 if ((bitmap & 1) == 0) {
274 thread((StgClosure **)p);
277 bitmap = bitmap >> 1;
286 thread_stack(StgPtr p, StgPtr stack_end)
288 const StgRetInfoTable* info;
292 // highly similar to scavenge_stack, but we do pointer threading here.
294 while (p < stack_end) {
296 // *p must be the info pointer of an activation
297 // record. All activation records have 'bitmap' style layout
300 info = get_ret_itbl((StgClosure *)p);
302 switch (info->i.type) {
304 // Dynamic bitmap: the mask is stored on the stack
308 dyn = ((StgRetDyn *)p)->liveness;
310 // traverse the bitmap first
311 bitmap = RET_DYN_LIVENESS(dyn);
312 p = (P_)&((StgRetDyn *)p)->payload[0];
313 size = RET_DYN_BITMAP_SIZE;
315 if ((bitmap & 1) == 0) {
316 thread((StgClosure **)p);
319 bitmap = bitmap >> 1;
323 // skip over the non-ptr words
324 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
326 // follow the ptr words
327 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
328 thread((StgClosure **)p);
334 // small bitmap (<= 32 entries, or 64 on a 64-bit machine)
335 case CATCH_RETRY_FRAME:
336 case CATCH_STM_FRAME:
337 case ATOMICALLY_FRAME:
339 case UNDERFLOW_FRAME:
343 bitmap = BITMAP_BITS(info->i.layout.bitmap);
344 size = BITMAP_SIZE(info->i.layout.bitmap);
346 // NOTE: the payload starts immediately after the info-ptr, we
347 // don't have an StgHeader in the same sense as a heap closure.
349 if ((bitmap & 1) == 0) {
350 thread((StgClosure **)p);
353 bitmap = bitmap >> 1;
364 thread((StgClosure **)p);
366 size = BCO_BITMAP_SIZE(bco);
367 thread_large_bitmap(p, BCO_BITMAP(bco), size);
372 // large bitmap (> 32 entries, or 64 on a 64-bit machine)
375 size = GET_LARGE_BITMAP(&info->i)->size;
376 thread_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size);
382 StgRetFun *ret_fun = (StgRetFun *)p;
383 StgFunInfoTable *fun_info;
385 fun_info = FUN_INFO_PTR_TO_STRUCT(UNTAG_CLOSURE((StgClosure *)
386 get_threaded_info((StgPtr)ret_fun->fun)));
387 // *before* threading it!
388 thread(&ret_fun->fun);
389 p = thread_arg_block(fun_info, ret_fun->payload);
394 barf("thread_stack: weird activation record found on stack: %d",
395 (int)(info->i.type));
401 thread_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
405 StgFunInfoTable *fun_info;
407 fun_info = FUN_INFO_PTR_TO_STRUCT(UNTAG_CLOSURE((StgClosure *)
408 get_threaded_info((StgPtr)fun)));
409 ASSERT(fun_info->i.type != PAP);
413 switch (fun_info->f.fun_type) {
415 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
418 thread_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
422 thread_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
426 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
429 if ((bitmap & 1) == 0) {
430 thread((StgClosure **)p);
433 bitmap = bitmap >> 1;
443 thread_PAP (StgPAP *pap)
446 p = thread_PAP_payload(pap->fun, pap->payload, pap->n_args);
452 thread_AP (StgAP *ap)
455 p = thread_PAP_payload(ap->fun, ap->payload, ap->n_args);
461 thread_AP_STACK (StgAP_STACK *ap)
464 thread_stack((P_)ap->payload, (P_)ap->payload + ap->size);
465 return (P_)ap + sizeofW(StgAP_STACK) + ap->size;
469 thread_TSO (StgTSO *tso)
471 thread_(&tso->_link);
472 thread_(&tso->global_link);
474 if ( tso->why_blocked == BlockedOnMVar
475 || tso->why_blocked == BlockedOnBlackHole
476 || tso->why_blocked == BlockedOnMsgThrowTo
478 thread_(&tso->block_info.closure);
480 thread_(&tso->blocked_exceptions);
485 thread_(&tso->stackobj);
486 return (StgPtr)tso + sizeofW(StgTSO);
491 update_fwd_large( bdescr *bd )
494 const StgInfoTable* info;
496 for (; bd != NULL; bd = bd->link) {
498 // nothing to do in a pinned block; it might not even have an object
500 if (bd->flags & BF_PINNED) continue;
503 info = get_itbl((StgClosure *)p);
505 switch (info->type) {
511 case MUT_ARR_PTRS_CLEAN:
512 case MUT_ARR_PTRS_DIRTY:
513 case MUT_ARR_PTRS_FROZEN:
514 case MUT_ARR_PTRS_FROZEN0:
519 a = (StgMutArrPtrs*)p;
520 for (p = (P_)a->payload; p < (P_)&a->payload[a->ptrs]; p++) {
521 thread((StgClosure **)p);
528 StgStack *stack = (StgStack*)p;
529 thread_stack(stack->sp, stack->stack + stack->stack_size);
534 thread_AP_STACK((StgAP_STACK *)p);
538 thread_PAP((StgPAP *)p);
544 StgTRecChunk *tc = (StgTRecChunk *)p;
545 TRecEntry *e = &(tc -> entries[0]);
546 thread_(&tc->prev_chunk);
547 for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
549 thread(&e->expected_value);
550 thread(&e->new_value);
556 barf("update_fwd_large: unknown/strange object %d", (int)(info->type));
561 // ToDo: too big to inline
562 static /* STATIC_INLINE */ StgPtr
563 thread_obj (StgInfoTable *info, StgPtr p)
565 switch (info->type) {
567 return p + sizeofW(StgThunk) + 1;
571 return p + sizeofW(StgHeader) + 1;
575 thread(&((StgClosure *)p)->payload[0]);
576 return p + sizeofW(StgHeader) + 1;
579 thread(&((StgThunk *)p)->payload[0]);
580 return p + sizeofW(StgThunk) + 1;
583 return p + sizeofW(StgThunk) + 2;
587 return p + sizeofW(StgHeader) + 2;
590 thread(&((StgThunk *)p)->payload[0]);
591 return p + sizeofW(StgThunk) + 2;
595 thread(&((StgClosure *)p)->payload[0]);
596 return p + sizeofW(StgHeader) + 2;
599 thread(&((StgThunk *)p)->payload[0]);
600 thread(&((StgThunk *)p)->payload[1]);
601 return p + sizeofW(StgThunk) + 2;
605 thread(&((StgClosure *)p)->payload[0]);
606 thread(&((StgClosure *)p)->payload[1]);
607 return p + sizeofW(StgHeader) + 2;
610 StgBCO *bco = (StgBCO *)p;
611 thread_(&bco->instrs);
612 thread_(&bco->literals);
614 return p + bco_sizeW(bco);
621 end = (P_)((StgThunk *)p)->payload +
622 info->layout.payload.ptrs;
623 for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
624 thread((StgClosure **)p);
626 return p + info->layout.payload.nptrs;
640 end = (P_)((StgClosure *)p)->payload +
641 info->layout.payload.ptrs;
642 for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
643 thread((StgClosure **)p);
645 return p + info->layout.payload.nptrs;
650 StgWeak *w = (StgWeak *)p;
651 thread(&w->cfinalizer);
654 thread(&w->finalizer);
655 if (w->link != NULL) {
658 return p + sizeofW(StgWeak);
664 StgMVar *mvar = (StgMVar *)p;
665 thread_(&mvar->head);
666 thread_(&mvar->tail);
667 thread(&mvar->value);
668 return p + sizeofW(StgMVar);
673 thread(&((StgInd *)p)->indirectee);
674 return p + sizeofW(StgInd);
678 StgSelector *s = (StgSelector *)p;
679 thread(&s->selectee);
680 return p + THUNK_SELECTOR_sizeW();
684 return thread_AP_STACK((StgAP_STACK *)p);
687 return thread_PAP((StgPAP *)p);
690 return thread_AP((StgAP *)p);
693 return p + arr_words_sizeW((StgArrWords *)p);
695 case MUT_ARR_PTRS_CLEAN:
696 case MUT_ARR_PTRS_DIRTY:
697 case MUT_ARR_PTRS_FROZEN:
698 case MUT_ARR_PTRS_FROZEN0:
703 a = (StgMutArrPtrs *)p;
704 for (p = (P_)a->payload; p < (P_)&a->payload[a->ptrs]; p++) {
705 thread((StgClosure **)p);
708 return (StgPtr)a + mut_arr_ptrs_sizeW(a);
712 return thread_TSO((StgTSO *)p);
716 StgStack *stack = (StgStack*)p;
717 thread_stack(stack->sp, stack->stack + stack->stack_size);
718 return p + stack_sizeW(stack);
724 StgTRecChunk *tc = (StgTRecChunk *)p;
725 TRecEntry *e = &(tc -> entries[0]);
726 thread_(&tc->prev_chunk);
727 for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
729 thread(&e->expected_value);
730 thread(&e->new_value);
732 return p + sizeofW(StgTRecChunk);
736 barf("update_fwd: unknown/strange object %d", (int)(info->type));
742 update_fwd( bdescr *blocks )
750 // cycle through all the blocks in the step
751 for (; bd != NULL; bd = bd->link) {
754 // linearly scan the objects in this block
755 while (p < bd->free) {
756 ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
757 info = get_itbl((StgClosure *)p);
758 p = thread_obj(info, p);
764 update_fwd_compact( bdescr *blocks )
770 bdescr *bd, *free_bd;
777 free = free_bd->start;
779 // cycle through all the blocks in the step
780 for (; bd != NULL; bd = bd->link) {
783 while (p < bd->free ) {
785 while ( p < bd->free && !is_marked(p,bd) ) {
794 m = * ((StgPtr)bd->u.bitmap + ((p - bd->start) / (BITS_IN(StgWord))));
795 m >>= ((p - bd->start) & (BITS_IN(StgWord) - 1));
797 while ( p < bd->free ) {
802 if (((StgWord)p & (sizeof(W_) * BITS_IN(StgWord))) == 0) {
810 // Problem: we need to know the destination for this cell
811 // in order to unthread its info pointer. But we can't
812 // know the destination without the size, because we may
813 // spill into the next block. So we have to run down the
814 // threaded list and get the info ptr first.
816 // ToDo: one possible avenue of attack is to use the fact
817 // that if (p&BLOCK_MASK) >= (free&BLOCK_MASK), then we
818 // definitely have enough room. Also see bug #1147.
819 iptr = get_threaded_info(p);
820 info = INFO_PTR_TO_STRUCT(UNTAG_CLOSURE((StgClosure *)iptr));
824 p = thread_obj(info, p);
827 if (free + size > free_bd->start + BLOCK_SIZE_W) {
828 // set the next bit in the bitmap to indicate that
829 // this object needs to be pushed into the next
830 // block. This saves us having to run down the
831 // threaded info pointer list twice during the next pass.
833 free_bd = free_bd->link;
834 free = free_bd->start;
836 ASSERT(!is_marked(q+1,bd));
839 unthread(q,(StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr));
849 update_bkwd_compact( generation *gen )
855 bdescr *bd, *free_bd;
857 nat size, free_blocks;
860 bd = free_bd = gen->old_blocks;
861 free = free_bd->start;
864 // cycle through all the blocks in the step
865 for (; bd != NULL; bd = bd->link) {
868 while (p < bd->free ) {
870 while ( p < bd->free && !is_marked(p,bd) ) {
879 m = * ((StgPtr)bd->u.bitmap + ((p - bd->start) / (BITS_IN(StgWord))));
880 m >>= ((p - bd->start) & (BITS_IN(StgWord) - 1));
882 while ( p < bd->free ) {
887 if (((StgWord)p & (sizeof(W_) * BITS_IN(StgWord))) == 0) {
895 if (is_marked(p+1,bd)) {
896 // don't forget to update the free ptr in the block desc.
897 free_bd->free = free;
898 free_bd = free_bd->link;
899 free = free_bd->start;
903 iptr = get_threaded_info(p);
904 unthread(p, (StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr));
905 ASSERT(LOOKS_LIKE_INFO_PTR((StgWord)((StgClosure *)p)->header.info));
906 info = get_itbl((StgClosure *)p);
907 size = closure_sizeW_((StgClosure *)p,info);
914 if (info->type == STACK) {
915 move_STACK((StgStack *)p, (StgStack *)free);
926 // free the remaining blocks and count what's left.
927 free_bd->free = free;
928 if (free_bd->link != NULL) {
929 freeChain(free_bd->link);
930 free_bd->link = NULL;
937 compact(StgClosure *static_objects)
942 // 1. thread the roots
943 markCapabilities((evac_fn)thread_root, NULL);
945 markScheduler((evac_fn)thread_root, NULL);
947 // the weak pointer lists...
948 if (weak_ptr_list != NULL) {
949 thread((void *)&weak_ptr_list);
951 if (old_weak_ptr_list != NULL) {
952 thread((void *)&old_weak_ptr_list); // tmp
956 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
959 for (n = 0; n < n_capabilities; n++) {
960 for (bd = capabilities[n].mut_lists[g];
961 bd != NULL; bd = bd->link) {
962 for (p = bd->start; p < bd->free; p++) {
963 thread((StgClosure **)p);
969 // the global thread list
970 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
971 thread((void *)&generations[g].threads);
974 // any threads resurrected during this GC
975 thread((void *)&resurrected_threads);
981 for (task = all_tasks; task != NULL; task = task->all_link) {
982 for (incall = task->incall; incall != NULL;
983 incall = incall->prev_stack) {
985 thread_(&incall->tso);
991 // the static objects
992 thread_static(static_objects /* ToDo: ok? */);
994 // the stable pointer table
995 threadStablePtrTable((evac_fn)thread_root, NULL);
997 // the CAF list (used by GHCi)
998 markCAFs((evac_fn)thread_root, NULL);
1000 // 2. update forward ptrs
1001 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1002 gen = &generations[g];
1003 debugTrace(DEBUG_gc, "update_fwd: %d", g);
1005 update_fwd(gen->blocks);
1006 for (n = 0; n < n_capabilities; n++) {
1007 update_fwd(gc_threads[n]->gens[g].todo_bd);
1008 update_fwd(gc_threads[n]->gens[g].part_list);
1010 update_fwd_large(gen->scavenged_large_objects);
1011 if (g == RtsFlags.GcFlags.generations-1 && gen->old_blocks != NULL) {
1012 debugTrace(DEBUG_gc, "update_fwd: %d (compact)", g);
1013 update_fwd_compact(gen->old_blocks);
1017 // 3. update backward ptrs
1019 if (gen->old_blocks != NULL) {
1020 blocks = update_bkwd_compact(gen);
1021 debugTrace(DEBUG_gc,
1022 "update_bkwd: %d (compact, old: %d blocks, now %d blocks)",
1023 gen->no, gen->n_old_blocks, blocks);
1024 gen->n_old_blocks = blocks;