1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 2001-2006
5 * Compacting garbage collector
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
12 * ---------------------------------------------------------------------------*/
14 #include "PosixSource.h"
18 #include "OSThreads.h"
19 #include "BlockAlloc.h"
27 // Turn off inlining when debugging - it obfuscates things
30 # define STATIC_INLINE static
33 /* ----------------------------------------------------------------------------
34 Threading / unthreading pointers.
36 The basic idea here is to chain together all the fields pointing at
37 a particular object, with the root of the chain in the object's
38 info table field. The original contents of the info pointer goes
39 at the end of the chain.
41 Adding a new field to the chain is a matter of swapping the
42 contents of the field with the contents of the object's info table
45 To unthread the chain, we walk down it updating all the fields on
46 the chain with the new location of the object. We stop when we
47 reach the info pointer at the end.
49 The main difficulty here is that we need to be able to identify the
50 info pointer at the end of the chain. We can't use the low bits of
51 the pointer for this; they are already being used for
52 pointer-tagging. What's more, we need to retain the
53 pointer-tagging tag bits on each pointer during the
54 threading/unthreading process.
56 Our solution is as follows:
57 - an info pointer (chain length zero) is identified by having tag 0
58 - in a threaded chain of length > 0:
59 - the pointer-tagging tag bits are attached to the info pointer
60 - the first entry in the chain has tag 1
61 - second and subsequent entries in the chain have tag 2
63 This exploits the fact that the tag on each pointer to a given
64 closure is normally the same (if they are not the same, then
65 presumably the tag is not essential and it therefore doesn't matter
66 if we throw away some of the tags).
67 ------------------------------------------------------------------------- */
70 thread (StgClosure **p)
78 q = (StgPtr)UNTAG_CLOSURE(q0);
80 // It doesn't look like a closure at the moment, because the info
81 // ptr is possibly threaded:
82 // ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
84 if (HEAP_ALLOCED(q)) {
86 // a handy way to discover whether the ptr is into the
87 // compacted area of the old gen, is that the EVACUATED flag
88 // is zero (it's non-zero for all the other areas of live
90 if ((bd->flags & BF_EVACUATED) == 0)
93 switch (GET_CLOSURE_TAG((StgClosure *)iptr))
96 // this is the info pointer; we are creating a new chain.
97 // save the original tag at the end of the chain.
98 *p = (StgClosure *)((StgWord)iptr + GET_CLOSURE_TAG(q0));
103 // this is a chain of length 1 or more
104 *p = (StgClosure *)iptr;
112 // This version of thread() takes a (void *), used to circumvent
113 // warnings from gcc about pointer punning and strict aliasing.
114 STATIC_INLINE void thread_ (void *p) { thread((StgClosure **)p); }
117 unthread( StgPtr p, StgWord free )
124 switch (GET_CLOSURE_TAG((StgClosure *)q))
127 // nothing to do; the chain is length zero
131 r = *q0; // r is the info ptr, tagged with the pointer-tag
133 *p = (StgWord)UNTAG_CLOSURE((StgClosure *)r);
146 // Traverse a threaded chain and pull out the info pointer at the end.
147 // The info pointer is also tagged with the appropriate pointer tag
148 // for this closure, which should be attached to the pointer
149 // subsequently passed to unthread().
150 STATIC_INLINE StgWord
151 get_threaded_info( StgPtr p )
155 q = (W_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p));
158 switch (GET_CLOSURE_TAG((StgClosure *)q))
161 ASSERT(LOOKS_LIKE_INFO_PTR(q));
165 StgWord r = *(StgPtr)(q-1);
166 ASSERT(LOOKS_LIKE_INFO_PTR(UNTAG_CLOSURE((StgClosure *)r)));
173 barf("get_threaded_info");
177 // A word-aligned memmove will be faster for small objects than libc's or gcc's.
178 // Remember, the two regions *might* overlap, but: to <= from.
180 move(StgPtr to, StgPtr from, nat size)
182 for(; size > 0; --size) {
188 thread_static( StgClosure* p )
190 const StgInfoTable *info;
192 // keep going until we've threaded all the objects on the linked
194 while (p != END_OF_STATIC_LIST) {
197 switch (info->type) {
200 thread(&((StgInd *)p)->indirectee);
201 p = *IND_STATIC_LINK(p);
205 p = *THUNK_STATIC_LINK(p);
208 p = *FUN_STATIC_LINK(p);
211 p = *STATIC_LINK(info,p);
215 barf("thread_static: strange closure %d", (int)(info->type));
222 thread_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size )
228 bitmap = large_bitmap->bitmap[b];
229 for (i = 0; i < size; ) {
230 if ((bitmap & 1) == 0) {
231 thread((StgClosure **)p);
235 if (i % BITS_IN(W_) == 0) {
237 bitmap = large_bitmap->bitmap[b];
239 bitmap = bitmap >> 1;
245 thread_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
252 switch (fun_info->f.fun_type) {
254 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
255 size = BITMAP_SIZE(fun_info->f.b.bitmap);
258 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
259 thread_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
263 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
264 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
267 if ((bitmap & 1) == 0) {
268 thread((StgClosure **)p);
271 bitmap = bitmap >> 1;
280 thread_stack(StgPtr p, StgPtr stack_end)
282 const StgRetInfoTable* info;
286 // highly similar to scavenge_stack, but we do pointer threading here.
288 while (p < stack_end) {
290 // *p must be the info pointer of an activation
291 // record. All activation records have 'bitmap' style layout
294 info = get_ret_itbl((StgClosure *)p);
296 switch (info->i.type) {
298 // Dynamic bitmap: the mask is stored on the stack
302 dyn = ((StgRetDyn *)p)->liveness;
304 // traverse the bitmap first
305 bitmap = RET_DYN_LIVENESS(dyn);
306 p = (P_)&((StgRetDyn *)p)->payload[0];
307 size = RET_DYN_BITMAP_SIZE;
309 if ((bitmap & 1) == 0) {
310 thread((StgClosure **)p);
313 bitmap = bitmap >> 1;
317 // skip over the non-ptr words
318 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
320 // follow the ptr words
321 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
322 thread((StgClosure **)p);
328 // small bitmap (<= 32 entries, or 64 on a 64-bit machine)
329 case CATCH_RETRY_FRAME:
330 case CATCH_STM_FRAME:
331 case ATOMICALLY_FRAME:
336 bitmap = BITMAP_BITS(info->i.layout.bitmap);
337 size = BITMAP_SIZE(info->i.layout.bitmap);
339 // NOTE: the payload starts immediately after the info-ptr, we
340 // don't have an StgHeader in the same sense as a heap closure.
342 if ((bitmap & 1) == 0) {
343 thread((StgClosure **)p);
346 bitmap = bitmap >> 1;
357 thread((StgClosure **)p);
359 size = BCO_BITMAP_SIZE(bco);
360 thread_large_bitmap(p, BCO_BITMAP(bco), size);
365 // large bitmap (> 32 entries, or 64 on a 64-bit machine)
368 size = GET_LARGE_BITMAP(&info->i)->size;
369 thread_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size);
375 StgRetFun *ret_fun = (StgRetFun *)p;
376 StgFunInfoTable *fun_info;
378 fun_info = FUN_INFO_PTR_TO_STRUCT(UNTAG_CLOSURE((StgClosure *)
379 get_threaded_info((StgPtr)ret_fun->fun)));
380 // *before* threading it!
381 thread(&ret_fun->fun);
382 p = thread_arg_block(fun_info, ret_fun->payload);
387 barf("thread_stack: weird activation record found on stack: %d",
388 (int)(info->i.type));
394 thread_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
398 StgFunInfoTable *fun_info;
400 fun_info = FUN_INFO_PTR_TO_STRUCT(UNTAG_CLOSURE((StgClosure *)
401 get_threaded_info((StgPtr)fun)));
402 ASSERT(fun_info->i.type != PAP);
406 switch (fun_info->f.fun_type) {
408 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
411 thread_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
415 thread_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
419 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
422 if ((bitmap & 1) == 0) {
423 thread((StgClosure **)p);
426 bitmap = bitmap >> 1;
436 thread_PAP (StgPAP *pap)
439 p = thread_PAP_payload(pap->fun, pap->payload, pap->n_args);
445 thread_AP (StgAP *ap)
448 p = thread_PAP_payload(ap->fun, ap->payload, ap->n_args);
454 thread_AP_STACK (StgAP_STACK *ap)
457 thread_stack((P_)ap->payload, (P_)ap->payload + ap->size);
458 return (P_)ap + sizeofW(StgAP_STACK) + ap->size;
462 thread_TSO (StgTSO *tso)
465 thread_(&tso->global_link);
467 if ( tso->why_blocked == BlockedOnMVar
468 || tso->why_blocked == BlockedOnBlackHole
469 || tso->why_blocked == BlockedOnException
471 thread_(&tso->block_info.closure);
473 thread_(&tso->blocked_exceptions);
477 thread_stack(tso->sp, &(tso->stack[tso->stack_size]));
478 return (StgPtr)tso + tso_sizeW(tso);
483 update_fwd_large( bdescr *bd )
486 const StgInfoTable* info;
488 for (; bd != NULL; bd = bd->link) {
491 info = get_itbl((StgClosure *)p);
493 switch (info->type) {
499 case MUT_ARR_PTRS_CLEAN:
500 case MUT_ARR_PTRS_DIRTY:
501 case MUT_ARR_PTRS_FROZEN:
502 case MUT_ARR_PTRS_FROZEN0:
507 next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
508 for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
509 thread((StgClosure **)p);
515 thread_TSO((StgTSO *)p);
519 thread_AP_STACK((StgAP_STACK *)p);
523 thread_PAP((StgPAP *)p);
529 StgTRecChunk *tc = (StgTRecChunk *)p;
530 TRecEntry *e = &(tc -> entries[0]);
531 thread_(&tc->prev_chunk);
532 for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
534 thread(&e->expected_value);
535 thread(&e->new_value);
541 barf("update_fwd_large: unknown/strange object %d", (int)(info->type));
546 // ToDo: too big to inline
547 static /* STATIC_INLINE */ StgPtr
548 thread_obj (StgInfoTable *info, StgPtr p)
550 switch (info->type) {
552 return p + sizeofW(StgThunk) + 1;
556 return p + sizeofW(StgHeader) + 1;
560 thread(&((StgClosure *)p)->payload[0]);
561 return p + sizeofW(StgHeader) + 1;
564 thread(&((StgThunk *)p)->payload[0]);
565 return p + sizeofW(StgThunk) + 1;
568 return p + sizeofW(StgThunk) + 2;
572 return p + sizeofW(StgHeader) + 2;
575 thread(&((StgThunk *)p)->payload[0]);
576 return p + sizeofW(StgThunk) + 2;
580 thread(&((StgClosure *)p)->payload[0]);
581 return p + sizeofW(StgHeader) + 2;
584 thread(&((StgThunk *)p)->payload[0]);
585 thread(&((StgThunk *)p)->payload[1]);
586 return p + sizeofW(StgThunk) + 2;
590 thread(&((StgClosure *)p)->payload[0]);
591 thread(&((StgClosure *)p)->payload[1]);
592 return p + sizeofW(StgHeader) + 2;
595 StgBCO *bco = (StgBCO *)p;
596 thread_(&bco->instrs);
597 thread_(&bco->literals);
599 return p + bco_sizeW(bco);
606 end = (P_)((StgThunk *)p)->payload +
607 info->layout.payload.ptrs;
608 for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
609 thread((StgClosure **)p);
611 return p + info->layout.payload.nptrs;
621 case SE_CAF_BLACKHOLE:
627 end = (P_)((StgClosure *)p)->payload +
628 info->layout.payload.ptrs;
629 for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
630 thread((StgClosure **)p);
632 return p + info->layout.payload.nptrs;
637 StgWeak *w = (StgWeak *)p;
640 thread(&w->finalizer);
641 if (w->link != NULL) {
644 return p + sizeofW(StgWeak);
650 StgMVar *mvar = (StgMVar *)p;
651 thread_(&mvar->head);
652 thread_(&mvar->tail);
653 thread(&mvar->value);
654 return p + sizeofW(StgMVar);
658 case IND_OLDGEN_PERM:
659 thread(&((StgInd *)p)->indirectee);
660 return p + sizeofW(StgInd);
664 StgSelector *s = (StgSelector *)p;
665 thread(&s->selectee);
666 return p + THUNK_SELECTOR_sizeW();
670 return thread_AP_STACK((StgAP_STACK *)p);
673 return thread_PAP((StgPAP *)p);
676 return thread_AP((StgAP *)p);
679 return p + arr_words_sizeW((StgArrWords *)p);
681 case MUT_ARR_PTRS_CLEAN:
682 case MUT_ARR_PTRS_DIRTY:
683 case MUT_ARR_PTRS_FROZEN:
684 case MUT_ARR_PTRS_FROZEN0:
689 next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
690 for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
691 thread((StgClosure **)p);
697 return thread_TSO((StgTSO *)p);
699 case TVAR_WATCH_QUEUE:
701 StgTVarWatchQueue *wq = (StgTVarWatchQueue *)p;
702 thread_(&wq->closure);
703 thread_(&wq->next_queue_entry);
704 thread_(&wq->prev_queue_entry);
705 return p + sizeofW(StgTVarWatchQueue);
710 StgTVar *tvar = (StgTVar *)p;
711 thread((void *)&tvar->current_value);
712 thread((void *)&tvar->first_watch_queue_entry);
713 return p + sizeofW(StgTVar);
718 StgTRecHeader *trec = (StgTRecHeader *)p;
719 thread_(&trec->enclosing_trec);
720 thread_(&trec->current_chunk);
721 thread_(&trec->invariants_to_check);
722 return p + sizeofW(StgTRecHeader);
728 StgTRecChunk *tc = (StgTRecChunk *)p;
729 TRecEntry *e = &(tc -> entries[0]);
730 thread_(&tc->prev_chunk);
731 for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
733 thread(&e->expected_value);
734 thread(&e->new_value);
736 return p + sizeofW(StgTRecChunk);
739 case ATOMIC_INVARIANT:
741 StgAtomicInvariant *invariant = (StgAtomicInvariant *)p;
742 thread_(&invariant->code);
743 thread_(&invariant->last_execution);
744 return p + sizeofW(StgAtomicInvariant);
747 case INVARIANT_CHECK_QUEUE:
749 StgInvariantCheckQueue *queue = (StgInvariantCheckQueue *)p;
750 thread_(&queue->invariant);
751 thread_(&queue->my_execution);
752 thread_(&queue->next_queue_entry);
753 return p + sizeofW(StgInvariantCheckQueue);
757 barf("update_fwd: unknown/strange object %d", (int)(info->type));
763 update_fwd( bdescr *blocks )
771 // cycle through all the blocks in the step
772 for (; bd != NULL; bd = bd->link) {
775 // linearly scan the objects in this block
776 while (p < bd->free) {
777 ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
778 info = get_itbl((StgClosure *)p);
779 p = thread_obj(info, p);
785 update_fwd_compact( bdescr *blocks )
791 bdescr *bd, *free_bd;
798 free = free_bd->start;
800 // cycle through all the blocks in the step
801 for (; bd != NULL; bd = bd->link) {
804 while (p < bd->free ) {
806 while ( p < bd->free && !is_marked(p,bd) ) {
815 m = * ((StgPtr)bd->u.bitmap + ((p - bd->start) / (BITS_IN(StgWord))));
816 m >>= ((p - bd->start) & (BITS_IN(StgWord) - 1));
818 while ( p < bd->free ) {
823 if (((StgWord)p & (sizeof(W_) * BITS_IN(StgWord))) == 0) {
831 // Problem: we need to know the destination for this cell
832 // in order to unthread its info pointer. But we can't
833 // know the destination without the size, because we may
834 // spill into the next block. So we have to run down the
835 // threaded list and get the info ptr first.
837 // ToDo: one possible avenue of attack is to use the fact
838 // that if (p&BLOCK_MASK) >= (free&BLOCK_MASK), then we
839 // definitely have enough room. Also see bug #1147.
840 iptr = get_threaded_info(p);
841 info = INFO_PTR_TO_STRUCT(UNTAG_CLOSURE((StgClosure *)iptr));
845 p = thread_obj(info, p);
848 if (free + size > free_bd->start + BLOCK_SIZE_W) {
849 // unset the next bit in the bitmap to indicate that
850 // this object needs to be pushed into the next
851 // block. This saves us having to run down the
852 // threaded info pointer list twice during the next pass.
854 free_bd = free_bd->link;
855 free = free_bd->start;
857 ASSERT(is_marked(q+1,bd));
860 unthread(q,(StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr));
870 update_bkwd_compact( step *stp )
876 bdescr *bd, *free_bd;
878 nat size, free_blocks;
881 bd = free_bd = stp->old_blocks;
882 free = free_bd->start;
885 // cycle through all the blocks in the step
886 for (; bd != NULL; bd = bd->link) {
889 while (p < bd->free ) {
891 while ( p < bd->free && !is_marked(p,bd) ) {
900 m = * ((StgPtr)bd->u.bitmap + ((p - bd->start) / (BITS_IN(StgWord))));
901 m >>= ((p - bd->start) & (BITS_IN(StgWord) - 1));
903 while ( p < bd->free ) {
908 if (((StgWord)p & (sizeof(W_) * BITS_IN(StgWord))) == 0) {
916 if (!is_marked(p+1,bd)) {
917 // don't forget to update the free ptr in the block desc.
918 free_bd->free = free;
919 free_bd = free_bd->link;
920 free = free_bd->start;
924 iptr = get_threaded_info(p);
925 unthread(p, (StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr));
926 ASSERT(LOOKS_LIKE_INFO_PTR(((StgClosure *)p)->header.info));
927 info = get_itbl((StgClosure *)p);
928 size = closure_sizeW_((StgClosure *)p,info);
935 if (info->type == TSO) {
936 move_TSO((StgTSO *)p, (StgTSO *)free);
947 // free the remaining blocks and count what's left.
948 free_bd->free = free;
949 if (free_bd->link != NULL) {
950 freeChain(free_bd->link);
951 free_bd->link = NULL;
963 // 1. thread the roots
964 GetRoots((evac_fn)thread);
966 // the weak pointer lists...
967 if (weak_ptr_list != NULL) {
968 thread((void *)&weak_ptr_list);
970 if (old_weak_ptr_list != NULL) {
971 thread((void *)&old_weak_ptr_list); // tmp
975 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
978 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
979 for (p = bd->start; p < bd->free; p++) {
980 thread((StgClosure **)p);
985 // the global thread list
986 thread((void *)&all_threads);
988 // any threads resurrected during this GC
989 thread((void *)&resurrected_threads);
994 for (task = all_tasks; task != NULL; task = task->all_link) {
1001 // the static objects
1002 thread_static(scavenged_static_objects);
1004 // the stable pointer table
1005 threadStablePtrTable((evac_fn)thread);
1007 // the CAF list (used by GHCi)
1008 markCAFs((evac_fn)thread);
1010 // 2. update forward ptrs
1011 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1012 for (s = 0; s < generations[g].n_steps; s++) {
1013 if (g==0 && s ==0) continue;
1014 stp = &generations[g].steps[s];
1015 debugTrace(DEBUG_gc, "update_fwd: %d.%d",
1016 stp->gen->no, stp->no);
1018 update_fwd(stp->blocks);
1019 update_fwd_large(stp->scavenged_large_objects);
1020 if (g == RtsFlags.GcFlags.generations-1 && stp->old_blocks != NULL) {
1021 debugTrace(DEBUG_gc, "update_fwd: %d.%d (compact)",
1022 stp->gen->no, stp->no);
1023 update_fwd_compact(stp->old_blocks);
1028 // 3. update backward ptrs
1029 stp = &oldest_gen->steps[0];
1030 if (stp->old_blocks != NULL) {
1031 blocks = update_bkwd_compact(stp);
1032 debugTrace(DEBUG_gc,
1033 "update_bkwd: %d.%d (compact, old: %d blocks, now %d blocks)",
1034 stp->gen->no, stp->no,
1035 stp->n_old_blocks, blocks);
1036 stp->n_old_blocks = blocks;