1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * Generational garbage collector
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
12 * ---------------------------------------------------------------------------*/
17 #include "OSThreads.h"
19 /* -----------------------------------------------------------------------------
22 ToDo: move this to the wiki when the implementation is done.
24 We're only going to try to parallelise the copying GC for now. The
27 Each thread has a gc_thread structure (see below) which holds its
28 thread-local data. We'll keep a pointer to this in a thread-local
29 variable, or possibly in a register.
31 In the gc_thread structure is a step_workspace for each step. The
32 primary purpose of the step_workspace is to hold evacuated objects;
33 when an object is evacuated, it is copied to the "todo" block in
34 the thread's workspace for the appropriate step. When the todo
35 block is full, it is pushed to the global step->todos list, which
36 is protected by a lock. (in fact we intervene a one-place buffer
37 here to reduce contention).
39 A thread repeatedly grabs a block of work from one of the
40 step->todos lists, scavenges it, and keeps the scavenged block on
41 its own ws->scavd_list (this is to avoid unnecessary contention
42 returning the completed buffers back to the step: we can just
43 collect them all later).
45 When there is no global work to do, we start scavenging the todo
46 blocks in the workspaces. This is where the scan_bd field comes
47 in: we can scan the contents of the todo block, when we have
48 scavenged the contents of the todo block (up to todo_bd->free), we
49 don't want to move this block immediately to the scavd_list,
50 because it is probably only partially full. So we remember that we
51 have scanned up to this point by saving the block in ws->scan_bd,
52 with the current scan pointer in ws->scan. Later, when more
53 objects have been copied to this block, we can come back and scan
54 the rest. When we visit this workspace again in the future,
55 scan_bd may still be the same as todo_bd, or it might be different:
56 if enough objects were copied into this block that it filled up,
57 then we will have allocated a new todo block, but *not* pushed the
58 old one to the step, because it is partially scanned.
60 The reason to leave scanning the todo blocks until last is that we
61 want to deal with full blocks as far as possible.
62 ------------------------------------------------------------------------- */
65 /* -----------------------------------------------------------------------------
68 A step workspace exists for each step for each GC thread. The GC
69 thread takes a block from the todos list of the step into the
70 scanbd and then scans it. Objects referred to by those in the scan
71 block are copied into the todo or scavd blocks of the relevant step.
73 ------------------------------------------------------------------------- */
75 typedef struct step_workspace_ {
76 step * step; // the step for this workspace
77 struct gc_thread_ * gct; // the gc_thread that contains this workspace
79 // where objects to be scavenged go
81 StgPtr todo_free; // free ptr for todo_bd
82 StgPtr todo_lim; // lim for todo_bd
84 bdescr * buffer_todo_bd; // buffer to reduce contention
85 // on the step's todos list
87 // where large objects to be scavenged go
88 bdescr * todo_large_objects;
90 // Objects that have already been, scavenged.
92 nat n_scavd_blocks; // count of blocks in this list
94 // Partially-full, scavenged, blocks
96 unsigned int n_part_blocks; // count of above
100 } step_workspace ATTRIBUTE_ALIGNED(64);
101 // align so that computing gct->steps[n] is a shift, not a multiply
102 // fails if the size is <64, which is why we need the pad above
104 /* ----------------------------------------------------------------------------
107 Every GC thread has one of these. It contains all the step specific
108 workspaces and other GC thread local information. At some later
109 point it maybe useful to move this other into the TLS store of the
111 ------------------------------------------------------------------------- */
113 typedef struct gc_thread_ {
115 OSThreadId id; // The OS thread that this struct belongs to
117 Condition wake_cond; // So we can go to sleep between GCs
121 nat thread_index; // a zero based index identifying the thread
123 bdescr * free_blocks; // a buffer of free blocks for this thread
124 // during GC without accessing the block
125 // allocators spin lock.
127 StgClosure* static_objects; // live static objects
128 StgClosure* scavenged_static_objects; // static objects scavenged so far
130 lnat gc_count; // number of GCs this thread has done
132 // block that is currently being scanned
135 // --------------------
138 step *evac_step; // Youngest generation that objects
139 // should be evacuated to in
140 // evacuate(). (Logically an
141 // argument to evacuate, but it's
142 // static a lot of the time so we
143 // optimise it into a per-thread
146 rtsBool failed_to_evac; // failure to evacuate an object typically
147 // Causes it to be recorded in the mutable
150 rtsBool eager_promotion; // forces promotion to the evac gen
151 // instead of the to-space
152 // corresponding to the object
154 lnat thunk_selector_depth; // ummm.... not used as of now
160 // -------------------
169 // -------------------
172 // array of workspaces, indexed by stp->abs_no. This is placed
173 // directly at the end of the gc_thread structure so that we can get from
174 // the gc_thread pointer to a workspace using only pointer
175 // arithmetic, no memory access. This happens in the inner loop
176 // of the GC, see Evac.c:alloc_for_copy().
177 step_workspace steps[];
181 extern nat n_gc_threads;
183 extern gc_thread **gc_threads;
185 /* -----------------------------------------------------------------------------
186 The gct variable is thread-local and points to the current thread's
187 gc_thread structure. It is heavily accessed, so we try to put gct
188 into a global register variable if possible; if we don't have a
189 register then use gcc's __thread extension to create a thread-local
192 Even on x86 where registers are scarce, it is worthwhile using a
193 register variable here: I measured about a 2-5% slowdown with the
195 -------------------------------------------------------------------------- */
197 #define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
199 #if defined(REG_Base) && !defined(i386_HOST_ARCH)
200 // on i386, REG_Base is %ebx which is also used for PIC, so we don't
203 GLOBAL_REG_DECL(gc_thread*, gct, REG_Base)
204 #define DECLARE_GCT /* nothing */
206 #elif defined(REG_R1)
208 GLOBAL_REG_DECL(gc_thread*, gct, REG_R1)
209 #define DECLARE_GCT /* nothing */
211 #elif defined(__GNUC__)
213 extern __thread gc_thread* gct;
214 #define DECLARE_GCT __thread gc_thread* gct;
218 #error Cannot find a way to declare the thread-local gct