1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * Generational garbage collector
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
12 * ---------------------------------------------------------------------------*/
21 /* -----------------------------------------------------------------------------
24 ToDo: move this to the wiki when the implementation is done.
26 We're only going to try to parallelise the copying GC for now. The
29 Each thread has a gc_thread structure (see below) which holds its
30 thread-local data. We'll keep a pointer to this in a thread-local
31 variable, or possibly in a register.
33 In the gc_thread structure is a gen_workspace for each generation. The
34 primary purpose of the gen_workspace is to hold evacuated objects;
35 when an object is evacuated, it is copied to the "todo" block in
36 the thread's workspace for the appropriate generation. When the todo
37 block is full, it is pushed to the global gen->todos list, which
38 is protected by a lock. (in fact we intervene a one-place buffer
39 here to reduce contention).
41 A thread repeatedly grabs a block of work from one of the
42 gen->todos lists, scavenges it, and keeps the scavenged block on
43 its own ws->scavd_list (this is to avoid unnecessary contention
44 returning the completed buffers back to the generation: we can just
45 collect them all later).
47 When there is no global work to do, we start scavenging the todo
48 blocks in the workspaces. This is where the scan_bd field comes
49 in: we can scan the contents of the todo block, when we have
50 scavenged the contents of the todo block (up to todo_bd->free), we
51 don't want to move this block immediately to the scavd_list,
52 because it is probably only partially full. So we remember that we
53 have scanned up to this point by saving the block in ws->scan_bd,
54 with the current scan pointer in ws->scan. Later, when more
55 objects have been copied to this block, we can come back and scan
56 the rest. When we visit this workspace again in the future,
57 scan_bd may still be the same as todo_bd, or it might be different:
58 if enough objects were copied into this block that it filled up,
59 then we will have allocated a new todo block, but *not* pushed the
60 old one to the generation, because it is partially scanned.
62 The reason to leave scanning the todo blocks until last is that we
63 want to deal with full blocks as far as possible.
64 ------------------------------------------------------------------------- */
67 /* -----------------------------------------------------------------------------
70 A generation workspace exists for each generation for each GC
71 thread. The GC thread takes a block from the todos list of the
72 generation into the scanbd and then scans it. Objects referred to
73 by those in the scan block are copied into the todo or scavd blocks
74 of the relevant generation.
76 ------------------------------------------------------------------------- */
78 typedef struct gen_workspace_ {
79 generation * gen; // the gen for this workspace
80 struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
82 // where objects to be scavenged go
84 StgPtr todo_free; // free ptr for todo_bd
85 StgPtr todo_lim; // lim for todo_bd
88 bdescr * todo_overflow;
91 // where large objects to be scavenged go
92 bdescr * todo_large_objects;
94 // Objects that have already been scavenged.
96 nat n_scavd_blocks; // count of blocks in this list
98 // Partially-full, scavenged, blocks
100 unsigned int n_part_blocks; // count of above
104 } gen_workspace ATTRIBUTE_ALIGNED(64);
105 // align so that computing gct->gens[n] is a shift, not a multiply
106 // fails if the size is <64, which is why we need the pad above
108 /* ----------------------------------------------------------------------------
111 Every GC thread has one of these. It contains all the generation
112 specific workspaces and other GC thread local information. At some
113 later point it maybe useful to move this other into the TLS store
115 ------------------------------------------------------------------------- */
117 typedef struct gc_thread_ {
119 OSThreadId id; // The OS thread that this struct belongs to
122 volatile rtsBool wakeup;
124 nat thread_index; // a zero based index identifying the thread
126 bdescr * free_blocks; // a buffer of free blocks for this thread
127 // during GC without accessing the block
128 // allocators spin lock.
130 StgClosure* static_objects; // live static objects
131 StgClosure* scavenged_static_objects; // static objects scavenged so far
133 lnat gc_count; // number of GCs this thread has done
135 // block that is currently being scanned
138 // Remembered sets on this CPU. Each GC thread has its own
139 // private per-generation remembered sets, so it can add an item
140 // to the remembered set without taking a lock. The mut_lists
141 // array on a gc_thread is the same as the one on the
142 // corresponding Capability; we stash it here too for easy access
143 // during GC; see recordMutableGen_GC().
146 // --------------------
149 generation *evac_gen; // Youngest generation that objects
150 // should be evacuated to in
151 // evacuate(). (Logically an
152 // argument to evacuate, but it's
153 // static a lot of the time so we
154 // optimise it into a per-thread
157 rtsBool failed_to_evac; // failure to evacuate an object typically
158 // Causes it to be recorded in the mutable
161 rtsBool eager_promotion; // forces promotion to the evac gen
162 // instead of the to-space
163 // corresponding to the object
165 lnat thunk_selector_depth; // ummm.... not used as of now
171 // -------------------
180 // -------------------
183 // array of workspaces, indexed by stp->abs_no. This is placed
184 // directly at the end of the gc_thread structure so that we can get from
185 // the gc_thread pointer to a workspace using only pointer
186 // arithmetic, no memory access. This happens in the inner loop
187 // of the GC, see Evac.c:alloc_for_copy().
188 gen_workspace gens[];
192 extern nat n_gc_threads;
194 /* -----------------------------------------------------------------------------
195 The gct variable is thread-local and points to the current thread's
196 gc_thread structure. It is heavily accessed, so we try to put gct
197 into a global register variable if possible; if we don't have a
198 register then use gcc's __thread extension to create a thread-local
201 Even on x86 where registers are scarce, it is worthwhile using a
202 register variable here: I measured about a 2-5% slowdown with the
204 -------------------------------------------------------------------------- */
206 extern gc_thread **gc_threads;
208 #if defined(THREADED_RTS)
210 #define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
212 #define SET_GCT(to) gct = (to)
216 #if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS))
217 // Using __thread is better than stealing a register on x86/Linux, because
218 // we have too few registers available. In my tests it was worth
219 // about 5% in GC performance, but of course that might change as gcc
220 // improves. -- SDM 2009/04/03
222 // We ought to do the same on MacOS X, but __thread is not
223 // supported there yet (gcc 4.0.1).
225 extern __thread gc_thread* gct;
226 #define DECLARE_GCT __thread gc_thread* gct;
229 #elif defined(sparc_TARGET_ARCH)
230 // On SPARC we can't pin gct to a register. Names like %l1 are just offsets
231 // into the register window, which change on each function call.
233 // There are eight global (non-window) registers, but they're used for other purposes.
234 // %g0 -- always zero
235 // %g1 -- volatile over function calls, used by the linker
236 // %g2-%g3 -- used as scratch regs by the C compiler (caller saves)
237 // %g4 -- volatile over function calls, used by the linker
238 // %g5-%g7 -- reserved by the OS
240 extern __thread gc_thread* gct;
241 #define DECLARE_GCT __thread gc_thread* gct;
244 #elif defined(REG_Base) && !defined(i386_HOST_ARCH)
245 // on i386, REG_Base is %ebx which is also used for PIC, so we don't
248 GLOBAL_REG_DECL(gc_thread*, gct, REG_Base)
249 #define DECLARE_GCT /* nothing */
252 #elif defined(REG_R1)
254 GLOBAL_REG_DECL(gc_thread*, gct, REG_R1)
255 #define DECLARE_GCT /* nothing */
258 #elif defined(__GNUC__)
260 extern __thread gc_thread* gct;
261 #define DECLARE_GCT __thread gc_thread* gct;
265 #error Cannot find a way to declare the thread-local gct
269 #else // not the threaded RTS
271 extern StgWord8 the_gc_thread[];
273 #define gct ((gc_thread*)&the_gc_thread)
274 #define SET_GCT(to) /*nothing*/
275 #define DECLARE_GCT /*nothing*/
281 #endif // SM_GCTHREAD_H