rts/sm/GCThread.h

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #ifndef GCTHREAD_H
  15 #define GCTHREAD_H
  16
  17 #include "OSThreads.h"
  18
  19 /* -----------------------------------------------------------------------------
  20    General scheme
  21
  22    ToDo: move this to the wiki when the implementation is done.
  23
  24    We're only going to try to parallelise the copying GC for now.  The
  25    Plan is as follows.
  26
  27    Each thread has a gc_thread structure (see below) which holds its
  28    thread-local data.  We'll keep a pointer to this in a thread-local
  29    variable, or possibly in a register.
  30
  31    In the gc_thread structure is a step_workspace for each step.  The
  32    primary purpose of the step_workspace is to hold evacuated objects;
  33    when an object is evacuated, it is copied to the "todo" block in
  34    the thread's workspace for the appropriate step.  When the todo
  35    block is full, it is pushed to the global step->todos list, which
  36    is protected by a lock.  (in fact we intervene a one-place buffer
  37    here to reduce contention).
  38
  39    A thread repeatedly grabs a block of work from one of the
  40    step->todos lists, scavenges it, and keeps the scavenged block on
  41    its own ws->scavd_list (this is to avoid unnecessary contention
  42    returning the completed buffers back to the step: we can just
  43    collect them all later).
  44
  45    When there is no global work to do, we start scavenging the todo
  46    blocks in the workspaces.  This is where the scan_bd field comes
  47    in: we can scan the contents of the todo block, when we have
  48    scavenged the contents of the todo block (up to todo_bd->free), we
  49    don't want to move this block immediately to the scavd_list,
  50    because it is probably only partially full.  So we remember that we
  51    have scanned up to this point by saving the block in ws->scan_bd,
  52    with the current scan pointer in ws->scan.  Later, when more
  53    objects have been copied to this block, we can come back and scan
  54    the rest.  When we visit this workspace again in the future,
  55    scan_bd may still be the same as todo_bd, or it might be different:
  56    if enough objects were copied into this block that it filled up,
  57    then we will have allocated a new todo block, but *not* pushed the
  58    old one to the step, because it is partially scanned.
  59
  60    The reason to leave scanning the todo blocks until last is that we
  61    want to deal with full blocks as far as possible.
  62    ------------------------------------------------------------------------- */
  63
  64
  65 /* -----------------------------------------------------------------------------
  66    Step Workspace
  67
  68    A step workspace exists for each step for each GC thread. The GC
  69    thread takes a block from the todos list of the step into the
  70    scanbd and then scans it.  Objects referred to by those in the scan
  71    block are copied into the todo or scavd blocks of the relevant step.
  72
  73    ------------------------------------------------------------------------- */
  74
  75 typedef struct step_workspace_ {
  76     step * step;                // the step for this workspace
  77     struct gc_thread_ * gct;    // the gc_thread that contains this workspace
  78
  79     // where objects to be scavenged go
  80     bdescr *     todo_bd;
  81     StgPtr       todo_free;            // free ptr for todo_bd
  82     StgPtr       todo_lim;             // lim for todo_bd
  83
  84     bdescr *     buffer_todo_bd;     // buffer to reduce contention
  85                                      // on the step's todos list
  86
  87     // where large objects to be scavenged go
  88     bdescr *     todo_large_objects;
  89
  90     // Objects that have already been, scavenged.
  91     bdescr *     scavd_list;
  92     nat          n_scavd_blocks;     // count of blocks in this list
  93
  94     // Partially-full, scavenged, blocks
  95     bdescr *     part_list;
  96     unsigned int n_part_blocks;      // count of above
  97
  98     StgWord pad[5];
  99
 100 } step_workspace ATTRIBUTE_ALIGNED(64);
 101 // align so that computing gct->steps[n] is a shift, not a multiply
 102 // fails if the size is <64, which is why we need the pad above
 103
 104 /* ----------------------------------------------------------------------------
 105    GC thread object
 106
 107    Every GC thread has one of these. It contains all the step specific
 108    workspaces and other GC thread local information. At some later
 109    point it maybe useful to move this other into the TLS store of the
 110    GC threads
 111    ------------------------------------------------------------------------- */
 112
 113 typedef struct gc_thread_ {
 114 #ifdef THREADED_RTS
 115     OSThreadId id;                 // The OS thread that this struct belongs to
 116     SpinLock   gc_spin;
 117     SpinLock   mut_spin;
 118     volatile rtsBool wakeup;
 119 #endif
 120     nat thread_index;              // a zero based index identifying the thread
 121
 122     bdescr * free_blocks;          // a buffer of free blocks for this thread
 123                                    //  during GC without accessing the block
 124                                    //   allocators spin lock.
 125
 126     StgClosure* static_objects;      // live static objects
 127     StgClosure* scavenged_static_objects;   // static objects scavenged so far
 128
 129     lnat gc_count;                 // number of GCs this thread has done
 130
 131     // block that is currently being scanned
 132     bdescr *     scan_bd;
 133
 134     // Remembered sets on this CPU.  Each GC thread has its own
 135     // private per-generation remembered sets, so it can add an item
 136     // to the remembered set without taking a lock.  The mut_lists
 137     // array on a gc_thread is the same as the one on the
 138     // corresponding Capability; we stash it here too for easy access
 139     // during GC; see recordMutableGen_GC().
 140     bdescr **    mut_lists;
 141
 142     // --------------------
 143     // evacuate flags
 144
 145     step *evac_step;               // Youngest generation that objects
 146                                    // should be evacuated to in
 147                                    // evacuate().  (Logically an
 148                                    // argument to evacuate, but it's
 149                                    // static a lot of the time so we
 150                                    // optimise it into a per-thread
 151                                    // variable).
 152
 153     rtsBool failed_to_evac;        // failure to evacuate an object typically
 154                                    // Causes it to be recorded in the mutable
 155                                    // object list
 156
 157     rtsBool eager_promotion;       // forces promotion to the evac gen
 158                                    // instead of the to-space
 159                                    // corresponding to the object
 160
 161     lnat thunk_selector_depth;     // ummm.... not used as of now
 162
 163 #ifdef USE_PAPI
 164     int papi_events;
 165 #endif
 166
 167     // -------------------
 168     // stats
 169
 170     lnat copied;
 171     lnat scanned;
 172     lnat any_work;
 173     lnat no_work;
 174     lnat scav_find_work;
 175
 176     // -------------------
 177     // workspaces
 178
 179     // array of workspaces, indexed by stp->abs_no.  This is placed
 180     // directly at the end of the gc_thread structure so that we can get from
 181     // the gc_thread pointer to a workspace using only pointer
 182     // arithmetic, no memory access.  This happens in the inner loop
 183     // of the GC, see Evac.c:alloc_for_copy().
 184     step_workspace steps[];
 185 } gc_thread;
 186
 187
 188 extern nat n_gc_threads;
 189
 190 extern gc_thread **gc_threads;
 191
 192 /* -----------------------------------------------------------------------------
 193    The gct variable is thread-local and points to the current thread's
 194    gc_thread structure.  It is heavily accessed, so we try to put gct
 195    into a global register variable if possible; if we don't have a
 196    register then use gcc's __thread extension to create a thread-local
 197    variable.
 198
 199    Even on x86 where registers are scarce, it is worthwhile using a
 200    register variable here: I measured about a 2-5% slowdown with the
 201    __thread version.
 202    -------------------------------------------------------------------------- */
 203
 204 #define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
 205
 206 #if defined(sparc_HOST_ARCH)
 207 // Don't use REG_base or R1 for gct on SPARC because they're getting clobbered
 208 //      by something else. Not sure what yet. -- BL 2009/01/03
 209
 210 extern __thread gc_thread* gct;
 211 #define DECLARE_GCT __thread gc_thread* gct;
 212
 213 #elif defined(REG_Base) && !defined(i386_HOST_ARCH)
 214 // on i386, REG_Base is %ebx which is also used for PIC, so we don't
 215 // want to steal it
 216
 217 GLOBAL_REG_DECL(gc_thread*, gct, REG_Base)
 218 #define DECLARE_GCT /* nothing */
 219
 220 #elif defined(REG_R1)
 221
 222 GLOBAL_REG_DECL(gc_thread*, gct, REG_R1)
 223 #define DECLARE_GCT /* nothing */
 224
 225 #elif defined(__GNUC__)
 226
 227 extern __thread gc_thread* gct;
 228 #define DECLARE_GCT __thread gc_thread* gct;
 229
 230 #else
 231
 232 #error Cannot find a way to declare the thread-local gct
 233
 234 #endif
 235
 236 #endif // GCTHREAD_H
 237