rts/sm/GCThread.h

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #ifndef SM_GCTHREAD_H
  15 #define SM_GCTHREAD_H
  16
  17 #include "WSDeque.h"
  18 #include "GetTime.h" // for Ticks
  19
  20 #include "BeginPrivate.h"
  21
  22 /* -----------------------------------------------------------------------------
  23    General scheme
  24
  25    ToDo: move this to the wiki when the implementation is done.
  26
  27    We're only going to try to parallelise the copying GC for now.  The
  28    Plan is as follows.
  29
  30    Each thread has a gc_thread structure (see below) which holds its
  31    thread-local data.  We'll keep a pointer to this in a thread-local
  32    variable, or possibly in a register.
  33
  34    In the gc_thread structure is a gen_workspace for each generation.  The
  35    primary purpose of the gen_workspace is to hold evacuated objects;
  36    when an object is evacuated, it is copied to the "todo" block in
  37    the thread's workspace for the appropriate generation.  When the todo
  38    block is full, it is pushed to the global gen->todos list, which
  39    is protected by a lock.  (in fact we intervene a one-place buffer
  40    here to reduce contention).
  41
  42    A thread repeatedly grabs a block of work from one of the
  43    gen->todos lists, scavenges it, and keeps the scavenged block on
  44    its own ws->scavd_list (this is to avoid unnecessary contention
  45    returning the completed buffers back to the generation: we can just
  46    collect them all later).
  47
  48    When there is no global work to do, we start scavenging the todo
  49    blocks in the workspaces.  This is where the scan_bd field comes
  50    in: we can scan the contents of the todo block, when we have
  51    scavenged the contents of the todo block (up to todo_bd->free), we
  52    don't want to move this block immediately to the scavd_list,
  53    because it is probably only partially full.  So we remember that we
  54    have scanned up to this point by saving the block in ws->scan_bd,
  55    with the current scan pointer in ws->scan.  Later, when more
  56    objects have been copied to this block, we can come back and scan
  57    the rest.  When we visit this workspace again in the future,
  58    scan_bd may still be the same as todo_bd, or it might be different:
  59    if enough objects were copied into this block that it filled up,
  60    then we will have allocated a new todo block, but *not* pushed the
  61    old one to the generation, because it is partially scanned.
  62
  63    The reason to leave scanning the todo blocks until last is that we
  64    want to deal with full blocks as far as possible.
  65    ------------------------------------------------------------------------- */
  66
  67
  68 /* -----------------------------------------------------------------------------
  69    Generation Workspace
  70
  71    A generation workspace exists for each generation for each GC
  72    thread. The GC thread takes a block from the todos list of the
  73    generation into the scanbd and then scans it.  Objects referred to
  74    by those in the scan block are copied into the todo or scavd blocks
  75    of the relevant generation.
  76
  77    ------------------------------------------------------------------------- */
  78
  79 typedef struct gen_workspace_ {
  80     generation * gen;           // the gen for this workspace
  81     struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
  82
  83     // where objects to be scavenged go
  84     bdescr *     todo_bd;
  85     StgPtr       todo_free;            // free ptr for todo_bd
  86     StgPtr       todo_lim;             // lim for todo_bd
  87
  88     WSDeque *    todo_q;
  89     bdescr *     todo_overflow;
  90     nat          n_todo_overflow;
  91
  92     // where large objects to be scavenged go
  93     bdescr *     todo_large_objects;
  94
  95     // Objects that have already been scavenged.
  96     bdescr *     scavd_list;
  97     nat          n_scavd_blocks;     // count of blocks in this list
  98
  99     // Partially-full, scavenged, blocks
 100     bdescr *     part_list;
 101     unsigned int n_part_blocks;      // count of above
 102
 103     StgWord pad[3];
 104
 105 } gen_workspace ATTRIBUTE_ALIGNED(64);
 106 // align so that computing gct->gens[n] is a shift, not a multiply
 107 // fails if the size is <64, which is why we need the pad above
 108
 109 /* ----------------------------------------------------------------------------
 110    GC thread object
 111
 112    Every GC thread has one of these. It contains all the generation
 113    specific workspaces and other GC thread local information. At some
 114    later point it maybe useful to move this other into the TLS store
 115    of the GC threads
 116    ------------------------------------------------------------------------- */
 117
 118 typedef struct gc_thread_ {
 119     Capability *cap;
 120
 121 #ifdef THREADED_RTS
 122     OSThreadId id;                 // The OS thread that this struct belongs to
 123     SpinLock   gc_spin;
 124     SpinLock   mut_spin;
 125     volatile rtsBool wakeup;
 126 #endif
 127     nat thread_index;              // a zero based index identifying the thread
 128
 129     bdescr * free_blocks;          // a buffer of free blocks for this thread
 130                                    //  during GC without accessing the block
 131                                    //   allocators spin lock.
 132
 133     StgClosure* static_objects;      // live static objects
 134     StgClosure* scavenged_static_objects;   // static objects scavenged so far
 135
 136     lnat gc_count;                 // number of GCs this thread has done
 137
 138     // block that is currently being scanned
 139     bdescr *     scan_bd;
 140
 141     // Remembered sets on this CPU.  Each GC thread has its own
 142     // private per-generation remembered sets, so it can add an item
 143     // to the remembered set without taking a lock.  The mut_lists
 144     // array on a gc_thread is the same as the one on the
 145     // corresponding Capability; we stash it here too for easy access
 146     // during GC; see recordMutableGen_GC().
 147     bdescr **    mut_lists;
 148
 149     // --------------------
 150     // evacuate flags
 151
 152     nat evac_gen_no;               // Youngest generation that objects
 153                                    // should be evacuated to in
 154                                    // evacuate().  (Logically an
 155                                    // argument to evacuate, but it's
 156                                    // static a lot of the time so we
 157                                    // optimise it into a per-thread
 158                                    // variable).
 159
 160     rtsBool failed_to_evac;        // failure to evacuate an object typically
 161                                    // Causes it to be recorded in the mutable
 162                                    // object list
 163
 164     rtsBool eager_promotion;       // forces promotion to the evac gen
 165                                    // instead of the to-space
 166                                    // corresponding to the object
 167
 168     lnat thunk_selector_depth;     // used to avoid unbounded recursion in
 169                                    // evacuate() for THUNK_SELECTOR
 170
 171 #ifdef USE_PAPI
 172     int papi_events;
 173 #endif
 174
 175     // -------------------
 176     // stats
 177
 178     lnat copied;
 179     lnat scanned;
 180     lnat any_work;
 181     lnat no_work;
 182     lnat scav_find_work;
 183
 184     Ticks gc_start_cpu;   // process CPU time
 185     Ticks gc_start_elapsed;  // process elapsed time
 186     Ticks gc_start_thread_cpu; // thread CPU time
 187     lnat gc_start_faults;
 188
 189     // -------------------
 190     // workspaces
 191
 192     // array of workspaces, indexed by gen->abs_no.  This is placed
 193     // directly at the end of the gc_thread structure so that we can get from
 194     // the gc_thread pointer to a workspace using only pointer
 195     // arithmetic, no memory access.  This happens in the inner loop
 196     // of the GC, see Evac.c:alloc_for_copy().
 197     gen_workspace gens[];
 198 } gc_thread;
 199
 200
 201 extern nat n_gc_threads;
 202
 203 extern gc_thread **gc_threads;
 204
 205 #include "EndPrivate.h"
 206
 207 #endif // SM_GCTHREAD_H
 208