X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGCThread.h;h=a274bb21bb608dcc15c6afface844d1e0884099d;hb=ef70af356e3229cc5c64359bf7866e5fdf44bb09;hp=8d171ae3ee11cb8318960945adfa7298aae6c7c7;hpb=a370654a872838c43e63bdd6cc279c0ee9913cdf;p=ghc-hetmet.git diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index 8d171ae..a274bb2 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -15,6 +15,7 @@ #define GCTHREAD_H #include "OSThreads.h" +#include "WSDeque.h" /* ----------------------------------------------------------------------------- General scheme @@ -74,20 +75,21 @@ typedef struct step_workspace_ { step * step; // the step for this workspace - struct gc_thread_ * gct; // the gc_thread that contains this workspace + struct gc_thread_ * my_gct; // the gc_thread that contains this workspace // where objects to be scavenged go bdescr * todo_bd; StgPtr todo_free; // free ptr for todo_bd StgPtr todo_lim; // lim for todo_bd - bdescr * buffer_todo_bd; // buffer to reduce contention - // on the step's todos list + WSDeque * todo_q; + bdescr * todo_overflow; + nat n_todo_overflow; // where large objects to be scavenged go bdescr * todo_large_objects; - // Objects that have already been, scavenged. + // Objects that have already been scavenged. bdescr * scavd_list; nat n_scavd_blocks; // count of blocks in this list @@ -95,13 +97,17 @@ typedef struct step_workspace_ { bdescr * part_list; unsigned int n_part_blocks; // count of above -} step_workspace; + StgWord pad[3]; + +} step_workspace ATTRIBUTE_ALIGNED(64); +// align so that computing gct->steps[n] is a shift, not a multiply +// fails if the size is <64, which is why we need the pad above /* ---------------------------------------------------------------------------- GC thread object Every GC thread has one of these. It contains all the step specific - workspaces and other GC thread loacl information. At some later + workspaces and other GC thread local information. At some later point it maybe useful to move this other into the TLS store of the GC threads ------------------------------------------------------------------------- */ @@ -109,10 +115,9 @@ typedef struct step_workspace_ { typedef struct gc_thread_ { #ifdef THREADED_RTS OSThreadId id; // The OS thread that this struct belongs to - Mutex wake_mutex; - Condition wake_cond; // So we can go to sleep between GCs - rtsBool wakeup; - rtsBool exit; + SpinLock gc_spin; + SpinLock mut_spin; + volatile rtsBool wakeup; #endif nat thread_index; // a zero based index identifying the thread @@ -128,6 +133,14 @@ typedef struct gc_thread_ { // block that is currently being scanned bdescr * scan_bd; + // Remembered sets on this CPU. Each GC thread has its own + // private per-generation remembered sets, so it can add an item + // to the remembered set without taking a lock. The mut_lists + // array on a gc_thread is the same as the one on the + // corresponding Capability; we stash it here too for easy access + // during GC; see recordMutableGen_GC(). + bdescr ** mut_lists; + // -------------------- // evacuate flags @@ -176,9 +189,70 @@ typedef struct gc_thread_ { extern nat n_gc_threads; +/* ----------------------------------------------------------------------------- + The gct variable is thread-local and points to the current thread's + gc_thread structure. It is heavily accessed, so we try to put gct + into a global register variable if possible; if we don't have a + register then use gcc's __thread extension to create a thread-local + variable. + + Even on x86 where registers are scarce, it is worthwhile using a + register variable here: I measured about a 2-5% slowdown with the + __thread version. + -------------------------------------------------------------------------- */ + extern gc_thread **gc_threads; -register gc_thread *gct __asm__("%rbx"); -// extern gc_thread *gct; // this thread's gct TODO: make thread-local + +#if defined(THREADED_RTS) + +#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); + +#define SET_GCT(to) gct = (to) + +#if defined(sparc_HOST_ARCH) || (defined(i386_HOST_ARCH) && defined(linux_HOST_OS)) +// Don't use REG_base or R1 for gct on SPARC because they're getting clobbered +// by something else. Not sure what yet. -- BL 2009/01/03 + +// Using __thread is better than stealing a register on x86/Linux, because +// we have too few registers available. In my tests it was worth +// about 5% in GC performance, but of course that might change as gcc +// improves. -- SDM 2009/04/03 + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + +#elif defined(REG_Base) && !defined(i386_HOST_ARCH) +// on i386, REG_Base is %ebx which is also used for PIC, so we don't +// want to steal it + +GLOBAL_REG_DECL(gc_thread*, gct, REG_Base) +#define DECLARE_GCT /* nothing */ + +#elif defined(REG_R1) + +GLOBAL_REG_DECL(gc_thread*, gct, REG_R1) +#define DECLARE_GCT /* nothing */ + +#elif defined(__GNUC__) + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + +#else + +#error Cannot find a way to declare the thread-local gct + +#endif + +#else // not the threaded RTS + +extern StgWord8 the_gc_thread[]; + +#define gct ((gc_thread*)&the_gc_thread) +#define SET_GCT(to) /*nothing*/ +#define DECLARE_GCT /*nothing*/ + +#endif #endif // GCTHREAD_H