X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=ghc%2Fincludes%2FSMP.h;h=93af9dfa9f5fd104ece9b6a6fbf35163eb96a06d;hp=319f51022056035ab5cd5f90fd2d8ede9235bb65;hb=eeb22c33a15bc8b2b2c71739b15b0e39181dbbf9;hpb=53057d6fc143017dfb7932c225b6bee7351e412f diff --git a/ghc/includes/SMP.h b/ghc/includes/SMP.h index 319f510..93af9df 100644 --- a/ghc/includes/SMP.h +++ b/ghc/includes/SMP.h @@ -1,84 +1,160 @@ /* ---------------------------------------------------------------------------- - * $Id: SMP.h,v 1.4 2002/02/04 20:10:47 sof Exp $ * - * (c) The GHC Team, 1999 + * (c) The GHC Team, 2005 * - * Macros for SMP support + * Macros for THREADED_RTS support * * -------------------------------------------------------------------------- */ #ifndef SMP_H #define SMP_H -/* SMP is currently not compatible with the following options: +/* THREADED_RTS is currently not compatible with the following options: * - * INTERPRETER - * PROFILING + * PROFILING (but only 1 CPU supported) * TICKY_TICKY - * and unregisterised builds. + * Unregisterised builds are ok, but only 1 CPU supported. */ -#if defined(SMP) +#if defined(THREADED_RTS) -#if defined(PROFILING) || defined(TICKY_TICKY) -#error Build options incompatible with SMP. +#if defined(TICKY_TICKY) +#error Build options incompatible with THREADED_RTS. #endif -/* - * CMPXCHG - this instruction is the standard "test & set". We use it - * for locking closures in the thunk and blackhole entry code. If the - * closure is already locked, or has an unexpected info pointer - * (because another thread is altering it in parallel), we just jump - * to the new entry point. +/* + * XCHG - the atomic exchange instruction. Used for locking closures + * during updates (see lockClosure() below) and the MVar primops. + * + * NB: the xchg instruction is implicitly locked, so we do not need + * a lock prefix here. */ -#if defined(i386_TARGET_ARCH) && defined(TABLES_NEXT_TO_CODE) -#define CMPXCHG(p, cmp, new) \ - __asm__ __volatile__ ( \ - "lock ; cmpxchg %1, %0\n" \ - "\tje 1f\n" \ - "\tjmp *%%eax\n" \ - "\t1:\n" \ - : /* no outputs */ \ - : "m" (p), "r" (new), "r" (cmp) \ - ) +INLINE_HEADER StgWord +xchg(StgPtr p, StgWord w) +{ + StgWord result; +#if i386_HOST_ARCH || x86_64_HOST_ARCH + result = w; + __asm__ __volatile__ ( + "xchg %1,%0" + :"+r" (result), "+m" (*p) + : /* no input-only operands */ + ); +#elif powerpc_HOST_ARCH + __asm__ __volatile__ ( + "1: lwarx %0, 0, %2\n" + " stwcx. %1, 0, %2\n" + " bne- 1b" + :"=r" (result) + :"r" (w), "r" (p) + ); +#else +#error xchg() unimplemented on this architecture +#endif + return result; +} /* - * XCHG - the atomic exchange instruction. Used for locking closures - * during updates (see LOCK_CLOSURE below) and the MVar primops. + * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used + * in the STM implementation. */ -#define XCHG(reg, obj) \ - __asm__ __volatile__ ( \ - "xchgl %1,%0" \ - :"+r" (reg), "+m" (obj) \ - : /* no input-only operands */ \ - ) +INLINE_HEADER StgWord +cas(StgVolatilePtr p, StgWord o, StgWord n) +{ +#if i386_HOST_ARCH || x86_64_HOST_ARCH + __asm__ __volatile__ ( + "lock/cmpxchg %3,%1" + :"=a"(o), "=m" (*(volatile unsigned int *)p) + :"0" (o), "r" (n)); + return o; +#elif powerpc_HOST_ARCH + StgWord result; + __asm__ __volatile__ ( + "1: lwarx %0, 0, %3\n" + " cmpw %0, %1\n" + " bne 2f\n" + " stwcx. %2, 0, %3\n" + " bne- 1b\n" + "2:" + :"=r" (result) + :"r" (o), "r" (n), "r" (p) + ); + return result; +#else +#error cas() unimplemented on this architecture +#endif +} +/* + * Write barrier - ensure that all preceding writes have happened + * before all following writes. + * + * We need to tell both the compiler AND the CPU about the barrier. + * This is a brute force solution; better results might be obtained by + * using volatile type declarations to get fine-grained ordering + * control in C, and optionally a memory barrier instruction on CPUs + * that require it (not x86 or x86_64). + */ +INLINE_HEADER void +wb(void) { +#if i386_HOST_ARCH || x86_64_HOST_ARCH + __asm__ __volatile__ ("" : : : "memory"); +#elif powerpc_HOST_ARCH + __asm__ __volatile__ ("lwsync" : : : "memory"); #else -#error SMP macros not defined for this architecture +#error memory barriers unimplemented on this architecture #endif +} /* - * LOCK_CLOSURE locks the specified closure, busy waiting for any - * existing locks to be cleared. + * Locking/unlocking closures + * + * This is used primarily in the implementation of MVars. */ -#define LOCK_CLOSURE(c) \ - ({ \ - const StgInfoTable *__info; \ - __info = &stg_WHITEHOLE_info; \ - do { \ - XCHG(__info,((StgClosure *)(c))->header.info); \ - } while (__info == &stg_WHITEHOLE_info); \ - __info; \ - }) +#define SPIN_COUNT 4000 + +INLINE_HEADER StgInfoTable * +lockClosure(StgClosure *p) +{ +#if i386_HOST_ARCH || x86_64_HOST_ARCH || powerpc_HOST_ARCH + StgWord info; + do { + nat i = 0; + do { + info = xchg((P_)&p->header.info, (W_)&stg_WHITEHOLE_info); + if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info; + } while (++i < SPIN_COUNT); + yieldThread(); + } while (1); +#else + ACQUIRE_SM_LOCK +#endif +} + +INLINE_HEADER void +unlockClosure(StgClosure *p, StgInfoTable *info) +{ +#if i386_HOST_ARCH || x86_64_HOST_ARCH || powerpc_HOST_ARCH + // This is a strictly ordered write, so we need a wb(): + wb(); + p->header.info = info; +#else + RELEASE_SM_LOCK; +#endif +} -#define LOCK_THUNK(__info) \ - CMPXCHG(R1.cl->header.info, __info, &stg_WHITEHOLE_info); +#else /* !THREADED_RTS */ -#else /* !SMP */ +#define wb() /* nothing */ -#define LOCK_CLOSURE(c) /* nothing */ -#define LOCK_THUNK(__info) /* nothing */ +INLINE_HEADER StgWord +xchg(StgPtr p, StgWord w) +{ + StgWord old = *p; + *p = w; + return old; +} -#endif /* SMP */ +#endif /* !THREADED_RTS */ #endif /* SMP_H */