1 /* ----------------------------------------------------------------------------
3 * (c) The GHC Team, 2005-2008
5 * Macros for multi-CPU support
7 * -------------------------------------------------------------------------- */
12 /* THREADED_RTS is currently not compatible with the following options:
14 * PROFILING (but only 1 CPU supported)
16 * Unregisterised builds are ok, but only 1 CPU supported.
19 #if defined(THREADED_RTS)
21 #if defined(TICKY_TICKY)
22 #error Build options incompatible with THREADED_RTS.
25 /* ----------------------------------------------------------------------------
27 ------------------------------------------------------------------------- */
30 // We only want write_barrier() declared in .hc files. Defining the
31 // other inline functions here causes type mismatch errors from gcc,
32 // because the generated C code is assuming that there are no
33 // prototypes in scope.
36 * The atomic exchange operation: xchg(p,w) exchanges the value
37 * pointed to by p with the value w, returning the old value.
39 * Used for locking closures during updates (see lockClosure() below)
40 * and the MVar primops.
42 EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
45 * Compare-and-swap. Atomically does this:
49 * if (r == o) { *p = n };
53 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
55 #endif // !IN_STG_CODE
58 * Various kinds of memory barrier.
59 * write_barrier: prevents future stores occurring before prededing stores.
60 * store_load_barrier: prevents future loads occurring before preceding stores.
61 * load_load_barrier: prevents future loads occurring before earlier stores.
63 * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
64 * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
66 * To check whether you got these right, try the test in
67 * testsuite/tests/ghc-regress/rts/testwsdeque.c
68 * This tests the work-stealing deque implementation, which relies on
69 * properly working store_load and load_load memory barriers.
71 EXTERN_INLINE void write_barrier(void);
72 EXTERN_INLINE void store_load_barrier(void);
73 EXTERN_INLINE void load_load_barrier(void);
75 /* ----------------------------------------------------------------------------
77 ------------------------------------------------------------------------- */
82 * NB: the xchg instruction is implicitly locked, so we do not need
86 xchg(StgPtr p, StgWord w)
89 #if i386_HOST_ARCH || x86_64_HOST_ARCH
91 __asm__ __volatile__ (
93 :"+r" (result), "+m" (*p)
94 : /* no input-only operands */
96 #elif powerpc_HOST_ARCH
97 __asm__ __volatile__ (
98 "1: lwarx %0, 0, %2\n"
104 #elif sparc_HOST_ARCH
106 __asm__ __volatile__ (
108 : "+r" (result), "+m" (*p)
109 : /* no input-only operands */
111 #elif !defined(WITHSMP)
115 #error xchg() unimplemented on this architecture
121 * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used
122 * in the STM implementation.
124 EXTERN_INLINE StgWord
125 cas(StgVolatilePtr p, StgWord o, StgWord n)
127 #if i386_HOST_ARCH || x86_64_HOST_ARCH
128 __asm__ __volatile__ (
129 "lock\ncmpxchg %3,%1"
130 :"=a"(o), "=m" (*(volatile unsigned int *)p)
133 #elif powerpc_HOST_ARCH
135 __asm__ __volatile__ (
136 "1: lwarx %0, 0, %3\n"
139 " stwcx. %2, 0, %3\n"
143 :"r" (o), "r" (n), "r" (p)
147 #elif sparc_HOST_ARCH
148 __asm__ __volatile__ (
155 #elif !defined(WITHSMP)
163 #error cas() unimplemented on this architecture
167 #endif // !IN_STG_CODE
170 * We need to tell both the compiler AND the CPU about the barriers.
171 * It's no good preventing the CPU from reordering the operations if
172 * the compiler has already done so - hence the "memory" restriction
173 * on each of the barriers below.
176 write_barrier(void) {
177 #if i386_HOST_ARCH || x86_64_HOST_ARCH
178 __asm__ __volatile__ ("" : : : "memory");
179 #elif powerpc_HOST_ARCH
180 __asm__ __volatile__ ("lwsync" : : : "memory");
181 #elif sparc_HOST_ARCH
182 /* Sparc in TSO mode does not require store/store barriers. */
183 __asm__ __volatile__ ("" : : : "memory");
184 #elif !defined(WITHSMP)
187 #error memory barriers unimplemented on this architecture
192 store_load_barrier(void) {
194 __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
195 #elif x86_64_HOST_ARCH
196 __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
197 #elif powerpc_HOST_ARCH
198 __asm__ __volatile__ ("sync" : : : "memory");
199 #elif sparc_HOST_ARCH
200 __asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
201 #elif !defined(WITHSMP)
204 #error memory barriers unimplemented on this architecture
209 load_load_barrier(void) {
211 __asm__ __volatile__ ("" : : : "memory");
212 #elif x86_64_HOST_ARCH
213 __asm__ __volatile__ ("" : : : "memory");
214 #elif powerpc_HOST_ARCH
215 __asm__ __volatile__ ("lwsync" : : : "memory");
216 #elif sparc_HOST_ARCH
217 /* Sparc in TSO mode does not require load/load barriers. */
218 __asm__ __volatile__ ("" : : : "memory");
219 #elif !defined(WITHSMP)
222 #error memory barriers unimplemented on this architecture
226 /* ---------------------------------------------------------------------- */
227 #else /* !THREADED_RTS */
229 #define write_barrier() /* nothing */
230 #define store_load_barrier() /* nothing */
231 #define load_load_barrier() /* nothing */
233 INLINE_HEADER StgWord
234 xchg(StgPtr p, StgWord w)
241 STATIC_INLINE StgWord
242 cas(StgVolatilePtr p, StgWord o, StgWord n)
252 #endif /* !THREADED_RTS */