1 /* ----------------------------------------------------------------------------
3 * (c) The GHC Team, 2005-2008
5 * Macros for multi-CPU support
7 * -------------------------------------------------------------------------- */
12 /* THREADED_RTS is currently not compatible with the following options:
14 * PROFILING (but only 1 CPU supported)
16 * Unregisterised builds are ok, but only 1 CPU supported.
19 #if defined(THREADED_RTS)
21 #if defined(TICKY_TICKY)
22 #error Build options incompatible with THREADED_RTS.
25 /* ----------------------------------------------------------------------------
27 ------------------------------------------------------------------------- */
30 // We only want write_barrier() declared in .hc files. Defining the
31 // other inline functions here causes type mismatch errors from gcc,
32 // because the generated C code is assuming that there are no
33 // prototypes in scope.
36 * The atomic exchange operation: xchg(p,w) exchanges the value
37 * pointed to by p with the value w, returning the old value.
39 * Used for locking closures during updates (see lockClosure() below)
40 * and the MVar primops.
42 EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
45 * Compare-and-swap. Atomically does this:
49 * if (r == o) { *p = n };
53 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
62 EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p);
71 EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p);
73 #endif // !IN_STG_CODE
76 * Various kinds of memory barrier.
77 * write_barrier: prevents future stores occurring before prededing stores.
78 * store_load_barrier: prevents future loads occurring before preceding stores.
79 * load_load_barrier: prevents future loads occurring before earlier stores.
81 * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
82 * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
84 * To check whether you got these right, try the test in
85 * testsuite/tests/ghc-regress/rts/testwsdeque.c
86 * This tests the work-stealing deque implementation, which relies on
87 * properly working store_load and load_load memory barriers.
89 EXTERN_INLINE void write_barrier(void);
90 EXTERN_INLINE void store_load_barrier(void);
91 EXTERN_INLINE void load_load_barrier(void);
93 /* ----------------------------------------------------------------------------
95 ------------------------------------------------------------------------- */
100 * NB: the xchg instruction is implicitly locked, so we do not need
101 * a lock prefix here.
103 EXTERN_INLINE StgWord
104 xchg(StgPtr p, StgWord w)
107 #if i386_HOST_ARCH || x86_64_HOST_ARCH
109 __asm__ __volatile__ (
111 :"+r" (result), "+m" (*p)
112 : /* no input-only operands */
114 #elif powerpc_HOST_ARCH
115 __asm__ __volatile__ (
116 "1: lwarx %0, 0, %2\n"
117 " stwcx. %1, 0, %2\n"
122 #elif sparc_HOST_ARCH
124 __asm__ __volatile__ (
126 : "+r" (result), "+m" (*p)
127 : /* no input-only operands */
129 #elif !defined(WITHSMP)
133 #error xchg() unimplemented on this architecture
139 * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used
140 * in the STM implementation.
142 EXTERN_INLINE StgWord
143 cas(StgVolatilePtr p, StgWord o, StgWord n)
145 #if i386_HOST_ARCH || x86_64_HOST_ARCH
146 __asm__ __volatile__ (
147 "lock\ncmpxchg %3,%1"
148 :"=a"(o), "=m" (*(volatile unsigned int *)p)
151 #elif powerpc_HOST_ARCH
153 __asm__ __volatile__ (
154 "1: lwarx %0, 0, %3\n"
157 " stwcx. %2, 0, %3\n"
161 :"r" (o), "r" (n), "r" (p)
165 #elif sparc_HOST_ARCH
166 __asm__ __volatile__ (
173 #elif !defined(WITHSMP)
181 #error cas() unimplemented on this architecture
185 EXTERN_INLINE StgWord
186 atomic_inc(StgVolatilePtr p)
188 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
191 __asm__ __volatile__ (
201 } while (cas(p, old, new) != old);
206 EXTERN_INLINE StgWord
207 atomic_dec(StgVolatilePtr p)
209 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
212 __asm__ __volatile__ (
222 } while (cas(p, old, new) != old);
227 #endif // !IN_STG_CODE
230 * We need to tell both the compiler AND the CPU about the barriers.
231 * It's no good preventing the CPU from reordering the operations if
232 * the compiler has already done so - hence the "memory" restriction
233 * on each of the barriers below.
236 write_barrier(void) {
237 #if i386_HOST_ARCH || x86_64_HOST_ARCH
238 __asm__ __volatile__ ("" : : : "memory");
239 #elif powerpc_HOST_ARCH
240 __asm__ __volatile__ ("lwsync" : : : "memory");
241 #elif sparc_HOST_ARCH
242 /* Sparc in TSO mode does not require store/store barriers. */
243 __asm__ __volatile__ ("" : : : "memory");
244 #elif !defined(WITHSMP)
247 #error memory barriers unimplemented on this architecture
252 store_load_barrier(void) {
254 __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
255 #elif x86_64_HOST_ARCH
256 __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
257 #elif powerpc_HOST_ARCH
258 __asm__ __volatile__ ("sync" : : : "memory");
259 #elif sparc_HOST_ARCH
260 __asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
261 #elif !defined(WITHSMP)
264 #error memory barriers unimplemented on this architecture
269 load_load_barrier(void) {
271 __asm__ __volatile__ ("" : : : "memory");
272 #elif x86_64_HOST_ARCH
273 __asm__ __volatile__ ("" : : : "memory");
274 #elif powerpc_HOST_ARCH
275 __asm__ __volatile__ ("lwsync" : : : "memory");
276 #elif sparc_HOST_ARCH
277 /* Sparc in TSO mode does not require load/load barriers. */
278 __asm__ __volatile__ ("" : : : "memory");
279 #elif !defined(WITHSMP)
282 #error memory barriers unimplemented on this architecture
286 /* ---------------------------------------------------------------------- */
287 #else /* !THREADED_RTS */
289 #define write_barrier() /* nothing */
290 #define store_load_barrier() /* nothing */
291 #define load_load_barrier() /* nothing */
293 INLINE_HEADER StgWord
294 xchg(StgPtr p, StgWord w)
301 STATIC_INLINE StgWord
302 cas(StgVolatilePtr p, StgWord o, StgWord n)
312 INLINE_HEADER StgWord
313 atomic_inc(StgVolatilePtr p)
318 INLINE_HEADER StgWord
319 atomic_dec(StgVolatilePtr p)
324 #endif /* !THREADED_RTS */