1 /* ----------------------------------------------------------------------------
3 * (c) The GHC Team, 2005-2009
5 * Macros for multi-CPU support
7 * Do not #include this file directly: #include "Rts.h" instead.
9 * To understand the structure of the RTS headers, see the wiki:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
12 * -------------------------------------------------------------------------- */
17 #if defined(THREADED_RTS)
19 /* ----------------------------------------------------------------------------
21 ------------------------------------------------------------------------- */
23 #if !IN_STG_CODE || IN_STGCRUN
24 // We only want the barriers, e.g. write_barrier(), declared in .hc
25 // files. Defining the other inline functions here causes type
26 // mismatch errors from gcc, because the generated C code is assuming
27 // that there are no prototypes in scope.
30 * The atomic exchange operation: xchg(p,w) exchanges the value
31 * pointed to by p with the value w, returning the old value.
33 * Used for locking closures during updates (see lockClosure() below)
34 * and the MVar primops.
36 EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
39 * Compare-and-swap. Atomically does this:
43 * if (r == o) { *p = n };
47 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
56 EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p);
65 EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p);
68 * Busy-wait nop: this is a hint to the CPU that we are currently in a
69 * busy-wait loop waiting for another CPU to change something. On a
70 * hypertreaded CPU it should yield to another thread, for example.
72 EXTERN_INLINE void busy_wait_nop(void);
74 #endif // !IN_STG_CODE
77 * Various kinds of memory barrier.
78 * write_barrier: prevents future stores occurring before prededing stores.
79 * store_load_barrier: prevents future loads occurring before preceding stores.
80 * load_load_barrier: prevents future loads occurring before earlier stores.
82 * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
83 * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
85 * To check whether you got these right, try the test in
86 * testsuite/tests/ghc-regress/rts/testwsdeque.c
87 * This tests the work-stealing deque implementation, which relies on
88 * properly working store_load and load_load memory barriers.
90 EXTERN_INLINE void write_barrier(void);
91 EXTERN_INLINE void store_load_barrier(void);
92 EXTERN_INLINE void load_load_barrier(void);
94 /* ----------------------------------------------------------------------------
96 ------------------------------------------------------------------------- */
98 #if !IN_STG_CODE || IN_STGCRUN
100 EXTERN_INLINE StgWord
101 xchg(StgPtr p, StgWord w)
104 #if i386_HOST_ARCH || x86_64_HOST_ARCH
106 __asm__ __volatile__ (
107 // NB: the xchg instruction is implicitly locked, so we do not
108 // need a lock prefix here.
110 :"+r" (result), "+m" (*p)
111 : /* no input-only operands */
113 #elif powerpc_HOST_ARCH
114 __asm__ __volatile__ (
115 "1: lwarx %0, 0, %2\n"
116 " stwcx. %1, 0, %2\n"
121 #elif sparc_HOST_ARCH
123 __asm__ __volatile__ (
125 : "+r" (result), "+m" (*p)
126 : /* no input-only operands */
128 #elif !defined(WITHSMP)
132 #error xchg() unimplemented on this architecture
138 * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used
139 * in the STM implementation.
141 EXTERN_INLINE StgWord
142 cas(StgVolatilePtr p, StgWord o, StgWord n)
144 #if i386_HOST_ARCH || x86_64_HOST_ARCH
145 __asm__ __volatile__ (
146 "lock\ncmpxchg %3,%1"
147 :"=a"(o), "=m" (*(volatile unsigned int *)p)
150 #elif powerpc_HOST_ARCH
152 __asm__ __volatile__ (
153 "1: lwarx %0, 0, %3\n"
156 " stwcx. %2, 0, %3\n"
160 :"r" (o), "r" (n), "r" (p)
164 #elif sparc_HOST_ARCH
165 __asm__ __volatile__ (
172 #elif !defined(WITHSMP)
180 #error cas() unimplemented on this architecture
184 EXTERN_INLINE StgWord
185 atomic_inc(StgVolatilePtr p)
187 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
190 __asm__ __volatile__ (
200 } while (cas(p, old, new) != old);
205 EXTERN_INLINE StgWord
206 atomic_dec(StgVolatilePtr p)
208 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
211 __asm__ __volatile__ (
221 } while (cas(p, old, new) != old);
229 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
230 __asm__ __volatile__ ("rep; nop");
237 #endif // !IN_STG_CODE
240 * We need to tell both the compiler AND the CPU about the barriers.
241 * It's no good preventing the CPU from reordering the operations if
242 * the compiler has already done so - hence the "memory" restriction
243 * on each of the barriers below.
246 write_barrier(void) {
247 #if i386_HOST_ARCH || x86_64_HOST_ARCH
248 __asm__ __volatile__ ("" : : : "memory");
249 #elif powerpc_HOST_ARCH
250 __asm__ __volatile__ ("lwsync" : : : "memory");
251 #elif sparc_HOST_ARCH
252 /* Sparc in TSO mode does not require store/store barriers. */
253 __asm__ __volatile__ ("" : : : "memory");
254 #elif !defined(WITHSMP)
257 #error memory barriers unimplemented on this architecture
262 store_load_barrier(void) {
264 __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
265 #elif x86_64_HOST_ARCH
266 __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
267 #elif powerpc_HOST_ARCH
268 __asm__ __volatile__ ("sync" : : : "memory");
269 #elif sparc_HOST_ARCH
270 __asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
271 #elif !defined(WITHSMP)
274 #error memory barriers unimplemented on this architecture
279 load_load_barrier(void) {
281 __asm__ __volatile__ ("" : : : "memory");
282 #elif x86_64_HOST_ARCH
283 __asm__ __volatile__ ("" : : : "memory");
284 #elif powerpc_HOST_ARCH
285 __asm__ __volatile__ ("lwsync" : : : "memory");
286 #elif sparc_HOST_ARCH
287 /* Sparc in TSO mode does not require load/load barriers. */
288 __asm__ __volatile__ ("" : : : "memory");
289 #elif !defined(WITHSMP)
292 #error memory barriers unimplemented on this architecture
296 // Load a pointer from a memory location that might be being modified
297 // concurrently. This prevents the compiler from optimising away
298 // multiple loads of the memory location, as it might otherwise do in
299 // a busy wait loop for example.
300 #define VOLATILE_LOAD(p) (*((StgVolatilePtr)(p)))
302 /* ---------------------------------------------------------------------- */
303 #else /* !THREADED_RTS */
305 #define write_barrier() /* nothing */
306 #define store_load_barrier() /* nothing */
307 #define load_load_barrier() /* nothing */
309 INLINE_HEADER StgWord
310 xchg(StgPtr p, StgWord w)
317 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
318 EXTERN_INLINE StgWord
319 cas(StgVolatilePtr p, StgWord o, StgWord n)
329 INLINE_HEADER StgWord
330 atomic_inc(StgVolatilePtr p)
335 INLINE_HEADER StgWord
336 atomic_dec(StgVolatilePtr p)
341 #define VOLATILE_LOAD(p) ((StgWord)*((StgWord*)(p)))
343 #endif /* !THREADED_RTS */