1 /* ----------------------------------------------------------------------------
3 * (c) The GHC Team, 2005-2009
5 * Macros for multi-CPU support
7 * Do not #include this file directly: #include "Rts.h" instead.
9 * To understand the structure of the RTS headers, see the wiki:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
12 * -------------------------------------------------------------------------- */
17 #if defined(THREADED_RTS)
19 /* ----------------------------------------------------------------------------
21 ------------------------------------------------------------------------- */
23 #if !IN_STG_CODE || IN_STGCRUN
24 // We only want the barriers, e.g. write_barrier(), declared in .hc
25 // files. Defining the other inline functions here causes type
26 // mismatch errors from gcc, because the generated C code is assuming
27 // that there are no prototypes in scope.
30 * The atomic exchange operation: xchg(p,w) exchanges the value
31 * pointed to by p with the value w, returning the old value.
33 * Used for locking closures during updates (see lockClosure() below)
34 * and the MVar primops.
36 EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
39 * Compare-and-swap. Atomically does this:
43 * if (r == o) { *p = n };
47 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
56 EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p);
65 EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p);
67 #endif // !IN_STG_CODE
70 * Various kinds of memory barrier.
71 * write_barrier: prevents future stores occurring before prededing stores.
72 * store_load_barrier: prevents future loads occurring before preceding stores.
73 * load_load_barrier: prevents future loads occurring before earlier stores.
75 * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
76 * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
78 * To check whether you got these right, try the test in
79 * testsuite/tests/ghc-regress/rts/testwsdeque.c
80 * This tests the work-stealing deque implementation, which relies on
81 * properly working store_load and load_load memory barriers.
83 EXTERN_INLINE void write_barrier(void);
84 EXTERN_INLINE void store_load_barrier(void);
85 EXTERN_INLINE void load_load_barrier(void);
87 /* ----------------------------------------------------------------------------
89 ------------------------------------------------------------------------- */
91 #if !IN_STG_CODE || IN_STGCRUN
94 xchg(StgPtr p, StgWord w)
97 #if i386_HOST_ARCH || x86_64_HOST_ARCH
99 __asm__ __volatile__ (
100 // NB: the xchg instruction is implicitly locked, so we do not
101 // need a lock prefix here.
103 :"+r" (result), "+m" (*p)
104 : /* no input-only operands */
106 #elif powerpc_HOST_ARCH
107 __asm__ __volatile__ (
108 "1: lwarx %0, 0, %2\n"
109 " stwcx. %1, 0, %2\n"
114 #elif sparc_HOST_ARCH
116 __asm__ __volatile__ (
118 : "+r" (result), "+m" (*p)
119 : /* no input-only operands */
121 #elif !defined(WITHSMP)
125 #error xchg() unimplemented on this architecture
131 * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used
132 * in the STM implementation.
134 EXTERN_INLINE StgWord
135 cas(StgVolatilePtr p, StgWord o, StgWord n)
137 #if i386_HOST_ARCH || x86_64_HOST_ARCH
138 __asm__ __volatile__ (
139 "lock\ncmpxchg %3,%1"
140 :"=a"(o), "=m" (*(volatile unsigned int *)p)
143 #elif powerpc_HOST_ARCH
145 __asm__ __volatile__ (
146 "1: lwarx %0, 0, %3\n"
149 " stwcx. %2, 0, %3\n"
153 :"r" (o), "r" (n), "r" (p)
157 #elif sparc_HOST_ARCH
158 __asm__ __volatile__ (
165 #elif !defined(WITHSMP)
173 #error cas() unimplemented on this architecture
177 EXTERN_INLINE StgWord
178 atomic_inc(StgVolatilePtr p)
180 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
183 __asm__ __volatile__ (
193 } while (cas(p, old, new) != old);
198 EXTERN_INLINE StgWord
199 atomic_dec(StgVolatilePtr p)
201 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
204 __asm__ __volatile__ (
214 } while (cas(p, old, new) != old);
219 #endif // !IN_STG_CODE
222 * We need to tell both the compiler AND the CPU about the barriers.
223 * It's no good preventing the CPU from reordering the operations if
224 * the compiler has already done so - hence the "memory" restriction
225 * on each of the barriers below.
228 write_barrier(void) {
229 #if i386_HOST_ARCH || x86_64_HOST_ARCH
230 __asm__ __volatile__ ("" : : : "memory");
231 #elif powerpc_HOST_ARCH
232 __asm__ __volatile__ ("lwsync" : : : "memory");
233 #elif sparc_HOST_ARCH
234 /* Sparc in TSO mode does not require store/store barriers. */
235 __asm__ __volatile__ ("" : : : "memory");
236 #elif !defined(WITHSMP)
239 #error memory barriers unimplemented on this architecture
244 store_load_barrier(void) {
246 __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
247 #elif x86_64_HOST_ARCH
248 __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
249 #elif powerpc_HOST_ARCH
250 __asm__ __volatile__ ("sync" : : : "memory");
251 #elif sparc_HOST_ARCH
252 __asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
253 #elif !defined(WITHSMP)
256 #error memory barriers unimplemented on this architecture
261 load_load_barrier(void) {
263 __asm__ __volatile__ ("" : : : "memory");
264 #elif x86_64_HOST_ARCH
265 __asm__ __volatile__ ("" : : : "memory");
266 #elif powerpc_HOST_ARCH
267 __asm__ __volatile__ ("lwsync" : : : "memory");
268 #elif sparc_HOST_ARCH
269 /* Sparc in TSO mode does not require load/load barriers. */
270 __asm__ __volatile__ ("" : : : "memory");
271 #elif !defined(WITHSMP)
274 #error memory barriers unimplemented on this architecture
278 /* ---------------------------------------------------------------------- */
279 #else /* !THREADED_RTS */
281 #define write_barrier() /* nothing */
282 #define store_load_barrier() /* nothing */
283 #define load_load_barrier() /* nothing */
285 INLINE_HEADER StgWord
286 xchg(StgPtr p, StgWord w)
293 STATIC_INLINE StgWord
294 cas(StgVolatilePtr p, StgWord o, StgWord n)
304 INLINE_HEADER StgWord
305 atomic_inc(StgVolatilePtr p)
310 INLINE_HEADER StgWord
311 atomic_dec(StgVolatilePtr p)
316 #endif /* !THREADED_RTS */