Add a proper write barrier for MVars
[ghc-hetmet.git] / rts / PrimOps.cmm
index ad761ab..04a753c 100644 (file)
 
 #include "Cmm.h"
 
+#ifdef __PIC__
+import __gmpz_init;
+import __gmpz_add;
+import __gmpz_sub;
+import __gmpz_mul;
+import __gmpz_gcd;
+import __gmpn_gcd_1;
+import __gmpn_cmp;
+import __gmpz_tdiv_q;
+import __gmpz_tdiv_r;
+import __gmpz_tdiv_qr;
+import __gmpz_fdiv_qr;
+import __gmpz_divexact;
+import __gmpz_and;
+import __gmpz_xor;
+import __gmpz_ior;
+import __gmpz_com;
+import base_GHCziIOBase_NestedAtomically_closure;
+import pthread_mutex_lock;
+import pthread_mutex_unlock;
+#endif
+import EnterCriticalSection;
+import LeaveCriticalSection;
+
 /*-----------------------------------------------------------------------------
   Array Primitives
 
@@ -207,7 +231,7 @@ atomicModifyMutVarzh_fast
    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
 
 #if defined(THREADED_RTS)
-    foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
+    ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
 #endif
 
    x = StgMutVar_var(R1);
@@ -238,7 +262,7 @@ atomicModifyMutVarzh_fast
    StgThunk_payload(r,0) = z;
 
 #if defined(THREADED_RTS)
-    foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
+    RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
 #endif
 
    RET_P(r);
@@ -1421,7 +1445,7 @@ isEmptyMVarzh_fast
 {
     /* args: R1 = MVar closure */
 
-    if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
+    if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
        RET_N(1);
     } else {
        RET_N(0);
@@ -1436,7 +1460,8 @@ newMVarzh_fast
     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
   
     mvar = Hp - SIZEOF_StgMVar + WDS(1);
-    SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
+    SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
+        // MVARs start dirty: generation 0 has no mutable list
     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
@@ -1471,11 +1496,15 @@ takeMVarzh_fast
 #else
     info = GET_INFO(mvar);
 #endif
+        
+    if (info == stg_MVAR_CLEAN_info) {
+        foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
+    }
 
     /* If the MVar is empty, put ourselves on its blocking queue,
      * and wait until we're woken up.
      */
-    if (info == stg_EMPTY_MVAR_info) {
+    if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
        if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
            StgMVar_head(mvar) = CurrentTSO;
        } else {
@@ -1519,7 +1548,9 @@ takeMVarzh_fast
       }
 
 #if defined(THREADED_RTS)
-      unlockClosure(mvar, stg_FULL_MVAR_info);
+      unlockClosure(mvar, stg_MVAR_DIRTY_info);
+#else
+      SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
       RET_P(val);
   } 
@@ -1529,9 +1560,9 @@ takeMVarzh_fast
       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
  
 #if defined(THREADED_RTS)
-      unlockClosure(mvar, stg_EMPTY_MVAR_info);
+      unlockClosure(mvar, stg_MVAR_DIRTY_info);
 #else
-      SET_INFO(mvar,stg_EMPTY_MVAR_info);
+      SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
 
       RET_P(val);
@@ -1553,9 +1584,9 @@ tryTakeMVarzh_fast
     info = GET_INFO(mvar);
 #endif
 
-    if (info == stg_EMPTY_MVAR_info) {
+    if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
 #if defined(THREADED_RTS)
-        unlockClosure(mvar, stg_EMPTY_MVAR_info);
+        unlockClosure(mvar, info);
 #endif
        /* HACK: we need a pointer to pass back, 
         * so we abuse NO_FINALIZER_closure
@@ -1563,6 +1594,10 @@ tryTakeMVarzh_fast
        RET_NP(0, stg_NO_FINALIZER_closure);
     }
 
+    if (info == stg_MVAR_CLEAN_info) {
+        foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
+    }
+
     /* we got the value... */
     val = StgMVar_value(mvar);
 
@@ -1592,7 +1627,9 @@ tryTakeMVarzh_fast
            StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
        }
 #if defined(THREADED_RTS)
-        unlockClosure(mvar, stg_FULL_MVAR_info);
+        unlockClosure(mvar, stg_MVAR_DIRTY_info);
+#else
+        SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
     }
     else 
@@ -1600,9 +1637,9 @@ tryTakeMVarzh_fast
        /* No further putMVars, MVar is now empty */
        StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
 #if defined(THREADED_RTS)
-       unlockClosure(mvar, stg_EMPTY_MVAR_info);
+       unlockClosure(mvar, stg_MVAR_DIRTY_info);
 #else
-       SET_INFO(mvar,stg_EMPTY_MVAR_info);
+       SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
     }
     
@@ -1623,7 +1660,11 @@ putMVarzh_fast
     info = GET_INFO(mvar);
 #endif
 
-    if (info == stg_FULL_MVAR_info) {
+    if (info == stg_MVAR_CLEAN_info) {
+        foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
+    }
+
+    if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
        if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
            StgMVar_head(mvar) = CurrentTSO;
        } else {
@@ -1662,7 +1703,9 @@ putMVarzh_fast
        }
 
 #if defined(THREADED_RTS)
-       unlockClosure(mvar, stg_EMPTY_MVAR_info);
+       unlockClosure(mvar, stg_MVAR_DIRTY_info);
+#else
+        SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
        jump %ENTRY_CODE(Sp(0));
     }
@@ -1672,9 +1715,9 @@ putMVarzh_fast
        StgMVar_value(mvar) = R2;
 
 #if defined(THREADED_RTS)
-       unlockClosure(mvar, stg_FULL_MVAR_info);
+       unlockClosure(mvar, stg_MVAR_DIRTY_info);
 #else
-       SET_INFO(mvar,stg_FULL_MVAR_info);
+       SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
        jump %ENTRY_CODE(Sp(0));
     }
@@ -1696,13 +1739,17 @@ tryPutMVarzh_fast
     info = GET_INFO(mvar);
 #endif
 
-    if (info == stg_FULL_MVAR_info) {
+    if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
 #if defined(THREADED_RTS)
-       unlockClosure(mvar, stg_FULL_MVAR_info);
+       unlockClosure(mvar, info);
 #endif
        RET_N(0);
     }
   
+    if (info == stg_MVAR_CLEAN_info) {
+        foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
+    }
+
     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
 
        /* There are takeMVar(s) waiting: wake up the first one
@@ -1728,7 +1775,9 @@ tryPutMVarzh_fast
        }
 
 #if defined(THREADED_RTS)
-       unlockClosure(mvar, stg_EMPTY_MVAR_info);
+       unlockClosure(mvar, stg_MVAR_DIRTY_info);
+#else
+        SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
     }
     else
@@ -1737,9 +1786,9 @@ tryPutMVarzh_fast
        StgMVar_value(mvar) = R2;
 
 #if defined(THREADED_RTS)
-       unlockClosure(mvar, stg_FULL_MVAR_info);
+       unlockClosure(mvar, stg_MVAR_DIRTY_info);
 #else
-       SET_INFO(mvar,stg_FULL_MVAR_info);
+       SET_INFO(mvar,stg_MVAR_DIRTY_info);
 #endif
     }
     
@@ -1869,7 +1918,7 @@ unpackClosurezh_fast
 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
 
     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
-    info  = %GET_STD_INFO(R1);
+    info  = %GET_STD_INFO(UNTAG(R1));
 
     // Some closures have non-standard layout, so we omit those here.
     W_ type;
@@ -1899,6 +1948,9 @@ out:
 
     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
 
+    W_ clos;
+    clos = UNTAG(R1);
+
     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
 
@@ -1907,7 +1959,7 @@ out:
     p = 0;
 for:
     if(p < ptrs) {
-        W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p);
+        W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
         p = p + 1;
         goto for;
     }
@@ -1917,7 +1969,7 @@ for:
     p = 0;
 for2:
     if(p < nptrs) {
-        W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs);
+        W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
         p = p + 1;
         goto for2;
     }
@@ -1944,7 +1996,7 @@ waitReadzh_fast
 {
     /* args: R1 */
 #ifdef THREADED_RTS
-    foreign "C" barf("waitRead# on threaded RTS");
+    foreign "C" barf("waitRead# on threaded RTS") never returns;
 #else
 
     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
@@ -1961,7 +2013,7 @@ waitWritezh_fast
 {
     /* args: R1 */
 #ifdef THREADED_RTS
-    foreign "C" barf("waitWrite# on threaded RTS");
+    foreign "C" barf("waitWrite# on threaded RTS") never returns;
 #else
 
     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
@@ -1986,7 +2038,7 @@ delayzh_fast
 #endif
 
 #ifdef THREADED_RTS
-    foreign "C" barf("delay# on threaded RTS");
+    foreign "C" barf("delay# on threaded RTS") never returns;
 #else
 
     /* args: R1 (microsecond delay amount) */
@@ -1998,7 +2050,7 @@ delayzh_fast
     /* could probably allocate this on the heap instead */
     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
                                            stg_delayzh_malloc_str);
-    reqID = foreign "C" addDelayRequest(R1);
+    (reqID) = foreign "C" addDelayRequest(R1);
     StgAsyncIOResult_reqID(ares)   = reqID;
     StgAsyncIOResult_len(ares)     = 0;
     StgAsyncIOResult_errCode(ares) = 0;
@@ -2052,7 +2104,7 @@ asyncReadzh_fast
     CInt reqID;
 
 #ifdef THREADED_RTS
-    foreign "C" barf("asyncRead# on threaded RTS");
+    foreign "C" barf("asyncRead# on threaded RTS") never returns;
 #else
 
     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
@@ -2060,10 +2112,10 @@ asyncReadzh_fast
     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
 
     /* could probably allocate this on the heap instead */
-    "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
+    ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
                                            stg_asyncReadzh_malloc_str)
                        [R1,R2,R3,R4];
-    reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
+    (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
     StgAsyncIOResult_reqID(ares)   = reqID;
     StgAsyncIOResult_len(ares)     = 0;
     StgAsyncIOResult_errCode(ares) = 0;
@@ -2080,17 +2132,17 @@ asyncWritezh_fast
     CInt reqID;
 
 #ifdef THREADED_RTS
-    foreign "C" barf("asyncWrite# on threaded RTS");
+    foreign "C" barf("asyncWrite# on threaded RTS") never returns;
 #else
 
     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
 
-    "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
+    ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
                                            stg_asyncWritezh_malloc_str)
                        [R1,R2,R3,R4];
-    reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
+    (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
 
     StgAsyncIOResult_reqID(ares)   = reqID;
     StgAsyncIOResult_len(ares)     = 0;
@@ -2108,7 +2160,7 @@ asyncDoProczh_fast
     CInt reqID;
 
 #ifdef THREADED_RTS
-    foreign "C" barf("asyncDoProc# on threaded RTS");
+    foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
 #else
 
     /* args: R1 = proc, R2 = param */
@@ -2116,10 +2168,10 @@ asyncDoProczh_fast
     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
 
     /* could probably allocate this on the heap instead */
-    "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
+    ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
                                            stg_asyncDoProczh_malloc_str) 
                                [R1,R2];
-    reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
+    (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
     StgAsyncIOResult_reqID(ares)   = reqID;
     StgAsyncIOResult_len(ares)     = 0;
     StgAsyncIOResult_errCode(ares) = 0;