Remove vectored returns.
[ghc-hetmet.git] / rts / PrimOps.cmm
index 5246f56..545aa48 100644 (file)
@@ -138,7 +138,7 @@ unsafeThawArrayzh_fast
   //
   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
-       foreign "C" recordMutableLock(R1 "ptr") [R1];
+       recordMutable(R1, R1);
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        RET_P(R1);
   } else {
@@ -418,12 +418,15 @@ int64ToIntegerzh_fast
    /* arguments: L1 = Int64# */
 
    L_ val;
-   W_ hi, s, neg, words_needed, p;
+   W_ hi, lo, s, neg, words_needed, p;
 
    val = L1;
    neg = 0;
 
-   if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
+   hi = TO_W_(val >> 32);
+   lo = TO_W_(val);
+
+   if ( hi != 0 && hi != 0xFFFFFFFF )  { 
        words_needed = 2;
    } else { 
        // minimum is one word
@@ -437,21 +440,24 @@ int64ToIntegerzh_fast
    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
    StgArrWords_words(p) = words_needed;
 
-   if ( %lt(val,0::L_) ) {
+   if ( %lt(hi,0) ) {
      neg = 1;
-     val = -val;
+     lo = -lo;
+     if(lo == 0) {
+       hi = -hi;
+     } else {
+       hi = -hi - 1;
+     }
    }
 
-   hi = TO_W_(val >> 32);
-
    if ( words_needed == 2 )  { 
       s = 2;
-      Hp(-1) = TO_W_(val);
+      Hp(-1) = lo;
       Hp(0) = hi;
    } else { 
-       if ( val != 0::L_ ) {
+       if ( lo != 0 ) {
           s = 1;
-          Hp(0) = TO_W_(val);
+          Hp(0) = lo;
        } else /* val==0 */  {
           s = 0;
        }
@@ -465,16 +471,18 @@ int64ToIntegerzh_fast
    */
    RET_NP(s,p);
 }
-
 word64ToIntegerzh_fast
 {
    /* arguments: L1 = Word64# */
 
    L_ val;
-   W_ hi, s, words_needed, p;
+   W_ hi, lo, s, words_needed, p;
 
    val = L1;
-   if ( val >= 0x100000000::L_ ) {
+   hi = TO_W_(val >> 32);
+   lo = TO_W_(val);
+
+   if ( hi != 0 ) {
       words_needed = 2;
    } else {
       words_needed = 1;
@@ -487,15 +495,14 @@ word64ToIntegerzh_fast
    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
    StgArrWords_words(p) = words_needed;
 
-   hi = TO_W_(val >> 32);
-   if ( val >= 0x100000000::L_ ) { 
+   if ( hi != 0 ) { 
      s = 2;
-     Hp(-1) = TO_W_(val);
+     Hp(-1) = lo;
      Hp(0)  = hi;
    } else {
-      if ( val != 0::L_ ) {
+      if ( lo != 0 ) {
         s = 1;
-        Hp(0) = TO_W_(val);
+        Hp(0) = lo;
      } else /* val==0 */  {
       s = 0;
      }
@@ -508,6 +515,7 @@ word64ToIntegerzh_fast
 }
 
 
+
 #endif /* SUPPORT_LONG_LONGS */
 
 /* ToDo: this is shockingly inefficient */
@@ -567,7 +575,7 @@ name                                                                    \
   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
   MP_INT__mp_d(mp_tmp2)            = BYTE_ARR_CTS(d2);                         \
                                                                         \
-  foreign "C" mpz_init(mp_result1 "ptr") [];                            \
+  foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
                                                                         \
   /* Perform the operation */                                           \
   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
@@ -594,7 +602,7 @@ name                                                                    \
   MP_INT__mp_size(mp_tmp1)     = (s1);                                 \
   MP_INT__mp_d(mp_tmp1)                = BYTE_ARR_CTS(d1);                     \
                                                                         \
-  foreign "C" mpz_init(mp_result1 "ptr") [];                            \
+  foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
                                                                         \
   /* Perform the operation */                                           \
   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
@@ -628,8 +636,8 @@ name
   MP_INT__mp_size(mp_tmp2)     = (s2);                                                 \
   MP_INT__mp_d(mp_tmp2)                = BYTE_ARR_CTS(d2);                                     \
                                                                                         \
-  foreign "C" mpz_init(mp_result1 "ptr") [];                                               \
-  foreign "C" mpz_init(mp_result2 "ptr") [];                                               \
+  foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
+  foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
                                                                                         \
   /* Perform the operation */                                                           \
   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
@@ -640,20 +648,20 @@ name
            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
 }
 
-GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
-GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
-GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
-GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
-GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
-GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
-GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
-GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
-GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
-GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
-GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
+GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
+GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
+GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
+GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
+GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
+GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
+GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
+GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
+GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
+GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
+GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
 
-GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
-GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
+GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
+GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
 
 #ifndef THREADED_RTS
 section "bss" {
@@ -668,7 +676,7 @@ gcdIntzh_fast
     FETCH_MP_TEMP(mp_tmp_w);
 
     W_[mp_tmp_w] = R1;
-    r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
+    r = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
 
     R1 = r;
     /* Result parked in R1, return via info-pointer at TOS */
@@ -679,7 +687,7 @@ gcdIntzh_fast
 gcdIntegerIntzh_fast
 {
     /* R1 = s1; R2 = d1; R3 = the int */
-    R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
+    R1 = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
     
     /* Result parked in R1, return via info-pointer at TOS */
     jump %ENTRY_CODE(Sp(0));
@@ -695,7 +703,7 @@ cmpIntegerIntzh_fast
     vsize = 0;
     v_digit = R3;
 
-    // paraphrased from mpz_cmp_si() in the GMP sources
+    // paraphrased from __gmpz_cmp_si() in the GMP sources
     if (%gt(v_digit,0)) {
        vsize = 1;
     } else { 
@@ -737,7 +745,7 @@ cmpIntegerzh_fast
     W_ usize, vsize, size, up, vp;
     CInt cmp;
 
-    // paraphrased from mpz_cmp() in the GMP sources
+    // paraphrased from __gmpz_cmp() in the GMP sources
     usize = R1;
     vsize = R3;
 
@@ -760,7 +768,7 @@ cmpIntegerzh_fast
     up = BYTE_ARR_CTS(R2);
     vp = BYTE_ARR_CTS(R4);
 
-    cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size) [];
+    cmp = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
 
     if (cmp == 0 :: CInt) {
        R1 = 0; 
@@ -962,41 +970,17 @@ isCurrentThreadBoundzh_fast
 
 // Catch retry frame ------------------------------------------------------------
 
-#define CATCH_RETRY_FRAME_ERROR(label) \
-  label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
-
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
-CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
-
-#if MAX_VECTORED_RTN > 8
-#error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
-#endif
-
 #if defined(PROFILING)
 #define CATCH_RETRY_FRAME_BITMAP 7
-#define CATCH_RETRY_FRAME_WORDS  6
+#define CATCH_RETRY_FRAME_WORDS  5
 #else
 #define CATCH_RETRY_FRAME_BITMAP 1
-#define CATCH_RETRY_FRAME_WORDS  4
+#define CATCH_RETRY_FRAME_WORDS  3
 #endif
 
 INFO_TABLE_RET(stg_catch_retry_frame,
               CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
-              CATCH_RETRY_FRAME,
-              stg_catch_retry_frame_0_ret,
-              stg_catch_retry_frame_1_ret,
-              stg_catch_retry_frame_2_ret,
-              stg_catch_retry_frame_3_ret,
-              stg_catch_retry_frame_4_ret,
-              stg_catch_retry_frame_5_ret,
-              stg_catch_retry_frame_6_ret,
-              stg_catch_retry_frame_7_ret)
+              CATCH_RETRY_FRAME)
 {
    W_ r, frame, trec, outer;
    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
@@ -1012,7 +996,7 @@ INFO_TABLE_RET(stg_catch_retry_frame,
      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
      jump %ENTRY_CODE(Sp(SP_OFF));
    } else {
-     /* Did not commit: retry */
+     /* Did not commit: re-execute */
      W_ new_trec;
      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
      StgTSO_trec(CurrentTSO) = new_trec;
@@ -1020,87 +1004,87 @@ INFO_TABLE_RET(stg_catch_retry_frame,
        R1 = StgCatchRetryFrame_alt_code(frame);
      } else {
        R1 = StgCatchRetryFrame_first_code(frame);
-       StgCatchRetryFrame_first_code_trec(frame) = new_trec;
      }
      jump stg_ap_v_fast;
    }
 }
 
 
-// Atomically frame -------------------------------------------------------------
-
-
-#define ATOMICALLY_FRAME_ERROR(label) \
-  label { foreign "C" barf("atomically_frame incorrectly entered!"); }
-
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
-ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
-
-#if MAX_VECTORED_RTN > 8
-#error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
-#endif
+// Atomically frame ------------------------------------------------------------
 
 #if defined(PROFILING)
 #define ATOMICALLY_FRAME_BITMAP 3
-#define ATOMICALLY_FRAME_WORDS  3
+#define ATOMICALLY_FRAME_WORDS  4
 #else
 #define ATOMICALLY_FRAME_BITMAP 0
-#define ATOMICALLY_FRAME_WORDS  1
+#define ATOMICALLY_FRAME_WORDS  2
 #endif
 
-
 INFO_TABLE_RET(stg_atomically_frame,
               ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
-              ATOMICALLY_FRAME,
-              stg_atomically_frame_0_ret,
-              stg_atomically_frame_1_ret,
-              stg_atomically_frame_2_ret,
-              stg_atomically_frame_3_ret,
-              stg_atomically_frame_4_ret,
-              stg_atomically_frame_5_ret,
-              stg_atomically_frame_6_ret,
-              stg_atomically_frame_7_ret)
+              ATOMICALLY_FRAME)
 {
-  W_ frame, trec, valid;
+  W_ frame, trec, valid, next_invariant, q, outer;
   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
 
   frame = Sp;
   trec = StgTSO_trec(CurrentTSO);
+  "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
+
+  if (outer == NO_TREC) {
+    /* First time back at the atomically frame -- pick up invariants */
+    "ptr" q = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
+    StgAtomicallyFrame_next_invariant_to_check(frame) = q;
 
-  /* The TSO is not currently waiting: try to commit the transaction */
-  valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
-  if (valid != 0) {
-    /* Transaction was valid: commit succeeded */
-    StgTSO_trec(CurrentTSO) = NO_TREC;
-    Sp = Sp + SIZEOF_StgAtomicallyFrame;
-    IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
-    jump %ENTRY_CODE(Sp(SP_OFF));
   } else {
-    /* Transaction was not valid: try again */
-    "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
+    /* Second/subsequent time back at the atomically frame -- abort the
+     * tx that's checking the invariant and move on to the next one */
+    StgTSO_trec(CurrentTSO) = outer;
+    q = StgAtomicallyFrame_next_invariant_to_check(frame);
+    StgInvariantCheckQueue_my_execution(q) = trec;
+    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
+    /* Don't free trec -- it's linked from q and will be stashed in the
+     * invariant if we eventually commit. */
+    q = StgInvariantCheckQueue_next_queue_entry(q);
+    StgAtomicallyFrame_next_invariant_to_check(frame) = q;
+    trec = outer;
+  }
+
+  q = StgAtomicallyFrame_next_invariant_to_check(frame);
+
+  if (q != END_INVARIANT_CHECK_QUEUE) {
+    /* We can't commit yet: another invariant to check */
+    "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
     StgTSO_trec(CurrentTSO) = trec;
-    R1 = StgAtomicallyFrame_code(frame);
+
+    next_invariant = StgInvariantCheckQueue_invariant(q);
+    R1 = StgAtomicInvariant_code(next_invariant);
     jump stg_ap_v_fast;
+
+  } else {
+
+    /* We've got no more invariants to check, try to commit */
+    valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
+    if (valid != 0) {
+      /* Transaction was valid: commit succeeded */
+      StgTSO_trec(CurrentTSO) = NO_TREC;
+      Sp = Sp + SIZEOF_StgAtomicallyFrame;
+      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
+      jump %ENTRY_CODE(Sp(SP_OFF));
+    } else {
+      /* Transaction was not valid: try again */
+      "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
+      StgTSO_trec(CurrentTSO) = trec;
+      StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
+      R1 = StgAtomicallyFrame_code(frame);
+      jump stg_ap_v_fast;
+    }
   }
 }
 
 INFO_TABLE_RET(stg_atomically_waiting_frame,
               ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
-              ATOMICALLY_FRAME,
-              stg_atomically_frame_0_ret,
-              stg_atomically_frame_1_ret,
-              stg_atomically_frame_2_ret,
-              stg_atomically_frame_3_ret,
-              stg_atomically_frame_4_ret,
-              stg_atomically_frame_5_ret,
-              stg_atomically_frame_6_ret,
-              stg_atomically_frame_7_ret)
+              ATOMICALLY_FRAME)
 {
   W_ frame, trec, valid;
   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
@@ -1127,40 +1111,18 @@ INFO_TABLE_RET(stg_atomically_waiting_frame,
 
 // STM catch frame --------------------------------------------------------------
 
-#define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
-   label                                                   \
-   {                                                       \
-      IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
-      Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
-      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
-      jump ret;                                            \
-   }
-
 #ifdef REG_R1
 #define SP_OFF 0
 #else
 #define SP_OFF 1
 #endif
 
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
-CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
-
-#if MAX_VECTORED_RTN > 8
-#error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
-#endif
-
 #if defined(PROFILING)
 #define CATCH_STM_FRAME_BITMAP 3
-#define CATCH_STM_FRAME_WORDS  3
+#define CATCH_STM_FRAME_WORDS  4
 #else
 #define CATCH_STM_FRAME_BITMAP 0
-#define CATCH_STM_FRAME_WORDS  1
+#define CATCH_STM_FRAME_WORDS  2
 #endif
 
 /* Catch frames are very similar to update frames, but when entering
@@ -1170,16 +1132,29 @@ CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
 
 INFO_TABLE_RET(stg_catch_stm_frame,
               CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
-              CATCH_STM_FRAME,
-              stg_catch_stm_frame_0_ret,
-              stg_catch_stm_frame_1_ret,
-              stg_catch_stm_frame_2_ret,
-              stg_catch_stm_frame_3_ret,
-              stg_catch_stm_frame_4_ret,
-              stg_catch_stm_frame_5_ret,
-              stg_catch_stm_frame_6_ret,
-              stg_catch_stm_frame_7_ret)
-CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
+              CATCH_STM_FRAME)
+   {
+      IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
+      W_ r, frame, trec, outer;
+      frame = Sp;
+      trec = StgTSO_trec(CurrentTSO);
+      "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
+      r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
+      if (r != 0) {
+        /* Commit succeeded */
+        StgTSO_trec(CurrentTSO) = outer;
+        Sp = Sp + SIZEOF_StgCatchSTMFrame;
+        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
+        jump Sp(SP_OFF);
+      } else {
+        /* Commit failed */
+        W_ new_trec;
+        "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
+        StgTSO_trec(CurrentTSO) = new_trec;
+        R1 = StgCatchSTMFrame_code(frame);
+        jump stg_ap_v_fast;
+      }
+   }
 
 
 // Primop definition ------------------------------------------------------------
@@ -1200,7 +1175,7 @@ atomicallyzh_fast
 
   /* Nested transactions are not allowed; raise an exception */
   if (old_trec != NO_TREC) {
-     R1 = GHCziIOBase_NestedAtomically_closure;
+     R1 = base_GHCziIOBase_NestedAtomically_closure;
      jump raisezh_fast;
   }
 
@@ -1210,6 +1185,7 @@ atomicallyzh_fast
 
   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
   StgAtomicallyFrame_code(frame) = R1;
+  StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
 
   /* Start the memory transcation */
   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
@@ -1234,6 +1210,14 @@ catchSTMzh_fast
 
   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
   StgCatchSTMFrame_handler(frame) = R2;
+  StgCatchSTMFrame_code(frame) = R1;
+
+  /* Start a nested transaction to run the body of the try block in */
+  W_ cur_trec;  
+  W_ new_trec;
+  cur_trec = StgTSO_trec(CurrentTSO);
+  "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
+  StgTSO_trec(CurrentTSO) = new_trec;
 
   /* Apply R1 to the realworld token */
   jump stg_ap_v_fast;
@@ -1266,7 +1250,6 @@ catchRetryzh_fast
   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
   StgCatchRetryFrame_first_code(frame) = R1;
   StgCatchRetryFrame_alt_code(frame) = R2;
-  StgCatchRetryFrame_first_code_trec(frame) = new_trec;
 
   /* Apply R1 to the realworld token */
   jump stg_ap_v_fast;
@@ -1285,54 +1268,48 @@ retryzh_fast
 
   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
 retry_pop_stack:
-  trec = StgTSO_trec(CurrentTSO);
-  "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
   StgTSO_sp(CurrentTSO) = Sp;
   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
   Sp = StgTSO_sp(CurrentTSO);
   frame = Sp;
+  trec = StgTSO_trec(CurrentTSO);
+  "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
 
   if (frame_type == CATCH_RETRY_FRAME) {
     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
     ASSERT(outer != NO_TREC);
+    // Abort the transaction attempting the current branch
+    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
+    foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
-      // Retry in the first code: try the alternative
+      // Retry in the first branch: try the alternative
       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
       StgTSO_trec(CurrentTSO) = trec;
       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
       R1 = StgCatchRetryFrame_alt_code(frame);
       jump stg_ap_v_fast;
     } else {
-      // Retry in the alternative code: propagate
-      W_ other_trec;
-      other_trec = StgCatchRetryFrame_first_code_trec(frame);
-      r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", other_trec "ptr") [];
-      if (r != 0) {
-        r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
-      } else {
-        foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
-      }
-      if (r != 0) {
-        // Merge between siblings succeeded: commit it back to enclosing transaction
-        // and then propagate the retry
-        StgTSO_trec(CurrentTSO) = outer;
-        Sp = Sp + SIZEOF_StgCatchRetryFrame;
-        goto retry_pop_stack;
-      } else {
-        // Merge failed: we musn't propagate the retry.  Try both paths again.
-        "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
-        StgCatchRetryFrame_first_code_trec(frame) = trec;
-        StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
-        StgTSO_trec(CurrentTSO) = trec;
-        R1 = StgCatchRetryFrame_first_code(frame);
-        jump stg_ap_v_fast;
-      }
+      // Retry in the alternative code: propagate the retry
+      StgTSO_trec(CurrentTSO) = outer;
+      Sp = Sp + SIZEOF_StgCatchRetryFrame;
+      goto retry_pop_stack;
     }
   }
 
   // We've reached the ATOMICALLY_FRAME: attempt to wait 
   ASSERT(frame_type == ATOMICALLY_FRAME);
+  if (outer != NO_TREC) {
+    // We called retry while checking invariants, so abort the current
+    // invariant check (merging its TVar accesses into the parents read
+    // set so we'll wait on them)
+    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
+    foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
+    trec = outer;
+     StgTSO_trec(CurrentTSO) = trec;
+    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
+  }
   ASSERT(outer == NO_TREC);
+
   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
   if (r != 0) {
     // Transaction was valid: stmWait put us on the TVars' queues, we now block
@@ -1355,6 +1332,23 @@ retry_pop_stack:
 }
 
 
+checkzh_fast
+{
+  W_ trec, closure;
+
+  /* Args: R1 = invariant closure */
+  MAYBE_GC (R1_PTR, checkzh_fast); 
+
+  trec = StgTSO_trec(CurrentTSO);
+  closure = R1;
+  foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
+                                     trec "ptr",
+                                     closure "ptr") [];
+
+  jump %ENTRY_CODE(Sp(0));
+}
+
+
 newTVarzh_fast
 {
   W_ tv;
@@ -1522,7 +1516,7 @@ takeMVarzh_fast
       /* actually perform the putMVar for the thread that we just woke up */
       tso = StgMVar_head(mvar);
       PerformPut(tso,StgMVar_value(mvar));
-      foreign "C" dirtyTSO(tso "ptr") [];
+      dirtyTSO(tso);
 
 #if defined(GRAN) || defined(PAR)
       /* ToDo: check 2nd arg (mvar) is right */
@@ -1596,7 +1590,7 @@ tryTakeMVarzh_fast
        /* actually perform the putMVar for the thread that we just woke up */
        tso = StgMVar_head(mvar);
        PerformPut(tso,StgMVar_value(mvar));
-        foreign "C" dirtyTSO(tso "ptr") [];
+        dirtyTSO(tso);
 
 #if defined(GRAN) || defined(PAR)
        /* ToDo: check 2nd arg (mvar) is right */
@@ -1666,7 +1660,7 @@ putMVarzh_fast
        /* actually perform the takeMVar */
        tso = StgMVar_head(mvar);
        PerformTake(tso, R2);
-        foreign "C" dirtyTSO(tso "ptr") [];
+        dirtyTSO(tso);
       
 #if defined(GRAN) || defined(PAR)
        /* ToDo: check 2nd arg (mvar) is right */
@@ -1732,7 +1726,7 @@ tryPutMVarzh_fast
        /* actually perform the takeMVar */
        tso = StgMVar_head(mvar);
        PerformTake(tso, R2);
-        foreign "C" dirtyTSO(tso "ptr") [];
+        dirtyTSO(tso);
       
 #if defined(GRAN) || defined(PAR)
        /* ToDo: check 2nd arg (mvar) is right */
@@ -1823,17 +1817,16 @@ newBCOzh_fast
     /* R1 = instrs
        R2 = literals
        R3 = ptrs
-       R4 = itbls
-       R5 = arity
-       R6 = bitmap array
+       R4 = arity
+       R5 = bitmap array
     */
     W_ bco, bitmap_arr, bytes, words;
     
-    bitmap_arr = R6;
+    bitmap_arr = R5;
     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
     bytes = WDS(words);
 
-    ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
+    ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
 
     bco = Hp - bytes + WDS(1);
     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
@@ -1841,8 +1834,7 @@ newBCOzh_fast
     StgBCO_instrs(bco)     = R1;
     StgBCO_literals(bco)   = R2;
     StgBCO_ptrs(bco)       = R3;
-    StgBCO_itbls(bco)      = R4;
-    StgBCO_arity(bco)      = HALF_W_(R5);
+    StgBCO_arity(bco)      = HALF_W_(R4);
     StgBCO_size(bco)       = HALF_W_(words);
     
     // Copy the arity/bitmap info into the BCO
@@ -1884,6 +1876,55 @@ mkApUpd0zh_fast
     RET_P(ap);
 }
 
+infoPtrzh_fast
+{
+/* args: R1 = closure to analyze */
+   
+  MAYBE_GC(R1_PTR, infoPtrzh_fast);
+
+  W_ info;
+  info = %GET_STD_INFO(R1);
+  RET_N(info);
+}
+
+closurePayloadzh_fast
+{
+/* args: R1 = closure to analyze */
+// TODO: Consider the absence of ptrs or nonptrs as a special case ?
+
+    MAYBE_GC(R1_PTR, closurePayloadzh_fast);
+
+    W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
+    info  = %GET_STD_INFO(R1);
+    ptrs  = TO_W_(%INFO_PTRS(info)); 
+    nptrs = TO_W_(%INFO_NPTRS(info));
+    p = 0;
+
+    ALLOC_PRIM (SIZEOF_StgMutArrPtrs + WDS(ptrs), R1_PTR, closurePayloadzh_fast);
+    ptrs_arr = Hp - SIZEOF_StgMutArrPtrs - WDS(ptrs) + WDS(1);
+    SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
+    StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
+for:
+    if(p < ptrs) {
+        W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p);
+        p = p + 1;
+        goto for;
+    }
+    
+    ALLOC_PRIM (SIZEOF_StgArrWords + WDS(nptrs), R1_PTR, closurePayloadzh_fast);
+    nptrs_arr = Hp - SIZEOF_StgArrWords - WDS(nptrs) + WDS(1);
+    SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
+    StgArrWords_words(nptrs_arr) = nptrs;
+    p = 0;
+for2:
+    if(p < nptrs) {
+        W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs);
+        p = p + 1;
+        goto for2;
+    }
+    RET_PP(ptrs_arr, nptrs_arr);
+}
+
 /* -----------------------------------------------------------------------------
    Thread I/O blocking primitives
    -------------------------------------------------------------------------- */
@@ -1975,8 +2016,11 @@ delayzh_fast
 #else
 
     W_ time;
-    time = foreign "C" getourtimeofday();
-    target = (R1 / (TICK_MILLISECS*1000)) + time;
+    W_ divisor;
+    time = foreign "C" getourtimeofday() [R1];
+    divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
+    target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
+           + time + 1; /* Add 1 as getourtimeofday rounds down */
     StgTSO_block_info(CurrentTSO) = target;
 
     /* Insert the new thread in the sleeping queue. */