[project @ 2003-07-23 13:39:11 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.hc
index 0d2e752..45717e5 100644 (file)
@@ -1,7 +1,7 @@
 /* -----------------------------------------------------------------------------
- * $Id: PrimOps.hc,v 1.98 2002/04/23 11:22:12 simonmar Exp $
+ * $Id: PrimOps.hc,v 1.110 2003/07/23 13:39:11 simonmar Exp $
  *
- * (c) The GHC Team, 1998-2000
+ * (c) The GHC Team, 1998-2002
  *
  * Primitive functions / data
  *
 #include "BlockAlloc.h" /* tmp */
 #include "StablePriv.h"
 #include "StgRun.h"
-#include "Itimer.h"
+#include "Timer.h"      /* TICK_MILLISECS */
 #include "Prelude.h"
+#ifndef mingw32_TARGET_OS
+#include "Itimer.h"    /* getourtimeofday() */
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
+#include <stdlib.h>
+
+#ifdef mingw32_TARGET_OS
+#include <windows.h>
+#include "win32/AsyncIO.h"
+#endif
 
 /* ** temporary **
 
@@ -52,10 +66,17 @@ StgWord GHC_ZCCReturnable_static_info[1];
  * We only define the cases actually used, to avoid having too much
  * garbage in this section.  Warning: any bugs in here will be hard to
  * track down.
+ *
+ * The return convention for an unboxed tuple is as follows:
+ *   - fit as many fields as possible in registers (as per the
+ *     function fast-entry point calling convention).
+ *   - sort the rest of the fields into pointers and non-pointers.
+ *     push the pointers on the stack, followed by the non-pointers.
+ *     (so the pointers have higher addresses).
  */
 
 /*------ All Regs available */
-#if defined(REG_R8)
+#if MAX_REAL_VANILLA_REG == 8
 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
 # define RET_N(a)     RET_P(a)
 
@@ -75,17 +96,11 @@ StgWord GHC_ZCCReturnable_static_info[1];
         R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); R4.w = (W_)(d); \
        JMP_(ENTRY_CODE(Sp[0]));
 
-# define RET_NNPNNP(a,b,c,d,e,f) \
-        R1.w = (W_)(a); R2.w = (W_)(b); R3.w = (W_)(c); \
-        R4.w = (W_)(d); R5.w = (W_)(e); R6.w = (W_)(f); \
-       JMP_(ENTRY_CODE(Sp[0]));
-
-#elif defined(REG_R7) || defined(REG_R6) || defined(REG_R5) || \
-      defined(REG_R4) || defined(REG_R3)
+#elif MAX_REAL_VANILLA_REG > 2 && MAX_REAL_VANILLA_REG < 8
 # error RET_n macros not defined for this setup.
 
 /*------ 2 Registers available */
-#elif defined(REG_R2)
+#elif MAX_REAL_VANILLA_REG == 2
 
 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
 # define RET_N(a)     RET_P(a)
@@ -95,45 +110,38 @@ StgWord GHC_ZCCReturnable_static_info[1];
 # define RET_NN(a,b)   RET_PP(a,b)
 # define RET_NP(a,b)   RET_PP(a,b)
 
-# define RET_PPP(a,b,c) \
-       R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
+# define RET_PPP(a,b,c)                                \
+       R1.w = (W_)(a);                         \
+       R2.w = (W_)(b);                         \
+       Sp[-1] = (W_)(c);                       \
+       Sp -= 1;                                \
        JMP_(ENTRY_CODE(Sp[1]));
-# define RET_NNP(a,b,c) \
-       R1.w = (W_)(a); R2.w = (W_)(b); Sp[-1] = (W_)(c); Sp -= 1; \
+
+# define RET_NNP(a,b,c)                                \
+       R1.w = (W_)(a);                         \
+       R2.w = (W_)(b);                         \
+       Sp[-1] = (W_)(c);                       \
+       Sp -= 1;                                \
        JMP_(ENTRY_CODE(Sp[1]));
 
 # define RET_NNNP(a,b,c,d)                     \
        R1.w = (W_)(a);                         \
         R2.w = (W_)(b);                        \
-    /*  Sp[-3] = ARGTAG(1); */                 \
         Sp[-2] = (W_)(c);                      \
         Sp[-1] = (W_)(d);                      \
-        Sp -= 3;                               \
-        JMP_(ENTRY_CODE(Sp[3]));
+        Sp -= 2;                               \
+        JMP_(ENTRY_CODE(Sp[2]));
 
 # define RET_NPNP(a,b,c,d)                     \
        R1.w = (W_)(a);                         \
         R2.w = (W_)(b);                        \
-    /*  Sp[-3] = ARGTAG(1); */                 \
         Sp[-2] = (W_)(c);                      \
         Sp[-1] = (W_)(d);                      \
-        Sp -= 3;                               \
-        JMP_(ENTRY_CODE(Sp[3]));
-
-# define RET_NNPNNP(a,b,c,d,e,f)               \
-        R1.w = (W_)(a);                                \
-       R2.w = (W_)(b);                         \
-       Sp[-6] = (W_)(c);                       \
-       /* Sp[-5] = ARGTAG(1); */               \
-       Sp[-4] = (W_)(d);                       \
-       /* Sp[-3] = ARGTAG(1); */               \
-       Sp[-2] = (W_)(e);                       \
-       Sp[-1] = (W_)(f);                       \
-       Sp -= 6;                                \
-       JMP_(ENTRY_CODE(Sp[6]));
+        Sp -= 2;                               \
+        JMP_(ENTRY_CODE(Sp[2]));
 
 /*------ 1 Register available */
-#elif defined(REG_R1)
+#elif MAX_REAL_VANILLA_REG == 1
 # define RET_P(a)     R1.w = (W_)(a); JMP_(ENTRY_CODE(Sp[0]));
 # define RET_N(a)     RET_P(a)
 
@@ -143,88 +151,54 @@ StgWord GHC_ZCCReturnable_static_info[1];
                       JMP_(ENTRY_CODE(Sp[2]));
 # define RET_NP(a,b)   RET_PP(a,b)
 
-# define RET_PPP(a,b,c) \
-       R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 2; \
+# define RET_PPP(a,b,c)                                \
+       R1.w = (W_)(a);                         \
+       Sp[-2] = (W_)(b);                       \
+       Sp[-1] = (W_)(c);                       \
+       Sp -= 2;                                \
+       JMP_(ENTRY_CODE(Sp[2]));
+
+# define RET_NNP(a,b,c)                                \
+       R1.w = (W_)(a);                         \
+       Sp[-2] = (W_)(b);                       \
+       Sp[-1] = (W_)(c);                       \
+       Sp -= 2;                                \
        JMP_(ENTRY_CODE(Sp[2]));
-# define RET_NNP(a,b,c) \
-       R1.w = (W_)(a); Sp[-2] = (W_)(b); Sp[-1] = (W_)(c); Sp -= 3; \
-       JMP_(ENTRY_CODE(Sp[3]));
 
 # define RET_NNNP(a,b,c,d)                     \
        R1.w = (W_)(a);                         \
-    /*  Sp[-5] = ARGTAG(1); */                 \
-        Sp[-4] = (W_)(b);                      \
-    /*  Sp[-3] = ARGTAG(1); */                 \
+        Sp[-3] = (W_)(b);                      \
         Sp[-2] = (W_)(c);                      \
         Sp[-1] = (W_)(d);                      \
-        Sp -= 5;                               \
-        JMP_(ENTRY_CODE(Sp[5]));
+        Sp -= 3;                               \
+        JMP_(ENTRY_CODE(Sp[3]));
 
 # define RET_NPNP(a,b,c,d)                     \
        R1.w = (W_)(a);                         \
-        Sp[-4] = (W_)(b);                      \
-    /*  Sp[-3] = ARGTAG(1); */                 \
-        Sp[-2] = (W_)(c);                      \
+        Sp[-3] = (W_)(c);                      \
+        Sp[-2] = (W_)(b);                      \
         Sp[-1] = (W_)(d);                      \
-        Sp -= 4;                               \
-        JMP_(ENTRY_CODE(Sp[4]));
-
-# define RET_NNPNNP(a,b,c,d,e,f)               \
-        R1.w = (W_)(a);                                \
-       Sp[-1] = (W_)(f);                       \
-       Sp[-2] = (W_)(e);                       \
-       /* Sp[-3] = ARGTAG(1); */               \
-       Sp[-4] = (W_)(d);                       \
-       /* Sp[-5] = ARGTAG(1); */               \
-       Sp[-6] = (W_)(c);                       \
-       Sp[-7] = (W_)(b);                       \
-       /* Sp[-8] = ARGTAG(1); */               \
-       Sp -= 8;                                \
-       JMP_(ENTRY_CODE(Sp[8]));
+        Sp -= 3;                               \
+        JMP_(ENTRY_CODE(Sp[3]));
 
 #else /* 0 Regs available */
 
-#define PUSH_P(o,x) Sp[-o] = (W_)(x)
-
-#ifdef DEBUG
-#define PUSH_N(o,x) Sp[1-o] = (W_)(x);  Sp[-o] = ARG_TAG(1);
-#else
-#define PUSH_N(o,x) Sp[1-o] = (W_)(x);
-#endif
+#define PUSH(o,x) Sp[-o] = (W_)(x)
 
 #define PUSHED(m)   Sp -= (m); JMP_(ENTRY_CODE(Sp[m]));
 
-/* Here's how to construct these macros:
- *
- *   N = number of N's in the name;
- *   P = number of P's in the name;
- *   s = N * 2 + P;
- *   while (nonNull(name)) {
- *     if (nextChar == 'P') {
- *       PUSH_P(s,_);
- *       s -= 1;
- *     } else {
- *       PUSH_N(s,_);
- *       s -= 2
- *     }
- *   }
- *   PUSHED(N * 2 + P);
- */
-
-# define RET_P(a)     PUSH_P(1,a); PUSHED(1)
-# define RET_N(a)     PUSH_N(2,a); PUSHED(2)
-
-# define RET_PP(a,b)   PUSH_P(2,a); PUSH_P(1,b); PUSHED(2)
-# define RET_NN(a,b)   PUSH_N(4,a); PUSH_N(2,b); PUSHED(4)
-# define RET_NP(a,b)   PUSH_N(3,a); PUSH_P(1,b); PUSHED(3)
+# define RET_P(a)     PUSH(1,a); PUSHED(1)
+# define RET_N(a)     PUSH(1,a); PUSHED(1)
 
-# define RET_PPP(a,b,c) PUSH_P(3,a); PUSH_P(2,b); PUSH_P(1,c); PUSHED(3)
-# define RET_NNP(a,b,c) PUSH_N(5,a); PUSH_N(3,b); PUSH_P(1,c); PUSHED(5)
+# define RET_PP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
+# define RET_NN(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
+# define RET_NP(a,b)   PUSH(2,a); PUSH(1,b); PUSHED(2)
 
-# define RET_NNNP(a,b,c,d) PUSH_N(7,a); PUSH_N(5,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(7)       
-# define RET_NPNP(a,b,c,d) PUSH_N(6,a); PUSH_P(4,b); PUSH_N(3,c); PUSH_P(1,d); PUSHED(6)       
-# define RET_NNPNNP(a,b,c,d,e,f) PUSH_N(10,a); PUSH_N(8,b); PUSH_P(6,c); PUSH_N(5,d); PUSH_N(3,e); PUSH_P(1,f); PUSHED(10)
+# define RET_PPP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
+# define RET_NNP(a,b,c) PUSH(3,a); PUSH(2,b); PUSH(1,c); PUSHED(3)
 
+# define RET_NNNP(a,b,c,d) PUSH(4,a); PUSH(3,b); PUSH(2,c); PUSH(1,d); PUSHED(4)       
+# define RET_NPNP(a,b,c,d) PUSH(4,a); PUSH(3,c); PUSH(2,b); PUSH(1,d); PUSHED(4)       
 #endif
 
 /*-----------------------------------------------------------------------------
@@ -331,7 +305,7 @@ FN_(newMutVarzh_fast)
   /* Args: R1.p = initialisation value */
   FB_
 
-  HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast,);
+  HP_CHK_GEN_TICKY(sizeofW(StgMutVar), R1_PTR, newMutVarzh_fast);
   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,1, 0); /* hack, dependent on rep. */
   CCS_ALLOC(CCCS,sizeofW(StgMutVar));
 
@@ -344,6 +318,59 @@ FN_(newMutVarzh_fast)
   FE_
 }
 
+FN_(atomicModifyMutVarzh_fast)
+{
+   StgMutVar* mv;
+   StgClosure *z, *x, *y, *r;
+   FB_
+   /* Args: R1.p :: MutVar#,  R2.p :: a -> (a,b) */
+
+   /* If x is the current contents of the MutVar#, then 
+      We want to make the new contents point to
+
+         (sel_0 (f x))
+      and the return value is
+
+        (sel_1 (f x))
+
+      obviously we can share (f x).
+
+         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
+        y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
+         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
+   */
+
+#define THUNK_SIZE(n) (sizeofW(StgHeader) + stg_max((n), MIN_UPD_SIZE))
+#define SIZE (THUNK_SIZE(2) + THUNK_SIZE(1) + THUNK_SIZE(1))
+
+   HP_CHK_GEN_TICKY(SIZE, R1_PTR|R2_PTR, atomicModifyMutVarzh_fast);
+   CCS_ALLOC(CCCS,SIZE);
+
+   x = ((StgMutVar *)R1.cl)->var;
+
+   TICK_ALLOC_UP_THK(2,0); // XXX
+   z = (StgClosure *) Hp - THUNK_SIZE(2) + 1;
+   SET_HDR(z, (StgInfoTable *)&stg_ap_2_upd_info, CCCS);
+   z->payload[0] = R2.cl;
+   z->payload[1] = x;
+
+   TICK_ALLOC_UP_THK(1,1); // XXX
+   y = (StgClosure *) (StgPtr)z - THUNK_SIZE(1);
+   SET_HDR(y, &stg_sel_0_upd_info, CCCS);
+   y->payload[0] = z;
+
+   ((StgMutVar *)R1.cl)->var = y;
+
+   TICK_ALLOC_UP_THK(1,1); // XXX
+   r = (StgClosure *) (StgPtr)y - THUNK_SIZE(1);
+   SET_HDR(r, &stg_sel_1_upd_info, CCCS);
+   r->payload[0] = z;
+
+   RET_P(r);
+   FE_
+}
+
 /* -----------------------------------------------------------------------------
    Foreign Object Primitives
    -------------------------------------------------------------------------- */
@@ -355,7 +382,7 @@ FN_(mkForeignObjzh_fast)
   StgForeignObj *result;
   FB_
 
-  HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast,);
+  HP_CHK_GEN_TICKY(sizeofW(StgForeignObj), NO_PTRS, mkForeignObjzh_fast);
   TICK_ALLOC_PRIM(sizeofW(StgHeader),
                  sizeofW(StgForeignObj)-sizeofW(StgHeader), 0);
   CCS_ALLOC(CCCS,sizeofW(StgForeignObj)); /* ccs prof */
@@ -399,7 +426,7 @@ FN_(mkWeakzh_fast)
     R3.cl = &stg_NO_FINALIZER_closure;
   }
 
-  HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast,);
+  HP_CHK_GEN_TICKY(sizeofW(StgWeak),R1_PTR|R2_PTR|R3_PTR, mkWeakzh_fast);
   TICK_ALLOC_PRIM(sizeofW(StgHeader)+1,  // +1 is for the link field
                  sizeofW(StgWeak)-sizeofW(StgHeader)-1, 0);
   CCS_ALLOC(CCCS,sizeofW(StgWeak)); /* ccs prof */
@@ -499,7 +526,7 @@ FN_(int2Integerzh_fast)
    FB_
 
    val = R1.i;
-   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast,);
+   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, int2Integerzh_fast);
    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
 
@@ -536,7 +563,7 @@ FN_(word2Integerzh_fast)
    FB_
 
    val = R1.w;
-   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast,)
+   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, word2Integerzh_fast)
    TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
 
@@ -585,7 +612,7 @@ FN_(int64ToIntegerzh_fast)
        /* minimum is one word */
        words_needed = 1;
    }
-   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast,)
+   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, int64ToIntegerzh_fast)
    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
 
@@ -636,7 +663,7 @@ FN_(word64ToIntegerzh_fast)
    } else {
       words_needed = 1;
    }
-   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast,)
+   HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+words_needed, NO_PTRS, word64ToIntegerzh_fast)
    TICK_ALLOC_PRIM(sizeofW(StgArrWords),words_needed,0);
    CCS_ALLOC(CCCS,sizeofW(StgArrWords)+words_needed); /* ccs prof */
 
@@ -952,7 +979,7 @@ FN_(decodeFloatzh_fast)
   /* arguments: F1 = Float# */
   arg = F1;
 
-  HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast,);
+  HP_CHK_GEN_TICKY(sizeofW(StgArrWords)+1, NO_PTRS, decodeFloatzh_fast);
   TICK_ALLOC_PRIM(sizeofW(StgArrWords),1,0);
   CCS_ALLOC(CCCS,sizeofW(StgArrWords)+1); /* ccs prof */
 
@@ -984,7 +1011,7 @@ FN_(decodeDoublezh_fast)
   /* arguments: D1 = Double# */
   arg = D1;
 
-  HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast,);
+  HP_CHK_GEN_TICKY(ARR_SIZE, NO_PTRS, decodeDoublezh_fast);
   TICK_ALLOC_PRIM(sizeofW(StgArrWords),DOUBLE_MANTISSA_SIZE,0);
   CCS_ALLOC(CCCS,ARR_SIZE); /* ccs prof */
 
@@ -1036,8 +1063,7 @@ FN_(forkProcesszh_fast)
 
   R1.i = RET_STGCALL1(StgInt, forkProcess, CurrentTSO);
 
-  JMP_(ENTRY_CODE(Sp[0]));
-
+  RET_N(R1.i);
   FE_
 }
 
@@ -1059,9 +1085,11 @@ FN_(myThreadIdzh_fast)
 FN_(labelThreadzh_fast)
 {
   FB_
-  /* args: R1.p = Addr# */
+  /* args: 
+       R1.p = ThreadId#
+       R2.p = Addr# */
 #ifdef DEBUG
-  STGCALL2(labelThread,CurrentTSO,(char *)R1.p);
+  STGCALL2(labelThread,R1.p,(char *)R2.p);
 #endif
   JMP_(ENTRY_CODE(Sp[0]));
   FE_
@@ -1118,7 +1146,7 @@ FN_(newMVarzh_fast)
   FB_
   /* args: none */
 
-  HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast,);
+  HP_CHK_GEN_TICKY(sizeofW(StgMVar), NO_PTRS, newMVarzh_fast);
   TICK_ALLOC_PRIM(sizeofW(StgMutVar)-1, // consider head,tail,link as admin wds
                  1, 0);
   CCS_ALLOC(CCCS,sizeofW(StgMVar)); /* ccs prof */
@@ -1149,8 +1177,7 @@ FN_(newMVarzh_fast)
 
 #define PerformPut(tso) ({                             \
     StgClosure *val = (StgClosure *)(tso)->sp[2];      \
-    (tso)->sp[2] = (W_)&stg_gc_noregs_info;            \
-    (tso)->sp += 2;                                    \
+    (tso)->sp += 3;                                    \
     val;                                               \
   })
 
@@ -1441,7 +1468,7 @@ FN_(makeStableNamezh_fast)
   StgStableName *sn_obj;
   FB_
 
-  HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast,);
+  HP_CHK_GEN_TICKY(sizeofW(StgStableName), R1_PTR, makeStableNamezh_fast);
   TICK_ALLOC_PRIM(sizeofW(StgHeader), 
                  sizeofW(StgStableName)-sizeofW(StgHeader), 0);
   CCS_ALLOC(CCCS,sizeofW(StgStableName)); /* ccs prof */
@@ -1496,20 +1523,36 @@ FN_(newBCOzh_fast)
      R2.p = literals
      R3.p = ptrs
      R4.p = itbls
+     R5.i = arity
+     R6.p = bitmap array
   */
   StgBCO *bco;
+  nat size;
+  StgArrWords *bitmap_arr;
   FB_
 
-  HP_CHK_GEN_TICKY(sizeofW(StgBCO),R1_PTR|R2_PTR|R3_PTR|R4_PTR, newBCOzh_fast,);
-  TICK_ALLOC_PRIM(sizeofW(StgHeader), sizeofW(StgBCO)-sizeofW(StgHeader), 0);
-  CCS_ALLOC(CCCS,sizeofW(StgBCO)); /* ccs prof */
-  bco = (StgBCO *) (Hp + 1 - sizeofW(StgBCO));
-  SET_HDR(bco, &stg_BCO_info, CCCS);
+  bitmap_arr = (StgArrWords *)R6.cl;
+  size = sizeofW(StgBCO) + bitmap_arr->words;
+  HP_CHK_GEN_TICKY(size,R1_PTR|R2_PTR|R3_PTR|R4_PTR|R6_PTR, newBCOzh_fast);
+  TICK_ALLOC_PRIM(size, size-sizeofW(StgHeader), 0);
+  CCS_ALLOC(CCCS,size); /* ccs prof */
+  bco = (StgBCO *) (Hp + 1 - size);
+  SET_HDR(bco, (const StgInfoTable *)&stg_BCO_info, CCCS);
 
   bco->instrs     = (StgArrWords*)R1.cl;
   bco->literals   = (StgArrWords*)R2.cl;
   bco->ptrs       = (StgMutArrPtrs*)R3.cl;
   bco->itbls      = (StgArrWords*)R4.cl;
+  bco->arity      = R5.w;
+  bco->size       = size;
+
+  // Copy the arity/bitmap info into the BCO
+  { 
+    int i;
+    for (i = 0; i < bitmap_arr->words; i++) {
+       bco->bitmap[i] = bitmap_arr->payload[i];
+    }
+  }
 
   TICK_RET_UNBOXED_TUP(1);
   RET_P(bco);
@@ -1518,15 +1561,21 @@ FN_(newBCOzh_fast)
 
 FN_(mkApUpd0zh_fast)
 {
-  /* R1.p = the fn for the AP_UPD
-  */
-  StgAP_UPD* ap;
+  // R1.p = the BCO# for the AP
+  //
+  StgPAP* ap;
   FB_
-  HP_CHK_GEN_TICKY(AP_sizeW(0), R1_PTR, mkApUpd0zh_fast,);
-  TICK_ALLOC_PRIM(sizeofW(StgHeader), AP_sizeW(0)-sizeofW(StgHeader), 0);
-  CCS_ALLOC(CCCS,AP_sizeW(0)); /* ccs prof */
-  ap = (StgAP_UPD *) (Hp + 1 - AP_sizeW(0));
-  SET_HDR(ap, &stg_AP_UPD_info, CCCS);
+
+  // This function is *only* used to wrap zero-arity BCOs in an
+  // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
+  // saturated and always points directly to a FUN or BCO.
+  ASSERT(get_itbl(R1.cl)->type == BCO && ((StgBCO *)R1.p)->arity == 0);
+
+  HP_CHK_GEN_TICKY(PAP_sizeW(0), R1_PTR, mkApUpd0zh_fast);
+  TICK_ALLOC_PRIM(sizeofW(StgHeader), PAP_sizeW(0)-sizeofW(StgHeader), 0);
+  CCS_ALLOC(CCCS,PAP_sizeW(0)); /* ccs prof */
+  ap = (StgPAP *) (Hp + 1 - PAP_sizeW(0));
+  SET_HDR(ap, &stg_AP_info, CCCS);
 
   ap->n_args = 0;
   ap->fun = R1.cl;
@@ -1570,15 +1619,33 @@ FN_(waitWritezh_fast)
 
 FN_(delayzh_fast)
 {
+#ifdef mingw32_TARGET_OS
+  StgAsyncIOResult* ares;
+  unsigned int reqID;
+#else
   StgTSO *t, *prev;
   nat target;
+#endif
   FB_
     /* args: R1.i */
     ASSERT(CurrentTSO->why_blocked == NotBlocked);
     CurrentTSO->why_blocked = BlockedOnDelay;
 
     ACQUIRE_LOCK(&sched_mutex);
-
+#ifdef mingw32_TARGET_OS
+    /* could probably allocate this on the heap instead */
+    ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
+    reqID = RET_STGCALL1(W_,addDelayRequest,R1.i);
+    ares->reqID   = reqID;
+    ares->len     = 0;
+    ares->errCode = 0;
+    CurrentTSO->block_info.async_result = ares;
+    /* Having all async-blocked threads reside on the blocked_queue simplifies matters, so
+     * change the status to OnDoProc & put the delayed thread on the blocked_queue.
+     */
+    CurrentTSO->why_blocked = BlockedOnDoProc;
+    APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+#else
     target = (R1.i / (TICK_MILLISECS*1000)) + getourtimeofday();
     CurrentTSO->block_info.target = target;
 
@@ -1596,9 +1663,76 @@ FN_(delayzh_fast)
     } else {
        prev->link = CurrentTSO;
     }
-
+#endif
     RELEASE_LOCK(&sched_mutex);
     JMP_(stg_block_noregs);
   FE_
 }
 
+#ifdef mingw32_TARGET_OS
+FN_(asyncReadzh_fast)
+{
+  StgAsyncIOResult* ares;
+  unsigned int reqID;
+  FB_
+    /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
+    ASSERT(CurrentTSO->why_blocked == NotBlocked);
+    CurrentTSO->why_blocked = BlockedOnRead;
+    ACQUIRE_LOCK(&sched_mutex);
+    /* could probably allocate this on the heap instead */
+    ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncReadzh_fast");
+    reqID = RET_STGCALL5(W_,addIORequest,R1.i,FALSE,R2.i,R3.i,(char*)R4.p);
+    ares->reqID   = reqID;
+    ares->len     = 0;
+    ares->errCode = 0;
+    CurrentTSO->block_info.async_result = ares;
+    APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+    RELEASE_LOCK(&sched_mutex);
+    JMP_(stg_block_async);
+  FE_
+}
+
+FN_(asyncWritezh_fast)
+{
+  StgAsyncIOResult* ares;
+  unsigned int reqID;
+  FB_
+    /* args: R1.i */
+    /* args: R1.i = fd, R2.i = isSock, R3.i = len, R4.p = buf */
+    ASSERT(CurrentTSO->why_blocked == NotBlocked);
+    CurrentTSO->why_blocked = BlockedOnWrite;
+    ACQUIRE_LOCK(&sched_mutex);
+    ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncWritezh_fast");
+    reqID = RET_STGCALL5(W_,addIORequest,R1.i,TRUE,R2.i,R3.i,(char*)R4.p);
+    ares->reqID   = reqID;
+    ares->len     = 0;
+    ares->errCode = 0;
+    CurrentTSO->block_info.async_result = ares;
+    APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+    RELEASE_LOCK(&sched_mutex);
+    JMP_(stg_block_async);
+  FE_
+}
+
+FN_(asyncDoProczh_fast)
+{
+  StgAsyncIOResult* ares;
+  unsigned int reqID;
+  FB_
+    /* args: R1.i = proc, R2.i = param */
+    ASSERT(CurrentTSO->why_blocked == NotBlocked);
+    CurrentTSO->why_blocked = BlockedOnDoProc;
+    ACQUIRE_LOCK(&sched_mutex);
+    /* could probably allocate this on the heap instead */
+    ares = (StgAsyncIOResult*)RET_STGCALL2(P_,stgMallocBytes,sizeof(StgAsyncIOResult), "asyncDoProczh_fast");
+    reqID = RET_STGCALL2(W_,addDoProcRequest,R1.p,R2.p);
+    ares->reqID   = reqID;
+    ares->len     = 0;
+    ares->errCode = 0;
+    CurrentTSO->block_info.async_result = ares;
+    APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+    RELEASE_LOCK(&sched_mutex);
+    JMP_(stg_block_async);
+  FE_
+}
+#endif