make the smp way RTS-only, normal libraries now work with -smp
[ghc-hetmet.git] / ghc / rts / Interpreter.c
index 0df0f99..56e9bb6 100644 (file)
@@ -4,12 +4,7 @@
  * Copyright (c) The GHC Team, 1994-2002.
  * ---------------------------------------------------------------------------*/
 
-#if !defined(SMP)
 #include "PosixSource.h"
-#else
-/* Hack and slash.. */
-#include "Stg.h"
-#endif
 #include "Rts.h"
 #include "RtsAPI.h"
 #include "RtsUtils.h"
 #include "Schedule.h"
 #include "RtsFlags.h"
 #include "Storage.h"
+#include "LdvProfile.h"
 #include "Updates.h"
 #include "Sanity.h"
+#include "Liveness.h"
 
 #include "Bytecodes.h"
 #include "Printer.h"
 #include "Disassembler.h"
 #include "Interpreter.h"
 
+#include <string.h>     /* for memcpy */
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
 
 /* --------------------------------------------------------------------------
  * The bytecode interpreter
@@ -41,7 +43,7 @@
 
 #define BCO_NEXT      instrs[bciPtr++]
 #define BCO_PTR(n)    (W_)ptrs[n]
-#define BCO_LIT(n)    (W_)literals[n]
+#define BCO_LIT(n)    literals[n]
 #define BCO_ITBL(n)   itbls[n]
 
 #define LOAD_STACK_POINTERS                                    \
     cap->r.rCurrentTSO->sp = Sp
 
 #define RETURN_TO_SCHEDULER(todo,retcode)      \
-   SAVE_STACK_POINTERS;                        \
-   cap->r.rCurrentTSO->what_next = (todo);      \
-   return (retcode);
+   SAVE_STACK_POINTERS;                                \
+   cap->r.rCurrentTSO->what_next = (todo);     \
+   threadPaused(cap,cap->r.rCurrentTSO);               \
+   cap->r.rRet = (retcode);                    \
+   return cap;
 
+#define RETURN_TO_SCHEDULER_NO_PAUSE(todo,retcode)     \
+   SAVE_STACK_POINTERS;                                        \
+   cap->r.rCurrentTSO->what_next = (todo);             \
+   cap->r.rRet = (retcode);                            \
+   return cap;
 
-static inline StgPtr
-allocate_UPD (int n_words)
-{
-   return allocate(stg_max(sizeofW(StgHeader)+MIN_UPD_SIZE, n_words));
-}
 
-static inline StgPtr
+STATIC_INLINE StgPtr
 allocate_NONUPD (int n_words)
 {
-    return allocate(stg_max(sizeofW(StgHeader)+MIN_NONUPD_SIZE, n_words));
+    return allocate(stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
 }
 
 
@@ -110,22 +114,22 @@ void interp_startup ( void )
 void interp_shutdown ( void )
 {
    int i, j, k, o_max, i_max, j_max;
-   fprintf(stderr, "%d constrs entered -> (%d BCO, %d UPD, %d ??? )\n",
+   debugBelch("%d constrs entered -> (%d BCO, %d UPD, %d ??? )\n",
                    it_retto_BCO + it_retto_UPDATE + it_retto_other,
                    it_retto_BCO, it_retto_UPDATE, it_retto_other );
-   fprintf(stderr, "%d total entries, %d unknown entries \n", 
+   debugBelch("%d total entries, %d unknown entries \n", 
                    it_total_entries, it_total_unknown_entries);
    for (i = 0; i < N_CLOSURE_TYPES; i++) {
      if (it_unknown_entries[i] == 0) continue;
-     fprintf(stderr, "   type %2d: unknown entries (%4.1f%%) == %d\n",
+     debugBelch("   type %2d: unknown entries (%4.1f%%) == %d\n",
             i, 100.0 * ((double)it_unknown_entries[i]) / 
                         ((double)it_total_unknown_entries),
              it_unknown_entries[i]);
    }
-   fprintf(stderr, "%d insns, %d slides, %d BCO_entries\n", 
+   debugBelch("%d insns, %d slides, %d BCO_entries\n", 
                    it_insns, it_slides, it_BCO_entries);
    for (i = 0; i < 27; i++) 
-      fprintf(stderr, "opcode %2d got %d\n", i, it_ofreq[i] );
+      debugBelch("opcode %2d got %d\n", i, it_ofreq[i] );
 
    for (k = 1; k < 20; k++) {
       o_max = 0;
@@ -139,7 +143,7 @@ void interp_shutdown ( void )
         }
       }
       
-      fprintf ( stderr, "%d:  count (%4.1f%%) %6d   is %d then %d\n",
+      debugBelch("%d:  count (%4.1f%%) %6d   is %d then %d\n",
                 k, ((double)o_max) * 100.0 / ((double)it_insns), o_max,
                    i_max, j_max );
       it_oofreq[i_max][j_max] = 0;
@@ -160,10 +164,9 @@ static StgWord app_ptrs_itbl[] = {
     (W_)&stg_ap_pppp_info,
     (W_)&stg_ap_ppppp_info,
     (W_)&stg_ap_pppppp_info,
-    (W_)&stg_ap_ppppppp_info
 };
 
-StgThreadReturnCode
+Capability *
 interpretBCO (Capability* cap)
 {
     // Use of register here is primarily to make it clear to compilers
@@ -228,14 +231,14 @@ eval_obj:
     INTERP_TICK(it_total_evals);
 
     IF_DEBUG(interpreter,
-             fprintf(stderr, 
+             debugBelch(
              "\n---------------------------------------------------------------\n");
-             fprintf(stderr,"Evaluating: "); printObj(obj);
-             fprintf(stderr,"Sp = %p\n", Sp);
-             fprintf(stderr, "\n" );
+             debugBelch("Evaluating: "); printObj(obj);
+             debugBelch("Sp = %p\n", Sp);
+             debugBelch("\n" );
 
              printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
-             fprintf(stderr, "\n\n");
+             debugBelch("\n\n");
             );
 
     IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
@@ -274,7 +277,7 @@ eval_obj:
        break;
        
     case BCO:
-       ASSERT(BCO_ARITY(obj) > 0);
+       ASSERT(((StgBCO *)obj)->arity > 0);
        break;
 
     case AP:   /* Copied from stg_AP_entry. */
@@ -327,13 +330,13 @@ eval_obj:
     {
        // Can't handle this object; yield to scheduler
        IF_DEBUG(interpreter,
-                fprintf(stderr, "evaluating unknown closure -- yielding to sched\n"); 
+                debugBelch("evaluating unknown closure -- yielding to sched\n"); 
                 printObj(obj);
            );
        Sp -= 2;
        Sp[1] = (W_)obj;
        Sp[0] = (W_)&stg_enter_info;
-       RETURN_TO_SCHEDULER(ThreadRunGHC, ThreadYielding);
+       RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
     }
     }
 
@@ -344,13 +347,13 @@ do_return:
     ASSERT(closure_HNF(obj));
 
     IF_DEBUG(interpreter,
-             fprintf(stderr, 
+             debugBelch(
              "\n---------------------------------------------------------------\n");
-             fprintf(stderr,"Returning: "); printObj(obj);
-             fprintf(stderr,"Sp = %p\n", Sp);
-             fprintf(stderr, "\n" );
+             debugBelch("Returning: "); printObj(obj);
+             debugBelch("Sp = %p\n", Sp);
+             debugBelch("\n" );
              printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
-             fprintf(stderr, "\n\n");
+             debugBelch("\n\n");
             );
 
     IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
@@ -395,9 +398,6 @@ do_return:
        if (info == (StgInfoTable *)&stg_ap_pppppp_info) {
            n = 6; m = 6; goto do_apply;
        }
-       if (info == (StgInfoTable *)&stg_ap_ppppppp_info) {
-           n = 7; m = 7; goto do_apply;
-       }
        goto do_return_unrecognised;
     }
 
@@ -425,13 +425,13 @@ do_return:
        // Can't handle this return address; yield to scheduler
        INTERP_TICK(it_retto_other);
        IF_DEBUG(interpreter,
-                fprintf(stderr, "returning to unknown frame -- yielding to sched\n"); 
+                debugBelch("returning to unknown frame -- yielding to sched\n"); 
                 printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
            );
        Sp -= 2;
        Sp[1] = (W_)obj;
        Sp[0] = (W_)&stg_enter_info;
-       RETURN_TO_SCHEDULER(ThreadRunGHC, ThreadYielding);
+       RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
     }
     }
 
@@ -488,10 +488,10 @@ do_return_unboxed:
            // Can't handle this return address; yield to scheduler
            INTERP_TICK(it_retto_other);
            IF_DEBUG(interpreter,
-                    fprintf(stderr, "returning to unknown frame -- yielding to sched\n"); 
+                    debugBelch("returning to unknown frame -- yielding to sched\n"); 
                     printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
                );
-           RETURN_TO_SCHEDULER(ThreadRunGHC, ThreadYielding);
+           RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
        }
        }
     }
@@ -510,7 +510,7 @@ do_apply:
 
        case PAP: {
            StgPAP *pap;
-           nat arity, i;
+           nat i, arity;
 
            pap = (StgPAP *)obj;
 
@@ -530,7 +530,8 @@ do_apply:
                // Shuffle the args for this function down, and put
                // the appropriate info table in the gap.
                for (i = 0; i < arity; i++) {
-                   Sp[i-1] = Sp[i];
+                   Sp[(int)i-1] = Sp[i];
+                   // ^^^^^ careful, i-1 might be negative, but i in unsigned
                }
                Sp[arity-1] = app_ptrs_itbl[n-arity-1];
                Sp--;
@@ -553,9 +554,7 @@ do_apply:
            else /* arity > n */ {
                // build a new PAP and return it.
                StgPAP *new_pap;
-               nat size;
-               size = PAP_sizeW(pap->n_args + m);
-               new_pap = (StgPAP *)allocate(size);
+               new_pap = (StgPAP *)allocate(PAP_sizeW(pap->n_args + m));
                SET_HDR(new_pap,&stg_PAP_info,CCCS);
                new_pap->arity = pap->arity - n;
                new_pap->n_args = pap->n_args + m;
@@ -576,7 +575,7 @@ do_apply:
            nat arity, i;
 
            Sp++;
-           arity = BCO_ARITY(obj);
+           arity = ((StgBCO *)obj)->arity;
            ASSERT(arity > 0);
            if (arity < n) {
                // n must be greater than 1, and the only kinds of
@@ -586,7 +585,8 @@ do_apply:
                // Shuffle the args for this function down, and put
                // the appropriate info table in the gap.
                for (i = 0; i < arity; i++) {
-                   Sp[i-1] = Sp[i];
+                   Sp[(int)i-1] = Sp[i];
+                   // ^^^^^ careful, i-1 might be negative, but i in unsigned
                }
                Sp[arity-1] = app_ptrs_itbl[n-arity-1];
                Sp--;
@@ -598,9 +598,8 @@ do_apply:
            else /* arity > n */ {
                // build a PAP and return it.
                StgPAP *pap;
-               nat size, i;
-               size = PAP_sizeW(m);
-               pap = (StgPAP *)allocate(size);
+               nat i;
+               pap = (StgPAP *)allocate(PAP_sizeW(m));
                SET_HDR(pap, &stg_PAP_info,CCCS);
                pap->arity = arity - n;
                pap->fun = obj;
@@ -620,7 +619,7 @@ do_apply:
            Sp -= 2;
            Sp[1] = (W_)obj;
            Sp[0] = (W_)&stg_enter_info;
-           RETURN_TO_SCHEDULER(ThreadRunGHC, ThreadYielding);
+           RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
     }
 
     // ------------------------------------------------------------------------
@@ -718,7 +717,7 @@ run_BCO:
     {
        register int       bciPtr     = 1; /* instruction pointer */
        register StgBCO*   bco        = (StgBCO*)obj;
-       register StgWord16* instrs    = (StgWord16*)(BCO_INSTRS(bco));
+       register StgWord16* instrs    = (StgWord16*)(bco->instrs->payload);
        register StgWord*  literals   = (StgWord*)(&bco->literals->payload[0]);
        register StgPtr*   ptrs       = (StgPtr*)(&bco->ptrs->payload[0]);
        register StgInfoTable** itbls = (StgInfoTable**)
@@ -732,18 +731,18 @@ run_BCO:
        ASSERT(bciPtr <= instrs[0]);
        IF_DEBUG(interpreter,
                 //if (do_print_stack) {
-                //fprintf(stderr, "\n-- BEGIN stack\n");
+                //debugBelch("\n-- BEGIN stack\n");
                 //printStack(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size,iSu);
-                //fprintf(stderr, "-- END stack\n\n");
+                //debugBelch("-- END stack\n\n");
                 //}
-                fprintf(stderr,"Sp = %p   pc = %d      ", Sp, bciPtr);
+                debugBelch("Sp = %p   pc = %d      ", Sp, bciPtr);
                 disInstr(bco,bciPtr);
                 if (0) { int i;
-                fprintf(stderr,"\n");
+                debugBelch("\n");
                 for (i = 8; i >= 0; i--) {
-                    fprintf(stderr, "%d  %p\n", i, (StgPtr)(*(Sp+i)));
+                    debugBelch("%d  %p\n", i, (StgPtr)(*(Sp+i)));
                 }
-                fprintf(stderr,"\n");
+                debugBelch("\n");
                 }
                 //if (do_print_stack) checkStack(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size,iSu);
            );
@@ -810,7 +809,7 @@ run_BCO:
 
        case bci_PUSH_ALTS: {
            int o_bco  = BCO_NEXT;
-           Sp[-2] = (W_)&stg_ctoi_ret_R1p_info;
+           Sp[-2] = (W_)&stg_ctoi_R1p_info;
            Sp[-1] = BCO_PTR(o_bco);
            Sp -= 2;
            goto nextInsn;
@@ -818,7 +817,7 @@ run_BCO:
 
        case bci_PUSH_ALTS_P: {
            int o_bco  = BCO_NEXT;
-           Sp[-2] = (W_)&stg_ctoi_ret_R1unpt_info;
+           Sp[-2] = (W_)&stg_ctoi_R1unpt_info;
            Sp[-1] = BCO_PTR(o_bco);
            Sp -= 2;
            goto nextInsn;
@@ -826,7 +825,7 @@ run_BCO:
 
        case bci_PUSH_ALTS_N: {
            int o_bco  = BCO_NEXT;
-           Sp[-2] = (W_)&stg_ctoi_ret_R1n_info;
+           Sp[-2] = (W_)&stg_ctoi_R1n_info;
            Sp[-1] = BCO_PTR(o_bco);
            Sp -= 2;
            goto nextInsn;
@@ -834,7 +833,7 @@ run_BCO:
 
        case bci_PUSH_ALTS_F: {
            int o_bco  = BCO_NEXT;
-           Sp[-2] = (W_)&stg_ctoi_ret_F1_info;
+           Sp[-2] = (W_)&stg_ctoi_F1_info;
            Sp[-1] = BCO_PTR(o_bco);
            Sp -= 2;
            goto nextInsn;
@@ -842,7 +841,7 @@ run_BCO:
 
        case bci_PUSH_ALTS_D: {
            int o_bco  = BCO_NEXT;
-           Sp[-2] = (W_)&stg_ctoi_ret_D1_info;
+           Sp[-2] = (W_)&stg_ctoi_D1_info;
            Sp[-1] = BCO_PTR(o_bco);
            Sp -= 2;
            goto nextInsn;
@@ -850,7 +849,7 @@ run_BCO:
 
        case bci_PUSH_ALTS_L: {
            int o_bco  = BCO_NEXT;
-           Sp[-2] = (W_)&stg_ctoi_ret_L1_info;
+           Sp[-2] = (W_)&stg_ctoi_L1_info;
            Sp[-1] = BCO_PTR(o_bco);
            Sp -= 2;
            goto nextInsn;
@@ -858,7 +857,7 @@ run_BCO:
 
        case bci_PUSH_ALTS_V: {
            int o_bco  = BCO_NEXT;
-           Sp[-2] = (W_)&stg_ctoi_ret_V_info;
+           Sp[-2] = (W_)&stg_ctoi_V_info;
            Sp[-1] = BCO_PTR(o_bco);
            Sp -= 2;
            goto nextInsn;
@@ -897,9 +896,6 @@ run_BCO:
        case bci_PUSH_APPLY_PPPPPP:
            Sp--; Sp[0] = (W_)&stg_ap_pppppp_info;
            goto nextInsn;
-       case bci_PUSH_APPLY_PPPPPPP:
-           Sp--; Sp[0] = (W_)&stg_ap_ppppppp_info;
-           goto nextInsn;
            
        case bci_PUSH_UBX: {
            int i;
@@ -907,7 +903,7 @@ run_BCO:
            int n_words = BCO_NEXT;
            Sp -= n_words;
            for (i = 0; i < n_words; i++) {
-               Sp[i] = BCO_LIT(o_lits+i);
+               Sp[i] = (W_)BCO_LIT(o_lits+i);
            }
            goto nextInsn;
        }
@@ -926,9 +922,8 @@ run_BCO:
 
        case bci_ALLOC_AP: {
            StgAP* ap; 
-           int n_payload = BCO_NEXT - 1;
-           int request   = PAP_sizeW(n_payload);
-           ap = (StgAP*)allocate_UPD(request);
+           int n_payload = BCO_NEXT;
+           ap = (StgAP*)allocate(AP_sizeW(n_payload));
            Sp[-1] = (W_)ap;
            ap->n_args = n_payload;
            SET_HDR(ap, &stg_AP_info, CCS_SYSTEM/*ToDo*/)
@@ -939,9 +934,8 @@ run_BCO:
        case bci_ALLOC_PAP: {
            StgPAP* pap; 
            int arity = BCO_NEXT;
-           int n_payload = BCO_NEXT - 1;
-           int request   = PAP_sizeW(n_payload);
-           pap = (StgPAP*)allocate_NONUPD(request);
+           int n_payload = BCO_NEXT;
+           pap = (StgPAP*)allocate(PAP_sizeW(n_payload));
            Sp[-1] = (W_)pap;
            pap->n_args = n_payload;
            pap->arity = arity;
@@ -953,27 +947,47 @@ run_BCO:
        case bci_MKAP: {
            int i;
            int stkoff = BCO_NEXT;
-           int n_payload = BCO_NEXT - 1;
+           int n_payload = BCO_NEXT;
            StgAP* ap = (StgAP*)Sp[stkoff];
            ASSERT((int)ap->n_args == n_payload);
            ap->fun = (StgClosure*)Sp[0];
-
+           
            // The function should be a BCO, and its bitmap should
            // cover the payload of the AP correctly.
            ASSERT(get_itbl(ap->fun)->type == BCO
-                  && (get_itbl(ap)->type == PAP || 
-                      BCO_BITMAP_SIZE(ap->fun) == ap->n_args));
-
+                  && BCO_BITMAP_SIZE(ap->fun) == ap->n_args);
+           
            for (i = 0; i < n_payload; i++)
                ap->payload[i] = (StgClosure*)Sp[i+1];
            Sp += n_payload+1;
            IF_DEBUG(interpreter,
-                    fprintf(stderr,"\tBuilt "); 
+                    debugBelch("\tBuilt "); 
                     printObj((StgClosure*)ap);
                );
            goto nextInsn;
        }
 
+       case bci_MKPAP: {
+           int i;
+           int stkoff = BCO_NEXT;
+           int n_payload = BCO_NEXT;
+           StgPAP* pap = (StgPAP*)Sp[stkoff];
+           ASSERT((int)pap->n_args == n_payload);
+           pap->fun = (StgClosure*)Sp[0];
+           
+           // The function should be a BCO
+           ASSERT(get_itbl(pap->fun)->type == BCO);
+           
+           for (i = 0; i < n_payload; i++)
+               pap->payload[i] = (StgClosure*)Sp[i+1];
+           Sp += n_payload+1;
+           IF_DEBUG(interpreter,
+                    debugBelch("\tBuilt "); 
+                    printObj((StgClosure*)pap);
+               );
+           goto nextInsn;
+       }
+
        case bci_UNPACK: {
            /* Unpack N ptr words from t.o.s constructor */
            int i;
@@ -1003,27 +1017,27 @@ run_BCO:
            Sp --;
            Sp[0] = (W_)con;
            IF_DEBUG(interpreter,
-                    fprintf(stderr,"\tBuilt "); 
+                    debugBelch("\tBuilt "); 
                     printObj((StgClosure*)con);
                );
            goto nextInsn;
        }
 
        case bci_TESTLT_P: {
-           int discr  = BCO_NEXT;
+           unsigned int discr  = BCO_NEXT;
            int failto = BCO_NEXT;
            StgClosure* con = (StgClosure*)Sp[0];
-           if (constrTag(con) >= discr) {
+           if (GET_TAG(con) >= discr) {
                bciPtr = failto;
            }
            goto nextInsn;
        }
 
        case bci_TESTEQ_P: {
-           int discr  = BCO_NEXT;
+           unsigned int discr  = BCO_NEXT;
            int failto = BCO_NEXT;
            StgClosure* con = (StgClosure*)Sp[0];
-           if (constrTag(con) != discr) {
+           if (GET_TAG(con) != discr) {
                bciPtr = failto;
            }
            goto nextInsn;
@@ -1153,12 +1167,15 @@ run_BCO:
        }
 
        case bci_CCALL: {
-           StgInt tok;
+           void *tok;
            int stk_offset            = BCO_NEXT;
            int o_itbl                = BCO_NEXT;
            void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl);
+           int ret_dyn_size = 
+               RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE
+               + sizeofW(StgRetDyn);
 
-#ifdef RTS_SUPPORTS_THREADS
+#ifdef THREADED_RTS
            // Threaded RTS:
            // Arguments on the TSO stack are not good, because garbage
            // collection might move the TSO as soon as we call
@@ -1168,7 +1185,10 @@ run_BCO:
            
            memcpy(arguments, Sp, sizeof(W_) * stk_offset);
 #endif
-               
+
+           // Restore the Haskell thread's current value of errno
+           errno = cap->r.rCurrentTSO->saved_errno;
+
            // There are a bunch of non-ptr words on the stack (the
            // ccall args, the ccall fun address and space for the
            // result), which we need to cover with an info table
@@ -1179,34 +1199,36 @@ run_BCO:
            // CCALL instruction.   So we build a RET_DYN stack frame
            // on the stack frame to describe this chunk of stack.
            //
-           Sp -= RET_DYN_SIZE + sizeofW(StgRetDyn);
-           ((StgRetDyn *)Sp)->liveness = ALL_NON_PTRS | N_NONPTRS(stk_offset);
+           Sp -= ret_dyn_size;
+           ((StgRetDyn *)Sp)->liveness = NO_PTRS | N_NONPTRS(stk_offset);
            ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info;
 
            SAVE_STACK_POINTERS;
-           tok = suspendThread(&cap->r,rtsFalse);
+           tok = suspendThread(&cap->r);
 
-#ifndef RTS_SUPPORTS_THREADS
+#ifndef THREADED_RTS
            // Careful:
            // suspendThread might have shifted the stack
            // around (stack squeezing), so we have to grab the real
            // Sp out of the TSO to find the ccall args again.
 
-           marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + RET_DYN_SIZE + sizeofW(StgRetDyn)) );
+           marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) );
 #else
            // Threaded RTS:
-           // We already made a malloced copy of the arguments above.
+           // We already made a copy of the arguments above.
 
            marshall_fn ( arguments );
 #endif
 
            // And restart the thread again, popping the RET_DYN frame.
-           cap = (Capability *)((void *)resumeThread(tok,rtsFalse) - sizeof(StgFunTable));
+           cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable)));
            LOAD_STACK_POINTERS;
-           Sp += RET_DYN_SIZE + sizeofW(StgRetDyn);
-
+           Sp += ret_dyn_size;
            
-#ifdef RTS_SUPPORTS_THREADS
+           // Save the Haskell thread's current value of errno
+           cap->r.rCurrentTSO->saved_errno = errno;
+               
+#ifdef THREADED_RTS
            // Threaded RTS:
            // Copy the "arguments", which might include a return value,
            // back to the TSO stack. It would of course be enough to