X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FInterpreter.c;h=da7ee2196aace39fc16e7335575ded68e69e6ce8;hp=0ca8ddf62378ecfb2ea4b442d5b624a50ffbf43f;hb=83d563cb9ede0ba792836e529b1e2929db926355;hpb=dfb079f3b16fb179e083d83280c56aa1ce5821a9

diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 0ca8ddf..da7ee21 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -7,27 +7,35 @@
 #include "PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
+#include "rts/Bytecodes.h"
+
+// internal headers
+#include "sm/Storage.h"
+#include "sm/Sanity.h"
 #include "RtsUtils.h"
-#include "Closures.h"
-#include "TSO.h"
 #include "Schedule.h"
-#include "RtsFlags.h"
-#include "LdvProfile.h"
 #include "Updates.h"
-#include "Sanity.h"
-#include "Liveness.h"
 #include "Prelude.h"
-
-#include "Bytecodes.h"
+#include "Stable.h"
 #include "Printer.h"
 #include "Disassembler.h"
 #include "Interpreter.h"
+#include "ThreadPaused.h"
+#include "Threads.h"
 
 #include <string.h>     /* for memcpy */
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 
+// When building the RTS in the non-dyn way on Windows, we don't
+//	want declspec(__dllimport__) on the front of function prototypes
+//	from libffi.
+#if defined(mingw32_HOST_OS) && !defined(__PIC__)
+# define LIBFFI_NOT_DLL
+#endif
+
+#include "ffi.h"
 
 /* --------------------------------------------------------------------------
  * The bytecode interpreter
@@ -62,6 +70,7 @@
     SpLim = cap->r.rCurrentTSO->stack + RESERVED_STACK_WORDS;
 
 #define SAVE_STACK_POINTERS			\
+    ASSERT(Sp > SpLim); \
     cap->r.rCurrentTSO->sp = Sp
 
 #define RETURN_TO_SCHEDULER(todo,retcode)	\
@@ -79,9 +88,9 @@
 
 
 STATIC_INLINE StgPtr
-allocate_NONUPD (int n_words)
+allocate_NONUPD (Capability *cap, int n_words)
 {
-    return allocate(stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
+    return allocate(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
 }
 
 int rts_stop_next_breakpoint = 0;
@@ -194,6 +203,9 @@ interpretBCO (Capability* cap)
 
     LOAD_STACK_POINTERS;
 
+    cap->r.rHpLim = (P_)1; // HpLim is the context-switch flag; when it
+                           // goes to zero we must return to the scheduler.
+
     // ------------------------------------------------------------------------
     // Case 1:
     // 
@@ -258,14 +270,13 @@ eval_obj:
              debugBelch("\n\n");
             );
 
-    IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
+//    IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
+    IF_DEBUG(sanity,checkStackFrame(Sp));
 
     switch ( get_itbl(obj)->type ) {
 
     case IND:
-    case IND_OLDGEN:
     case IND_PERM:
-    case IND_OLDGEN_PERM:
     case IND_STATIC:
     { 
 	tagged_obj = ((StgInd*)obj)->indirectee;
@@ -431,7 +442,8 @@ do_return:
         // to a PAP by the GC, violating the invariant that PAPs
         // always contain a tagged pointer to the function.
 	INTERP_TICK(it_retto_UPDATE);
-	UPD_IND(((StgUpdateFrame *)Sp)->updatee, tagged_obj); 
+        updateThunk(cap, cap->r.rCurrentTSO, 
+                    ((StgUpdateFrame *)Sp)->updatee, tagged_obj);
 	Sp += sizeofW(StgUpdateFrame);
 	goto do_return;
 
@@ -548,6 +560,16 @@ do_apply:
 		goto defer_apply_to_sched;
 	    }
 
+            // Stack check: we're about to unpack the PAP onto the
+            // stack.  The (+1) is for the (arity < n) case, where we
+            // also need space for an extra info pointer.
+            if (Sp - (pap->n_args + 1) < SpLim) {
+                Sp -= 2;
+                Sp[1] = (W_)tagged_obj;
+                Sp[0] = (W_)&stg_enter_info;
+                RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
+            }
+
 	    Sp++;
 	    arity = pap->arity;
 	    ASSERT(arity > 0);
@@ -583,7 +605,7 @@ do_apply:
 	    else /* arity > n */ {
 		// build a new PAP and return it.
 		StgPAP *new_pap;
-		new_pap = (StgPAP *)allocate(PAP_sizeW(pap->n_args + m));
+		new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m));
 		SET_HDR(new_pap,&stg_PAP_info,CCCS);
 		new_pap->arity = pap->arity - n;
 		new_pap->n_args = pap->n_args + m;
@@ -628,7 +650,7 @@ do_apply:
 		// build a PAP and return it.
 		StgPAP *pap;
 		nat i;
-		pap = (StgPAP *)allocate(PAP_sizeW(m));
+		pap = (StgPAP *)allocate(cap, PAP_sizeW(m));
 		SET_HDR(pap, &stg_PAP_info,CCCS);
 		pap->arity = arity - n;
 		pap->fun = obj;
@@ -697,7 +719,7 @@ do_apply:
 
 run_BCO_return:
     // Heap check
-    if (doYouWantToGC()) {
+    if (doYouWantToGC(cap)) {
 	Sp--; Sp[0] = (W_)&stg_enter_info;
 	RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
     }
@@ -708,7 +730,7 @@ run_BCO_return:
     
 run_BCO_return_unboxed:
     // Heap check
-    if (doYouWantToGC()) {
+    if (doYouWantToGC(cap)) {
 	RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
     }
     // Stack checks aren't necessary at return points, the stack use
@@ -726,7 +748,7 @@ run_BCO_fun:
 	);
 
     // Heap check
-    if (doYouWantToGC()) {
+    if (doYouWantToGC(cap)) {
 	Sp -= 2; 
 	Sp[1] = (W_)obj; 
 	Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really
@@ -748,19 +770,22 @@ run_BCO_fun:
 run_BCO:
     INTERP_TICK(it_BCO_entries);
     {
-	register int       bciPtr     = 1; /* instruction pointer */
+	register int       bciPtr = 0; /* instruction pointer */
         register StgWord16 bci;
 	register StgBCO*   bco        = (StgBCO*)obj;
 	register StgWord16* instrs    = (StgWord16*)(bco->instrs->payload);
 	register StgWord*  literals   = (StgWord*)(&bco->literals->payload[0]);
 	register StgPtr*   ptrs       = (StgPtr*)(&bco->ptrs->payload[0]);
+	int bcoSize;
+    bcoSize = BCO_NEXT_WORD;
+	IF_DEBUG(interpreter,debugBelch("bcoSize = %d\n", bcoSize));
 
 #ifdef INTERP_STATS
 	it_lastopc = 0; /* no opcode */
 #endif
 
     nextInsn:
-	ASSERT(bciPtr <= instrs[0]);
+	ASSERT(bciPtr < bcoSize);
 	IF_DEBUG(interpreter,
 		 //if (do_print_stack) {
 		 //debugBelch("\n-- BEGIN stack\n");
@@ -839,7 +864,7 @@ run_BCO:
                   // stg_apply_interp_info pointer and a pointer to
                   // the BCO
                   size_words = BCO_BITMAP_SIZE(obj) + 2;
-                  new_aps = (StgAP_STACK *) allocate (AP_STACK_sizeW(size_words));
+                  new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words));
                   SET_HDR(new_aps,&stg_AP_STACK_info,CCS_SYSTEM); 
                   new_aps->size = size_words;
                   new_aps->fun = &stg_dummy_ret_closure; 
@@ -859,21 +884,15 @@ run_BCO:
                   // in a reasonable state for the GC and so that
                   // execution of this BCO can continue when we resume
                   ioAction = (StgClosure *) deRefStablePtr (rts_breakpoint_io_action);
-                  Sp -= 9;
-                  Sp[8] = (W_)obj;   
-                  Sp[7] = (W_)&stg_apply_interp_info;
-                  Sp[6] = (W_)&stg_noforceIO_info;     // see [unreg] below
+                  Sp -= 8;
+                  Sp[7] = (W_)obj;
+                  Sp[6] = (W_)&stg_apply_interp_info;
                   Sp[5] = (W_)new_aps;                 // the AP_STACK
                   Sp[4] = (W_)BCO_PTR(arg3_freeVars);  // the info about local vars of the breakpoint
                   Sp[3] = (W_)False_closure;            // True <=> a breakpoint
                   Sp[2] = (W_)&stg_ap_pppv_info;
                   Sp[1] = (W_)ioAction;                // apply the IO action to its two arguments above
                   Sp[0] = (W_)&stg_enter_info;         // get ready to run the IO action
-                  // Note [unreg]: in unregisterised mode, the return
-                  // convention for IO is different.  The
-                  // stg_noForceIO_info stack frame is necessary to
-                  // account for this difference.
-
                   // set the flag in the TSO to say that we are now
                   // stopping at a breakpoint so that when we resume
                   // we don't stop on the same breakpoint that we
@@ -1058,7 +1077,7 @@ run_BCO:
 	case bci_ALLOC_AP: {
 	    StgAP* ap; 
 	    int n_payload = BCO_NEXT;
-	    ap = (StgAP*)allocate(AP_sizeW(n_payload));
+	    ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
 	    Sp[-1] = (W_)ap;
 	    ap->n_args = n_payload;
 	    SET_HDR(ap, &stg_AP_info, CCS_SYSTEM/*ToDo*/)
@@ -1069,7 +1088,7 @@ run_BCO:
 	case bci_ALLOC_AP_NOUPD: {
 	    StgAP* ap; 
 	    int n_payload = BCO_NEXT;
-	    ap = (StgAP*)allocate(AP_sizeW(n_payload));
+	    ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
 	    Sp[-1] = (W_)ap;
 	    ap->n_args = n_payload;
 	    SET_HDR(ap, &stg_AP_NOUPD_info, CCS_SYSTEM/*ToDo*/)
@@ -1081,7 +1100,7 @@ run_BCO:
 	    StgPAP* pap; 
 	    int arity = BCO_NEXT;
 	    int n_payload = BCO_NEXT;
-	    pap = (StgPAP*)allocate(PAP_sizeW(n_payload));
+	    pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload));
 	    Sp[-1] = (W_)pap;
 	    pap->n_args = n_payload;
 	    pap->arity = arity;
@@ -1153,7 +1172,7 @@ run_BCO:
 	    StgInfoTable* itbl = INFO_PTR_TO_STRUCT(BCO_LIT(o_itbl));
 	    int request        = CONSTR_sizeW( itbl->layout.payload.ptrs, 
 					       itbl->layout.payload.nptrs );
-	    StgClosure* con = (StgClosure*)allocate_NONUPD(request);
+	    StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request);
 	    ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0);
 	    SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), CCS_SYSTEM/*ToDo*/);
 	    for (i = 0; i < n_words; i++) {
@@ -1171,7 +1190,7 @@ run_BCO:
 
 	case bci_TESTLT_P: {
 	    unsigned int discr  = BCO_NEXT;
-	    int failto = BCO_NEXT;
+	    int failto = BCO_GET_LARGE_ARG;
 	    StgClosure* con = (StgClosure*)Sp[0];
 	    if (GET_TAG(con) >= discr) {
 		bciPtr = failto;
@@ -1181,7 +1200,7 @@ run_BCO:
 
 	case bci_TESTEQ_P: {
 	    unsigned int discr  = BCO_NEXT;
-	    int failto = BCO_NEXT;
+	    int failto = BCO_GET_LARGE_ARG;
 	    StgClosure* con = (StgClosure*)Sp[0];
 	    if (GET_TAG(con) != discr) {
 		bciPtr = failto;
@@ -1192,7 +1211,7 @@ run_BCO:
 	case bci_TESTLT_I: {
 	    // There should be an Int at Sp[1], and an info table at Sp[0].
 	    int discr   = BCO_NEXT;
-	    int failto  = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
 	    I_ stackInt = (I_)Sp[1];
 	    if (stackInt >= (I_)BCO_LIT(discr))
 		bciPtr = failto;
@@ -1202,7 +1221,7 @@ run_BCO:
 	case bci_TESTEQ_I: {
 	    // There should be an Int at Sp[1], and an info table at Sp[0].
 	    int discr   = BCO_NEXT;
-	    int failto  = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
 	    I_ stackInt = (I_)Sp[1];
 	    if (stackInt != (I_)BCO_LIT(discr)) {
 		bciPtr = failto;
@@ -1210,10 +1229,31 @@ run_BCO:
 	    goto nextInsn;
 	}
 
+	case bci_TESTLT_W: {
+	    // There should be an Int at Sp[1], and an info table at Sp[0].
+	    int discr   = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
+	    W_ stackWord = (W_)Sp[1];
+	    if (stackWord >= (W_)BCO_LIT(discr))
+		bciPtr = failto;
+	    goto nextInsn;
+	}
+
+	case bci_TESTEQ_W: {
+	    // There should be an Int at Sp[1], and an info table at Sp[0].
+	    int discr   = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
+	    W_ stackWord = (W_)Sp[1];
+	    if (stackWord != (W_)BCO_LIT(discr)) {
+		bciPtr = failto;
+	    }
+	    goto nextInsn;
+	}
+
 	case bci_TESTLT_D: {
 	    // There should be a Double at Sp[1], and an info table at Sp[0].
 	    int discr   = BCO_NEXT;
-	    int failto  = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
 	    StgDouble stackDbl, discrDbl;
 	    stackDbl = PK_DBL( & Sp[1] );
 	    discrDbl = PK_DBL( & BCO_LIT(discr) );
@@ -1226,7 +1266,7 @@ run_BCO:
 	case bci_TESTEQ_D: {
 	    // There should be a Double at Sp[1], and an info table at Sp[0].
 	    int discr   = BCO_NEXT;
-	    int failto  = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
 	    StgDouble stackDbl, discrDbl;
 	    stackDbl = PK_DBL( & Sp[1] );
 	    discrDbl = PK_DBL( & BCO_LIT(discr) );
@@ -1239,7 +1279,7 @@ run_BCO:
 	case bci_TESTLT_F: {
 	    // There should be a Float at Sp[1], and an info table at Sp[0].
 	    int discr   = BCO_NEXT;
-	    int failto  = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
 	    StgFloat stackFlt, discrFlt;
 	    stackFlt = PK_FLT( & Sp[1] );
 	    discrFlt = PK_FLT( & BCO_LIT(discr) );
@@ -1252,7 +1292,7 @@ run_BCO:
 	case bci_TESTEQ_F: {
 	    // There should be a Float at Sp[1], and an info table at Sp[0].
 	    int discr   = BCO_NEXT;
-	    int failto  = BCO_NEXT;
+	    int failto  = BCO_GET_LARGE_ARG;
 	    StgFloat stackFlt, discrFlt;
 	    stackFlt = PK_FLT( & Sp[1] );
 	    discrFlt = PK_FLT( & BCO_LIT(discr) );
@@ -1269,7 +1309,7 @@ run_BCO:
 	    // context switching: sometimes the scheduler can invoke
 	    // the interpreter with context_switch == 1, particularly
 	    // if the -C0 flag has been given on the cmd line.
-	    if (context_switch) {
+	    if (cap->r.rHpLim == NULL) {
 		Sp--; Sp[0] = (W_)&stg_enter_info;
 		RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding);
 	    }
@@ -1316,21 +1356,71 @@ run_BCO:
 	    void *tok;
 	    int stk_offset            = BCO_NEXT;
 	    int o_itbl                = BCO_NEXT;
+	    int interruptible         = BCO_NEXT;
 	    void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl);
 	    int ret_dyn_size = 
 		RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE
 		+ sizeofW(StgRetDyn);
 
-#ifdef THREADED_RTS
-	    // Threaded RTS:
-	    // Arguments on the TSO stack are not good, because garbage
-	    // collection might move the TSO as soon as we call
-	    // suspendThread below.
+            /* the stack looks like this:
+               
+               |             |  <- Sp + stk_offset
+               +-------------+  
+               |             |
+               |    args     |
+               |             |  <- Sp + ret_size + 1
+               +-------------+
+               |    C fun    |  <- Sp + ret_size
+               +-------------+
+               |     ret     |  <- Sp
+               +-------------+
+
+               ret is a placeholder for the return address, and may be
+               up to 2 words.
+
+               We need to copy the args out of the TSO, because when
+               we call suspendThread() we no longer own the TSO stack,
+               and it may move at any time - indeed suspendThread()
+               itself may do stack squeezing and move our args.
+               So we make a copy of the argument block.
+            */
+
+#define ROUND_UP_WDS(p)  ((((StgWord)(p)) + sizeof(W_)-1)/sizeof(W_))
+
+            ffi_cif *cif = (ffi_cif *)marshall_fn;
+            nat nargs = cif->nargs;
+            nat ret_size;
+            nat i;
+            StgPtr p;
+            W_ ret[2];                  // max needed
+	    W_ *arguments[stk_offset];  // max needed
+            void *argptrs[nargs];
+            void (*fn)(void);
+
+            if (cif->rtype->type == FFI_TYPE_VOID) {
+                // necessary because cif->rtype->size == 1 for void,
+                // but the bytecode generator has not pushed a
+                // placeholder in this case.
+                ret_size = 0;
+            } else {
+                ret_size = ROUND_UP_WDS(cif->rtype->size);
+            }
 
-	    W_ arguments[stk_offset];
-	    
-	    memcpy(arguments, Sp, sizeof(W_) * stk_offset);
-#endif
+	    memcpy(arguments, Sp+ret_size+1, 
+                   sizeof(W_) * (stk_offset-1-ret_size));
+            
+            // libffi expects the args as an array of pointers to
+            // values, so we have to construct this array before making
+            // the call.
+            p = (StgPtr)arguments;
+            for (i = 0; i < nargs; i++) {
+                argptrs[i] = (void *)p;
+                // get the size from the cif
+                p += ROUND_UP_WDS(cif->arg_types[i]->size);
+            }
+
+            // this is the function we're going to call
+            fn = (void(*)(void))Sp[ret_size];
 
 	    // Restore the Haskell thread's current value of errno
 	    errno = cap->r.rCurrentTSO->saved_errno;
@@ -1355,24 +1445,13 @@ run_BCO:
             ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj;
 
 	    SAVE_STACK_POINTERS;
-	    tok = suspendThread(&cap->r);
-
-#ifndef THREADED_RTS
-	    // Careful:
-	    // suspendThread might have shifted the stack
-	    // around (stack squeezing), so we have to grab the real
-	    // Sp out of the TSO to find the ccall args again.
+	    tok = suspendThread(&cap->r, interruptible ? rtsTrue : rtsFalse);
 
-	    marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) );
-#else
-	    // Threaded RTS:
 	    // We already made a copy of the arguments above.
-
-	    marshall_fn ( arguments );
-#endif
+            ffi_call(cif, fn, ret, argptrs);
 
 	    // And restart the thread again, popping the RET_DYN frame.
-	    cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable)));
+	    cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - STG_FIELD_OFFSET(Capability,r)));
 	    LOAD_STACK_POINTERS;
 
             // Re-load the pointer to the BCO from the RET_DYN frame,
@@ -1389,20 +1468,16 @@ run_BCO:
 	    // Save the Haskell thread's current value of errno
 	    cap->r.rCurrentTSO->saved_errno = errno;
 		
-#ifdef THREADED_RTS
-	    // Threaded RTS:
-	    // Copy the "arguments", which might include a return value,
-	    // back to the TSO stack. It would of course be enough to
-	    // just copy the return value, but we don't know the offset.
-	    memcpy(Sp, arguments, sizeof(W_) * stk_offset);
-#endif
+	    // Copy the return value back to the TSO stack.  It is at
+            // most 2 words large, and resides at arguments[0].
+            memcpy(Sp, ret, sizeof(W_) * stg_min(stk_offset,ret_size));
 
 	    goto nextInsn;
 	}
 
 	case bci_JMP: {
 	    /* BCO_NEXT modifies bciPtr, so be conservative. */
-	    int nextpc = BCO_NEXT;
+	    int nextpc = BCO_GET_LARGE_ARG;
 	    bciPtr     = nextpc;
 	    goto nextInsn;
 	}