X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FInterpreter.c;h=da7ee2196aace39fc16e7335575ded68e69e6ce8;hp=0ca8ddf62378ecfb2ea4b442d5b624a50ffbf43f;hb=83d563cb9ede0ba792836e529b1e2929db926355;hpb=dfb079f3b16fb179e083d83280c56aa1ce5821a9 diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 0ca8ddf..da7ee21 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -7,27 +7,35 @@ #include "PosixSource.h" #include "Rts.h" #include "RtsAPI.h" +#include "rts/Bytecodes.h" + +// internal headers +#include "sm/Storage.h" +#include "sm/Sanity.h" #include "RtsUtils.h" -#include "Closures.h" -#include "TSO.h" #include "Schedule.h" -#include "RtsFlags.h" -#include "LdvProfile.h" #include "Updates.h" -#include "Sanity.h" -#include "Liveness.h" #include "Prelude.h" - -#include "Bytecodes.h" +#include "Stable.h" #include "Printer.h" #include "Disassembler.h" #include "Interpreter.h" +#include "ThreadPaused.h" +#include "Threads.h" #include /* for memcpy */ #ifdef HAVE_ERRNO_H #include #endif +// When building the RTS in the non-dyn way on Windows, we don't +// want declspec(__dllimport__) on the front of function prototypes +// from libffi. +#if defined(mingw32_HOST_OS) && !defined(__PIC__) +# define LIBFFI_NOT_DLL +#endif + +#include "ffi.h" /* -------------------------------------------------------------------------- * The bytecode interpreter @@ -62,6 +70,7 @@ SpLim = cap->r.rCurrentTSO->stack + RESERVED_STACK_WORDS; #define SAVE_STACK_POINTERS \ + ASSERT(Sp > SpLim); \ cap->r.rCurrentTSO->sp = Sp #define RETURN_TO_SCHEDULER(todo,retcode) \ @@ -79,9 +88,9 @@ STATIC_INLINE StgPtr -allocate_NONUPD (int n_words) +allocate_NONUPD (Capability *cap, int n_words) { - return allocate(stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words)); + return allocate(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words)); } int rts_stop_next_breakpoint = 0; @@ -194,6 +203,9 @@ interpretBCO (Capability* cap) LOAD_STACK_POINTERS; + cap->r.rHpLim = (P_)1; // HpLim is the context-switch flag; when it + // goes to zero we must return to the scheduler. + // ------------------------------------------------------------------------ // Case 1: // @@ -258,14 +270,13 @@ eval_obj: debugBelch("\n\n"); ); - IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size)); +// IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size)); + IF_DEBUG(sanity,checkStackFrame(Sp)); switch ( get_itbl(obj)->type ) { case IND: - case IND_OLDGEN: case IND_PERM: - case IND_OLDGEN_PERM: case IND_STATIC: { tagged_obj = ((StgInd*)obj)->indirectee; @@ -431,7 +442,8 @@ do_return: // to a PAP by the GC, violating the invariant that PAPs // always contain a tagged pointer to the function. INTERP_TICK(it_retto_UPDATE); - UPD_IND(((StgUpdateFrame *)Sp)->updatee, tagged_obj); + updateThunk(cap, cap->r.rCurrentTSO, + ((StgUpdateFrame *)Sp)->updatee, tagged_obj); Sp += sizeofW(StgUpdateFrame); goto do_return; @@ -548,6 +560,16 @@ do_apply: goto defer_apply_to_sched; } + // Stack check: we're about to unpack the PAP onto the + // stack. The (+1) is for the (arity < n) case, where we + // also need space for an extra info pointer. + if (Sp - (pap->n_args + 1) < SpLim) { + Sp -= 2; + Sp[1] = (W_)tagged_obj; + Sp[0] = (W_)&stg_enter_info; + RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow); + } + Sp++; arity = pap->arity; ASSERT(arity > 0); @@ -583,7 +605,7 @@ do_apply: else /* arity > n */ { // build a new PAP and return it. StgPAP *new_pap; - new_pap = (StgPAP *)allocate(PAP_sizeW(pap->n_args + m)); + new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m)); SET_HDR(new_pap,&stg_PAP_info,CCCS); new_pap->arity = pap->arity - n; new_pap->n_args = pap->n_args + m; @@ -628,7 +650,7 @@ do_apply: // build a PAP and return it. StgPAP *pap; nat i; - pap = (StgPAP *)allocate(PAP_sizeW(m)); + pap = (StgPAP *)allocate(cap, PAP_sizeW(m)); SET_HDR(pap, &stg_PAP_info,CCCS); pap->arity = arity - n; pap->fun = obj; @@ -697,7 +719,7 @@ do_apply: run_BCO_return: // Heap check - if (doYouWantToGC()) { + if (doYouWantToGC(cap)) { Sp--; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow); } @@ -708,7 +730,7 @@ run_BCO_return: run_BCO_return_unboxed: // Heap check - if (doYouWantToGC()) { + if (doYouWantToGC(cap)) { RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow); } // Stack checks aren't necessary at return points, the stack use @@ -726,7 +748,7 @@ run_BCO_fun: ); // Heap check - if (doYouWantToGC()) { + if (doYouWantToGC(cap)) { Sp -= 2; Sp[1] = (W_)obj; Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really @@ -748,19 +770,22 @@ run_BCO_fun: run_BCO: INTERP_TICK(it_BCO_entries); { - register int bciPtr = 1; /* instruction pointer */ + register int bciPtr = 0; /* instruction pointer */ register StgWord16 bci; register StgBCO* bco = (StgBCO*)obj; register StgWord16* instrs = (StgWord16*)(bco->instrs->payload); register StgWord* literals = (StgWord*)(&bco->literals->payload[0]); register StgPtr* ptrs = (StgPtr*)(&bco->ptrs->payload[0]); + int bcoSize; + bcoSize = BCO_NEXT_WORD; + IF_DEBUG(interpreter,debugBelch("bcoSize = %d\n", bcoSize)); #ifdef INTERP_STATS it_lastopc = 0; /* no opcode */ #endif nextInsn: - ASSERT(bciPtr <= instrs[0]); + ASSERT(bciPtr < bcoSize); IF_DEBUG(interpreter, //if (do_print_stack) { //debugBelch("\n-- BEGIN stack\n"); @@ -839,7 +864,7 @@ run_BCO: // stg_apply_interp_info pointer and a pointer to // the BCO size_words = BCO_BITMAP_SIZE(obj) + 2; - new_aps = (StgAP_STACK *) allocate (AP_STACK_sizeW(size_words)); + new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words)); SET_HDR(new_aps,&stg_AP_STACK_info,CCS_SYSTEM); new_aps->size = size_words; new_aps->fun = &stg_dummy_ret_closure; @@ -859,21 +884,15 @@ run_BCO: // in a reasonable state for the GC and so that // execution of this BCO can continue when we resume ioAction = (StgClosure *) deRefStablePtr (rts_breakpoint_io_action); - Sp -= 9; - Sp[8] = (W_)obj; - Sp[7] = (W_)&stg_apply_interp_info; - Sp[6] = (W_)&stg_noforceIO_info; // see [unreg] below + Sp -= 8; + Sp[7] = (W_)obj; + Sp[6] = (W_)&stg_apply_interp_info; Sp[5] = (W_)new_aps; // the AP_STACK Sp[4] = (W_)BCO_PTR(arg3_freeVars); // the info about local vars of the breakpoint Sp[3] = (W_)False_closure; // True <=> a breakpoint Sp[2] = (W_)&stg_ap_pppv_info; Sp[1] = (W_)ioAction; // apply the IO action to its two arguments above Sp[0] = (W_)&stg_enter_info; // get ready to run the IO action - // Note [unreg]: in unregisterised mode, the return - // convention for IO is different. The - // stg_noForceIO_info stack frame is necessary to - // account for this difference. - // set the flag in the TSO to say that we are now // stopping at a breakpoint so that when we resume // we don't stop on the same breakpoint that we @@ -1058,7 +1077,7 @@ run_BCO: case bci_ALLOC_AP: { StgAP* ap; int n_payload = BCO_NEXT; - ap = (StgAP*)allocate(AP_sizeW(n_payload)); + ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); Sp[-1] = (W_)ap; ap->n_args = n_payload; SET_HDR(ap, &stg_AP_info, CCS_SYSTEM/*ToDo*/) @@ -1069,7 +1088,7 @@ run_BCO: case bci_ALLOC_AP_NOUPD: { StgAP* ap; int n_payload = BCO_NEXT; - ap = (StgAP*)allocate(AP_sizeW(n_payload)); + ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); Sp[-1] = (W_)ap; ap->n_args = n_payload; SET_HDR(ap, &stg_AP_NOUPD_info, CCS_SYSTEM/*ToDo*/) @@ -1081,7 +1100,7 @@ run_BCO: StgPAP* pap; int arity = BCO_NEXT; int n_payload = BCO_NEXT; - pap = (StgPAP*)allocate(PAP_sizeW(n_payload)); + pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload)); Sp[-1] = (W_)pap; pap->n_args = n_payload; pap->arity = arity; @@ -1153,7 +1172,7 @@ run_BCO: StgInfoTable* itbl = INFO_PTR_TO_STRUCT(BCO_LIT(o_itbl)); int request = CONSTR_sizeW( itbl->layout.payload.ptrs, itbl->layout.payload.nptrs ); - StgClosure* con = (StgClosure*)allocate_NONUPD(request); + StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request); ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0); SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), CCS_SYSTEM/*ToDo*/); for (i = 0; i < n_words; i++) { @@ -1171,7 +1190,7 @@ run_BCO: case bci_TESTLT_P: { unsigned int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; StgClosure* con = (StgClosure*)Sp[0]; if (GET_TAG(con) >= discr) { bciPtr = failto; @@ -1181,7 +1200,7 @@ run_BCO: case bci_TESTEQ_P: { unsigned int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; StgClosure* con = (StgClosure*)Sp[0]; if (GET_TAG(con) != discr) { bciPtr = failto; @@ -1192,7 +1211,7 @@ run_BCO: case bci_TESTLT_I: { // There should be an Int at Sp[1], and an info table at Sp[0]. int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; I_ stackInt = (I_)Sp[1]; if (stackInt >= (I_)BCO_LIT(discr)) bciPtr = failto; @@ -1202,7 +1221,7 @@ run_BCO: case bci_TESTEQ_I: { // There should be an Int at Sp[1], and an info table at Sp[0]. int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; I_ stackInt = (I_)Sp[1]; if (stackInt != (I_)BCO_LIT(discr)) { bciPtr = failto; @@ -1210,10 +1229,31 @@ run_BCO: goto nextInsn; } + case bci_TESTLT_W: { + // There should be an Int at Sp[1], and an info table at Sp[0]. + int discr = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; + W_ stackWord = (W_)Sp[1]; + if (stackWord >= (W_)BCO_LIT(discr)) + bciPtr = failto; + goto nextInsn; + } + + case bci_TESTEQ_W: { + // There should be an Int at Sp[1], and an info table at Sp[0]. + int discr = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; + W_ stackWord = (W_)Sp[1]; + if (stackWord != (W_)BCO_LIT(discr)) { + bciPtr = failto; + } + goto nextInsn; + } + case bci_TESTLT_D: { // There should be a Double at Sp[1], and an info table at Sp[0]. int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; StgDouble stackDbl, discrDbl; stackDbl = PK_DBL( & Sp[1] ); discrDbl = PK_DBL( & BCO_LIT(discr) ); @@ -1226,7 +1266,7 @@ run_BCO: case bci_TESTEQ_D: { // There should be a Double at Sp[1], and an info table at Sp[0]. int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; StgDouble stackDbl, discrDbl; stackDbl = PK_DBL( & Sp[1] ); discrDbl = PK_DBL( & BCO_LIT(discr) ); @@ -1239,7 +1279,7 @@ run_BCO: case bci_TESTLT_F: { // There should be a Float at Sp[1], and an info table at Sp[0]. int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; StgFloat stackFlt, discrFlt; stackFlt = PK_FLT( & Sp[1] ); discrFlt = PK_FLT( & BCO_LIT(discr) ); @@ -1252,7 +1292,7 @@ run_BCO: case bci_TESTEQ_F: { // There should be a Float at Sp[1], and an info table at Sp[0]. int discr = BCO_NEXT; - int failto = BCO_NEXT; + int failto = BCO_GET_LARGE_ARG; StgFloat stackFlt, discrFlt; stackFlt = PK_FLT( & Sp[1] ); discrFlt = PK_FLT( & BCO_LIT(discr) ); @@ -1269,7 +1309,7 @@ run_BCO: // context switching: sometimes the scheduler can invoke // the interpreter with context_switch == 1, particularly // if the -C0 flag has been given on the cmd line. - if (context_switch) { + if (cap->r.rHpLim == NULL) { Sp--; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding); } @@ -1316,21 +1356,71 @@ run_BCO: void *tok; int stk_offset = BCO_NEXT; int o_itbl = BCO_NEXT; + int interruptible = BCO_NEXT; void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl); int ret_dyn_size = RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE + sizeofW(StgRetDyn); -#ifdef THREADED_RTS - // Threaded RTS: - // Arguments on the TSO stack are not good, because garbage - // collection might move the TSO as soon as we call - // suspendThread below. + /* the stack looks like this: + + | | <- Sp + stk_offset + +-------------+ + | | + | args | + | | <- Sp + ret_size + 1 + +-------------+ + | C fun | <- Sp + ret_size + +-------------+ + | ret | <- Sp + +-------------+ + + ret is a placeholder for the return address, and may be + up to 2 words. + + We need to copy the args out of the TSO, because when + we call suspendThread() we no longer own the TSO stack, + and it may move at any time - indeed suspendThread() + itself may do stack squeezing and move our args. + So we make a copy of the argument block. + */ + +#define ROUND_UP_WDS(p) ((((StgWord)(p)) + sizeof(W_)-1)/sizeof(W_)) + + ffi_cif *cif = (ffi_cif *)marshall_fn; + nat nargs = cif->nargs; + nat ret_size; + nat i; + StgPtr p; + W_ ret[2]; // max needed + W_ *arguments[stk_offset]; // max needed + void *argptrs[nargs]; + void (*fn)(void); + + if (cif->rtype->type == FFI_TYPE_VOID) { + // necessary because cif->rtype->size == 1 for void, + // but the bytecode generator has not pushed a + // placeholder in this case. + ret_size = 0; + } else { + ret_size = ROUND_UP_WDS(cif->rtype->size); + } - W_ arguments[stk_offset]; - - memcpy(arguments, Sp, sizeof(W_) * stk_offset); -#endif + memcpy(arguments, Sp+ret_size+1, + sizeof(W_) * (stk_offset-1-ret_size)); + + // libffi expects the args as an array of pointers to + // values, so we have to construct this array before making + // the call. + p = (StgPtr)arguments; + for (i = 0; i < nargs; i++) { + argptrs[i] = (void *)p; + // get the size from the cif + p += ROUND_UP_WDS(cif->arg_types[i]->size); + } + + // this is the function we're going to call + fn = (void(*)(void))Sp[ret_size]; // Restore the Haskell thread's current value of errno errno = cap->r.rCurrentTSO->saved_errno; @@ -1355,24 +1445,13 @@ run_BCO: ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj; SAVE_STACK_POINTERS; - tok = suspendThread(&cap->r); - -#ifndef THREADED_RTS - // Careful: - // suspendThread might have shifted the stack - // around (stack squeezing), so we have to grab the real - // Sp out of the TSO to find the ccall args again. + tok = suspendThread(&cap->r, interruptible ? rtsTrue : rtsFalse); - marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) ); -#else - // Threaded RTS: // We already made a copy of the arguments above. - - marshall_fn ( arguments ); -#endif + ffi_call(cif, fn, ret, argptrs); // And restart the thread again, popping the RET_DYN frame. - cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable))); + cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - STG_FIELD_OFFSET(Capability,r))); LOAD_STACK_POINTERS; // Re-load the pointer to the BCO from the RET_DYN frame, @@ -1389,20 +1468,16 @@ run_BCO: // Save the Haskell thread's current value of errno cap->r.rCurrentTSO->saved_errno = errno; -#ifdef THREADED_RTS - // Threaded RTS: - // Copy the "arguments", which might include a return value, - // back to the TSO stack. It would of course be enough to - // just copy the return value, but we don't know the offset. - memcpy(Sp, arguments, sizeof(W_) * stk_offset); -#endif + // Copy the return value back to the TSO stack. It is at + // most 2 words large, and resides at arguments[0]. + memcpy(Sp, ret, sizeof(W_) * stg_min(stk_offset,ret_size)); goto nextInsn; } case bci_JMP: { /* BCO_NEXT modifies bciPtr, so be conservative. */ - int nextpc = BCO_NEXT; + int nextpc = BCO_GET_LARGE_ARG; bciPtr = nextpc; goto nextInsn; }