X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FInterpreter.c;h=ab59533666eab0a381fa0be6dc5503c7efed1e83;hp=66634459959e4edaa8d4f58d0422aa97e02f7714;hb=b7fecf522f271305de84d0efe8af5cad28aa45f5;hpb=17f848e12faf8cf51aa58918522b6abe1e75dc51 diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 6663445..ab59533 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -28,6 +28,7 @@ #include #endif +#include "ffi.h" /* -------------------------------------------------------------------------- * The bytecode interpreter @@ -189,7 +190,7 @@ interpretBCO (Capability* cap) // that these entities are non-aliasable. register StgPtr Sp; // local state -- stack pointer register StgPtr SpLim; // local state -- stack lim pointer - register StgClosure* obj; + register StgClosure *tagged_obj = 0, *obj; nat n, m; LOAD_STACK_POINTERS; @@ -224,7 +225,7 @@ interpretBCO (Capability* cap) // +---------------+ // else if (Sp[0] == (W_)&stg_apply_interp_info) { - obj = (StgClosure *)Sp[1]; + obj = UNTAG_CLOSURE((StgClosure *)Sp[1]); Sp += 2; goto run_BCO_fun; } @@ -241,9 +242,10 @@ interpretBCO (Capability* cap) // Evaluate the object on top of the stack. eval: - obj = (StgClosure*)Sp[0]; Sp++; + tagged_obj = (StgClosure*)Sp[0]; Sp++; eval_obj: + obj = UNTAG_CLOSURE(tagged_obj); INTERP_TICK(it_total_evals); IF_DEBUG(interpreter, @@ -267,7 +269,7 @@ eval_obj: case IND_OLDGEN_PERM: case IND_STATIC: { - obj = ((StgInd*)obj)->indirectee; + tagged_obj = ((StgInd*)obj)->indirectee; goto eval_obj; } @@ -307,7 +309,7 @@ eval_obj: // Stack check if (Sp - (words+sizeofW(StgUpdateFrame)) < SpLim) { Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow); } @@ -327,7 +329,7 @@ eval_obj: Sp[i] = (W_)ap->payload[i]; } - obj = (StgClosure*)ap->fun; + obj = UNTAG_CLOSURE((StgClosure*)ap->fun); ASSERT(get_itbl(obj)->type == BCO); goto run_BCO_fun; } @@ -350,16 +352,17 @@ eval_obj: printObj(obj); ); Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } } // ------------------------------------------------------------------------ - // We now have an evaluated object (obj). The next thing to + // We now have an evaluated object (tagged_obj). The next thing to // do is return it to the stack frame on top of the stack. do_return: + obj = UNTAG_CLOSURE(tagged_obj); ASSERT(closure_HNF(obj)); IF_DEBUG(interpreter, @@ -420,8 +423,16 @@ do_return: case UPDATE_FRAME: // Returning to an update frame: do the update, pop the update // frame, and continue with the next stack frame. + // + // NB. we must update with the *tagged* pointer. Some tags + // are not optional, and if we omit the tag bits when updating + // then bad things can happen (albeit very rarely). See #1925. + // What happened was an indirection was created with an + // untagged pointer, and this untagged pointer was propagated + // to a PAP by the GC, violating the invariant that PAPs + // always contain a tagged pointer to the function. INTERP_TICK(it_retto_UPDATE); - UPD_IND(((StgUpdateFrame *)Sp)->updatee, obj); + UPD_IND(((StgUpdateFrame *)Sp)->updatee, tagged_obj); Sp += sizeofW(StgUpdateFrame); goto do_return; @@ -431,6 +442,8 @@ do_return: INTERP_TICK(it_retto_BCO); Sp--; Sp[0] = (W_)obj; + // NB. return the untagged object; the bytecode expects it to + // be untagged. XXX this doesn't seem right. obj = (StgClosure*)Sp[2]; ASSERT(get_itbl(obj)->type == BCO); goto run_BCO_return; @@ -445,7 +458,7 @@ do_return: printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size); ); Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } @@ -518,6 +531,7 @@ do_return_unboxed: // Application... do_apply: + ASSERT(obj == UNTAG_CLOSURE(tagged_obj)); // we have a function to apply (obj), and n arguments taking up m // words on the stack. The info table (stg_ap_pp_info or whatever) // is on top of the arguments on the stack. @@ -531,7 +545,7 @@ do_apply: pap = (StgPAP *)obj; // we only cope with PAPs whose function is a BCO - if (get_itbl(pap->fun)->type != BCO) { + if (get_itbl(UNTAG_CLOSURE(pap->fun))->type != BCO) { goto defer_apply_to_sched; } @@ -556,7 +570,7 @@ do_apply: for (i = 0; i < pap->n_args; i++) { Sp[i] = (W_)pap->payload[i]; } - obj = pap->fun; + obj = UNTAG_CLOSURE(pap->fun); goto run_BCO_fun; } else if (arity == n) { @@ -564,7 +578,7 @@ do_apply: for (i = 0; i < pap->n_args; i++) { Sp[i] = (W_)pap->payload[i]; } - obj = pap->fun; + obj = UNTAG_CLOSURE(pap->fun); goto run_BCO_fun; } else /* arity > n */ { @@ -581,7 +595,7 @@ do_apply: for (i = 0; i < m; i++) { new_pap->payload[pap->n_args + i] = (StgClosure *)Sp[i]; } - obj = (StgClosure *)new_pap; + tagged_obj = (StgClosure *)new_pap; Sp += m; goto do_return; } @@ -623,7 +637,7 @@ do_apply: for (i = 0; i < m; i++) { pap->payload[i] = (StgClosure *)Sp[i]; } - obj = (StgClosure *)pap; + tagged_obj = (StgClosure *)pap; Sp += m; goto do_return; } @@ -633,7 +647,7 @@ do_apply: default: defer_apply_to_sched: Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } @@ -846,15 +860,20 @@ run_BCO: // in a reasonable state for the GC and so that // execution of this BCO can continue when we resume ioAction = (StgClosure *) deRefStablePtr (rts_breakpoint_io_action); - Sp -= 8; - Sp[7] = (W_)obj; - Sp[6] = (W_)&stg_apply_interp_info; + Sp -= 9; + Sp[8] = (W_)obj; + Sp[7] = (W_)&stg_apply_interp_info; + Sp[6] = (W_)&stg_noforceIO_info; // see [unreg] below Sp[5] = (W_)new_aps; // the AP_STACK Sp[4] = (W_)BCO_PTR(arg3_freeVars); // the info about local vars of the breakpoint Sp[3] = (W_)False_closure; // True <=> a breakpoint Sp[2] = (W_)&stg_ap_pppv_info; Sp[1] = (W_)ioAction; // apply the IO action to its two arguments above Sp[0] = (W_)&stg_enter_info; // get ready to run the IO action + // Note [unreg]: in unregisterised mode, the return + // convention for IO is different. The + // stg_noForceIO_info stack frame is necessary to + // account for this difference. // set the flag in the TSO to say that we are now // stopping at a breakpoint so that when we resume @@ -1048,6 +1067,17 @@ run_BCO: goto nextInsn; } + case bci_ALLOC_AP_NOUPD: { + StgAP* ap; + int n_payload = BCO_NEXT; + ap = (StgAP*)allocate(AP_sizeW(n_payload)); + Sp[-1] = (W_)ap; + ap->n_args = n_payload; + SET_HDR(ap, &stg_AP_NOUPD_info, CCS_SYSTEM/*ToDo*/) + Sp --; + goto nextInsn; + } + case bci_ALLOC_PAP: { StgPAP* pap; int arity = BCO_NEXT; @@ -1247,7 +1277,7 @@ run_BCO: goto eval; case bci_RETURN: - obj = (StgClosure *)Sp[0]; + tagged_obj = (StgClosure *)Sp[0]; Sp++; goto do_return; @@ -1292,16 +1322,65 @@ run_BCO: RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE + sizeofW(StgRetDyn); -#ifdef THREADED_RTS - // Threaded RTS: - // Arguments on the TSO stack are not good, because garbage - // collection might move the TSO as soon as we call - // suspendThread below. + /* the stack looks like this: + + | | <- Sp + stk_offset + +-------------+ + | | + | args | + | | <- Sp + ret_size + 1 + +-------------+ + | C fun | <- Sp + ret_size + +-------------+ + | ret | <- Sp + +-------------+ + + ret is a placeholder for the return address, and may be + up to 2 words. + + We need to copy the args out of the TSO, because when + we call suspendThread() we no longer own the TSO stack, + and it may move at any time - indeed suspendThread() + itself may do stack squeezing and move our args. + So we make a copy of the argument block. + */ + +#define ROUND_UP_WDS(p) ((((StgWord)(p)) + sizeof(W_)-1)/sizeof(W_)) + + ffi_cif *cif = (ffi_cif *)marshall_fn; + nat nargs = cif->nargs; + nat ret_size; + nat i; + StgPtr p; + W_ ret[2]; // max needed + W_ *arguments[stk_offset]; // max needed + void *argptrs[nargs]; + void (*fn)(void); + + if (cif->rtype->type == FFI_TYPE_VOID) { + // necessary because cif->rtype->size == 1 for void, + // but the bytecode generator has not pushed a + // placeholder in this case. + ret_size = 0; + } else { + ret_size = ROUND_UP_WDS(cif->rtype->size); + } - W_ arguments[stk_offset]; - - memcpy(arguments, Sp, sizeof(W_) * stk_offset); -#endif + memcpy(arguments, Sp+ret_size+1, + sizeof(W_) * (stk_offset-1-ret_size)); + + // libffi expects the args as an array of pointers to + // values, so we have to construct this array before making + // the call. + p = (StgPtr)arguments; + for (i = 0; i < nargs; i++) { + argptrs[i] = (void *)p; + // get the size from the cif + p += ROUND_UP_WDS(cif->arg_types[i]->size); + } + + // this is the function we're going to call + fn = (void(*)(void))Sp[ret_size]; // Restore the Haskell thread's current value of errno errno = cap->r.rCurrentTSO->saved_errno; @@ -1317,41 +1396,41 @@ run_BCO: // on the stack frame to describe this chunk of stack. // Sp -= ret_dyn_size; - ((StgRetDyn *)Sp)->liveness = NO_PTRS | N_NONPTRS(stk_offset); + ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset); ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info; + // save obj (pointer to the current BCO), since this + // might move during the call. We use the R1 slot in the + // RET_DYN frame for this, hence R1_PTR above. + ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj; + SAVE_STACK_POINTERS; tok = suspendThread(&cap->r); -#ifndef THREADED_RTS - // Careful: - // suspendThread might have shifted the stack - // around (stack squeezing), so we have to grab the real - // Sp out of the TSO to find the ccall args again. - - marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) ); -#else - // Threaded RTS: // We already made a copy of the arguments above. - - marshall_fn ( arguments ); -#endif + ffi_call(cif, fn, ret, argptrs); // And restart the thread again, popping the RET_DYN frame. cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable))); LOAD_STACK_POINTERS; + + // Re-load the pointer to the BCO from the RET_DYN frame, + // it might have moved during the call. Also reload the + // pointers to the components of the BCO. + obj = ((StgRetDyn *)Sp)->payload[0]; + bco = (StgBCO*)obj; + instrs = (StgWord16*)(bco->instrs->payload); + literals = (StgWord*)(&bco->literals->payload[0]); + ptrs = (StgPtr*)(&bco->ptrs->payload[0]); + Sp += ret_dyn_size; // Save the Haskell thread's current value of errno cap->r.rCurrentTSO->saved_errno = errno; -#ifdef THREADED_RTS - // Threaded RTS: - // Copy the "arguments", which might include a return value, - // back to the TSO stack. It would of course be enough to - // just copy the return value, but we don't know the offset. - memcpy(Sp, arguments, sizeof(W_) * stk_offset); -#endif + // Copy the return value back to the TSO stack. It is at + // most 2 words large, and resides at arguments[0]. + memcpy(Sp, ret, sizeof(W_) * stg_min(stk_offset,ret_size)); goto nextInsn; } @@ -1369,7 +1448,7 @@ run_BCO: // Errors default: barf("interpretBCO: unknown or unimplemented opcode %d", - (int)BCO_NEXT); + (int)(bci & 0xFF)); } /* switch on opcode */ }