X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FInterpreter.c;h=ab59533666eab0a381fa0be6dc5503c7efed1e83;hp=77f30582bb1036e9c77f000cfbfe42e0bdcbc820;hb=842e9d6628a27cf1f420d53f6a5901935dc50c54;hpb=0daf69ba7856dc3dca65b7bccfd59a29ef632a6e diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 77f3058..ab59533 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -28,6 +28,7 @@ #include #endif +#include "ffi.h" /* -------------------------------------------------------------------------- * The bytecode interpreter @@ -189,7 +190,7 @@ interpretBCO (Capability* cap) // that these entities are non-aliasable. register StgPtr Sp; // local state -- stack pointer register StgPtr SpLim; // local state -- stack lim pointer - register StgClosure* obj; + register StgClosure *tagged_obj = 0, *obj; nat n, m; LOAD_STACK_POINTERS; @@ -241,10 +242,10 @@ interpretBCO (Capability* cap) // Evaluate the object on top of the stack. eval: - obj = (StgClosure*)Sp[0]; Sp++; + tagged_obj = (StgClosure*)Sp[0]; Sp++; eval_obj: - obj = UNTAG_CLOSURE(obj); + obj = UNTAG_CLOSURE(tagged_obj); INTERP_TICK(it_total_evals); IF_DEBUG(interpreter, @@ -268,7 +269,7 @@ eval_obj: case IND_OLDGEN_PERM: case IND_STATIC: { - obj = ((StgInd*)obj)->indirectee; + tagged_obj = ((StgInd*)obj)->indirectee; goto eval_obj; } @@ -308,7 +309,7 @@ eval_obj: // Stack check if (Sp - (words+sizeofW(StgUpdateFrame)) < SpLim) { Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow); } @@ -351,16 +352,17 @@ eval_obj: printObj(obj); ); Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } } // ------------------------------------------------------------------------ - // We now have an evaluated object (obj). The next thing to + // We now have an evaluated object (tagged_obj). The next thing to // do is return it to the stack frame on top of the stack. do_return: + obj = UNTAG_CLOSURE(tagged_obj); ASSERT(closure_HNF(obj)); IF_DEBUG(interpreter, @@ -421,8 +423,16 @@ do_return: case UPDATE_FRAME: // Returning to an update frame: do the update, pop the update // frame, and continue with the next stack frame. + // + // NB. we must update with the *tagged* pointer. Some tags + // are not optional, and if we omit the tag bits when updating + // then bad things can happen (albeit very rarely). See #1925. + // What happened was an indirection was created with an + // untagged pointer, and this untagged pointer was propagated + // to a PAP by the GC, violating the invariant that PAPs + // always contain a tagged pointer to the function. INTERP_TICK(it_retto_UPDATE); - UPD_IND(((StgUpdateFrame *)Sp)->updatee, obj); + UPD_IND(((StgUpdateFrame *)Sp)->updatee, tagged_obj); Sp += sizeofW(StgUpdateFrame); goto do_return; @@ -432,6 +442,8 @@ do_return: INTERP_TICK(it_retto_BCO); Sp--; Sp[0] = (W_)obj; + // NB. return the untagged object; the bytecode expects it to + // be untagged. XXX this doesn't seem right. obj = (StgClosure*)Sp[2]; ASSERT(get_itbl(obj)->type == BCO); goto run_BCO_return; @@ -446,7 +458,7 @@ do_return: printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size); ); Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } @@ -519,6 +531,7 @@ do_return_unboxed: // Application... do_apply: + ASSERT(obj == UNTAG_CLOSURE(tagged_obj)); // we have a function to apply (obj), and n arguments taking up m // words on the stack. The info table (stg_ap_pp_info or whatever) // is on top of the arguments on the stack. @@ -582,7 +595,7 @@ do_apply: for (i = 0; i < m; i++) { new_pap->payload[pap->n_args + i] = (StgClosure *)Sp[i]; } - obj = (StgClosure *)new_pap; + tagged_obj = (StgClosure *)new_pap; Sp += m; goto do_return; } @@ -624,7 +637,7 @@ do_apply: for (i = 0; i < m; i++) { pap->payload[i] = (StgClosure *)Sp[i]; } - obj = (StgClosure *)pap; + tagged_obj = (StgClosure *)pap; Sp += m; goto do_return; } @@ -634,7 +647,7 @@ do_apply: default: defer_apply_to_sched: Sp -= 2; - Sp[1] = (W_)obj; + Sp[1] = (W_)tagged_obj; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } @@ -1264,7 +1277,7 @@ run_BCO: goto eval; case bci_RETURN: - obj = (StgClosure *)Sp[0]; + tagged_obj = (StgClosure *)Sp[0]; Sp++; goto do_return; @@ -1309,16 +1322,65 @@ run_BCO: RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE + sizeofW(StgRetDyn); -#ifdef THREADED_RTS - // Threaded RTS: - // Arguments on the TSO stack are not good, because garbage - // collection might move the TSO as soon as we call - // suspendThread below. + /* the stack looks like this: + + | | <- Sp + stk_offset + +-------------+ + | | + | args | + | | <- Sp + ret_size + 1 + +-------------+ + | C fun | <- Sp + ret_size + +-------------+ + | ret | <- Sp + +-------------+ + + ret is a placeholder for the return address, and may be + up to 2 words. + + We need to copy the args out of the TSO, because when + we call suspendThread() we no longer own the TSO stack, + and it may move at any time - indeed suspendThread() + itself may do stack squeezing and move our args. + So we make a copy of the argument block. + */ + +#define ROUND_UP_WDS(p) ((((StgWord)(p)) + sizeof(W_)-1)/sizeof(W_)) + + ffi_cif *cif = (ffi_cif *)marshall_fn; + nat nargs = cif->nargs; + nat ret_size; + nat i; + StgPtr p; + W_ ret[2]; // max needed + W_ *arguments[stk_offset]; // max needed + void *argptrs[nargs]; + void (*fn)(void); + + if (cif->rtype->type == FFI_TYPE_VOID) { + // necessary because cif->rtype->size == 1 for void, + // but the bytecode generator has not pushed a + // placeholder in this case. + ret_size = 0; + } else { + ret_size = ROUND_UP_WDS(cif->rtype->size); + } - W_ arguments[stk_offset]; - - memcpy(arguments, Sp, sizeof(W_) * stk_offset); -#endif + memcpy(arguments, Sp+ret_size+1, + sizeof(W_) * (stk_offset-1-ret_size)); + + // libffi expects the args as an array of pointers to + // values, so we have to construct this array before making + // the call. + p = (StgPtr)arguments; + for (i = 0; i < nargs; i++) { + argptrs[i] = (void *)p; + // get the size from the cif + p += ROUND_UP_WDS(cif->arg_types[i]->size); + } + + // this is the function we're going to call + fn = (void(*)(void))Sp[ret_size]; // Restore the Haskell thread's current value of errno errno = cap->r.rCurrentTSO->saved_errno; @@ -1334,41 +1396,41 @@ run_BCO: // on the stack frame to describe this chunk of stack. // Sp -= ret_dyn_size; - ((StgRetDyn *)Sp)->liveness = NO_PTRS | N_NONPTRS(stk_offset); + ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset); ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info; + // save obj (pointer to the current BCO), since this + // might move during the call. We use the R1 slot in the + // RET_DYN frame for this, hence R1_PTR above. + ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj; + SAVE_STACK_POINTERS; tok = suspendThread(&cap->r); -#ifndef THREADED_RTS - // Careful: - // suspendThread might have shifted the stack - // around (stack squeezing), so we have to grab the real - // Sp out of the TSO to find the ccall args again. - - marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) ); -#else - // Threaded RTS: // We already made a copy of the arguments above. - - marshall_fn ( arguments ); -#endif + ffi_call(cif, fn, ret, argptrs); // And restart the thread again, popping the RET_DYN frame. cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable))); LOAD_STACK_POINTERS; + + // Re-load the pointer to the BCO from the RET_DYN frame, + // it might have moved during the call. Also reload the + // pointers to the components of the BCO. + obj = ((StgRetDyn *)Sp)->payload[0]; + bco = (StgBCO*)obj; + instrs = (StgWord16*)(bco->instrs->payload); + literals = (StgWord*)(&bco->literals->payload[0]); + ptrs = (StgPtr*)(&bco->ptrs->payload[0]); + Sp += ret_dyn_size; // Save the Haskell thread's current value of errno cap->r.rCurrentTSO->saved_errno = errno; -#ifdef THREADED_RTS - // Threaded RTS: - // Copy the "arguments", which might include a return value, - // back to the TSO stack. It would of course be enough to - // just copy the return value, but we don't know the offset. - memcpy(Sp, arguments, sizeof(W_) * stk_offset); -#endif + // Copy the return value back to the TSO stack. It is at + // most 2 words large, and resides at arguments[0]. + memcpy(Sp, ret, sizeof(W_) * stg_min(stk_offset,ret_size)); goto nextInsn; }