#include <errno.h>
#endif
+#include "ffi.h"
/* --------------------------------------------------------------------------
* The bytecode interpreter
SpLim = cap->r.rCurrentTSO->stack + RESERVED_STACK_WORDS;
#define SAVE_STACK_POINTERS \
+ ASSERT(Sp > SpLim); \
cap->r.rCurrentTSO->sp = Sp
#define RETURN_TO_SCHEDULER(todo,retcode) \
// that these entities are non-aliasable.
register StgPtr Sp; // local state -- stack pointer
register StgPtr SpLim; // local state -- stack lim pointer
- register StgClosure* obj;
+ register StgClosure *tagged_obj = 0, *obj;
nat n, m;
LOAD_STACK_POINTERS;
+ cap->r.rHpLim = (P_)1; // HpLim is the context-switch flag; when it
+ // goes to zero we must return to the scheduler.
+
// ------------------------------------------------------------------------
// Case 1:
//
// Evaluate the object on top of the stack.
eval:
- obj = (StgClosure*)Sp[0]; Sp++;
+ tagged_obj = (StgClosure*)Sp[0]; Sp++;
eval_obj:
- obj = UNTAG_CLOSURE(obj);
+ obj = UNTAG_CLOSURE(tagged_obj);
INTERP_TICK(it_total_evals);
IF_DEBUG(interpreter,
case IND_OLDGEN_PERM:
case IND_STATIC:
{
- obj = ((StgInd*)obj)->indirectee;
+ tagged_obj = ((StgInd*)obj)->indirectee;
goto eval_obj;
}
// Stack check
if (Sp - (words+sizeofW(StgUpdateFrame)) < SpLim) {
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
}
printObj(obj);
);
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
}
// ------------------------------------------------------------------------
- // We now have an evaluated object (obj). The next thing to
+ // We now have an evaluated object (tagged_obj). The next thing to
// do is return it to the stack frame on top of the stack.
do_return:
+ obj = UNTAG_CLOSURE(tagged_obj);
ASSERT(closure_HNF(obj));
IF_DEBUG(interpreter,
case UPDATE_FRAME:
// Returning to an update frame: do the update, pop the update
// frame, and continue with the next stack frame.
+ //
+ // NB. we must update with the *tagged* pointer. Some tags
+ // are not optional, and if we omit the tag bits when updating
+ // then bad things can happen (albeit very rarely). See #1925.
+ // What happened was an indirection was created with an
+ // untagged pointer, and this untagged pointer was propagated
+ // to a PAP by the GC, violating the invariant that PAPs
+ // always contain a tagged pointer to the function.
INTERP_TICK(it_retto_UPDATE);
- UPD_IND(((StgUpdateFrame *)Sp)->updatee, obj);
+ UPD_IND(((StgUpdateFrame *)Sp)->updatee, tagged_obj);
Sp += sizeofW(StgUpdateFrame);
goto do_return;
INTERP_TICK(it_retto_BCO);
Sp--;
Sp[0] = (W_)obj;
+ // NB. return the untagged object; the bytecode expects it to
+ // be untagged. XXX this doesn't seem right.
obj = (StgClosure*)Sp[2];
ASSERT(get_itbl(obj)->type == BCO);
goto run_BCO_return;
printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
);
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
// Application...
do_apply:
+ ASSERT(obj == UNTAG_CLOSURE(tagged_obj));
// we have a function to apply (obj), and n arguments taking up m
// words on the stack. The info table (stg_ap_pp_info or whatever)
// is on top of the arguments on the stack.
goto defer_apply_to_sched;
}
+ // Stack check: we're about to unpack the PAP onto the
+ // stack. The (+1) is for the (arity < n) case, where we
+ // also need space for an extra info pointer.
+ if (Sp - (pap->n_args + 1) < SpLim) {
+ Sp -= 2;
+ Sp[1] = (W_)tagged_obj;
+ Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
+ }
+
Sp++;
arity = pap->arity;
ASSERT(arity > 0);
for (i = 0; i < m; i++) {
new_pap->payload[pap->n_args + i] = (StgClosure *)Sp[i];
}
- obj = (StgClosure *)new_pap;
+ tagged_obj = (StgClosure *)new_pap;
Sp += m;
goto do_return;
}
for (i = 0; i < m; i++) {
pap->payload[i] = (StgClosure *)Sp[i];
}
- obj = (StgClosure *)pap;
+ tagged_obj = (StgClosure *)pap;
Sp += m;
goto do_return;
}
default:
defer_apply_to_sched:
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
// context switching: sometimes the scheduler can invoke
// the interpreter with context_switch == 1, particularly
// if the -C0 flag has been given on the cmd line.
- if (context_switch) {
+ if (cap->r.rHpLim == NULL) {
Sp--; Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding);
}
goto eval;
case bci_RETURN:
- obj = (StgClosure *)Sp[0];
+ tagged_obj = (StgClosure *)Sp[0];
Sp++;
goto do_return;
RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE
+ sizeofW(StgRetDyn);
-#ifdef THREADED_RTS
- // Threaded RTS:
- // Arguments on the TSO stack are not good, because garbage
- // collection might move the TSO as soon as we call
- // suspendThread below.
+ /* the stack looks like this:
+
+ | | <- Sp + stk_offset
+ +-------------+
+ | |
+ | args |
+ | | <- Sp + ret_size + 1
+ +-------------+
+ | C fun | <- Sp + ret_size
+ +-------------+
+ | ret | <- Sp
+ +-------------+
+
+ ret is a placeholder for the return address, and may be
+ up to 2 words.
+
+ We need to copy the args out of the TSO, because when
+ we call suspendThread() we no longer own the TSO stack,
+ and it may move at any time - indeed suspendThread()
+ itself may do stack squeezing and move our args.
+ So we make a copy of the argument block.
+ */
+
+#define ROUND_UP_WDS(p) ((((StgWord)(p)) + sizeof(W_)-1)/sizeof(W_))
+
+ ffi_cif *cif = (ffi_cif *)marshall_fn;
+ nat nargs = cif->nargs;
+ nat ret_size;
+ nat i;
+ StgPtr p;
+ W_ ret[2]; // max needed
+ W_ *arguments[stk_offset]; // max needed
+ void *argptrs[nargs];
+ void (*fn)(void);
+
+ if (cif->rtype->type == FFI_TYPE_VOID) {
+ // necessary because cif->rtype->size == 1 for void,
+ // but the bytecode generator has not pushed a
+ // placeholder in this case.
+ ret_size = 0;
+ } else {
+ ret_size = ROUND_UP_WDS(cif->rtype->size);
+ }
+
+ memcpy(arguments, Sp+ret_size+1,
+ sizeof(W_) * (stk_offset-1-ret_size));
+
+ // libffi expects the args as an array of pointers to
+ // values, so we have to construct this array before making
+ // the call.
+ p = (StgPtr)arguments;
+ for (i = 0; i < nargs; i++) {
+ argptrs[i] = (void *)p;
+ // get the size from the cif
+ p += ROUND_UP_WDS(cif->arg_types[i]->size);
+ }
- W_ arguments[stk_offset];
-
- memcpy(arguments, Sp, sizeof(W_) * stk_offset);
-#endif
+ // this is the function we're going to call
+ fn = (void(*)(void))Sp[ret_size];
// Restore the Haskell thread's current value of errno
errno = cap->r.rCurrentTSO->saved_errno;
// on the stack frame to describe this chunk of stack.
//
Sp -= ret_dyn_size;
- ((StgRetDyn *)Sp)->liveness = NO_PTRS | N_NONPTRS(stk_offset);
+ ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset);
((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info;
+ // save obj (pointer to the current BCO), since this
+ // might move during the call. We use the R1 slot in the
+ // RET_DYN frame for this, hence R1_PTR above.
+ ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj;
+
SAVE_STACK_POINTERS;
tok = suspendThread(&cap->r);
-#ifndef THREADED_RTS
- // Careful:
- // suspendThread might have shifted the stack
- // around (stack squeezing), so we have to grab the real
- // Sp out of the TSO to find the ccall args again.
-
- marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) );
-#else
- // Threaded RTS:
// We already made a copy of the arguments above.
-
- marshall_fn ( arguments );
-#endif
+ ffi_call(cif, fn, ret, argptrs);
// And restart the thread again, popping the RET_DYN frame.
- cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable)));
+ cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - FIELD_OFFSET(Capability,r)));
LOAD_STACK_POINTERS;
+
+ // Re-load the pointer to the BCO from the RET_DYN frame,
+ // it might have moved during the call. Also reload the
+ // pointers to the components of the BCO.
+ obj = ((StgRetDyn *)Sp)->payload[0];
+ bco = (StgBCO*)obj;
+ instrs = (StgWord16*)(bco->instrs->payload);
+ literals = (StgWord*)(&bco->literals->payload[0]);
+ ptrs = (StgPtr*)(&bco->ptrs->payload[0]);
+
Sp += ret_dyn_size;
// Save the Haskell thread's current value of errno
cap->r.rCurrentTSO->saved_errno = errno;
-#ifdef THREADED_RTS
- // Threaded RTS:
- // Copy the "arguments", which might include a return value,
- // back to the TSO stack. It would of course be enough to
- // just copy the return value, but we don't know the offset.
- memcpy(Sp, arguments, sizeof(W_) * stk_offset);
-#endif
+ // Copy the return value back to the TSO stack. It is at
+ // most 2 words large, and resides at arguments[0].
+ memcpy(Sp, ret, sizeof(W_) * stg_min(stk_offset,ret_size));
goto nextInsn;
}