#include "PosixSource.h"
#include "Rts.h"
#include "RtsAPI.h"
+#include "rts/Bytecodes.h"
+
+// internal headers
+#include "sm/Storage.h"
+#include "sm/Sanity.h"
#include "RtsUtils.h"
-#include "Closures.h"
-#include "TSO.h"
#include "Schedule.h"
-#include "RtsFlags.h"
-#include "LdvProfile.h"
#include "Updates.h"
-#include "Sanity.h"
-#include "Liveness.h"
#include "Prelude.h"
-
-#include "Bytecodes.h"
+#include "Stable.h"
#include "Printer.h"
#include "Disassembler.h"
#include "Interpreter.h"
+#include "ThreadPaused.h"
+#include "Threads.h"
#include <string.h> /* for memcpy */
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
+// When building the RTS in the non-dyn way on Windows, we don't
+// want declspec(__dllimport__) on the front of function prototypes
+// from libffi.
+#if defined(mingw32_HOST_OS) && !defined(__PIC__)
+# define LIBFFI_NOT_DLL
+#endif
+
+#include "ffi.h"
/* --------------------------------------------------------------------------
* The bytecode interpreter
SpLim = cap->r.rCurrentTSO->stack + RESERVED_STACK_WORDS;
#define SAVE_STACK_POINTERS \
+ ASSERT(Sp > SpLim); \
cap->r.rCurrentTSO->sp = Sp
#define RETURN_TO_SCHEDULER(todo,retcode) \
STATIC_INLINE StgPtr
-allocate_NONUPD (int n_words)
+allocate_NONUPD (Capability *cap, int n_words)
{
- return allocate(stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
+ return allocate(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
}
int rts_stop_next_breakpoint = 0;
// that these entities are non-aliasable.
register StgPtr Sp; // local state -- stack pointer
register StgPtr SpLim; // local state -- stack lim pointer
- register StgClosure* obj;
+ register StgClosure *tagged_obj = 0, *obj;
nat n, m;
LOAD_STACK_POINTERS;
+ cap->r.rHpLim = (P_)1; // HpLim is the context-switch flag; when it
+ // goes to zero we must return to the scheduler.
+
// ------------------------------------------------------------------------
// Case 1:
//
// Evaluate the object on top of the stack.
eval:
- obj = (StgClosure*)Sp[0]; Sp++;
+ tagged_obj = (StgClosure*)Sp[0]; Sp++;
eval_obj:
- obj = UNTAG_CLOSURE(obj);
+ obj = UNTAG_CLOSURE(tagged_obj);
INTERP_TICK(it_total_evals);
IF_DEBUG(interpreter,
debugBelch("\n\n");
);
- IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
+// IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
+ IF_DEBUG(sanity,checkStackFrame(Sp));
switch ( get_itbl(obj)->type ) {
case IND:
- case IND_OLDGEN:
case IND_PERM:
- case IND_OLDGEN_PERM:
case IND_STATIC:
{
- obj = ((StgInd*)obj)->indirectee;
+ tagged_obj = ((StgInd*)obj)->indirectee;
goto eval_obj;
}
// Stack check
if (Sp - (words+sizeofW(StgUpdateFrame)) < SpLim) {
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
}
printObj(obj);
);
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
}
// ------------------------------------------------------------------------
- // We now have an evaluated object (obj). The next thing to
+ // We now have an evaluated object (tagged_obj). The next thing to
// do is return it to the stack frame on top of the stack.
do_return:
+ obj = UNTAG_CLOSURE(tagged_obj);
ASSERT(closure_HNF(obj));
IF_DEBUG(interpreter,
case UPDATE_FRAME:
// Returning to an update frame: do the update, pop the update
// frame, and continue with the next stack frame.
+ //
+ // NB. we must update with the *tagged* pointer. Some tags
+ // are not optional, and if we omit the tag bits when updating
+ // then bad things can happen (albeit very rarely). See #1925.
+ // What happened was an indirection was created with an
+ // untagged pointer, and this untagged pointer was propagated
+ // to a PAP by the GC, violating the invariant that PAPs
+ // always contain a tagged pointer to the function.
INTERP_TICK(it_retto_UPDATE);
- UPD_IND(((StgUpdateFrame *)Sp)->updatee, obj);
+ updateThunk(cap, cap->r.rCurrentTSO,
+ ((StgUpdateFrame *)Sp)->updatee, tagged_obj);
Sp += sizeofW(StgUpdateFrame);
goto do_return;
INTERP_TICK(it_retto_BCO);
Sp--;
Sp[0] = (W_)obj;
+ // NB. return the untagged object; the bytecode expects it to
+ // be untagged. XXX this doesn't seem right.
obj = (StgClosure*)Sp[2];
ASSERT(get_itbl(obj)->type == BCO);
goto run_BCO_return;
printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
);
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
// Application...
do_apply:
+ ASSERT(obj == UNTAG_CLOSURE(tagged_obj));
// we have a function to apply (obj), and n arguments taking up m
// words on the stack. The info table (stg_ap_pp_info or whatever)
// is on top of the arguments on the stack.
goto defer_apply_to_sched;
}
+ // Stack check: we're about to unpack the PAP onto the
+ // stack. The (+1) is for the (arity < n) case, where we
+ // also need space for an extra info pointer.
+ if (Sp - (pap->n_args + 1) < SpLim) {
+ Sp -= 2;
+ Sp[1] = (W_)tagged_obj;
+ Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
+ }
+
Sp++;
arity = pap->arity;
ASSERT(arity > 0);
else /* arity > n */ {
// build a new PAP and return it.
StgPAP *new_pap;
- new_pap = (StgPAP *)allocate(PAP_sizeW(pap->n_args + m));
+ new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m));
SET_HDR(new_pap,&stg_PAP_info,CCCS);
new_pap->arity = pap->arity - n;
new_pap->n_args = pap->n_args + m;
for (i = 0; i < m; i++) {
new_pap->payload[pap->n_args + i] = (StgClosure *)Sp[i];
}
- obj = (StgClosure *)new_pap;
+ tagged_obj = (StgClosure *)new_pap;
Sp += m;
goto do_return;
}
// build a PAP and return it.
StgPAP *pap;
nat i;
- pap = (StgPAP *)allocate(PAP_sizeW(m));
+ pap = (StgPAP *)allocate(cap, PAP_sizeW(m));
SET_HDR(pap, &stg_PAP_info,CCCS);
pap->arity = arity - n;
pap->fun = obj;
for (i = 0; i < m; i++) {
pap->payload[i] = (StgClosure *)Sp[i];
}
- obj = (StgClosure *)pap;
+ tagged_obj = (StgClosure *)pap;
Sp += m;
goto do_return;
}
default:
defer_apply_to_sched:
Sp -= 2;
- Sp[1] = (W_)obj;
+ Sp[1] = (W_)tagged_obj;
Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
run_BCO_return:
// Heap check
- if (doYouWantToGC()) {
+ if (doYouWantToGC(cap)) {
Sp--; Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
run_BCO_return_unboxed:
// Heap check
- if (doYouWantToGC()) {
+ if (doYouWantToGC(cap)) {
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
// Stack checks aren't necessary at return points, the stack use
);
// Heap check
- if (doYouWantToGC()) {
+ if (doYouWantToGC(cap)) {
Sp -= 2;
Sp[1] = (W_)obj;
Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really
run_BCO:
INTERP_TICK(it_BCO_entries);
{
- register int bciPtr = 1; /* instruction pointer */
+ register int bciPtr = 0; /* instruction pointer */
register StgWord16 bci;
register StgBCO* bco = (StgBCO*)obj;
register StgWord16* instrs = (StgWord16*)(bco->instrs->payload);
register StgWord* literals = (StgWord*)(&bco->literals->payload[0]);
register StgPtr* ptrs = (StgPtr*)(&bco->ptrs->payload[0]);
+ int bcoSize;
+ bcoSize = BCO_NEXT_WORD;
+ IF_DEBUG(interpreter,debugBelch("bcoSize = %d\n", bcoSize));
#ifdef INTERP_STATS
it_lastopc = 0; /* no opcode */
#endif
nextInsn:
- ASSERT(bciPtr <= instrs[0]);
+ ASSERT(bciPtr < bcoSize);
IF_DEBUG(interpreter,
//if (do_print_stack) {
//debugBelch("\n-- BEGIN stack\n");
// stg_apply_interp_info pointer and a pointer to
// the BCO
size_words = BCO_BITMAP_SIZE(obj) + 2;
- new_aps = (StgAP_STACK *) allocate (AP_STACK_sizeW(size_words));
+ new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words));
SET_HDR(new_aps,&stg_AP_STACK_info,CCS_SYSTEM);
new_aps->size = size_words;
new_aps->fun = &stg_dummy_ret_closure;
// in a reasonable state for the GC and so that
// execution of this BCO can continue when we resume
ioAction = (StgClosure *) deRefStablePtr (rts_breakpoint_io_action);
- Sp -= 8;
- Sp[7] = (W_)obj;
- Sp[6] = (W_)&stg_apply_interp_info;
+ Sp -= 9;
+ Sp[8] = (W_)obj;
+ Sp[7] = (W_)&stg_apply_interp_info;
+ Sp[6] = (W_)&stg_noforceIO_info; // see [unreg] below
Sp[5] = (W_)new_aps; // the AP_STACK
Sp[4] = (W_)BCO_PTR(arg3_freeVars); // the info about local vars of the breakpoint
Sp[3] = (W_)False_closure; // True <=> a breakpoint
Sp[2] = (W_)&stg_ap_pppv_info;
Sp[1] = (W_)ioAction; // apply the IO action to its two arguments above
Sp[0] = (W_)&stg_enter_info; // get ready to run the IO action
+ // Note [unreg]: in unregisterised mode, the return
+ // convention for IO is different. The
+ // stg_noForceIO_info stack frame is necessary to
+ // account for this difference.
// set the flag in the TSO to say that we are now
// stopping at a breakpoint so that when we resume
case bci_ALLOC_AP: {
StgAP* ap;
int n_payload = BCO_NEXT;
- ap = (StgAP*)allocate(AP_sizeW(n_payload));
+ ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
Sp[-1] = (W_)ap;
ap->n_args = n_payload;
SET_HDR(ap, &stg_AP_info, CCS_SYSTEM/*ToDo*/)
case bci_ALLOC_AP_NOUPD: {
StgAP* ap;
int n_payload = BCO_NEXT;
- ap = (StgAP*)allocate(AP_sizeW(n_payload));
+ ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
Sp[-1] = (W_)ap;
ap->n_args = n_payload;
SET_HDR(ap, &stg_AP_NOUPD_info, CCS_SYSTEM/*ToDo*/)
StgPAP* pap;
int arity = BCO_NEXT;
int n_payload = BCO_NEXT;
- pap = (StgPAP*)allocate(PAP_sizeW(n_payload));
+ pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload));
Sp[-1] = (W_)pap;
pap->n_args = n_payload;
pap->arity = arity;
StgInfoTable* itbl = INFO_PTR_TO_STRUCT(BCO_LIT(o_itbl));
int request = CONSTR_sizeW( itbl->layout.payload.ptrs,
itbl->layout.payload.nptrs );
- StgClosure* con = (StgClosure*)allocate_NONUPD(request);
+ StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request);
ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0);
SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), CCS_SYSTEM/*ToDo*/);
for (i = 0; i < n_words; i++) {
case bci_TESTLT_P: {
unsigned int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
StgClosure* con = (StgClosure*)Sp[0];
if (GET_TAG(con) >= discr) {
bciPtr = failto;
case bci_TESTEQ_P: {
unsigned int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
StgClosure* con = (StgClosure*)Sp[0];
if (GET_TAG(con) != discr) {
bciPtr = failto;
case bci_TESTLT_I: {
// There should be an Int at Sp[1], and an info table at Sp[0].
int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
I_ stackInt = (I_)Sp[1];
if (stackInt >= (I_)BCO_LIT(discr))
bciPtr = failto;
case bci_TESTEQ_I: {
// There should be an Int at Sp[1], and an info table at Sp[0].
int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
I_ stackInt = (I_)Sp[1];
if (stackInt != (I_)BCO_LIT(discr)) {
bciPtr = failto;
goto nextInsn;
}
+ case bci_TESTLT_W: {
+ // There should be an Int at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
+ W_ stackWord = (W_)Sp[1];
+ if (stackWord >= (W_)BCO_LIT(discr))
+ bciPtr = failto;
+ goto nextInsn;
+ }
+
+ case bci_TESTEQ_W: {
+ // There should be an Int at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
+ W_ stackWord = (W_)Sp[1];
+ if (stackWord != (W_)BCO_LIT(discr)) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
case bci_TESTLT_D: {
// There should be a Double at Sp[1], and an info table at Sp[0].
int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
StgDouble stackDbl, discrDbl;
stackDbl = PK_DBL( & Sp[1] );
discrDbl = PK_DBL( & BCO_LIT(discr) );
case bci_TESTEQ_D: {
// There should be a Double at Sp[1], and an info table at Sp[0].
int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
StgDouble stackDbl, discrDbl;
stackDbl = PK_DBL( & Sp[1] );
discrDbl = PK_DBL( & BCO_LIT(discr) );
case bci_TESTLT_F: {
// There should be a Float at Sp[1], and an info table at Sp[0].
int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
StgFloat stackFlt, discrFlt;
stackFlt = PK_FLT( & Sp[1] );
discrFlt = PK_FLT( & BCO_LIT(discr) );
case bci_TESTEQ_F: {
// There should be a Float at Sp[1], and an info table at Sp[0].
int discr = BCO_NEXT;
- int failto = BCO_NEXT;
+ int failto = BCO_GET_LARGE_ARG;
StgFloat stackFlt, discrFlt;
stackFlt = PK_FLT( & Sp[1] );
discrFlt = PK_FLT( & BCO_LIT(discr) );
// context switching: sometimes the scheduler can invoke
// the interpreter with context_switch == 1, particularly
// if the -C0 flag has been given on the cmd line.
- if (context_switch) {
+ if (cap->r.rHpLim == NULL) {
Sp--; Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding);
}
goto eval;
case bci_RETURN:
- obj = (StgClosure *)Sp[0];
+ tagged_obj = (StgClosure *)Sp[0];
Sp++;
goto do_return;
RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE
+ sizeofW(StgRetDyn);
-#ifdef THREADED_RTS
- // Threaded RTS:
- // Arguments on the TSO stack are not good, because garbage
- // collection might move the TSO as soon as we call
- // suspendThread below.
+ /* the stack looks like this:
+
+ | | <- Sp + stk_offset
+ +-------------+
+ | |
+ | args |
+ | | <- Sp + ret_size + 1
+ +-------------+
+ | C fun | <- Sp + ret_size
+ +-------------+
+ | ret | <- Sp
+ +-------------+
+
+ ret is a placeholder for the return address, and may be
+ up to 2 words.
+
+ We need to copy the args out of the TSO, because when
+ we call suspendThread() we no longer own the TSO stack,
+ and it may move at any time - indeed suspendThread()
+ itself may do stack squeezing and move our args.
+ So we make a copy of the argument block.
+ */
+
+#define ROUND_UP_WDS(p) ((((StgWord)(p)) + sizeof(W_)-1)/sizeof(W_))
+
+ ffi_cif *cif = (ffi_cif *)marshall_fn;
+ nat nargs = cif->nargs;
+ nat ret_size;
+ nat i;
+ StgPtr p;
+ W_ ret[2]; // max needed
+ W_ *arguments[stk_offset]; // max needed
+ void *argptrs[nargs];
+ void (*fn)(void);
+
+ if (cif->rtype->type == FFI_TYPE_VOID) {
+ // necessary because cif->rtype->size == 1 for void,
+ // but the bytecode generator has not pushed a
+ // placeholder in this case.
+ ret_size = 0;
+ } else {
+ ret_size = ROUND_UP_WDS(cif->rtype->size);
+ }
- W_ arguments[stk_offset];
-
- memcpy(arguments, Sp, sizeof(W_) * stk_offset);
-#endif
+ memcpy(arguments, Sp+ret_size+1,
+ sizeof(W_) * (stk_offset-1-ret_size));
+
+ // libffi expects the args as an array of pointers to
+ // values, so we have to construct this array before making
+ // the call.
+ p = (StgPtr)arguments;
+ for (i = 0; i < nargs; i++) {
+ argptrs[i] = (void *)p;
+ // get the size from the cif
+ p += ROUND_UP_WDS(cif->arg_types[i]->size);
+ }
+
+ // this is the function we're going to call
+ fn = (void(*)(void))Sp[ret_size];
// Restore the Haskell thread's current value of errno
errno = cap->r.rCurrentTSO->saved_errno;
// on the stack frame to describe this chunk of stack.
//
Sp -= ret_dyn_size;
- ((StgRetDyn *)Sp)->liveness = NO_PTRS | N_NONPTRS(stk_offset);
+ ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset);
((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info;
+ // save obj (pointer to the current BCO), since this
+ // might move during the call. We use the R1 slot in the
+ // RET_DYN frame for this, hence R1_PTR above.
+ ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj;
+
SAVE_STACK_POINTERS;
tok = suspendThread(&cap->r);
-#ifndef THREADED_RTS
- // Careful:
- // suspendThread might have shifted the stack
- // around (stack squeezing), so we have to grab the real
- // Sp out of the TSO to find the ccall args again.
-
- marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) );
-#else
- // Threaded RTS:
// We already made a copy of the arguments above.
-
- marshall_fn ( arguments );
-#endif
+ ffi_call(cif, fn, ret, argptrs);
// And restart the thread again, popping the RET_DYN frame.
- cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable)));
+ cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - STG_FIELD_OFFSET(Capability,r)));
LOAD_STACK_POINTERS;
+
+ // Re-load the pointer to the BCO from the RET_DYN frame,
+ // it might have moved during the call. Also reload the
+ // pointers to the components of the BCO.
+ obj = ((StgRetDyn *)Sp)->payload[0];
+ bco = (StgBCO*)obj;
+ instrs = (StgWord16*)(bco->instrs->payload);
+ literals = (StgWord*)(&bco->literals->payload[0]);
+ ptrs = (StgPtr*)(&bco->ptrs->payload[0]);
+
Sp += ret_dyn_size;
// Save the Haskell thread's current value of errno
cap->r.rCurrentTSO->saved_errno = errno;
-#ifdef THREADED_RTS
- // Threaded RTS:
- // Copy the "arguments", which might include a return value,
- // back to the TSO stack. It would of course be enough to
- // just copy the return value, but we don't know the offset.
- memcpy(Sp, arguments, sizeof(W_) * stk_offset);
-#endif
+ // Copy the return value back to the TSO stack. It is at
+ // most 2 words large, and resides at arguments[0].
+ memcpy(Sp, ret, sizeof(W_) * stg_min(stk_offset,ret_size));
goto nextInsn;
}
case bci_JMP: {
/* BCO_NEXT modifies bciPtr, so be conservative. */
- int nextpc = BCO_NEXT;
+ int nextpc = BCO_GET_LARGE_ARG;
bciPtr = nextpc;
goto nextInsn;
}