/* -----------------------------------------------------------------------------
- * $Id: StgCRun.c,v 1.42 2004/08/13 13:10:46 simonmar Exp $
*
* (c) The GHC Team, 1998-2003
*
* in libc.a clobbers $s6.
*/
#include "ghcconfig.h"
-#ifdef alpha_TARGET_ARCH
+#ifdef alpha_HOST_ARCH
#define alpha_EXTRA_CAREFUL
register long fake_ra __asm__("$26");
register long fake_gp __asm__("$29");
#include "Rts.h"
#include "StgRun.h"
#include "RtsFlags.h"
+#include "OSThreads.h"
#include "Capability.h"
#ifdef DEBUG
any architecture (using miniinterpreter)
-------------------------------------------------------------------------- */
-StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
+StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
{
while (f) {
- if (RtsFlags[0].DebugFlags.interpreter) {
- fprintf(stderr,"Jumping to ");
+ IF_DEBUG(interpreter,
+ debugBelch("Jumping to ");
printPtr((P_)f); fflush(stdout);
- fprintf(stderr,"\n");
- }
+ debugBelch("\n");
+ );
f = (StgFunPtr) (f)();
}
- return (StgThreadReturnCode)R1.i;
+ return (StgRegTable *)R1.p;
}
StgFunPtr StgReturn(void)
x86 architecture
-------------------------------------------------------------------------- */
-#ifdef i386_TARGET_ARCH
+#ifdef i386_HOST_ARCH
-StgThreadReturnCode
+#ifdef darwin_TARGET_OS
+#define STG_GLOBAL ".globl "
+#else
+#define STG_GLOBAL ".global "
+#endif
+
+StgRegTable *
StgRun(StgFunPtr f, StgRegTable *basereg) {
unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
- StgThreadReturnCode r;
+ StgRegTable * r;
__asm__ volatile (
/*
*/
"movl %3,%%ebx\n\t"
/*
- * grab the function argument from the stack, and jump to it.
+ * grab the function argument from the stack
*/
"movl %2,%%eax\n\t"
+
+#if darwin_TARGET_OS
+ /*
+ * Darwin: keep the stack aligned
+ */
+ "subl $12,%%esp\n\t"
+#endif
+
+ /*
+ * jump to it
+ */
"jmp *%%eax\n\t"
- ".global " STG_RETURN "\n"
+ STG_GLOBAL STG_RETURN "\n"
STG_RETURN ":\n\t"
+#if darwin_TARGET_OS
+ /*
+ * Darwin: keep the stack aligned
+ */
+ "addl $12,%%esp\n\t"
+#endif
+
"movl %%esi, %%eax\n\t" /* Return value in R1 */
/*
------------------------------------------------------------------------- */
-#ifdef x86_64_TARGET_ARCH
+#ifdef x86_64_HOST_ARCH
-extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
-static void StgRunIsImplementedInAssembler(void)
+void StgRunIsImplementedInAssembler(void);
+void StgRunIsImplementedInAssembler(void)
{
__asm__ volatile (
/*
"addq %0, %%rsp\n\t"
"retq"
- : : "i"(RESERVED_C_STACK_BYTES+48 /*stack frame size*/));
+ : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
+ /*
+ HACK alert!
+
+ The x86_64 ABI specifies that on a procedure call, %rsp is
+ aligned on a 16-byte boundary + 8. That is, the first
+ argument on the stack after the return address will be
+ 16-byte aligned.
+
+ Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
+ of 16 bytes.
+
+ BUT... when we do a C-call from STG land, gcc likes to put the
+ stack alignment adjustment in the prolog. eg. if we're calling
+ a function with arguments in regs, gcc will insert 'subq $8,%rsp'
+ in the prolog, to keep %rsp aligned (the return address is 8
+ bytes, remember). The mangler throws away the prolog, so we
+ lose the stack alignment.
+
+ The hack is to add this extra 8 bytes to our %rsp adjustment
+ here, so that throughout STG code, %rsp is 16-byte aligned,
+ ready for a C-call.
+
+ A quick way to see if this is wrong is to compile this code:
+
+ main = System.Exit.exitWith ExitSuccess
+
+ And run it with +RTS -sstderr. The stats code in the RTS, in
+ particular statsPrintf(), relies on the stack alignment because
+ it saves the %xmm regs on the stack, so it'll fall over if the
+ stack isn't aligned, and calling exitWith from Haskell invokes
+ shutdownHaskellAndExit using a C call.
+
+ Future gcc releases will almost certainly break this hack...
+ */
}
#endif /* x86-64 */
Updated info (GHC 4.08.2): not saving %i7 any more (see below).
-------------------------------------------------------------------------- */
-#ifdef sparc_TARGET_ARCH
+#ifdef sparc_HOST_ARCH
-StgThreadReturnCode
+StgRegTable *
StgRun(StgFunPtr f, StgRegTable *basereg) {
unsigned char space[RESERVED_C_STACK_BYTES];
__asm__ volatile ("ld %1,%0"
: "=r" (i7) : "m" (((void **)(space))[100]));
#endif
- return (StgThreadReturnCode)R1.i;
+ return (StgRegTable *)R1.i;
}
#endif
tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
-------------------------------------------------------------------------- */
-#ifdef alpha_TARGET_ARCH
+#ifdef alpha_HOST_ARCH
-StgThreadReturnCode
+StgRegTable *
StgRun(StgFunPtr f, StgRegTable *basereg)
{
register long real_ra __asm__("$26"); volatile long save_ra;
register StgFunPtr real_pv __asm__("$27");
- StgThreadReturnCode ret;
+ StgRegTable * ret;
save_ra = real_ra;
save_gp = real_gp;
return ret;
}
-#endif /* alpha_TARGET_ARCH */
+#endif /* alpha_HOST_ARCH */
/* -----------------------------------------------------------------------------
HP-PA architecture
-------------------------------------------------------------------------- */
-#ifdef hppa1_1_TARGET_ARCH
+#ifdef hppa1_1_HOST_ARCH
-StgThreadReturnCode
+StgRegTable *
StgRun(StgFunPtr f, StgRegTable *basereg)
{
StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
- StgThreadReturnCode ret;
+ StgRegTable * ret;
__asm__ volatile ("ldo %0(%%r30),%%r19\n"
"\tstw %%r3, 0(0,%%r19)\n"
return ret;
}
-#endif /* hppa1_1_TARGET_ARCH */
+#endif /* hppa1_1_HOST_ARCH */
/* -----------------------------------------------------------------------------
PowerPC architecture
-------------------------------------------------------------------------- */
-#ifdef powerpc_TARGET_ARCH
+#ifdef powerpc_HOST_ARCH
-extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
-#ifdef darwin_TARGET_OS
-static void StgRunIsImplementedInAssembler(void)
+#ifdef darwin_HOST_OS
+void StgRunIsImplementedInAssembler(void)
{
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ // if the toolchain supports deadstripping, we have to
+ // prevent it here (it tends to get confused here).
+ __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
+#endif
__asm__ volatile (
"\n.globl _StgRun\n"
"_StgRun:\n"
"\tla r1,%0(r1)\n"
"\tlmw r13,-220(r1)\n"
"\tb restFP # f14\n"
- : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
+ : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
}
#else
"\tlwz 0,4(1)\n"
"\tmtlr 0\n"
"\tblr\n"
- : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
+ : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
}
#endif
#endif
/* -----------------------------------------------------------------------------
+ PowerPC 64 architecture
+
+ Everything is in assembler, so we don't have to deal with GCC...
+
+ -------------------------------------------------------------------------- */
+
+#ifdef powerpc64_HOST_ARCH
+
+#ifdef linux_HOST_OS
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ // r0 volatile
+ // r1 stack pointer
+ // r2 toc - needs to be saved
+ // r3-r10 argument passing, volatile
+ // r11, r12 very volatile (not saved across cross-module calls)
+ // r13 thread local state (never modified, don't need to save)
+ // r14-r31 callee-save
+ __asm__ volatile (
+ ".section \".opd\",\"aw\"\n"
+ ".align 3\n"
+ ".globl StgRun\n"
+ "StgRun:\n"
+ "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
+ "\t.size StgRun,24\n"
+ ".globl StgReturn\n"
+ "StgReturn:\n"
+ "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
+ "\t.size StgReturn,24\n"
+ ".previous\n"
+ ".globl .StgRun\n"
+ ".type .StgRun,@function\n"
+ ".StgRun:\n"
+ "\tmflr 0\n"
+ "\tmr 5, 1\n"
+ "\tstd 0, 16(1)\n"
+ "\tstdu 1, -%0(1)\n"
+ "\tstd 2, -296(5)\n"
+ "\tstd 14, -288(5)\n"
+ "\tstd 15, -280(5)\n"
+ "\tstd 16, -272(5)\n"
+ "\tstd 17, -264(5)\n"
+ "\tstd 18, -256(5)\n"
+ "\tstd 19, -248(5)\n"
+ "\tstd 20, -240(5)\n"
+ "\tstd 21, -232(5)\n"
+ "\tstd 22, -224(5)\n"
+ "\tstd 23, -216(5)\n"
+ "\tstd 24, -208(5)\n"
+ "\tstd 25, -200(5)\n"
+ "\tstd 26, -192(5)\n"
+ "\tstd 27, -184(5)\n"
+ "\tstd 28, -176(5)\n"
+ "\tstd 29, -168(5)\n"
+ "\tstd 30, -160(5)\n"
+ "\tstd 31, -152(5)\n"
+ "\tstfd 14, -144(5)\n"
+ "\tstfd 15, -136(5)\n"
+ "\tstfd 16, -128(5)\n"
+ "\tstfd 17, -120(5)\n"
+ "\tstfd 18, -112(5)\n"
+ "\tstfd 19, -104(5)\n"
+ "\tstfd 20, -96(5)\n"
+ "\tstfd 21, -88(5)\n"
+ "\tstfd 22, -80(5)\n"
+ "\tstfd 23, -72(5)\n"
+ "\tstfd 24, -64(5)\n"
+ "\tstfd 25, -56(5)\n"
+ "\tstfd 26, -48(5)\n"
+ "\tstfd 27, -40(5)\n"
+ "\tstfd 28, -32(5)\n"
+ "\tstfd 29, -24(5)\n"
+ "\tstfd 30, -16(5)\n"
+ "\tstfd 31, -8(5)\n"
+ "\tmr 27, 4\n" // BaseReg == r27
+ "\tld 2, 8(3)\n"
+ "\tld 3, 0(3)\n"
+ "\tmtctr 3\n"
+ "\tbctr\n"
+ ".globl .StgReturn\n"
+ ".type .StgReturn,@function\n"
+ ".StgReturn:\n"
+ "\tmr 3,14\n"
+ "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
+ "\tld 2, -296(5)\n"
+ "\tld 14, -288(5)\n"
+ "\tld 15, -280(5)\n"
+ "\tld 16, -272(5)\n"
+ "\tld 17, -264(5)\n"
+ "\tld 18, -256(5)\n"
+ "\tld 19, -248(5)\n"
+ "\tld 20, -240(5)\n"
+ "\tld 21, -232(5)\n"
+ "\tld 22, -224(5)\n"
+ "\tld 23, -216(5)\n"
+ "\tld 24, -208(5)\n"
+ "\tld 25, -200(5)\n"
+ "\tld 26, -192(5)\n"
+ "\tld 27, -184(5)\n"
+ "\tld 28, -176(5)\n"
+ "\tld 29, -168(5)\n"
+ "\tld 30, -160(5)\n"
+ "\tld 31, -152(5)\n"
+ "\tlfd 14, -144(5)\n"
+ "\tlfd 15, -136(5)\n"
+ "\tlfd 16, -128(5)\n"
+ "\tlfd 17, -120(5)\n"
+ "\tlfd 18, -112(5)\n"
+ "\tlfd 19, -104(5)\n"
+ "\tlfd 20, -96(5)\n"
+ "\tlfd 21, -88(5)\n"
+ "\tlfd 22, -80(5)\n"
+ "\tlfd 23, -72(5)\n"
+ "\tlfd 24, -64(5)\n"
+ "\tlfd 25, -56(5)\n"
+ "\tlfd 26, -48(5)\n"
+ "\tlfd 27, -40(5)\n"
+ "\tlfd 28, -32(5)\n"
+ "\tlfd 29, -24(5)\n"
+ "\tlfd 30, -16(5)\n"
+ "\tlfd 31, -8(5)\n"
+ "\tmr 1, 5\n"
+ "\tld 0, 16(1)\n"
+ "\tmtlr 0\n"
+ "\tblr\n"
+ : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
+}
+#else // linux_HOST_OS
+#error Only linux support for power64 right now.
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
IA64 architecture
Again, in assembler - so we can fiddle with the register stack, and because
loc31: saved gp (gcc 3.3 uses this slot)
-------------------------------------------------------------------------- */
-#ifdef ia64_TARGET_ARCH
+#ifdef ia64_HOST_ARCH
/* the memory stack is rarely used, so 16K is excessive */
#undef RESERVED_C_STACK_BYTES
#endif
#endif /* !USE_MINIINTERPRETER */
-