X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FStgCRun.c;h=a5444f4159152561b9820c677f161b5e27617682;hb=b868ceeab9281a76e6b5095bc612c0b992c16418;hp=79104e7c2c41beeeee6ff790bad477e081680bf1;hpb=95ca6bff6fc9918203173b442192d9298ef9757a;p=ghc-hetmet.git diff --git a/ghc/rts/StgCRun.c b/ghc/rts/StgCRun.c index 79104e7..a5444f4 100644 --- a/ghc/rts/StgCRun.c +++ b/ghc/rts/StgCRun.c @@ -1,5 +1,4 @@ /* ----------------------------------------------------------------------------- - * $Id: StgCRun.c,v 1.44 2004/09/03 15:28:56 simonmar Exp $ * * (c) The GHC Team, 1998-2003 * @@ -51,7 +50,7 @@ * in libc.a clobbers $s6. */ #include "ghcconfig.h" -#ifdef alpha_TARGET_ARCH +#ifdef alpha_HOST_ARCH #define alpha_EXTRA_CAREFUL register long fake_ra __asm__("$26"); register long fake_gp __asm__("$29"); @@ -83,17 +82,17 @@ register double fake_f9 __asm__("$f9"); any architecture (using miniinterpreter) -------------------------------------------------------------------------- */ -StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED) +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED) { while (f) { - if (RtsFlags[0].DebugFlags.interpreter) { + IF_DEBUG(interpreter, debugBelch("Jumping to "); printPtr((P_)f); fflush(stdout); debugBelch("\n"); - } + ); f = (StgFunPtr) (f)(); } - return (StgThreadReturnCode)R1.i; + return (StgRegTable *)R1.p; } StgFunPtr StgReturn(void) @@ -113,13 +112,19 @@ StgFunPtr StgReturn(void) x86 architecture -------------------------------------------------------------------------- */ -#ifdef i386_TARGET_ARCH +#ifdef i386_HOST_ARCH -StgThreadReturnCode +#ifdef darwin_TARGET_OS +#define STG_GLOBAL ".globl " +#else +#define STG_GLOBAL ".global " +#endif + +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg) { unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ]; - StgThreadReturnCode r; + StgRegTable * r; __asm__ volatile ( /* @@ -136,14 +141,32 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { */ "movl %3,%%ebx\n\t" /* - * grab the function argument from the stack, and jump to it. + * grab the function argument from the stack */ "movl %2,%%eax\n\t" + +#if darwin_TARGET_OS + /* + * Darwin: keep the stack aligned + */ + "subl $12,%%esp\n\t" +#endif + + /* + * jump to it + */ "jmp *%%eax\n\t" - ".global " STG_RETURN "\n" + STG_GLOBAL STG_RETURN "\n" STG_RETURN ":\n\t" +#if darwin_TARGET_OS + /* + * Darwin: keep the stack aligned + */ + "addl $12,%%esp\n\t" +#endif + "movl %%esi, %%eax\n\t" /* Return value in R1 */ /* @@ -176,11 +199,12 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { ------------------------------------------------------------------------- */ -#ifdef x86_64_TARGET_ARCH +#ifdef x86_64_HOST_ARCH -extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg); +extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg); -static void StgRunIsImplementedInAssembler(void) +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) { __asm__ volatile ( /* @@ -200,7 +224,7 @@ static void StgRunIsImplementedInAssembler(void) /* * Set BaseReg */ - "movq %%rsi,%%rbx\n\t" + "movq %%rsi,%%r13\n\t" /* * grab the function argument from the stack, and jump to it. */ @@ -210,7 +234,7 @@ static void StgRunIsImplementedInAssembler(void) ".global " STG_RETURN "\n" STG_RETURN ":\n\t" - "movq %%r13, %%rax\n\t" /* Return value in R1 */ + "movq %%rbx, %%rax\n\t" /* Return value in R1 */ /* * restore callee-saves registers. (Don't stomp on %%rax!) @@ -226,7 +250,41 @@ static void StgRunIsImplementedInAssembler(void) "addq %0, %%rsp\n\t" "retq" - : : "i"(RESERVED_C_STACK_BYTES+48 /*stack frame size*/)); + : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/)); + /* + HACK alert! + + The x86_64 ABI specifies that on a procedure call, %rsp is + aligned on a 16-byte boundary + 8. That is, the first + argument on the stack after the return address will be + 16-byte aligned. + + Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple + of 16 bytes. + + BUT... when we do a C-call from STG land, gcc likes to put the + stack alignment adjustment in the prolog. eg. if we're calling + a function with arguments in regs, gcc will insert 'subq $8,%rsp' + in the prolog, to keep %rsp aligned (the return address is 8 + bytes, remember). The mangler throws away the prolog, so we + lose the stack alignment. + + The hack is to add this extra 8 bytes to our %rsp adjustment + here, so that throughout STG code, %rsp is 16-byte aligned, + ready for a C-call. + + A quick way to see if this is wrong is to compile this code: + + main = System.Exit.exitWith ExitSuccess + + And run it with +RTS -sstderr. The stats code in the RTS, in + particular statsPrintf(), relies on the stack alignment because + it saves the %xmm regs on the stack, so it'll fall over if the + stack isn't aligned, and calling exitWith from Haskell invokes + shutdownHaskellAndExit using a C call. + + Future gcc releases will almost certainly break this hack... + */ } #endif /* x86-64 */ @@ -262,9 +320,9 @@ static void StgRunIsImplementedInAssembler(void) Updated info (GHC 4.08.2): not saving %i7 any more (see below). -------------------------------------------------------------------------- */ -#ifdef sparc_TARGET_ARCH +#ifdef sparc_HOST_ARCH -StgThreadReturnCode +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg) { unsigned char space[RESERVED_C_STACK_BYTES]; @@ -297,7 +355,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { __asm__ volatile ("ld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100])); #endif - return (StgThreadReturnCode)R1.i; + return (StgRegTable *)R1.i; } #endif @@ -330,9 +388,9 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF -------------------------------------------------------------------------- */ -#ifdef alpha_TARGET_ARCH +#ifdef alpha_HOST_ARCH -StgThreadReturnCode +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg) { register long real_ra __asm__("$26"); volatile long save_ra; @@ -361,7 +419,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) register StgFunPtr real_pv __asm__("$27"); - StgThreadReturnCode ret; + StgRegTable * ret; save_ra = real_ra; save_gp = real_gp; @@ -426,19 +484,19 @@ StgRun(StgFunPtr f, StgRegTable *basereg) return ret; } -#endif /* alpha_TARGET_ARCH */ +#endif /* alpha_HOST_ARCH */ /* ----------------------------------------------------------------------------- HP-PA architecture -------------------------------------------------------------------------- */ -#ifdef hppa1_1_TARGET_ARCH +#ifdef hppa1_1_HOST_ARCH -StgThreadReturnCode +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg) { StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)]; - StgThreadReturnCode ret; + StgRegTable * ret; __asm__ volatile ("ldo %0(%%r30),%%r19\n" "\tstw %%r3, 0(0,%%r19)\n" @@ -519,7 +577,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) return ret; } -#endif /* hppa1_1_TARGET_ARCH */ +#endif /* hppa1_1_HOST_ARCH */ /* ----------------------------------------------------------------------------- PowerPC architecture @@ -528,13 +586,18 @@ StgRun(StgFunPtr f, StgRegTable *basereg) -------------------------------------------------------------------------- */ -#ifdef powerpc_TARGET_ARCH +#ifdef powerpc_HOST_ARCH -extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg); +extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg); -#ifdef darwin_TARGET_OS -static void StgRunIsImplementedInAssembler(void) +#ifdef darwin_HOST_OS +void StgRunIsImplementedInAssembler(void) { +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + // if the toolchain supports deadstripping, we have to + // prevent it here (it tends to get confused here). + __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler"); +#endif __asm__ volatile ( "\n.globl _StgRun\n" "_StgRun:\n" @@ -552,7 +615,7 @@ static void StgRunIsImplementedInAssembler(void) "\tla r1,%0(r1)\n" "\tlmw r13,-220(r1)\n" "\tb restFP # f14\n" - : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/)); + : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/)); } #else @@ -566,7 +629,8 @@ static void StgRunIsImplementedInAssembler(void) // *) The Link Register is saved to a different offset in the caller's stack frame // (Linux: 4(r1), Darwin 8(r1)) -static void StgRunIsImplementedInAssembler(void) +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) { __asm__ volatile ( "\t.globl StgRun\n" @@ -627,13 +691,150 @@ static void StgRunIsImplementedInAssembler(void) "\tlwz 0,4(1)\n" "\tmtlr 0\n" "\tblr\n" - : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/)); + : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/)); } #endif #endif /* ----------------------------------------------------------------------------- + PowerPC 64 architecture + + Everything is in assembler, so we don't have to deal with GCC... + + -------------------------------------------------------------------------- */ + +#ifdef powerpc64_HOST_ARCH + +#ifdef linux_HOST_OS +extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg); + +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) +{ + // r0 volatile + // r1 stack pointer + // r2 toc - needs to be saved + // r3-r10 argument passing, volatile + // r11, r12 very volatile (not saved across cross-module calls) + // r13 thread local state (never modified, don't need to save) + // r14-r31 callee-save + __asm__ volatile ( + ".section \".opd\",\"aw\"\n" + ".align 3\n" + ".globl StgRun\n" + "StgRun:\n" + "\t.quad\t.StgRun,.TOC.@tocbase,0\n" + "\t.size StgRun,24\n" + ".globl StgReturn\n" + "StgReturn:\n" + "\t.quad\t.StgReturn,.TOC.@tocbase,0\n" + "\t.size StgReturn,24\n" + ".previous\n" + ".globl .StgRun\n" + ".type .StgRun,@function\n" + ".StgRun:\n" + "\tmflr 0\n" + "\tmr 5, 1\n" + "\tstd 0, 16(1)\n" + "\tstdu 1, -%0(1)\n" + "\tstd 2, -296(5)\n" + "\tstd 14, -288(5)\n" + "\tstd 15, -280(5)\n" + "\tstd 16, -272(5)\n" + "\tstd 17, -264(5)\n" + "\tstd 18, -256(5)\n" + "\tstd 19, -248(5)\n" + "\tstd 20, -240(5)\n" + "\tstd 21, -232(5)\n" + "\tstd 22, -224(5)\n" + "\tstd 23, -216(5)\n" + "\tstd 24, -208(5)\n" + "\tstd 25, -200(5)\n" + "\tstd 26, -192(5)\n" + "\tstd 27, -184(5)\n" + "\tstd 28, -176(5)\n" + "\tstd 29, -168(5)\n" + "\tstd 30, -160(5)\n" + "\tstd 31, -152(5)\n" + "\tstfd 14, -144(5)\n" + "\tstfd 15, -136(5)\n" + "\tstfd 16, -128(5)\n" + "\tstfd 17, -120(5)\n" + "\tstfd 18, -112(5)\n" + "\tstfd 19, -104(5)\n" + "\tstfd 20, -96(5)\n" + "\tstfd 21, -88(5)\n" + "\tstfd 22, -80(5)\n" + "\tstfd 23, -72(5)\n" + "\tstfd 24, -64(5)\n" + "\tstfd 25, -56(5)\n" + "\tstfd 26, -48(5)\n" + "\tstfd 27, -40(5)\n" + "\tstfd 28, -32(5)\n" + "\tstfd 29, -24(5)\n" + "\tstfd 30, -16(5)\n" + "\tstfd 31, -8(5)\n" + "\tmr 27, 4\n" // BaseReg == r27 + "\tld 2, 8(3)\n" + "\tld 3, 0(3)\n" + "\tmtctr 3\n" + "\tbctr\n" + ".globl .StgReturn\n" + ".type .StgReturn,@function\n" + ".StgReturn:\n" + "\tmr 3,14\n" + "\tla 5, %0(1)\n" // load address == addi r5, r1, %0 + "\tld 2, -296(5)\n" + "\tld 14, -288(5)\n" + "\tld 15, -280(5)\n" + "\tld 16, -272(5)\n" + "\tld 17, -264(5)\n" + "\tld 18, -256(5)\n" + "\tld 19, -248(5)\n" + "\tld 20, -240(5)\n" + "\tld 21, -232(5)\n" + "\tld 22, -224(5)\n" + "\tld 23, -216(5)\n" + "\tld 24, -208(5)\n" + "\tld 25, -200(5)\n" + "\tld 26, -192(5)\n" + "\tld 27, -184(5)\n" + "\tld 28, -176(5)\n" + "\tld 29, -168(5)\n" + "\tld 30, -160(5)\n" + "\tld 31, -152(5)\n" + "\tlfd 14, -144(5)\n" + "\tlfd 15, -136(5)\n" + "\tlfd 16, -128(5)\n" + "\tlfd 17, -120(5)\n" + "\tlfd 18, -112(5)\n" + "\tlfd 19, -104(5)\n" + "\tlfd 20, -96(5)\n" + "\tlfd 21, -88(5)\n" + "\tlfd 22, -80(5)\n" + "\tlfd 23, -72(5)\n" + "\tlfd 24, -64(5)\n" + "\tlfd 25, -56(5)\n" + "\tlfd 26, -48(5)\n" + "\tlfd 27, -40(5)\n" + "\tlfd 28, -32(5)\n" + "\tlfd 29, -24(5)\n" + "\tlfd 30, -16(5)\n" + "\tlfd 31, -8(5)\n" + "\tmr 1, 5\n" + "\tld 0, 16(1)\n" + "\tmtlr 0\n" + "\tblr\n" + : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/)); +} +#else // linux_HOST_OS +#error Only linux support for power64 right now. +#endif + +#endif + +/* ----------------------------------------------------------------------------- IA64 architecture Again, in assembler - so we can fiddle with the register stack, and because @@ -646,7 +847,7 @@ static void StgRunIsImplementedInAssembler(void) loc31: saved gp (gcc 3.3 uses this slot) -------------------------------------------------------------------------- */ -#ifdef ia64_TARGET_ARCH +#ifdef ia64_HOST_ARCH /* the memory stack is rarely used, so 16K is excessive */ #undef RESERVED_C_STACK_BYTES @@ -659,7 +860,8 @@ static void StgRunIsImplementedInAssembler(void) #define LOCALS 31 #endif -static void StgRunIsImplementedInAssembler(void) +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) { __asm__ volatile( ".global StgRun\n" @@ -700,4 +902,3 @@ static void StgRunIsImplementedInAssembler(void) #endif #endif /* !USE_MINIINTERPRETER */ -