X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FStgCRun.c;h=d593db094323cbc8763f99f3efea12fc6bc68511;hb=5bcba9677f6d097ecad2034b7bd039d125eaabdc;hp=ac46b73949936d6a53e026aed75dd4b8ef1a03bf;hpb=53ddaa573e121f905728011e2c75c84548b6f976;p=ghc-hetmet.git diff --git a/ghc/rts/StgCRun.c b/ghc/rts/StgCRun.c index ac46b73..d593db0 100644 --- a/ghc/rts/StgCRun.c +++ b/ghc/rts/StgCRun.c @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------------- - * $Id: StgCRun.c,v 1.31 2002/03/26 10:35:20 simonmar Exp $ + * $Id: StgCRun.c,v 1.43 2004/08/13 13:57:08 simonmar Exp $ * - * (c) The GHC Team, 1998-2000 + * (c) The GHC Team, 1998-2003 * * STG-to-C glue. * @@ -50,10 +50,11 @@ * that we don't use but which are callee-save registers. The __divq() routine * in libc.a clobbers $s6. */ -#include "config.h" +#include "ghcconfig.h" #ifdef alpha_TARGET_ARCH #define alpha_EXTRA_CAREFUL register long fake_ra __asm__("$26"); +register long fake_gp __asm__("$29"); #ifdef alpha_EXTRA_CAREFUL register long fake_s6 __asm__("$15"); register double fake_f8 __asm__("$f8"); @@ -67,9 +68,11 @@ register double fake_f9 __asm__("$f9"); #include "Stg.h" #include "Rts.h" #include "StgRun.h" +#include "RtsFlags.h" +#include "OSThreads.h" +#include "Capability.h" #ifdef DEBUG -#include "RtsFlags.h" #include "RtsUtils.h" #include "Printer.h" #endif @@ -80,22 +83,22 @@ register double fake_f9 __asm__("$f9"); any architecture (using miniinterpreter) -------------------------------------------------------------------------- */ -extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED) +StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED) { - while (f) { - IF_DEBUG(evaluator, - fprintf(stderr,"Jumping to "); - printPtr((P_)f); fflush(stdout); - fprintf(stderr,"\n"); - ); - f = (StgFunPtr) (f)(); - } - return (StgThreadReturnCode)R1.i; + while (f) { + if (RtsFlags[0].DebugFlags.interpreter) { + fprintf(stderr,"Jumping to "); + printPtr((P_)f); fflush(stdout); + fprintf(stderr,"\n"); + } + f = (StgFunPtr) (f)(); + } + return (StgThreadReturnCode)R1.i; } -EXTFUN(StgReturn) +StgFunPtr StgReturn(void) { - return 0; + return 0; } #else /* !USE_MINIINTERPRETER */ @@ -163,6 +166,71 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { #endif +/* ---------------------------------------------------------------------------- + x86-64 is almost the same as plain x86. + + I've done it using entirely inline assembler, because I couldn't + get gcc to generate the correct subtraction from %rsp by using + the local array variable trick. It didn't seem to reserve + enough space. Oh well, it's not much harder this way. + + ------------------------------------------------------------------------- */ + +#ifdef x86_64_TARGET_ARCH + +extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg); + +static void StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile ( + /* + * save callee-saves registers on behalf of the STG code. + */ + ".globl StgRun\n" + "StgRun:\n\t" + "subq %0, %%rsp\n\t" + "movq %%rsp, %%rax\n\t" + "addq %0-48, %%rax\n\t" + "movq %%rbx,0(%%rax)\n\t" + "movq %%rbp,8(%%rax)\n\t" + "movq %%r12,16(%%rax)\n\t" + "movq %%r13,24(%%rax)\n\t" + "movq %%r14,32(%%rax)\n\t" + "movq %%r15,40(%%rax)\n\t" + /* + * Set BaseReg + */ + "movq %%rsi,%%rbx\n\t" + /* + * grab the function argument from the stack, and jump to it. + */ + "movq %%rdi,%%rax\n\t" + "jmp *%%rax\n\t" + + ".global " STG_RETURN "\n" + STG_RETURN ":\n\t" + + "movq %%r13, %%rax\n\t" /* Return value in R1 */ + + /* + * restore callee-saves registers. (Don't stomp on %%rax!) + */ + "movq %%rsp, %%rdx\n\t" + "addq %0-48, %%rdx\n\t" + "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */ + "movq 8(%%rdx),%%rbp\n\t" + "movq 16(%%rdx),%%r12\n\t" + "movq 24(%%rdx),%%r13\n\t" + "movq 32(%%rdx),%%r14\n\t" + "movq 40(%%rdx),%%r15\n\t" + "addq %0, %%rsp\n\t" + "retq" + + : : "i"(RESERVED_C_STACK_BYTES+48 /*stack frame size*/)); +} + +#endif /* x86-64 */ + /* ----------------------------------------------------------------------------- Sparc architecture @@ -268,6 +336,7 @@ StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg) { register long real_ra __asm__("$26"); volatile long save_ra; + register long real_gp __asm__("$29"); volatile long save_gp; register long real_s0 __asm__("$9" ); volatile long save_s0; register long real_s1 __asm__("$10"); volatile long save_s1; @@ -295,6 +364,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) StgThreadReturnCode ret; save_ra = real_ra; + save_gp = real_gp; save_s0 = real_s0; save_s1 = real_s1; @@ -351,6 +421,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) #endif real_ra = save_ra; + real_gp = save_gp; return ret; } @@ -453,33 +524,177 @@ StgRun(StgFunPtr f, StgRegTable *basereg) /* ----------------------------------------------------------------------------- PowerPC architecture - We can use a simple function call as a tail call (the bl instruction places - the return address in the Link Register, and we ignore it). - We make GCC do the register saving. GCC does a good job - and saves all general purpose registers with a single stmw - (store multiple words) instruction. + Everything is in assembler, so we don't have to deal with GCC... -------------------------------------------------------------------------- */ #ifdef powerpc_TARGET_ARCH -StgThreadReturnCode -StgRun(StgFunPtr f, StgRegTable *basereg) { +extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg); - unsigned char space[RESERVED_C_STACK_BYTES]; +#ifdef darwin_TARGET_OS +static void StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile ( + "\n.globl _StgRun\n" + "_StgRun:\n" + "\tmflr r0\n" + "\tbl saveFP # f14\n" + "\tstmw r13,-220(r1)\n" + "\tstwu r1,-%0(r1)\n" + "\tmr r27,r4\n" // BaseReg == r27 + "\tmtctr r3\n" + "\tmr r12,r3\n" + "\tbctr\n" + ".globl _StgReturn\n" + "_StgReturn:\n" + "\tmr r3,r14\n" + "\tla r1,%0(r1)\n" + "\tlmw r13,-220(r1)\n" + "\tb restFP # f14\n" + : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/)); +} +#else - f(); - __asm__ volatile ( - ".align 4\n" - ".globl " STG_RETURN "\n" - STG_RETURN ":" - : : : - "r14","r15","r16","r17","r18","r19","r20","r21","r22","r23","r24","r25","r26", - "r27","r28","r29","r30","r31", - "fr14","fr15","fr16","fr17","fr18","fr19","fr20", - "fr21","fr22","fr23","fr24","fr25","fr26","fr27","fr28","fr29","fr30","fr31"); - - return (StgThreadReturnCode)R1.i; +// This version is for PowerPC Linux. + +// Differences from the Darwin/Mac OS X version: +// *) Different Assembler Syntax +// *) Doesn't use Register Saving Helper Functions (although they exist somewhere) +// *) We may not access positive stack offsets +// (no "Red Zone" as in the Darwin ABI) +// *) The Link Register is saved to a different offset in the caller's stack frame +// (Linux: 4(r1), Darwin 8(r1)) + +static void StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile ( + "\t.globl StgRun\n" + "\t.type StgRun,@function\n" + "StgRun:\n" + "\tmflr 0\n" + "\tstw 0,4(1)\n" + "\tmr 5,1\n" + "\tstwu 1,-%0(1)\n" + "\tstmw 13,-220(5)\n" + "\tstfd 14,-144(5)\n" + "\tstfd 15,-136(5)\n" + "\tstfd 16,-128(5)\n" + "\tstfd 17,-120(5)\n" + "\tstfd 18,-112(5)\n" + "\tstfd 19,-104(5)\n" + "\tstfd 20,-96(5)\n" + "\tstfd 21,-88(5)\n" + "\tstfd 22,-80(5)\n" + "\tstfd 23,-72(5)\n" + "\tstfd 24,-64(5)\n" + "\tstfd 25,-56(5)\n" + "\tstfd 26,-48(5)\n" + "\tstfd 27,-40(5)\n" + "\tstfd 28,-32(5)\n" + "\tstfd 29,-24(5)\n" + "\tstfd 30,-16(5)\n" + "\tstfd 31,-8(5)\n" + "\tmr 27,4\n" // BaseReg == r27 + "\tmtctr 3\n" + "\tmr 12,3\n" + "\tbctr\n" + ".globl StgReturn\n" + "\t.type StgReturn,@function\n" + "StgReturn:\n" + "\tmr 3,14\n" + "\tla 5,%0(1)\n" + "\tlmw 13,-220(5)\n" + "\tlfd 14,-144(5)\n" + "\tlfd 15,-136(5)\n" + "\tlfd 16,-128(5)\n" + "\tlfd 17,-120(5)\n" + "\tlfd 18,-112(5)\n" + "\tlfd 19,-104(5)\n" + "\tlfd 20,-96(5)\n" + "\tlfd 21,-88(5)\n" + "\tlfd 22,-80(5)\n" + "\tlfd 23,-72(5)\n" + "\tlfd 24,-64(5)\n" + "\tlfd 25,-56(5)\n" + "\tlfd 26,-48(5)\n" + "\tlfd 27,-40(5)\n" + "\tlfd 28,-32(5)\n" + "\tlfd 29,-24(5)\n" + "\tlfd 30,-16(5)\n" + "\tlfd 31,-8(5)\n" + "\tmr 1,5\n" + "\tlwz 0,4(1)\n" + "\tmtlr 0\n" + "\tblr\n" + : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/)); +} +#endif + +#endif + +/* ----------------------------------------------------------------------------- + IA64 architecture + + Again, in assembler - so we can fiddle with the register stack, and because + gcc doesn't handle asm-clobbered callee-saves correctly. + + loc0 - loc15: preserved locals + loc16 - loc28: STG registers + loc29: saved ar.pfs + loc30: saved b0 + loc31: saved gp (gcc 3.3 uses this slot) + -------------------------------------------------------------------------- */ + +#ifdef ia64_TARGET_ARCH + +/* the memory stack is rarely used, so 16K is excessive */ +#undef RESERVED_C_STACK_BYTES +#define RESERVED_C_STACK_BYTES 1024 + +#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3) +/* gcc 3.3+: leave an extra slot for gp saves */ +#define LOCALS 32 +#else +#define LOCALS 31 +#endif + +static void StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile( + ".global StgRun\n" + "StgRun:\n" + "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */ + "\tld8 r18 = [r32],8\n" /* get procedure address */ + "\tadds sp = -%0, sp ;;\n" /* setup stack */ + "\tld8 gp = [r32]\n" /* get procedure GP */ + "\tadds r16 = %0-(6*16), sp\n" + "\tadds r17 = %0-(5*16), sp ;;\n" + "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */ + "\tstf.spill [r17] = f17,32\n" + "\tmov b6 = r18 ;;\n" /* set target address */ + "\tstf.spill [r16] = f18,32\n" + "\tstf.spill [r17] = f19,32\n" + "\tmov loc30 = b0 ;;\n" /* save return address */ + "\tstf.spill [r16] = f20,32\n" + "\tstf.spill [r17] = f21,32\n" + "\tbr.few b6 ;;\n" /* branch to function */ + ".global StgReturn\n" + "StgReturn:\n" + "\tmov r8 = loc16\n" /* return value in r8 */ + "\tadds r16 = %0-(6*16), sp\n" + "\tadds r17 = %0-(5*16), sp ;;\n" + "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */ + "\tldf.fill f17 = [r17],32\n" + "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */ + "\tldf.fill f18 = [r16],32\n" + "\tldf.fill f19 = [r17],32\n" + "\tmov b0 = loc30 ;;\n" /* restore return address */ + "\tldf.fill f20 = [r16],32\n" + "\tldf.fill f21 = [r17],32\n" + "\tadds sp = %0, sp\n" /* restore stack */ + "\tbr.ret.sptk.many b0 ;;\n" /* return */ + : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS)); } #endif