X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FStgCRun.c;h=e28353c35332d72e558a03e9aaa007c4dc7bafd4;hp=302e9109397120c0279cff7c0c7a64cc969d37e7;hb=e5c3b478b3cd1707cf122833822f44b2ac09b8e9;hpb=2d0adf8f0729cf8a4d988eaac48a480378b191ce diff --git a/rts/StgCRun.c b/rts/StgCRun.c index 302e910..e28353c 100644 --- a/rts/StgCRun.c +++ b/rts/StgCRun.c @@ -34,7 +34,6 @@ #include "PosixSource.h" - /* * We define the following (unused) global register variables, because for * some reason gcc generates sub-optimal code for StgRun() on the Alpha @@ -50,6 +49,7 @@ * in libc.a clobbers $s6. */ #include "ghcconfig.h" +#ifndef USE_MINIINTERPRETER #ifdef alpha_HOST_ARCH #define alpha_EXTRA_CAREFUL register long fake_ra __asm__("$26"); @@ -60,15 +60,17 @@ register double fake_f8 __asm__("$f8"); register double fake_f9 __asm__("$f9"); #endif #endif +#endif /* include Stg.h first because we want real machine regs in here: we * have to get the value of R1 back from Stg land to C land intact. */ +// yeuch +#define IN_STGCRUN 1 #include "Stg.h" #include "Rts.h" + #include "StgRun.h" -#include "RtsFlags.h" -#include "OSThreads.h" #include "Capability.h" #ifdef DEBUG @@ -85,13 +87,11 @@ register double fake_f9 __asm__("$f9"); StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED) { while (f) { - /* XXX Disabled due to RtsFlags[]/RtsFlags mismatch IF_DEBUG(interpreter, debugBelch("Jumping to "); printPtr((P_)f); fflush(stdout); debugBelch("\n"); ); - */ f = (StgFunPtr) (f)(); } return (StgRegTable *)R1.p; @@ -105,6 +105,12 @@ StgFunPtr StgReturn(void) #else /* !USE_MINIINTERPRETER */ #ifdef LEADING_UNDERSCORE +#define STG_RUN "_StgRun" +#else +#define STG_RUN "StgRun" +#endif + +#ifdef LEADING_UNDERSCORE #define STG_RETURN "_StgReturn" #else #define STG_RETURN "StgReturn" @@ -116,7 +122,7 @@ StgFunPtr StgReturn(void) #ifdef i386_HOST_ARCH -#ifdef darwin_TARGET_OS +#ifdef darwin_HOST_OS #define STG_GLOBAL ".globl " #else #define STG_GLOBAL ".global " @@ -205,8 +211,8 @@ StgRunIsImplementedInAssembler(void) /* * save callee-saves registers on behalf of the STG code. */ - ".globl StgRun\n" - "StgRun:\n\t" + ".globl " STG_RUN "\n" + STG_RUN ":\n\t" "subq %0, %%rsp\n\t" "movq %%rsp, %%rax\n\t" "addq %0-48, %%rax\n\t" @@ -226,7 +232,7 @@ StgRunIsImplementedInAssembler(void) "movq %%rdi,%%rax\n\t" "jmp *%%rax\n\t" - ".global " STG_RETURN "\n" + ".globl " STG_RETURN "\n" STG_RETURN ":\n\t" "movq %%rbx, %%rax\n\t" /* Return value in R1 */ @@ -843,6 +849,10 @@ StgRunIsImplementedInAssembler(void) loc29: saved ar.pfs loc30: saved b0 loc31: saved gp (gcc 3.3 uses this slot) + loc32: saved ar.lc + loc33: saved pr + f2 - f5: preserved floating-point registers + f16 - f23: preserved floating-point registers -------------------------------------------------------------------------- */ #ifdef ia64_HOST_ARCH @@ -851,12 +861,13 @@ StgRunIsImplementedInAssembler(void) #undef RESERVED_C_STACK_BYTES #define RESERVED_C_STACK_BYTES 1024 -#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3) -/* gcc 3.3+: leave an extra slot for gp saves */ -#define LOCALS 32 -#else -#define LOCALS 31 -#endif +/* We don't spill all the callee-save FP registers, only the ones that + * gcc has been observed to use */ +#define PRESERVED_FP_REGISTERS 12 + +/* We always allocate 34 local and 8 output registers. As long as gcc used + * fewer than 32 locals, the mangler will adjust the stack frame accordingly. */ +#define LOCALS 34 static void GNUC3_ATTRIBUTE(used) StgRunIsImplementedInAssembler(void) @@ -868,8 +879,8 @@ StgRunIsImplementedInAssembler(void) "\tld8 r18 = [r32],8\n" /* get procedure address */ "\tadds sp = -%0, sp ;;\n" /* setup stack */ "\tld8 gp = [r32]\n" /* get procedure GP */ - "\tadds r16 = %0-(6*16), sp\n" - "\tadds r17 = %0-(5*16), sp ;;\n" + "\tadds r16 = %0-(%2*16), sp\n" + "\tadds r17 = %0-((%2-1)*16), sp ;;\n" "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */ "\tstf.spill [r17] = f17,32\n" "\tmov b6 = r18 ;;\n" /* set target address */ @@ -877,13 +888,21 @@ StgRunIsImplementedInAssembler(void) "\tstf.spill [r17] = f19,32\n" "\tmov loc30 = b0 ;;\n" /* save return address */ "\tstf.spill [r16] = f20,32\n" - "\tstf.spill [r17] = f21,32\n" + "\tstf.spill [r17] = f21,32 ;;\n" + "\tstf.spill [r16] = f22,32\n" + "\tstf.spill [r17] = f23,32\n" + "\tmov loc32 = ar.lc ;;\n" /* save loop counter */ + "\tstf.spill [r16] = f2,32\n" + "\tstf.spill [r17] = f3,32\n" + "\tmov loc33 = pr ;;\n" /* save predicate registers */ + "\tstf.spill [r16] = f4,32\n" + "\tstf.spill [r17] = f5,32\n" "\tbr.few b6 ;;\n" /* branch to function */ ".global StgReturn\n" "StgReturn:\n" "\tmov r8 = loc16\n" /* return value in r8 */ - "\tadds r16 = %0-(6*16), sp\n" - "\tadds r17 = %0-(5*16), sp ;;\n" + "\tadds r16 = %0-(%2*16), sp\n" + "\tadds r17 = %0-((%2-1)*16), sp ;;\n" "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */ "\tldf.fill f17 = [r17],32\n" "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */ @@ -892,9 +911,19 @@ StgRunIsImplementedInAssembler(void) "\tmov b0 = loc30 ;;\n" /* restore return address */ "\tldf.fill f20 = [r16],32\n" "\tldf.fill f21 = [r17],32\n" - "\tadds sp = %0, sp\n" /* restore stack */ + "\tmov ar.lc = loc32 ;;\n" /* restore loop counter */ + "\tldf.fill f22 = [r16],32\n" + "\tldf.fill f23 = [r17],32\n" + "\tmov pr = loc33 ;;\n" /* restore predicate registers */ + "\tldf.fill f2 = [r16],32\n" + "\tldf.fill f3 = [r17],32\n" + "\tadds sp = %0, sp ;;\n" /* restore stack */ + "\tldf.fill f4 = [r16],32\n" + "\tldf.fill f5 = [r17],32\n" "\tbr.ret.sptk.many b0 ;;\n" /* return */ - : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS)); + : : "i"(RESERVED_C_STACK_BYTES + PRESERVED_FP_REGISTERS*16), + "i"(LOCALS), + "i"(PRESERVED_FP_REGISTERS)); } #endif