X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FStgCRun.c;h=e28353c35332d72e558a03e9aaa007c4dc7bafd4;hp=51388510c6333a618e1b4832cae9b6e3a1a95e66;hb=e5c3b478b3cd1707cf122833822f44b2ac09b8e9;hpb=ffdbb302967a147550113251e47b9d98a7affdc0 diff --git a/rts/StgCRun.c b/rts/StgCRun.c index 5138851..e28353c 100644 --- a/rts/StgCRun.c +++ b/rts/StgCRun.c @@ -34,7 +34,6 @@ #include "PosixSource.h" - /* * We define the following (unused) global register variables, because for * some reason gcc generates sub-optimal code for StgRun() on the Alpha @@ -66,11 +65,12 @@ register double fake_f9 __asm__("$f9"); /* include Stg.h first because we want real machine regs in here: we * have to get the value of R1 back from Stg land to C land intact. */ +// yeuch +#define IN_STGCRUN 1 #include "Stg.h" #include "Rts.h" + #include "StgRun.h" -#include "RtsFlags.h" -#include "OSThreads.h" #include "Capability.h" #ifdef DEBUG @@ -87,13 +87,11 @@ register double fake_f9 __asm__("$f9"); StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED) { while (f) { - /* XXX Disabled due to RtsFlags[]/RtsFlags mismatch IF_DEBUG(interpreter, debugBelch("Jumping to "); printPtr((P_)f); fflush(stdout); debugBelch("\n"); ); - */ f = (StgFunPtr) (f)(); } return (StgRegTable *)R1.p; @@ -124,7 +122,7 @@ StgFunPtr StgReturn(void) #ifdef i386_HOST_ARCH -#ifdef darwin_TARGET_OS +#ifdef darwin_HOST_OS #define STG_GLOBAL ".globl " #else #define STG_GLOBAL ".global " @@ -851,6 +849,10 @@ StgRunIsImplementedInAssembler(void) loc29: saved ar.pfs loc30: saved b0 loc31: saved gp (gcc 3.3 uses this slot) + loc32: saved ar.lc + loc33: saved pr + f2 - f5: preserved floating-point registers + f16 - f23: preserved floating-point registers -------------------------------------------------------------------------- */ #ifdef ia64_HOST_ARCH @@ -859,12 +861,13 @@ StgRunIsImplementedInAssembler(void) #undef RESERVED_C_STACK_BYTES #define RESERVED_C_STACK_BYTES 1024 -#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3) -/* gcc 3.3+: leave an extra slot for gp saves */ -#define LOCALS 32 -#else -#define LOCALS 31 -#endif +/* We don't spill all the callee-save FP registers, only the ones that + * gcc has been observed to use */ +#define PRESERVED_FP_REGISTERS 12 + +/* We always allocate 34 local and 8 output registers. As long as gcc used + * fewer than 32 locals, the mangler will adjust the stack frame accordingly. */ +#define LOCALS 34 static void GNUC3_ATTRIBUTE(used) StgRunIsImplementedInAssembler(void) @@ -876,8 +879,8 @@ StgRunIsImplementedInAssembler(void) "\tld8 r18 = [r32],8\n" /* get procedure address */ "\tadds sp = -%0, sp ;;\n" /* setup stack */ "\tld8 gp = [r32]\n" /* get procedure GP */ - "\tadds r16 = %0-(6*16), sp\n" - "\tadds r17 = %0-(5*16), sp ;;\n" + "\tadds r16 = %0-(%2*16), sp\n" + "\tadds r17 = %0-((%2-1)*16), sp ;;\n" "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */ "\tstf.spill [r17] = f17,32\n" "\tmov b6 = r18 ;;\n" /* set target address */ @@ -885,13 +888,21 @@ StgRunIsImplementedInAssembler(void) "\tstf.spill [r17] = f19,32\n" "\tmov loc30 = b0 ;;\n" /* save return address */ "\tstf.spill [r16] = f20,32\n" - "\tstf.spill [r17] = f21,32\n" + "\tstf.spill [r17] = f21,32 ;;\n" + "\tstf.spill [r16] = f22,32\n" + "\tstf.spill [r17] = f23,32\n" + "\tmov loc32 = ar.lc ;;\n" /* save loop counter */ + "\tstf.spill [r16] = f2,32\n" + "\tstf.spill [r17] = f3,32\n" + "\tmov loc33 = pr ;;\n" /* save predicate registers */ + "\tstf.spill [r16] = f4,32\n" + "\tstf.spill [r17] = f5,32\n" "\tbr.few b6 ;;\n" /* branch to function */ ".global StgReturn\n" "StgReturn:\n" "\tmov r8 = loc16\n" /* return value in r8 */ - "\tadds r16 = %0-(6*16), sp\n" - "\tadds r17 = %0-(5*16), sp ;;\n" + "\tadds r16 = %0-(%2*16), sp\n" + "\tadds r17 = %0-((%2-1)*16), sp ;;\n" "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */ "\tldf.fill f17 = [r17],32\n" "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */ @@ -900,9 +911,19 @@ StgRunIsImplementedInAssembler(void) "\tmov b0 = loc30 ;;\n" /* restore return address */ "\tldf.fill f20 = [r16],32\n" "\tldf.fill f21 = [r17],32\n" - "\tadds sp = %0, sp\n" /* restore stack */ + "\tmov ar.lc = loc32 ;;\n" /* restore loop counter */ + "\tldf.fill f22 = [r16],32\n" + "\tldf.fill f23 = [r17],32\n" + "\tmov pr = loc33 ;;\n" /* restore predicate registers */ + "\tldf.fill f2 = [r16],32\n" + "\tldf.fill f3 = [r17],32\n" + "\tadds sp = %0, sp ;;\n" /* restore stack */ + "\tldf.fill f4 = [r16],32\n" + "\tldf.fill f5 = [r17],32\n" "\tbr.ret.sptk.many b0 ;;\n" /* return */ - : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS)); + : : "i"(RESERVED_C_STACK_BYTES + PRESERVED_FP_REGISTERS*16), + "i"(LOCALS), + "i"(PRESERVED_FP_REGISTERS)); } #endif