X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=ghc%2Frts%2FStgCRun.c;h=29d4efe82ba90292409904c8bd83889a24b0ebb5;hp=129cd23c520922c1d4ffad736db7d308a0e8b785;hb=a0f46309637779ccc141ec531e9b128596a5bba0;hpb=09ce0083f1d378b21e0fbb97931267d8ff2dfc52 diff --git a/ghc/rts/StgCRun.c b/ghc/rts/StgCRun.c index 129cd23..29d4efe 100644 --- a/ghc/rts/StgCRun.c +++ b/ghc/rts/StgCRun.c @@ -1,7 +1,6 @@ /* ----------------------------------------------------------------------------- - * $Id: StgCRun.c,v 1.22 2000/11/13 14:53:27 simonmar Exp $ * - * (c) The GHC Team, 1998-2000 + * (c) The GHC Team, 1998-2003 * * STG-to-C glue. * @@ -19,29 +18,60 @@ * the whatever way C returns a value. * * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any - * other registers (other than saving the C callee-saves + * other registers (other than saving the C callee-saves * registers). Instead, the called function "f" must do that * in STG land. - * + * * GCC will have assumed that pushing/popping of C-stack frames is * going on when it generated its code, and used stack space * accordingly. However, we actually {\em post-process away} all * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will * be OK however, if we initially make sure there are * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local - * variables. + * variables. * * -------------------------------------------------------------------------- */ +#include "PosixSource.h" + + +/* + * We define the following (unused) global register variables, because for + * some reason gcc generates sub-optimal code for StgRun() on the Alpha + * (unnecessarily saving extra registers on the stack) if we don't. + * + * Why do it at the top of this file, rather than near StgRun() below? Because + * gcc doesn't let us define global register variables after any function + * definition has been read. Any point after #include "Stg.h" would be too + * late. + * + * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers + * that we don't use but which are callee-save registers. The __divq() routine + * in libc.a clobbers $s6. + */ +#include "ghcconfig.h" +#ifdef alpha_HOST_ARCH +#define alpha_EXTRA_CAREFUL +register long fake_ra __asm__("$26"); +register long fake_gp __asm__("$29"); +#ifdef alpha_EXTRA_CAREFUL +register long fake_s6 __asm__("$15"); +register double fake_f8 __asm__("$f8"); +register double fake_f9 __asm__("$f9"); +#endif +#endif + /* include Stg.h first because we want real machine regs in here: we * have to get the value of R1 back from Stg land to C land intact. */ #include "Stg.h" #include "Rts.h" #include "StgRun.h" +#include "RtsFlags.h" +#include "OSThreads.h" +#include "Capability.h" #ifdef DEBUG -#include "RtsFlags.h" #include "RtsUtils.h" #include "Printer.h" #endif @@ -51,175 +81,24 @@ /* ----------------------------------------------------------------------------- any architecture (using miniinterpreter) -------------------------------------------------------------------------- */ - -/* The static @jmp_environment@ variable allows @miniInterpret@ to - * communicate with @StgReturn@. - * - * Because @StgRun@ may be used recursively, we carefully - * save and restore the whole of @jmp_environment@. - */ -#include -#include /* for memcpy */ - -static jmp_buf jmp_environment; - -#if 1 - -extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg) -{ - while (f) { - IF_DEBUG(evaluator, - fprintf(stderr,"Jumping to "); - printPtr((P_)f); - fprintf(stderr,"\n"); - ); - f = (StgFunPtr) (f)(); - } - return (StgThreadReturnCode)R1.i; -} - -EXTFUN(StgReturn) -{ - return 0; -} - -#else - -#define CHECK_STACK 0 -#define STACK_DETAILS 0 - -static int enters = 0; - -static void scanStackSeg ( W_* ptr, int nwords ) -{ - W_ w; -#if CHECK_STACK - int nwords0 = nwords; -#if STACK_DETAILS - while (nwords > 0) { - w = *ptr; - if (IS_ARG_TAG(w)) { - fprintf ( stderr, "%d",w ); nwords--; ptr++; - while (w) { fprintf(stderr, "_"); w--; nwords--; ptr++; } - } - else { - fprintf(stderr, "p"); - nwords--; ptr++; - } - } - if (nwords < 0) fprintf(stderr, "erk: nwords < 0\n"); -#endif - checkStackChunk ( ptr, ptr-nwords0 ); -#endif -} -extern StgFunPtr stg_enterStackTop; -extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg) +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED) { - char* nm; - while (1) { - -#if CHECK_STACK - { - int i; - StgTSO* tso = basereg->rCurrentTSO; - StgWord* sb = tso->stack + tso->stack_size; - StgWord* sp; - StgWord* su; - int ws; - - if (f == &stg_enterStackTop) { - sp = tso->sp; - su = tso->su; - } else { - sp = basereg->rSp; - su = basereg->rSu; - } - -#if STACK_DETAILS - fprintf(stderr, - "== SB = %p SP = %p(%p) SU = %p SpLim = %p(%p)\n", - sb, sp, tso->sp, su, basereg->rSpLim, tso->stack + RESERVED_STACK_WORDS); -#endif - - if (su >= sb) goto postloop; - if (!sp || !su) goto postloop; - - printStack ( sp, sb, su); - - while (1) { - ws = su - sp; - switch (get_itbl((StgClosure*)su)->type) { - case STOP_FRAME: - scanStackSeg(sp,ws); -#if STACK_DETAILS - fprintf(stderr, "S%d ",ws); - fprintf(stderr, "\n"); -#endif - goto postloop; - case UPDATE_FRAME: - scanStackSeg(sp,ws); -#if STACK_DETAILS - fprintf(stderr,"U%d ",ws); -#endif - sp = su + sizeofW(StgUpdateFrame); - su = ((StgUpdateFrame*)su)->link; - break; - case SEQ_FRAME: - scanStackSeg(sp,ws); -#if STACK_DETAILS - fprintf(stderr,"Q%d ",ws); -#endif - sp = su + sizeofW(StgSeqFrame); - su = ((StgSeqFrame*)su)->link; - break; - case CATCH_FRAME: - scanStackSeg(sp,ws); -#if STACK_DETAILS - fprintf(stderr,"C%d ",ws); -#endif - sp = su + sizeofW(StgCatchFrame); - su = ((StgCatchFrame*)su)->link; - break; - default: - fprintf(stderr, "?\nweird record on stack\n"); - assert(0); - goto postloop; - } - } - postloop: - } -#endif -#if STACK_DETAILS - fprintf(stderr,"\n"); -#endif -#if 1 - fprintf(stderr,"-- enter %p ", f); - nm = nameFromOPtr ( f ); - if (nm) fprintf(stderr, "%s", nm); else - printPtr((P_)f); - fprintf ( stderr, "\n"); -#endif -#if STACK_DETAILS - fprintf(stderr,"\n"); -#endif - zzz: - if (enters % 1000 == 0) fprintf(stderr, "%d enters\n",enters); - enters++; - f = (StgFunPtr) (f)(); - if (!f) break; + while (f) { + IF_DEBUG(interpreter, + debugBelch("Jumping to "); + printPtr((P_)f); fflush(stdout); + debugBelch("\n"); + ); + f = (StgFunPtr) (f)(); } - fprintf (stderr, "miniInterpreter: bye!\n\n" ); - return (StgThreadReturnCode)R1.i; + return (StgRegTable *)R1.p; } -EXTFUN(StgReturn) +StgFunPtr StgReturn(void) { - return 0; + return 0; } -#endif - - #else /* !USE_MINIINTERPRETER */ @@ -232,17 +111,23 @@ EXTFUN(StgReturn) /* ----------------------------------------------------------------------------- x86 architecture -------------------------------------------------------------------------- */ - -#ifdef i386_TARGET_ARCH -StgThreadReturnCode +#ifdef i386_HOST_ARCH + +#ifdef darwin_TARGET_OS +#define STG_GLOBAL ".globl " +#else +#define STG_GLOBAL ".global " +#endif + +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg) { unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ]; - StgThreadReturnCode r; + StgRegTable * r; __asm__ volatile ( - /* + /* * save callee-saves registers on behalf of the STG code. */ "movl %%esp, %%eax\n\t" @@ -256,14 +141,32 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { */ "movl %3,%%ebx\n\t" /* - * grab the function argument from the stack, and jump to it. + * grab the function argument from the stack */ "movl %2,%%eax\n\t" + +#if darwin_TARGET_OS + /* + * Darwin: keep the stack aligned + */ + "subl $12,%%esp\n\t" +#endif + + /* + * jump to it + */ "jmp *%%eax\n\t" - ".global " STG_RETURN "\n" + STG_GLOBAL STG_RETURN "\n" STG_RETURN ":\n\t" +#if darwin_TARGET_OS + /* + * Darwin: keep the stack aligned + */ + "addl $12,%%esp\n\t" +#endif + "movl %%esi, %%eax\n\t" /* Return value in R1 */ /* @@ -286,10 +189,110 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { #endif +/* ---------------------------------------------------------------------------- + x86-64 is almost the same as plain x86. + + I've done it using entirely inline assembler, because I couldn't + get gcc to generate the correct subtraction from %rsp by using + the local array variable trick. It didn't seem to reserve + enough space. Oh well, it's not much harder this way. + + ------------------------------------------------------------------------- */ + +#ifdef x86_64_HOST_ARCH + +extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg); + +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile ( + /* + * save callee-saves registers on behalf of the STG code. + */ + ".globl StgRun\n" + "StgRun:\n\t" + "subq %0, %%rsp\n\t" + "movq %%rsp, %%rax\n\t" + "addq %0-48, %%rax\n\t" + "movq %%rbx,0(%%rax)\n\t" + "movq %%rbp,8(%%rax)\n\t" + "movq %%r12,16(%%rax)\n\t" + "movq %%r13,24(%%rax)\n\t" + "movq %%r14,32(%%rax)\n\t" + "movq %%r15,40(%%rax)\n\t" + /* + * Set BaseReg + */ + "movq %%rsi,%%r13\n\t" + /* + * grab the function argument from the stack, and jump to it. + */ + "movq %%rdi,%%rax\n\t" + "jmp *%%rax\n\t" + + ".global " STG_RETURN "\n" + STG_RETURN ":\n\t" + + "movq %%rbx, %%rax\n\t" /* Return value in R1 */ + + /* + * restore callee-saves registers. (Don't stomp on %%rax!) + */ + "movq %%rsp, %%rdx\n\t" + "addq %0-48, %%rdx\n\t" + "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */ + "movq 8(%%rdx),%%rbp\n\t" + "movq 16(%%rdx),%%r12\n\t" + "movq 24(%%rdx),%%r13\n\t" + "movq 32(%%rdx),%%r14\n\t" + "movq 40(%%rdx),%%r15\n\t" + "addq %0, %%rsp\n\t" + "retq" + + : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/)); + /* + HACK alert! + + The x86_64 ABI specifies that on a procedure call, %rsp is + aligned on a 16-byte boundary + 8. That is, the first + argument on the stack after the return address will be + 16-byte aligned. + + Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple + of 16 bytes. + + BUT... when we do a C-call from STG land, gcc likes to put the + stack alignment adjustment in the prolog. eg. if we're calling + a function with arguments in regs, gcc will insert 'subq $8,%rsp' + in the prolog, to keep %rsp aligned (the return address is 8 + bytes, remember). The mangler throws away the prolog, so we + lose the stack alignment. + + The hack is to add this extra 8 bytes to our %rsp adjustment + here, so that throughout STG code, %rsp is 16-byte aligned, + ready for a C-call. + + A quick way to see if this is wrong is to compile this code: + + main = System.Exit.exitWith ExitSuccess + + And run it with +RTS -sstderr. The stats code in the RTS, in + particular statsPrintf(), relies on the stack alignment because + it saves the %xmm regs on the stack, so it'll fall over if the + stack isn't aligned, and calling exitWith from Haskell invokes + shutdownHaskellAndExit using a C call. + + Future gcc releases will almost certainly break this hack... + */ +} + +#endif /* x86-64 */ + /* ----------------------------------------------------------------------------- Sparc architecture - -- + -- OLD COMMENT from GHC-3.02: We want tailjumps to be calls, because `call xxx' is the only Sparc @@ -316,18 +319,22 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { Updated info (GHC 4.08.2): not saving %i7 any more (see below). -------------------------------------------------------------------------- */ - -#ifdef sparc_TARGET_ARCH -StgThreadReturnCode +#ifdef sparc_HOST_ARCH + +StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg) { - StgChar space[RESERVED_C_STACK_BYTES]; + unsigned char space[RESERVED_C_STACK_BYTES]; +#if 0 + register void *i7 __asm__("%i7"); + ((void **)(space))[100] = i7; +#endif f(); __asm__ volatile ( - ".align 4\n" + ".align 4\n" ".global " STG_RETURN "\n" - STG_RETURN ":" + STG_RETURN ":" : : : "l0","l1","l2","l3","l4","l5","l6","l7"); /* we tell the C compiler that l0-l7 are clobbered on return to * StgReturn, otherwise it tries to use these to save eg. the @@ -345,86 +352,151 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { * call to f(), this gets clobbered in STG land and we end up * dereferencing a bogus pointer in StgReturn. */ - __asm__ volatile ("ld %1,%0" + __asm__ volatile ("ld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100])); #endif - return (StgThreadReturnCode)R1.i; + return (StgRegTable *)R1.i; } #endif /* ----------------------------------------------------------------------------- alpha architecture + + "The stack pointer (SP) must at all times denote an address that has octaword + alignment. (This restriction has the side effect that the in-memory portion + of the argument list, if any, will start on an octaword boundary.) Note that + the stack grows toward lower addresses. During a procedure invocation, SP + can never be set to a value that is higher than the value of SP at entry to + that procedure invocation. + + "The contents of the stack, located above the portion of the argument list + (if any) that is passed in memory, belong to the calling procedure. Because + they are part of the calling procedure, they should not be read or written + by the called procedure, except as specified by indirect arguments or + language-controlled up-level references. + + "The SP value might be used by the hardware when raising exceptions and + asynchronous interrupts. It must be assumed that the contents of the stack + below the current SP value and within the stack for the current thread are + continually and unpredictably modified, as specified in the _Alpha + Architecture Reference Manual_, and as a result of asynchronous software + actions." + + -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for + Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www. + tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF -------------------------------------------------------------------------- */ -#ifdef alpha_TARGET_ARCH +#ifdef alpha_HOST_ARCH -StgThreadReturnCode -StgRun(StgFunPtr f, StgRegTable *basereg) +StgRegTable * +StgRun(StgFunPtr f, StgRegTable *basereg) { - StgThreadReturnCode ret; - - __asm__ volatile ("stq $9,-8($30)\n\t" - "stq $10,-16($30)\n\t" - "stq $11,-24($30)\n\t" - "stq $12,-32($30)\n\t" - "stq $13,-40($30)\n\t" - "stq $14,-48($30)\n\t" - "stq $15,-56($30)\n\t" - "stt $f2,-64($30)\n\t" - "stt $f3,-72($30)\n\t" - "stt $f4,-80($30)\n\t" - "stt $f5,-88($30)\n\t" - "stt $f6,-96($30)\n\t" - "stt $f7,-104($30)\n\t" - "stt $f8,-112($30)\n\t" - "stt $f9,-120($30)\n\t" - "lda $30,-%0($30)" : : - "K" (RESERVED_C_STACK_BYTES+ - 8*sizeof(double)+8*sizeof(long))); + register long real_ra __asm__("$26"); volatile long save_ra; + register long real_gp __asm__("$29"); volatile long save_gp; + + register long real_s0 __asm__("$9" ); volatile long save_s0; + register long real_s1 __asm__("$10"); volatile long save_s1; + register long real_s2 __asm__("$11"); volatile long save_s2; + register long real_s3 __asm__("$12"); volatile long save_s3; + register long real_s4 __asm__("$13"); volatile long save_s4; + register long real_s5 __asm__("$14"); volatile long save_s5; +#ifdef alpha_EXTRA_CAREFUL + register long real_s6 __asm__("$15"); volatile long save_s6; +#endif - f(); + register double real_f2 __asm__("$f2"); volatile double save_f2; + register double real_f3 __asm__("$f3"); volatile double save_f3; + register double real_f4 __asm__("$f4"); volatile double save_f4; + register double real_f5 __asm__("$f5"); volatile double save_f5; + register double real_f6 __asm__("$f6"); volatile double save_f6; + register double real_f7 __asm__("$f7"); volatile double save_f7; +#ifdef alpha_EXTRA_CAREFUL + register double real_f8 __asm__("$f8"); volatile double save_f8; + register double real_f9 __asm__("$f9"); volatile double save_f9; +#endif + + register StgFunPtr real_pv __asm__("$27"); - __asm__ volatile (".align 3\n" - ".globl " STG_RETURN "\n" - STG_RETURN ":\n\t" - "lda %0,($14)\n\t" /* save R1 */ - "lda $30,%0($30)\n\t" - "ldq $9,-8($30)\n\t" - "ldq $10,-16($30)\n\t" - "ldq $11,-24($30)\n\t" - "ldq $12,-32($30)\n\t" - "ldq $13,-40($30)\n\t" - "ldq $14,-48($30)\n\t" - "ldq $15,-56($30)\n\t" - "ldt $f2,-64($30)\n\t" - "ldt $f3,-72($30)\n\t" - "ldt $f4,-80($30)\n\t" - "ldt $f5,-88($30)\n\t" - "ldt $f6,-96($30)\n\t" - "ldt $f7,-104($30)\n\t" - "ldt $f8,-112($30)\n\t" - "ldt $f9,-120($30)" - : "=r" (ret) - : "K" (RESERVED_C_STACK_BYTES+ - 8*sizeof(double)+8*sizeof(long))); + StgRegTable * ret; + + save_ra = real_ra; + save_gp = real_gp; + + save_s0 = real_s0; + save_s1 = real_s1; + save_s2 = real_s2; + save_s3 = real_s3; + save_s4 = real_s4; + save_s5 = real_s5; +#ifdef alpha_EXTRA_CAREFUL + save_s6 = real_s6; +#endif + + save_f2 = real_f2; + save_f3 = real_f3; + save_f4 = real_f4; + save_f5 = real_f5; + save_f6 = real_f6; + save_f7 = real_f7; +#ifdef alpha_EXTRA_CAREFUL + save_f8 = real_f8; + save_f9 = real_f9; +#endif + + real_pv = f; + + __asm__ volatile( "lda $30,-%0($30)" "\n" + "\t" "jmp ($27)" "\n" + "\t" ".align 3" "\n" + ".globl " STG_RETURN "\n" + STG_RETURN ":" "\n" + "\t" "lda $30,%0($30)" "\n" + : : "K" (RESERVED_C_STACK_BYTES)); + + ret = real_s5; + + real_s0 = save_s0; + real_s1 = save_s1; + real_s2 = save_s2; + real_s3 = save_s3; + real_s4 = save_s4; + real_s5 = save_s5; +#ifdef alpha_EXTRA_CAREFUL + real_s6 = save_s6; +#endif + + real_f2 = save_f2; + real_f3 = save_f3; + real_f4 = save_f4; + real_f5 = save_f5; + real_f6 = save_f6; + real_f7 = save_f7; +#ifdef alpha_EXTRA_CAREFUL + real_f8 = save_f8; + real_f9 = save_f9; +#endif + + real_ra = save_ra; + real_gp = save_gp; return ret; } -#endif /* alpha_TARGET_ARCH */ +#endif /* alpha_HOST_ARCH */ /* ----------------------------------------------------------------------------- HP-PA architecture -------------------------------------------------------------------------- */ -#ifdef hppa1_1_TARGET_ARCH +#ifdef hppa1_1_HOST_ARCH -StgThreadReturnCode -StgRun(StgFunPtr f, StgRegTable *basereg) +StgRegTable * +StgRun(StgFunPtr f, StgRegTable *basereg) { StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)]; - StgThreadReturnCode ret; + StgRegTable * ret; __asm__ volatile ("ldo %0(%%r30),%%r19\n" "\tstw %%r3, 0(0,%%r19)\n" @@ -496,7 +568,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) "\tfldds 8(0,%%r19),%%fr19\n" "\tldo 32(%%r19),%%r19\n" "\tfldds -16(0,%%r19),%%fr20\n" - "\tfldds -8(0,%%r19),%%fr21\n" + "\tfldds -8(0,%%r19),%%fr21\n" : "=r" (ret) : "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19" @@ -505,6 +577,329 @@ StgRun(StgFunPtr f, StgRegTable *basereg) return ret; } -#endif /* hppa1_1_TARGET_ARCH */ +#endif /* hppa1_1_HOST_ARCH */ + +/* ----------------------------------------------------------------------------- + PowerPC architecture + + Everything is in assembler, so we don't have to deal with GCC... + + -------------------------------------------------------------------------- */ + +#ifdef powerpc_HOST_ARCH + +extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg); + +#ifdef darwin_HOST_OS +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) +{ +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + // if the toolchain supports deadstripping, we have to + // prevent it here (it tends to get confused here). + __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler"); +#endif + __asm__ volatile ( + "\n.globl _StgRun\n" + "_StgRun:\n" + "\tmflr r0\n" + "\tbl saveFP # f14\n" + "\tstmw r13,-220(r1)\n" + "\tstwu r1,-%0(r1)\n" + "\tmr r27,r4\n" // BaseReg == r27 + "\tmtctr r3\n" + "\tmr r12,r3\n" + "\tbctr\n" + ".globl _StgReturn\n" + "_StgReturn:\n" + "\tmr r3,r14\n" + "\tla r1,%0(r1)\n" + "\tlmw r13,-220(r1)\n" + "\tb restFP # f14\n" + : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/)); +} +#else + +// This version is for PowerPC Linux. + +// Differences from the Darwin/Mac OS X version: +// *) Different Assembler Syntax +// *) Doesn't use Register Saving Helper Functions (although they exist somewhere) +// *) We may not access positive stack offsets +// (no "Red Zone" as in the Darwin ABI) +// *) The Link Register is saved to a different offset in the caller's stack frame +// (Linux: 4(r1), Darwin 8(r1)) + +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile ( + "\t.globl StgRun\n" + "\t.type StgRun,@function\n" + "StgRun:\n" + "\tmflr 0\n" + "\tstw 0,4(1)\n" + "\tmr 5,1\n" + "\tstwu 1,-%0(1)\n" + "\tstmw 13,-220(5)\n" + "\tstfd 14,-144(5)\n" + "\tstfd 15,-136(5)\n" + "\tstfd 16,-128(5)\n" + "\tstfd 17,-120(5)\n" + "\tstfd 18,-112(5)\n" + "\tstfd 19,-104(5)\n" + "\tstfd 20,-96(5)\n" + "\tstfd 21,-88(5)\n" + "\tstfd 22,-80(5)\n" + "\tstfd 23,-72(5)\n" + "\tstfd 24,-64(5)\n" + "\tstfd 25,-56(5)\n" + "\tstfd 26,-48(5)\n" + "\tstfd 27,-40(5)\n" + "\tstfd 28,-32(5)\n" + "\tstfd 29,-24(5)\n" + "\tstfd 30,-16(5)\n" + "\tstfd 31,-8(5)\n" + "\tmr 27,4\n" // BaseReg == r27 + "\tmtctr 3\n" + "\tmr 12,3\n" + "\tbctr\n" + ".globl StgReturn\n" + "\t.type StgReturn,@function\n" + "StgReturn:\n" + "\tmr 3,14\n" + "\tla 5,%0(1)\n" + "\tlmw 13,-220(5)\n" + "\tlfd 14,-144(5)\n" + "\tlfd 15,-136(5)\n" + "\tlfd 16,-128(5)\n" + "\tlfd 17,-120(5)\n" + "\tlfd 18,-112(5)\n" + "\tlfd 19,-104(5)\n" + "\tlfd 20,-96(5)\n" + "\tlfd 21,-88(5)\n" + "\tlfd 22,-80(5)\n" + "\tlfd 23,-72(5)\n" + "\tlfd 24,-64(5)\n" + "\tlfd 25,-56(5)\n" + "\tlfd 26,-48(5)\n" + "\tlfd 27,-40(5)\n" + "\tlfd 28,-32(5)\n" + "\tlfd 29,-24(5)\n" + "\tlfd 30,-16(5)\n" + "\tlfd 31,-8(5)\n" + "\tmr 1,5\n" + "\tlwz 0,4(1)\n" + "\tmtlr 0\n" + "\tblr\n" + : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/)); +} +#endif + +#endif + +/* ----------------------------------------------------------------------------- + PowerPC 64 architecture + + Everything is in assembler, so we don't have to deal with GCC... + + -------------------------------------------------------------------------- */ + +#ifdef powerpc64_HOST_ARCH + +#ifdef linux_HOST_OS +extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg); + +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) +{ + // r0 volatile + // r1 stack pointer + // r2 toc - needs to be saved + // r3-r10 argument passing, volatile + // r11, r12 very volatile (not saved across cross-module calls) + // r13 thread local state (never modified, don't need to save) + // r14-r31 callee-save + __asm__ volatile ( + ".section \".opd\",\"aw\"\n" + ".align 3\n" + ".globl StgRun\n" + "StgRun:\n" + "\t.quad\t.StgRun,.TOC.@tocbase,0\n" + "\t.size StgRun,24\n" + ".globl StgReturn\n" + "StgReturn:\n" + "\t.quad\t.StgReturn,.TOC.@tocbase,0\n" + "\t.size StgReturn,24\n" + ".previous\n" + ".globl .StgRun\n" + ".type .StgRun,@function\n" + ".StgRun:\n" + "\tmflr 0\n" + "\tmr 5, 1\n" + "\tstd 0, 16(1)\n" + "\tstdu 1, -%0(1)\n" + "\tstd 2, -296(5)\n" + "\tstd 14, -288(5)\n" + "\tstd 15, -280(5)\n" + "\tstd 16, -272(5)\n" + "\tstd 17, -264(5)\n" + "\tstd 18, -256(5)\n" + "\tstd 19, -248(5)\n" + "\tstd 20, -240(5)\n" + "\tstd 21, -232(5)\n" + "\tstd 22, -224(5)\n" + "\tstd 23, -216(5)\n" + "\tstd 24, -208(5)\n" + "\tstd 25, -200(5)\n" + "\tstd 26, -192(5)\n" + "\tstd 27, -184(5)\n" + "\tstd 28, -176(5)\n" + "\tstd 29, -168(5)\n" + "\tstd 30, -160(5)\n" + "\tstd 31, -152(5)\n" + "\tstfd 14, -144(5)\n" + "\tstfd 15, -136(5)\n" + "\tstfd 16, -128(5)\n" + "\tstfd 17, -120(5)\n" + "\tstfd 18, -112(5)\n" + "\tstfd 19, -104(5)\n" + "\tstfd 20, -96(5)\n" + "\tstfd 21, -88(5)\n" + "\tstfd 22, -80(5)\n" + "\tstfd 23, -72(5)\n" + "\tstfd 24, -64(5)\n" + "\tstfd 25, -56(5)\n" + "\tstfd 26, -48(5)\n" + "\tstfd 27, -40(5)\n" + "\tstfd 28, -32(5)\n" + "\tstfd 29, -24(5)\n" + "\tstfd 30, -16(5)\n" + "\tstfd 31, -8(5)\n" + "\tmr 27, 4\n" // BaseReg == r27 + "\tld 2, 8(3)\n" + "\tld 3, 0(3)\n" + "\tmtctr 3\n" + "\tbctr\n" + ".globl .StgReturn\n" + ".type .StgReturn,@function\n" + ".StgReturn:\n" + "\tmr 3,14\n" + "\tla 5, %0(1)\n" // load address == addi r5, r1, %0 + "\tld 2, -296(5)\n" + "\tld 14, -288(5)\n" + "\tld 15, -280(5)\n" + "\tld 16, -272(5)\n" + "\tld 17, -264(5)\n" + "\tld 18, -256(5)\n" + "\tld 19, -248(5)\n" + "\tld 20, -240(5)\n" + "\tld 21, -232(5)\n" + "\tld 22, -224(5)\n" + "\tld 23, -216(5)\n" + "\tld 24, -208(5)\n" + "\tld 25, -200(5)\n" + "\tld 26, -192(5)\n" + "\tld 27, -184(5)\n" + "\tld 28, -176(5)\n" + "\tld 29, -168(5)\n" + "\tld 30, -160(5)\n" + "\tld 31, -152(5)\n" + "\tlfd 14, -144(5)\n" + "\tlfd 15, -136(5)\n" + "\tlfd 16, -128(5)\n" + "\tlfd 17, -120(5)\n" + "\tlfd 18, -112(5)\n" + "\tlfd 19, -104(5)\n" + "\tlfd 20, -96(5)\n" + "\tlfd 21, -88(5)\n" + "\tlfd 22, -80(5)\n" + "\tlfd 23, -72(5)\n" + "\tlfd 24, -64(5)\n" + "\tlfd 25, -56(5)\n" + "\tlfd 26, -48(5)\n" + "\tlfd 27, -40(5)\n" + "\tlfd 28, -32(5)\n" + "\tlfd 29, -24(5)\n" + "\tlfd 30, -16(5)\n" + "\tlfd 31, -8(5)\n" + "\tmr 1, 5\n" + "\tld 0, 16(1)\n" + "\tmtlr 0\n" + "\tblr\n" + : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/)); +} +#else // linux_HOST_OS +#error Only linux support for power64 right now. +#endif + +#endif + +/* ----------------------------------------------------------------------------- + IA64 architecture + + Again, in assembler - so we can fiddle with the register stack, and because + gcc doesn't handle asm-clobbered callee-saves correctly. + + loc0 - loc15: preserved locals + loc16 - loc28: STG registers + loc29: saved ar.pfs + loc30: saved b0 + loc31: saved gp (gcc 3.3 uses this slot) + -------------------------------------------------------------------------- */ + +#ifdef ia64_HOST_ARCH + +/* the memory stack is rarely used, so 16K is excessive */ +#undef RESERVED_C_STACK_BYTES +#define RESERVED_C_STACK_BYTES 1024 + +#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3) +/* gcc 3.3+: leave an extra slot for gp saves */ +#define LOCALS 32 +#else +#define LOCALS 31 +#endif + +static void GNUC3_ATTRIBUTE(used) +StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile( + ".global StgRun\n" + "StgRun:\n" + "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */ + "\tld8 r18 = [r32],8\n" /* get procedure address */ + "\tadds sp = -%0, sp ;;\n" /* setup stack */ + "\tld8 gp = [r32]\n" /* get procedure GP */ + "\tadds r16 = %0-(6*16), sp\n" + "\tadds r17 = %0-(5*16), sp ;;\n" + "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */ + "\tstf.spill [r17] = f17,32\n" + "\tmov b6 = r18 ;;\n" /* set target address */ + "\tstf.spill [r16] = f18,32\n" + "\tstf.spill [r17] = f19,32\n" + "\tmov loc30 = b0 ;;\n" /* save return address */ + "\tstf.spill [r16] = f20,32\n" + "\tstf.spill [r17] = f21,32\n" + "\tbr.few b6 ;;\n" /* branch to function */ + ".global StgReturn\n" + "StgReturn:\n" + "\tmov r8 = loc16\n" /* return value in r8 */ + "\tadds r16 = %0-(6*16), sp\n" + "\tadds r17 = %0-(5*16), sp ;;\n" + "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */ + "\tldf.fill f17 = [r17],32\n" + "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */ + "\tldf.fill f18 = [r16],32\n" + "\tldf.fill f19 = [r17],32\n" + "\tmov b0 = loc30 ;;\n" /* restore return address */ + "\tldf.fill f20 = [r16],32\n" + "\tldf.fill f21 = [r17],32\n" + "\tadds sp = %0, sp\n" /* restore stack */ + "\tbr.ret.sptk.many b0 ;;\n" /* return */ + : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS)); +} + +#endif #endif /* !USE_MINIINTERPRETER */