fix for gcc 4.1.x
[ghc-hetmet.git] / ghc / rts / StgCRun.c
index 79104e7..29d4efe 100644 (file)
@@ -1,5 +1,4 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgCRun.c,v 1.44 2004/09/03 15:28:56 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2003
  *
@@ -51,7 +50,7 @@
  * in libc.a clobbers $s6.
  */
 #include "ghcconfig.h"
-#ifdef alpha_TARGET_ARCH
+#ifdef alpha_HOST_ARCH
 #define alpha_EXTRA_CAREFUL
 register long   fake_ra __asm__("$26");
 register long   fake_gp __asm__("$29");
@@ -83,17 +82,17 @@ register double fake_f9 __asm__("$f9");
    any architecture (using miniinterpreter)
    -------------------------------------------------------------------------- */
 
-StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
+StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
 {
     while (f) {
-       if (RtsFlags[0].DebugFlags.interpreter) {
+       IF_DEBUG(interpreter,
            debugBelch("Jumping to ");
            printPtr((P_)f); fflush(stdout);
            debugBelch("\n");
-       }
+           );
        f = (StgFunPtr) (f)();
     }
-    return (StgThreadReturnCode)R1.i;
+    return (StgRegTable *)R1.p;
 }
 
 StgFunPtr StgReturn(void)
@@ -113,13 +112,19 @@ StgFunPtr StgReturn(void)
    x86 architecture
    -------------------------------------------------------------------------- */
 
-#ifdef i386_TARGET_ARCH
+#ifdef i386_HOST_ARCH
 
-StgThreadReturnCode
+#ifdef darwin_TARGET_OS
+#define STG_GLOBAL ".globl "
+#else
+#define STG_GLOBAL ".global "
+#endif
+
+StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg) {
 
     unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
-    StgThreadReturnCode r;
+    StgRegTable * r;
 
     __asm__ volatile (
        /*
@@ -136,14 +141,32 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
         */
        "movl %3,%%ebx\n\t"
        /*
-        * grab the function argument from the stack, and jump to it.
+        * grab the function argument from the stack
         */
         "movl %2,%%eax\n\t"
+        
+#if darwin_TARGET_OS
+       /*
+        * Darwin: keep the stack aligned
+        */
+        "subl $12,%%esp\n\t"
+#endif
+
+       /*
+        * jump to it
+        */
         "jmp *%%eax\n\t"
 
-       ".global " STG_RETURN "\n"
+       STG_GLOBAL STG_RETURN "\n"
                STG_RETURN ":\n\t"
 
+#if darwin_TARGET_OS
+       /*
+        * Darwin: keep the stack aligned
+        */
+        "addl $12,%%esp\n\t"
+#endif
+
        "movl %%esi, %%eax\n\t"   /* Return value in R1  */
 
        /*
@@ -176,11 +199,12 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
 
    ------------------------------------------------------------------------- */
 
-#ifdef x86_64_TARGET_ARCH
+#ifdef x86_64_HOST_ARCH
 
-extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
 
-static void StgRunIsImplementedInAssembler(void)
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
 {
     __asm__ volatile (
        /*
@@ -200,7 +224,7 @@ static void StgRunIsImplementedInAssembler(void)
        /*
         * Set BaseReg
         */
-       "movq %%rsi,%%rbx\n\t"
+       "movq %%rsi,%%r13\n\t"
        /*
         * grab the function argument from the stack, and jump to it.
         */
@@ -210,7 +234,7 @@ static void StgRunIsImplementedInAssembler(void)
        ".global " STG_RETURN "\n"
                STG_RETURN ":\n\t"
 
-       "movq %%r13, %%rax\n\t"   /* Return value in R1  */
+       "movq %%rbx, %%rax\n\t"   /* Return value in R1  */
 
        /*
         * restore callee-saves registers.  (Don't stomp on %%rax!)
@@ -226,7 +250,41 @@ static void StgRunIsImplementedInAssembler(void)
        "addq %0, %%rsp\n\t"
        "retq"
 
-       : : "i"(RESERVED_C_STACK_BYTES+48 /*stack frame size*/));
+       : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
+    /* 
+       HACK alert!
+
+       The x86_64 ABI specifies that on a procedure call, %rsp is
+       aligned on a 16-byte boundary + 8.  That is, the first
+       argument on the stack after the return address will be
+       16-byte aligned.  
+       
+       Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
+       of 16 bytes.  
+       
+       BUT... when we do a C-call from STG land, gcc likes to put the
+       stack alignment adjustment in the prolog.  eg. if we're calling
+       a function with arguments in regs, gcc will insert 'subq $8,%rsp'
+       in the prolog, to keep %rsp aligned (the return address is 8
+       bytes, remember).  The mangler throws away the prolog, so we
+       lose the stack alignment.
+
+       The hack is to add this extra 8 bytes to our %rsp adjustment
+       here, so that throughout STG code, %rsp is 16-byte aligned,
+       ready for a C-call.  
+
+       A quick way to see if this is wrong is to compile this code:
+
+          main = System.Exit.exitWith ExitSuccess
+
+       And run it with +RTS -sstderr.  The stats code in the RTS, in
+       particular statsPrintf(), relies on the stack alignment because
+       it saves the %xmm regs on the stack, so it'll fall over if the
+       stack isn't aligned, and calling exitWith from Haskell invokes
+       shutdownHaskellAndExit using a C call.
+
+       Future gcc releases will almost certainly break this hack...
+    */
 }
 
 #endif /* x86-64 */
@@ -262,9 +320,9 @@ static void StgRunIsImplementedInAssembler(void)
    Updated info (GHC 4.08.2): not saving %i7 any more (see below).
    -------------------------------------------------------------------------- */
 
-#ifdef sparc_TARGET_ARCH
+#ifdef sparc_HOST_ARCH
 
-StgThreadReturnCode
+StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg) {
 
     unsigned char space[RESERVED_C_STACK_BYTES];
@@ -297,7 +355,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
     __asm__ volatile ("ld %1,%0"
                      : "=r" (i7) : "m" (((void **)(space))[100]));
 #endif
-    return (StgThreadReturnCode)R1.i;
+    return (StgRegTable *)R1.i;
 }
 
 #endif
@@ -330,9 +388,9 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
       tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
    -------------------------------------------------------------------------- */
 
-#ifdef alpha_TARGET_ARCH
+#ifdef alpha_HOST_ARCH
 
-StgThreadReturnCode
+StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg)
 {
     register long   real_ra __asm__("$26"); volatile long   save_ra;
@@ -361,7 +419,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
 
     register StgFunPtr real_pv __asm__("$27");
 
-    StgThreadReturnCode ret;
+    StgRegTable * ret;
 
     save_ra = real_ra;
     save_gp = real_gp;
@@ -426,19 +484,19 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
     return ret;
 }
 
-#endif /* alpha_TARGET_ARCH */
+#endif /* alpha_HOST_ARCH */
 
 /* -----------------------------------------------------------------------------
    HP-PA architecture
    -------------------------------------------------------------------------- */
 
-#ifdef hppa1_1_TARGET_ARCH
+#ifdef hppa1_1_HOST_ARCH
 
-StgThreadReturnCode
+StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg)
 {
     StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
-    StgThreadReturnCode ret;
+    StgRegTable * ret;
 
     __asm__ volatile ("ldo %0(%%r30),%%r19\n"
                      "\tstw %%r3, 0(0,%%r19)\n"
@@ -519,7 +577,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
     return ret;
 }
 
-#endif /* hppa1_1_TARGET_ARCH */
+#endif /* hppa1_1_HOST_ARCH */
 
 /* -----------------------------------------------------------------------------
    PowerPC architecture
@@ -528,13 +586,19 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
    
    -------------------------------------------------------------------------- */
 
-#ifdef powerpc_TARGET_ARCH
+#ifdef powerpc_HOST_ARCH
 
-extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
 
-#ifdef darwin_TARGET_OS
-static void StgRunIsImplementedInAssembler(void)
+#ifdef darwin_HOST_OS
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
 {
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            // if the toolchain supports deadstripping, we have to
+            // prevent it here (it tends to get confused here).
+        __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
+#endif
        __asm__ volatile (
                "\n.globl _StgRun\n"
                "_StgRun:\n"
@@ -552,7 +616,7 @@ static void StgRunIsImplementedInAssembler(void)
                "\tla r1,%0(r1)\n"
                "\tlmw r13,-220(r1)\n"
                "\tb restFP # f14\n"
-       : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
+       : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
 }
 #else
 
@@ -566,7 +630,8 @@ static void StgRunIsImplementedInAssembler(void)
 // *) The Link Register is saved to a different offset in the caller's stack frame
 //    (Linux: 4(r1), Darwin 8(r1))
 
-static void StgRunIsImplementedInAssembler(void)
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
 {
        __asm__ volatile (
                "\t.globl StgRun\n"
@@ -627,13 +692,150 @@ static void StgRunIsImplementedInAssembler(void)
                "\tlwz 0,4(1)\n"
                "\tmtlr 0\n"
                "\tblr\n"
-       : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
+       : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
 }
 #endif
 
 #endif
 
 /* -----------------------------------------------------------------------------
+   PowerPC 64 architecture
+
+   Everything is in assembler, so we don't have to deal with GCC...
+   
+   -------------------------------------------------------------------------- */
+
+#ifdef powerpc64_HOST_ARCH
+
+#ifdef linux_HOST_OS
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+        // r0 volatile
+       // r1 stack pointer
+       // r2 toc - needs to be saved
+       // r3-r10 argument passing, volatile
+       // r11, r12 very volatile (not saved across cross-module calls)
+       // r13 thread local state (never modified, don't need to save)
+       // r14-r31 callee-save
+       __asm__ volatile (
+               ".section \".opd\",\"aw\"\n"
+               ".align 3\n"
+               ".globl StgRun\n"
+               "StgRun:\n"
+                       "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
+                       "\t.size StgRun,24\n"
+               ".globl StgReturn\n"
+               "StgReturn:\n"
+                       "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
+                       "\t.size StgReturn,24\n"
+               ".previous\n"
+               ".globl .StgRun\n"
+               ".type .StgRun,@function\n"
+               ".StgRun:\n"
+                       "\tmflr 0\n"
+                       "\tmr 5, 1\n"
+                       "\tstd 0, 16(1)\n"
+                       "\tstdu 1, -%0(1)\n"
+                       "\tstd 2, -296(5)\n"
+                       "\tstd 14, -288(5)\n"
+                       "\tstd 15, -280(5)\n"
+                       "\tstd 16, -272(5)\n"
+                       "\tstd 17, -264(5)\n"
+                       "\tstd 18, -256(5)\n"
+                       "\tstd 19, -248(5)\n"
+                       "\tstd 20, -240(5)\n"
+                       "\tstd 21, -232(5)\n"
+                       "\tstd 22, -224(5)\n"
+                       "\tstd 23, -216(5)\n"
+                       "\tstd 24, -208(5)\n"
+                       "\tstd 25, -200(5)\n"
+                       "\tstd 26, -192(5)\n"
+                       "\tstd 27, -184(5)\n"
+                       "\tstd 28, -176(5)\n"
+                       "\tstd 29, -168(5)\n"
+                       "\tstd 30, -160(5)\n"
+                       "\tstd 31, -152(5)\n"
+                       "\tstfd 14, -144(5)\n"
+                       "\tstfd 15, -136(5)\n"
+                       "\tstfd 16, -128(5)\n"
+                       "\tstfd 17, -120(5)\n"
+                       "\tstfd 18, -112(5)\n"
+                       "\tstfd 19, -104(5)\n"
+                       "\tstfd 20, -96(5)\n"
+                       "\tstfd 21, -88(5)\n"
+                       "\tstfd 22, -80(5)\n"
+                       "\tstfd 23, -72(5)\n"
+                       "\tstfd 24, -64(5)\n"
+                       "\tstfd 25, -56(5)\n"
+                       "\tstfd 26, -48(5)\n"
+                       "\tstfd 27, -40(5)\n"
+                       "\tstfd 28, -32(5)\n"
+                       "\tstfd 29, -24(5)\n"
+                       "\tstfd 30, -16(5)\n"
+                       "\tstfd 31, -8(5)\n"
+                       "\tmr 27, 4\n"  // BaseReg == r27
+                       "\tld 2, 8(3)\n"
+                       "\tld 3, 0(3)\n"
+                       "\tmtctr 3\n"
+                       "\tbctr\n"
+               ".globl .StgReturn\n"
+               ".type .StgReturn,@function\n"
+               ".StgReturn:\n"
+                       "\tmr 3,14\n"
+                       "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
+                       "\tld 2, -296(5)\n"
+                       "\tld 14, -288(5)\n"
+                       "\tld 15, -280(5)\n"
+                       "\tld 16, -272(5)\n"
+                       "\tld 17, -264(5)\n"
+                       "\tld 18, -256(5)\n"
+                       "\tld 19, -248(5)\n"
+                       "\tld 20, -240(5)\n"
+                       "\tld 21, -232(5)\n"
+                       "\tld 22, -224(5)\n"
+                       "\tld 23, -216(5)\n"
+                       "\tld 24, -208(5)\n"
+                       "\tld 25, -200(5)\n"
+                       "\tld 26, -192(5)\n"
+                       "\tld 27, -184(5)\n"
+                       "\tld 28, -176(5)\n"
+                       "\tld 29, -168(5)\n"
+                       "\tld 30, -160(5)\n"
+                       "\tld 31, -152(5)\n"
+                       "\tlfd 14, -144(5)\n"
+                       "\tlfd 15, -136(5)\n"
+                       "\tlfd 16, -128(5)\n"
+                       "\tlfd 17, -120(5)\n"
+                       "\tlfd 18, -112(5)\n"
+                       "\tlfd 19, -104(5)\n"
+                       "\tlfd 20, -96(5)\n"
+                       "\tlfd 21, -88(5)\n"
+                       "\tlfd 22, -80(5)\n"
+                       "\tlfd 23, -72(5)\n"
+                       "\tlfd 24, -64(5)\n"
+                       "\tlfd 25, -56(5)\n"
+                       "\tlfd 26, -48(5)\n"
+                       "\tlfd 27, -40(5)\n"
+                       "\tlfd 28, -32(5)\n"
+                       "\tlfd 29, -24(5)\n"
+                       "\tlfd 30, -16(5)\n"
+                       "\tlfd 31, -8(5)\n"
+                       "\tmr 1, 5\n"
+                       "\tld 0, 16(1)\n"
+                       "\tmtlr 0\n"
+                       "\tblr\n"
+       : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
+}
+#else // linux_HOST_OS
+#error Only linux support for power64 right now.
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
    IA64 architecture
 
    Again, in assembler - so we can fiddle with the register stack, and because
@@ -646,7 +848,7 @@ static void StgRunIsImplementedInAssembler(void)
            loc31: saved gp (gcc 3.3 uses this slot)
    -------------------------------------------------------------------------- */
 
-#ifdef ia64_TARGET_ARCH
+#ifdef ia64_HOST_ARCH
 
 /* the memory stack is rarely used, so 16K is excessive */
 #undef RESERVED_C_STACK_BYTES
@@ -659,7 +861,8 @@ static void StgRunIsImplementedInAssembler(void)
 #define LOCALS 31
 #endif
 
-static void StgRunIsImplementedInAssembler(void)
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
 {
     __asm__ volatile(
                ".global StgRun\n"
@@ -700,4 +903,3 @@ static void StgRunIsImplementedInAssembler(void)
 #endif
 
 #endif /* !USE_MINIINTERPRETER */
-