fix for gcc 4.1.x
[ghc-hetmet.git] / ghc / rts / StgCRun.c
index fc4ca66..29d4efe 100644 (file)
@@ -1,7 +1,6 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgCRun.c,v 1.25 2001/08/07 20:06:41 ken Exp $
  *
- * (c) The GHC Team, 1998-2000
+ * (c) The GHC Team, 1998-2003
  *
  * STG-to-C glue.
  *
  * the whatever way C returns a value.
  *
  * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
- * other registers (other than saving the C callee-saves 
+ * other registers (other than saving the C callee-saves
  * registers).  Instead, the called function "f" must do that
  * in STG land.
- * 
+ *
  * GCC will have assumed that pushing/popping of C-stack frames is
  * going on when it generated its code, and used stack space
  * accordingly.  However, we actually {\em post-process away} all
  * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
  * be OK however, if we initially make sure there are
  * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
- * variables.  
+ * variables.
  *
  * -------------------------------------------------------------------------- */
 
+#include "PosixSource.h"
+
 
 /*
  * We define the following (unused) global register variables, because for
  * definition has been read.  Any point after #include "Stg.h" would be too
  * late.
  *
- * You can define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
- * that we don't use but which are callee-save registers.  It shouldn't be
- * necessary.
+ * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
+ * that we don't use but which are callee-save registers.  The __divq() routine
+ * in libc.a clobbers $s6.
  */
-#include "config.h"
-#ifdef alpha_TARGET_ARCH
-#undef alpha_EXTRA_CAREFUL
+#include "ghcconfig.h"
+#ifdef alpha_HOST_ARCH
+#define alpha_EXTRA_CAREFUL
 register long   fake_ra __asm__("$26");
+register long   fake_gp __asm__("$29");
 #ifdef alpha_EXTRA_CAREFUL
 register long   fake_s6 __asm__("$15");
 register double fake_f8 __asm__("$f8");
@@ -65,9 +67,11 @@ register double fake_f9 __asm__("$f9");
 #include "Stg.h"
 #include "Rts.h"
 #include "StgRun.h"
+#include "RtsFlags.h"
+#include "OSThreads.h"
+#include "Capability.h"
 
 #ifdef DEBUG
-#include "RtsFlags.h"
 #include "RtsUtils.h"
 #include "Printer.h"
 #endif
@@ -77,175 +81,24 @@ register double fake_f9 __asm__("$f9");
 /* -----------------------------------------------------------------------------
    any architecture (using miniinterpreter)
    -------------------------------------------------------------------------- */
-       
-/* The static @jmp_environment@ variable allows @miniInterpret@ to
- * communicate with @StgReturn@.
- * 
- * Because @StgRun@ may be used recursively, we carefully
- * save and restore the whole of @jmp_environment@.
- */
-#include <setjmp.h>
-#include <string.h> /* for memcpy */
-
-static jmp_buf jmp_environment;
-
-#if 1
-
-extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg)
-{
-   while (f) {
-      IF_DEBUG(evaluator,
-              fprintf(stderr,"Jumping to ");
-              printPtr((P_)f);
-              fprintf(stderr,"\n");
-             );
-      f = (StgFunPtr) (f)();
-   }
-   return (StgThreadReturnCode)R1.i;
-}
-
-EXTFUN(StgReturn)
-{
-   return 0;
-}
-
-#else
-
-#define CHECK_STACK   0
-#define STACK_DETAILS 0
 
-static int enters = 0;
-
-static void scanStackSeg ( W_* ptr, int nwords )
-{
-   W_ w;
-#if CHECK_STACK
-   int nwords0 = nwords;
-#if STACK_DETAILS
-   while (nwords > 0) {
-      w = *ptr;
-      if (IS_ARG_TAG(w)) {
-         fprintf ( stderr, "%d",w ); nwords--; ptr++;
-        while (w) { fprintf(stderr, "_"); w--; nwords--; ptr++; }
-      }
-      else {
-         fprintf(stderr, "p"); 
-         nwords--; ptr++;
-      }
-   }
-   if (nwords < 0) fprintf(stderr, "erk: nwords < 0\n");
-#endif
-   checkStackChunk ( ptr, ptr-nwords0 );
-#endif
-}
-
-extern StgFunPtr stg_enterStackTop;
-extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg)
+StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
 {
-    char* nm;
-    while (1) {
-
-#if CHECK_STACK
-   {
-   int i;
-   StgTSO*  tso = basereg->rCurrentTSO;
-   StgWord* sb  = tso->stack + tso->stack_size;
-   StgWord* sp;
-   StgWord* su;
-   int ws;
-
-   if (f == &stg_enterStackTop) {
-      sp = tso->sp;
-      su = tso->su;
-   } else {
-      sp  = basereg->rSp;
-      su  = basereg->rSu;
-   }
-
-#if STACK_DETAILS
-   fprintf(stderr, 
-           "== SB = %p   SP = %p(%p)   SU = %p   SpLim = %p(%p)\n", 
-           sb, sp, tso->sp,   su, basereg->rSpLim, tso->stack + RESERVED_STACK_WORDS);
-#endif
-
-   if (su >= sb) goto postloop;
-   if (!sp || !su) goto postloop;
-
-   printStack ( sp, sb, su);
-
-   while (1) {
-      ws = su - sp;
-      switch (get_itbl((StgClosure*)su)->type) {
-         case STOP_FRAME: 
-            scanStackSeg(sp,ws);
-#if STACK_DETAILS
-            fprintf(stderr, "S%d ",ws); 
-            fprintf(stderr, "\n");
-#endif
-            goto postloop;
-         case UPDATE_FRAME: 
-            scanStackSeg(sp,ws);
-#if STACK_DETAILS
-            fprintf(stderr,"U%d ",ws); 
-#endif
-            sp = su + sizeofW(StgUpdateFrame);
-            su = ((StgUpdateFrame*)su)->link;
-            break;
-         case SEQ_FRAME: 
-            scanStackSeg(sp,ws);
-#if STACK_DETAILS
-            fprintf(stderr,"Q%d ",ws); 
-#endif
-            sp = su + sizeofW(StgSeqFrame);
-            su = ((StgSeqFrame*)su)->link;
-            break;
-         case CATCH_FRAME: 
-            scanStackSeg(sp,ws);
-#if STACK_DETAILS
-            fprintf(stderr,"C%d ",ws); 
-#endif
-            sp = su + sizeofW(StgCatchFrame);
-            su = ((StgCatchFrame*)su)->link;
-            break;
-         default:
-            fprintf(stderr, "?\nweird record on stack\n");
-            assert(0);
-            goto postloop;
-      }
-   }
-   postloop:
-   }
-#endif
-#if STACK_DETAILS
-       fprintf(stderr,"\n");
-#endif
-#if 1
-       fprintf(stderr,"-- enter %p ", f);
-       nm = nameFromOPtr ( f );
-          if (nm) fprintf(stderr, "%s", nm); else
-          printPtr((P_)f);
-       fprintf ( stderr, "\n");
-#endif
-#if STACK_DETAILS
-       fprintf(stderr,"\n");
-#endif
-    zzz:
-       if (enters % 1000 == 0) fprintf(stderr, "%d enters\n",enters);
-       enters++;
-       f = (StgFunPtr) (f)();
-       if (!f) break;
+    while (f) {
+       IF_DEBUG(interpreter,
+           debugBelch("Jumping to ");
+           printPtr((P_)f); fflush(stdout);
+           debugBelch("\n");
+           );
+       f = (StgFunPtr) (f)();
     }
-    fprintf (stderr, "miniInterpreter: bye!\n\n" );
-    return (StgThreadReturnCode)R1.i;
+    return (StgRegTable *)R1.p;
 }
 
-EXTFUN(StgReturn)
+StgFunPtr StgReturn(void)
 {
-   return 0;
+    return 0;
 }
-#endif
-
-
 
 #else /* !USE_MINIINTERPRETER */
 
@@ -258,17 +111,23 @@ EXTFUN(StgReturn)
 /* -----------------------------------------------------------------------------
    x86 architecture
    -------------------------------------------------------------------------- */
-       
-#ifdef i386_TARGET_ARCH
 
-StgThreadReturnCode
+#ifdef i386_HOST_ARCH
+
+#ifdef darwin_TARGET_OS
+#define STG_GLOBAL ".globl "
+#else
+#define STG_GLOBAL ".global "
+#endif
+
+StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg) {
 
     unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
-    StgThreadReturnCode r;
+    StgRegTable * r;
 
     __asm__ volatile (
-       /* 
+       /*
         * save callee-saves registers on behalf of the STG code.
         */
        "movl %%esp, %%eax\n\t"
@@ -282,14 +141,32 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
         */
        "movl %3,%%ebx\n\t"
        /*
-        * grab the function argument from the stack, and jump to it.
+        * grab the function argument from the stack
         */
         "movl %2,%%eax\n\t"
+        
+#if darwin_TARGET_OS
+       /*
+        * Darwin: keep the stack aligned
+        */
+        "subl $12,%%esp\n\t"
+#endif
+
+       /*
+        * jump to it
+        */
         "jmp *%%eax\n\t"
 
-       ".global " STG_RETURN "\n"
+       STG_GLOBAL STG_RETURN "\n"
                STG_RETURN ":\n\t"
 
+#if darwin_TARGET_OS
+       /*
+        * Darwin: keep the stack aligned
+        */
+        "addl $12,%%esp\n\t"
+#endif
+
        "movl %%esi, %%eax\n\t"   /* Return value in R1  */
 
        /*
@@ -312,10 +189,110 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
 
 #endif
 
+/* ----------------------------------------------------------------------------
+   x86-64 is almost the same as plain x86.
+
+   I've done it using entirely inline assembler, because I couldn't
+   get gcc to generate the correct subtraction from %rsp by using
+   the local array variable trick.  It didn't seem to reserve
+   enough space.  Oh well, it's not much harder this way.
+
+   ------------------------------------------------------------------------- */
+
+#ifdef x86_64_HOST_ARCH
+
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+    __asm__ volatile (
+       /*
+        * save callee-saves registers on behalf of the STG code.
+        */
+       ".globl StgRun\n"
+       "StgRun:\n\t"
+       "subq %0, %%rsp\n\t"
+       "movq %%rsp, %%rax\n\t"
+       "addq %0-48, %%rax\n\t"
+        "movq %%rbx,0(%%rax)\n\t"
+        "movq %%rbp,8(%%rax)\n\t"
+        "movq %%r12,16(%%rax)\n\t"
+        "movq %%r13,24(%%rax)\n\t"
+        "movq %%r14,32(%%rax)\n\t"
+        "movq %%r15,40(%%rax)\n\t"
+       /*
+        * Set BaseReg
+        */
+       "movq %%rsi,%%r13\n\t"
+       /*
+        * grab the function argument from the stack, and jump to it.
+        */
+        "movq %%rdi,%%rax\n\t"
+        "jmp *%%rax\n\t"
+
+       ".global " STG_RETURN "\n"
+               STG_RETURN ":\n\t"
+
+       "movq %%rbx, %%rax\n\t"   /* Return value in R1  */
+
+       /*
+        * restore callee-saves registers.  (Don't stomp on %%rax!)
+        */
+       "movq %%rsp, %%rdx\n\t"
+       "addq %0-48, %%rdx\n\t"
+        "movq 0(%%rdx),%%rbx\n\t"      /* restore the registers saved above */
+        "movq 8(%%rdx),%%rbp\n\t"
+        "movq 16(%%rdx),%%r12\n\t"
+        "movq 24(%%rdx),%%r13\n\t"
+        "movq 32(%%rdx),%%r14\n\t"
+        "movq 40(%%rdx),%%r15\n\t"
+       "addq %0, %%rsp\n\t"
+       "retq"
+
+       : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
+    /* 
+       HACK alert!
+
+       The x86_64 ABI specifies that on a procedure call, %rsp is
+       aligned on a 16-byte boundary + 8.  That is, the first
+       argument on the stack after the return address will be
+       16-byte aligned.  
+       
+       Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
+       of 16 bytes.  
+       
+       BUT... when we do a C-call from STG land, gcc likes to put the
+       stack alignment adjustment in the prolog.  eg. if we're calling
+       a function with arguments in regs, gcc will insert 'subq $8,%rsp'
+       in the prolog, to keep %rsp aligned (the return address is 8
+       bytes, remember).  The mangler throws away the prolog, so we
+       lose the stack alignment.
+
+       The hack is to add this extra 8 bytes to our %rsp adjustment
+       here, so that throughout STG code, %rsp is 16-byte aligned,
+       ready for a C-call.  
+
+       A quick way to see if this is wrong is to compile this code:
+
+          main = System.Exit.exitWith ExitSuccess
+
+       And run it with +RTS -sstderr.  The stats code in the RTS, in
+       particular statsPrintf(), relies on the stack alignment because
+       it saves the %xmm regs on the stack, so it'll fall over if the
+       stack isn't aligned, and calling exitWith from Haskell invokes
+       shutdownHaskellAndExit using a C call.
+
+       Future gcc releases will almost certainly break this hack...
+    */
+}
+
+#endif /* x86-64 */
+
 /* -----------------------------------------------------------------------------
    Sparc architecture
 
-   -- 
+   --
    OLD COMMENT from GHC-3.02:
 
    We want tailjumps to be calls, because `call xxx' is the only Sparc
@@ -342,10 +319,10 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
 
    Updated info (GHC 4.08.2): not saving %i7 any more (see below).
    -------------------------------------------------------------------------- */
-       
-#ifdef sparc_TARGET_ARCH
 
-StgThreadReturnCode
+#ifdef sparc_HOST_ARCH
+
+StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg) {
 
     unsigned char space[RESERVED_C_STACK_BYTES];
@@ -355,9 +332,9 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
 #endif
     f();
     __asm__ volatile (
-           ".align 4\n"                
+           ".align 4\n"
             ".global " STG_RETURN "\n"
-                   STG_RETURN ":" 
+                   STG_RETURN ":"
            : : : "l0","l1","l2","l3","l4","l5","l6","l7");
     /* we tell the C compiler that l0-l7 are clobbered on return to
      * StgReturn, otherwise it tries to use these to save eg. the
@@ -375,10 +352,10 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
      * call to f(), this gets clobbered in STG land and we end up
      * dereferencing a bogus pointer in StgReturn.
      */
-    __asm__ volatile ("ld %1,%0" 
+    __asm__ volatile ("ld %1,%0"
                      : "=r" (i7) : "m" (((void **)(space))[100]));
 #endif
-    return (StgThreadReturnCode)R1.i;
+    return (StgRegTable *)R1.i;
 }
 
 #endif
@@ -406,17 +383,18 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
     Architecture Reference Manual_, and as a result of asynchronous software
     actions."
 
-   -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for   
+   -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
       Alpha Systems, 5.1 edition, August 2000, section 3.2.1.  http://www.
       tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
    -------------------------------------------------------------------------- */
 
-#ifdef alpha_TARGET_ARCH
+#ifdef alpha_HOST_ARCH
 
-StgThreadReturnCode
-StgRun(StgFunPtr f, StgRegTable *basereg) 
+StgRegTable *
+StgRun(StgFunPtr f, StgRegTable *basereg)
 {
     register long   real_ra __asm__("$26"); volatile long   save_ra;
+    register long   real_gp __asm__("$29"); volatile long   save_gp;
 
     register long   real_s0 __asm__("$9" ); volatile long   save_s0;
     register long   real_s1 __asm__("$10"); volatile long   save_s1;
@@ -427,7 +405,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
 #ifdef alpha_EXTRA_CAREFUL
     register long   real_s6 __asm__("$15"); volatile long   save_s6;
 #endif
-                                                                                  
+
     register double real_f2 __asm__("$f2"); volatile double save_f2;
     register double real_f3 __asm__("$f3"); volatile double save_f3;
     register double real_f4 __asm__("$f4"); volatile double save_f4;
@@ -441,9 +419,10 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
 
     register StgFunPtr real_pv __asm__("$27");
 
-    StgThreadReturnCode ret;
+    StgRegTable * ret;
 
     save_ra = real_ra;
+    save_gp = real_gp;
 
     save_s0 = real_s0;
     save_s1 = real_s1;
@@ -500,23 +479,24 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
 #endif
 
     real_ra = save_ra;
+    real_gp = save_gp;
 
     return ret;
 }
 
-#endif /* alpha_TARGET_ARCH */
+#endif /* alpha_HOST_ARCH */
 
 /* -----------------------------------------------------------------------------
    HP-PA architecture
    -------------------------------------------------------------------------- */
 
-#ifdef hppa1_1_TARGET_ARCH
+#ifdef hppa1_1_HOST_ARCH
 
-StgThreadReturnCode
-StgRun(StgFunPtr f, StgRegTable *basereg) 
+StgRegTable *
+StgRun(StgFunPtr f, StgRegTable *basereg)
 {
     StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
-    StgThreadReturnCode ret;
+    StgRegTable * ret;
 
     __asm__ volatile ("ldo %0(%%r30),%%r19\n"
                      "\tstw %%r3, 0(0,%%r19)\n"
@@ -588,7 +568,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
                      "\tfldds   8(0,%%r19),%%fr19\n"
                      "\tldo 32(%%r19),%%r19\n"
                      "\tfldds -16(0,%%r19),%%fr20\n"
-                     "\tfldds  -8(0,%%r19),%%fr21\n" 
+                     "\tfldds  -8(0,%%r19),%%fr21\n"
                         : "=r" (ret)
                         : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
                         : "%r19"
@@ -597,6 +577,329 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
     return ret;
 }
 
-#endif /* hppa1_1_TARGET_ARCH */
+#endif /* hppa1_1_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+   PowerPC architecture
+
+   Everything is in assembler, so we don't have to deal with GCC...
+   
+   -------------------------------------------------------------------------- */
+
+#ifdef powerpc_HOST_ARCH
+
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+#ifdef darwin_HOST_OS
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            // if the toolchain supports deadstripping, we have to
+            // prevent it here (it tends to get confused here).
+        __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
+#endif
+       __asm__ volatile (
+               "\n.globl _StgRun\n"
+               "_StgRun:\n"
+               "\tmflr r0\n"
+               "\tbl saveFP # f14\n"
+               "\tstmw r13,-220(r1)\n"
+               "\tstwu r1,-%0(r1)\n"
+                "\tmr r27,r4\n" // BaseReg == r27
+               "\tmtctr r3\n"
+               "\tmr r12,r3\n"
+               "\tbctr\n"
+               ".globl _StgReturn\n"
+               "_StgReturn:\n"
+               "\tmr r3,r14\n"
+               "\tla r1,%0(r1)\n"
+               "\tlmw r13,-220(r1)\n"
+               "\tb restFP # f14\n"
+       : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
+}
+#else
+
+// This version is for PowerPC Linux.
+
+// Differences from the Darwin/Mac OS X version:
+// *) Different Assembler Syntax
+// *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
+// *) We may not access positive stack offsets
+//    (no "Red Zone" as in the Darwin ABI)
+// *) The Link Register is saved to a different offset in the caller's stack frame
+//    (Linux: 4(r1), Darwin 8(r1))
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+       __asm__ volatile (
+               "\t.globl StgRun\n"
+               "\t.type StgRun,@function\n"
+               "StgRun:\n"
+               "\tmflr 0\n"
+               "\tstw 0,4(1)\n"
+               "\tmr 5,1\n"
+               "\tstwu 1,-%0(1)\n"
+               "\tstmw 13,-220(5)\n"
+               "\tstfd 14,-144(5)\n"
+               "\tstfd 15,-136(5)\n"
+               "\tstfd 16,-128(5)\n"
+               "\tstfd 17,-120(5)\n"
+               "\tstfd 18,-112(5)\n"
+               "\tstfd 19,-104(5)\n"
+               "\tstfd 20,-96(5)\n"
+               "\tstfd 21,-88(5)\n"
+               "\tstfd 22,-80(5)\n"
+               "\tstfd 23,-72(5)\n"
+               "\tstfd 24,-64(5)\n"
+               "\tstfd 25,-56(5)\n"
+               "\tstfd 26,-48(5)\n"
+               "\tstfd 27,-40(5)\n"
+               "\tstfd 28,-32(5)\n"
+               "\tstfd 29,-24(5)\n"
+               "\tstfd 30,-16(5)\n"
+               "\tstfd 31,-8(5)\n"
+               "\tmr 27,4\n"  // BaseReg == r27
+               "\tmtctr 3\n"
+               "\tmr 12,3\n"
+               "\tbctr\n"
+               ".globl StgReturn\n"
+               "\t.type StgReturn,@function\n"
+               "StgReturn:\n"
+               "\tmr 3,14\n"
+               "\tla 5,%0(1)\n"
+               "\tlmw 13,-220(5)\n"
+               "\tlfd 14,-144(5)\n"
+               "\tlfd 15,-136(5)\n"
+               "\tlfd 16,-128(5)\n"
+               "\tlfd 17,-120(5)\n"
+               "\tlfd 18,-112(5)\n"
+               "\tlfd 19,-104(5)\n"
+               "\tlfd 20,-96(5)\n"
+               "\tlfd 21,-88(5)\n"
+               "\tlfd 22,-80(5)\n"
+               "\tlfd 23,-72(5)\n"
+               "\tlfd 24,-64(5)\n"
+               "\tlfd 25,-56(5)\n"
+               "\tlfd 26,-48(5)\n"
+               "\tlfd 27,-40(5)\n"
+               "\tlfd 28,-32(5)\n"
+               "\tlfd 29,-24(5)\n"
+               "\tlfd 30,-16(5)\n"
+               "\tlfd 31,-8(5)\n"
+               "\tmr 1,5\n"
+               "\tlwz 0,4(1)\n"
+               "\tmtlr 0\n"
+               "\tblr\n"
+       : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
+}
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
+   PowerPC 64 architecture
+
+   Everything is in assembler, so we don't have to deal with GCC...
+   
+   -------------------------------------------------------------------------- */
+
+#ifdef powerpc64_HOST_ARCH
+
+#ifdef linux_HOST_OS
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+        // r0 volatile
+       // r1 stack pointer
+       // r2 toc - needs to be saved
+       // r3-r10 argument passing, volatile
+       // r11, r12 very volatile (not saved across cross-module calls)
+       // r13 thread local state (never modified, don't need to save)
+       // r14-r31 callee-save
+       __asm__ volatile (
+               ".section \".opd\",\"aw\"\n"
+               ".align 3\n"
+               ".globl StgRun\n"
+               "StgRun:\n"
+                       "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
+                       "\t.size StgRun,24\n"
+               ".globl StgReturn\n"
+               "StgReturn:\n"
+                       "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
+                       "\t.size StgReturn,24\n"
+               ".previous\n"
+               ".globl .StgRun\n"
+               ".type .StgRun,@function\n"
+               ".StgRun:\n"
+                       "\tmflr 0\n"
+                       "\tmr 5, 1\n"
+                       "\tstd 0, 16(1)\n"
+                       "\tstdu 1, -%0(1)\n"
+                       "\tstd 2, -296(5)\n"
+                       "\tstd 14, -288(5)\n"
+                       "\tstd 15, -280(5)\n"
+                       "\tstd 16, -272(5)\n"
+                       "\tstd 17, -264(5)\n"
+                       "\tstd 18, -256(5)\n"
+                       "\tstd 19, -248(5)\n"
+                       "\tstd 20, -240(5)\n"
+                       "\tstd 21, -232(5)\n"
+                       "\tstd 22, -224(5)\n"
+                       "\tstd 23, -216(5)\n"
+                       "\tstd 24, -208(5)\n"
+                       "\tstd 25, -200(5)\n"
+                       "\tstd 26, -192(5)\n"
+                       "\tstd 27, -184(5)\n"
+                       "\tstd 28, -176(5)\n"
+                       "\tstd 29, -168(5)\n"
+                       "\tstd 30, -160(5)\n"
+                       "\tstd 31, -152(5)\n"
+                       "\tstfd 14, -144(5)\n"
+                       "\tstfd 15, -136(5)\n"
+                       "\tstfd 16, -128(5)\n"
+                       "\tstfd 17, -120(5)\n"
+                       "\tstfd 18, -112(5)\n"
+                       "\tstfd 19, -104(5)\n"
+                       "\tstfd 20, -96(5)\n"
+                       "\tstfd 21, -88(5)\n"
+                       "\tstfd 22, -80(5)\n"
+                       "\tstfd 23, -72(5)\n"
+                       "\tstfd 24, -64(5)\n"
+                       "\tstfd 25, -56(5)\n"
+                       "\tstfd 26, -48(5)\n"
+                       "\tstfd 27, -40(5)\n"
+                       "\tstfd 28, -32(5)\n"
+                       "\tstfd 29, -24(5)\n"
+                       "\tstfd 30, -16(5)\n"
+                       "\tstfd 31, -8(5)\n"
+                       "\tmr 27, 4\n"  // BaseReg == r27
+                       "\tld 2, 8(3)\n"
+                       "\tld 3, 0(3)\n"
+                       "\tmtctr 3\n"
+                       "\tbctr\n"
+               ".globl .StgReturn\n"
+               ".type .StgReturn,@function\n"
+               ".StgReturn:\n"
+                       "\tmr 3,14\n"
+                       "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
+                       "\tld 2, -296(5)\n"
+                       "\tld 14, -288(5)\n"
+                       "\tld 15, -280(5)\n"
+                       "\tld 16, -272(5)\n"
+                       "\tld 17, -264(5)\n"
+                       "\tld 18, -256(5)\n"
+                       "\tld 19, -248(5)\n"
+                       "\tld 20, -240(5)\n"
+                       "\tld 21, -232(5)\n"
+                       "\tld 22, -224(5)\n"
+                       "\tld 23, -216(5)\n"
+                       "\tld 24, -208(5)\n"
+                       "\tld 25, -200(5)\n"
+                       "\tld 26, -192(5)\n"
+                       "\tld 27, -184(5)\n"
+                       "\tld 28, -176(5)\n"
+                       "\tld 29, -168(5)\n"
+                       "\tld 30, -160(5)\n"
+                       "\tld 31, -152(5)\n"
+                       "\tlfd 14, -144(5)\n"
+                       "\tlfd 15, -136(5)\n"
+                       "\tlfd 16, -128(5)\n"
+                       "\tlfd 17, -120(5)\n"
+                       "\tlfd 18, -112(5)\n"
+                       "\tlfd 19, -104(5)\n"
+                       "\tlfd 20, -96(5)\n"
+                       "\tlfd 21, -88(5)\n"
+                       "\tlfd 22, -80(5)\n"
+                       "\tlfd 23, -72(5)\n"
+                       "\tlfd 24, -64(5)\n"
+                       "\tlfd 25, -56(5)\n"
+                       "\tlfd 26, -48(5)\n"
+                       "\tlfd 27, -40(5)\n"
+                       "\tlfd 28, -32(5)\n"
+                       "\tlfd 29, -24(5)\n"
+                       "\tlfd 30, -16(5)\n"
+                       "\tlfd 31, -8(5)\n"
+                       "\tmr 1, 5\n"
+                       "\tld 0, 16(1)\n"
+                       "\tmtlr 0\n"
+                       "\tblr\n"
+       : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
+}
+#else // linux_HOST_OS
+#error Only linux support for power64 right now.
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
+   IA64 architecture
+
+   Again, in assembler - so we can fiddle with the register stack, and because
+   gcc doesn't handle asm-clobbered callee-saves correctly.
+
+   loc0  - loc15: preserved locals
+   loc16 - loc28: STG registers
+           loc29: saved ar.pfs
+           loc30: saved b0
+           loc31: saved gp (gcc 3.3 uses this slot)
+   -------------------------------------------------------------------------- */
+
+#ifdef ia64_HOST_ARCH
+
+/* the memory stack is rarely used, so 16K is excessive */
+#undef RESERVED_C_STACK_BYTES
+#define RESERVED_C_STACK_BYTES 1024
+
+#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
+/* gcc 3.3+: leave an extra slot for gp saves */
+#define LOCALS 32
+#else
+#define LOCALS 31
+#endif
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+    __asm__ volatile(
+               ".global StgRun\n"
+               "StgRun:\n"
+               "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
+               "\tld8 r18 = [r32],8\n"                 /* get procedure address */
+               "\tadds sp = -%0, sp ;;\n"              /* setup stack */
+               "\tld8 gp = [r32]\n"                    /* get procedure GP */
+               "\tadds r16 = %0-(6*16), sp\n"
+               "\tadds r17 = %0-(5*16), sp ;;\n"
+               "\tstf.spill [r16] = f16,32\n"          /* spill callee-saved fp regs */
+               "\tstf.spill [r17] = f17,32\n"
+               "\tmov b6 = r18 ;;\n"                   /* set target address */
+               "\tstf.spill [r16] = f18,32\n"
+               "\tstf.spill [r17] = f19,32\n"
+               "\tmov loc30 = b0 ;;\n"                 /* save return address */
+               "\tstf.spill [r16] = f20,32\n"
+               "\tstf.spill [r17] = f21,32\n"
+               "\tbr.few b6 ;;\n"                      /* branch to function */
+               ".global StgReturn\n"
+               "StgReturn:\n"
+               "\tmov r8 = loc16\n"            /* return value in r8 */
+               "\tadds r16 = %0-(6*16), sp\n"
+               "\tadds r17 = %0-(5*16), sp ;;\n"
+               "\tldf.fill f16 = [r16],32\n"   /* start restoring fp regs */
+               "\tldf.fill f17 = [r17],32\n"
+               "\tmov ar.pfs = loc29 ;;\n"     /* restore register frame */
+               "\tldf.fill f18 = [r16],32\n"
+               "\tldf.fill f19 = [r17],32\n"
+               "\tmov b0 = loc30 ;;\n"         /* restore return address */
+               "\tldf.fill f20 = [r16],32\n"
+               "\tldf.fill f21 = [r17],32\n"
+               "\tadds sp = %0, sp\n"          /* restore stack */
+               "\tbr.ret.sptk.many b0 ;;\n"    /* return */
+       : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
+}
+
+#endif
 
 #endif /* !USE_MINIINTERPRETER */