+#endif /* hppa1_1_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ PowerPC architecture
+
+ Everything is in assembler, so we don't have to deal with GCC...
+
+ -------------------------------------------------------------------------- */
+
+#ifdef powerpc_HOST_ARCH
+
+extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+
+#ifdef darwin_HOST_OS
+static void StgRunIsImplementedInAssembler(void)
+{
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ // if the toolchain supports deadstripping, we have to
+ // prevent it here (it tends to get confused here).
+ __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
+#endif
+ __asm__ volatile (
+ "\n.globl _StgRun\n"
+ "_StgRun:\n"
+ "\tmflr r0\n"
+ "\tbl saveFP # f14\n"
+ "\tstmw r13,-220(r1)\n"
+ "\tstwu r1,-%0(r1)\n"
+ "\tmr r27,r4\n" // BaseReg == r27
+ "\tmtctr r3\n"
+ "\tmr r12,r3\n"
+ "\tbctr\n"
+ ".globl _StgReturn\n"
+ "_StgReturn:\n"
+ "\tmr r3,r14\n"
+ "\tla r1,%0(r1)\n"
+ "\tlmw r13,-220(r1)\n"
+ "\tb restFP # f14\n"
+ : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
+}
+#else
+
+// This version is for PowerPC Linux.
+
+// Differences from the Darwin/Mac OS X version:
+// *) Different Assembler Syntax
+// *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
+// *) We may not access positive stack offsets
+// (no "Red Zone" as in the Darwin ABI)
+// *) The Link Register is saved to a different offset in the caller's stack frame
+// (Linux: 4(r1), Darwin 8(r1))
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ "\t.globl StgRun\n"
+ "\t.type StgRun,@function\n"
+ "StgRun:\n"
+ "\tmflr 0\n"
+ "\tstw 0,4(1)\n"
+ "\tmr 5,1\n"
+ "\tstwu 1,-%0(1)\n"
+ "\tstmw 13,-220(5)\n"
+ "\tstfd 14,-144(5)\n"
+ "\tstfd 15,-136(5)\n"
+ "\tstfd 16,-128(5)\n"
+ "\tstfd 17,-120(5)\n"
+ "\tstfd 18,-112(5)\n"
+ "\tstfd 19,-104(5)\n"
+ "\tstfd 20,-96(5)\n"
+ "\tstfd 21,-88(5)\n"
+ "\tstfd 22,-80(5)\n"
+ "\tstfd 23,-72(5)\n"
+ "\tstfd 24,-64(5)\n"
+ "\tstfd 25,-56(5)\n"
+ "\tstfd 26,-48(5)\n"
+ "\tstfd 27,-40(5)\n"
+ "\tstfd 28,-32(5)\n"
+ "\tstfd 29,-24(5)\n"
+ "\tstfd 30,-16(5)\n"
+ "\tstfd 31,-8(5)\n"
+ "\tmr 27,4\n" // BaseReg == r27
+ "\tmtctr 3\n"
+ "\tmr 12,3\n"
+ "\tbctr\n"
+ ".globl StgReturn\n"
+ "\t.type StgReturn,@function\n"
+ "StgReturn:\n"
+ "\tmr 3,14\n"
+ "\tla 5,%0(1)\n"
+ "\tlmw 13,-220(5)\n"
+ "\tlfd 14,-144(5)\n"
+ "\tlfd 15,-136(5)\n"
+ "\tlfd 16,-128(5)\n"
+ "\tlfd 17,-120(5)\n"
+ "\tlfd 18,-112(5)\n"
+ "\tlfd 19,-104(5)\n"
+ "\tlfd 20,-96(5)\n"
+ "\tlfd 21,-88(5)\n"
+ "\tlfd 22,-80(5)\n"
+ "\tlfd 23,-72(5)\n"
+ "\tlfd 24,-64(5)\n"
+ "\tlfd 25,-56(5)\n"
+ "\tlfd 26,-48(5)\n"
+ "\tlfd 27,-40(5)\n"
+ "\tlfd 28,-32(5)\n"
+ "\tlfd 29,-24(5)\n"
+ "\tlfd 30,-16(5)\n"
+ "\tlfd 31,-8(5)\n"
+ "\tmr 1,5\n"
+ "\tlwz 0,4(1)\n"
+ "\tmtlr 0\n"
+ "\tblr\n"
+ : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
+}
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ PowerPC 64 architecture
+
+ Everything is in assembler, so we don't have to deal with GCC...
+
+ -------------------------------------------------------------------------- */
+
+#ifdef powerpc64_HOST_ARCH
+
+#ifdef linux_HOST_OS
+extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ // r0 volatile
+ // r1 stack pointer
+ // r2 toc - needs to be saved
+ // r3-r10 argument passing, volatile
+ // r11, r12 very volatile (not saved across cross-module calls)
+ // r13 thread local state (never modified, don't need to save)
+ // r14-r31 callee-save
+ __asm__ volatile (
+ ".section \".opd\",\"aw\"\n"
+ ".align 3\n"
+ ".globl StgRun\n"
+ "StgRun:\n"
+ "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
+ "\t.size StgRun,24\n"
+ ".globl StgReturn\n"
+ "StgReturn:\n"
+ "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
+ "\t.size StgReturn,24\n"
+ ".previous\n"
+ ".globl .StgRun\n"
+ ".type .StgRun,@function\n"
+ ".StgRun:\n"
+ "\tmflr 0\n"
+ "\tmr 5, 1\n"
+ "\tstd 0, 16(1)\n"
+ "\tstdu 1, -%0(1)\n"
+ "\tstd 2, -296(5)\n"
+ "\tstd 14, -288(5)\n"
+ "\tstd 15, -280(5)\n"
+ "\tstd 16, -272(5)\n"
+ "\tstd 17, -264(5)\n"
+ "\tstd 18, -256(5)\n"
+ "\tstd 19, -248(5)\n"
+ "\tstd 20, -240(5)\n"
+ "\tstd 21, -232(5)\n"
+ "\tstd 22, -224(5)\n"
+ "\tstd 23, -216(5)\n"
+ "\tstd 24, -208(5)\n"
+ "\tstd 25, -200(5)\n"
+ "\tstd 26, -192(5)\n"
+ "\tstd 27, -184(5)\n"
+ "\tstd 28, -176(5)\n"
+ "\tstd 29, -168(5)\n"
+ "\tstd 30, -160(5)\n"
+ "\tstd 31, -152(5)\n"
+ "\tstfd 14, -144(5)\n"
+ "\tstfd 15, -136(5)\n"
+ "\tstfd 16, -128(5)\n"
+ "\tstfd 17, -120(5)\n"
+ "\tstfd 18, -112(5)\n"
+ "\tstfd 19, -104(5)\n"
+ "\tstfd 20, -96(5)\n"
+ "\tstfd 21, -88(5)\n"
+ "\tstfd 22, -80(5)\n"
+ "\tstfd 23, -72(5)\n"
+ "\tstfd 24, -64(5)\n"
+ "\tstfd 25, -56(5)\n"
+ "\tstfd 26, -48(5)\n"
+ "\tstfd 27, -40(5)\n"
+ "\tstfd 28, -32(5)\n"
+ "\tstfd 29, -24(5)\n"
+ "\tstfd 30, -16(5)\n"
+ "\tstfd 31, -8(5)\n"
+ "\tmr 27, 4\n" // BaseReg == r27
+ "\tld 2, 8(3)\n"
+ "\tld 3, 0(3)\n"
+ "\tmtctr 3\n"
+ "\tbctr\n"
+ ".globl .StgReturn\n"
+ ".type .StgReturn,@function\n"
+ ".StgReturn:\n"
+ "\tmr 3,14\n"
+ "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
+ "\tld 2, -296(5)\n"
+ "\tld 14, -288(5)\n"
+ "\tld 15, -280(5)\n"
+ "\tld 16, -272(5)\n"
+ "\tld 17, -264(5)\n"
+ "\tld 18, -256(5)\n"
+ "\tld 19, -248(5)\n"
+ "\tld 20, -240(5)\n"
+ "\tld 21, -232(5)\n"
+ "\tld 22, -224(5)\n"
+ "\tld 23, -216(5)\n"
+ "\tld 24, -208(5)\n"
+ "\tld 25, -200(5)\n"
+ "\tld 26, -192(5)\n"
+ "\tld 27, -184(5)\n"
+ "\tld 28, -176(5)\n"
+ "\tld 29, -168(5)\n"
+ "\tld 30, -160(5)\n"
+ "\tld 31, -152(5)\n"
+ "\tlfd 14, -144(5)\n"
+ "\tlfd 15, -136(5)\n"
+ "\tlfd 16, -128(5)\n"
+ "\tlfd 17, -120(5)\n"
+ "\tlfd 18, -112(5)\n"
+ "\tlfd 19, -104(5)\n"
+ "\tlfd 20, -96(5)\n"
+ "\tlfd 21, -88(5)\n"
+ "\tlfd 22, -80(5)\n"
+ "\tlfd 23, -72(5)\n"
+ "\tlfd 24, -64(5)\n"
+ "\tlfd 25, -56(5)\n"
+ "\tlfd 26, -48(5)\n"
+ "\tlfd 27, -40(5)\n"
+ "\tlfd 28, -32(5)\n"
+ "\tlfd 29, -24(5)\n"
+ "\tlfd 30, -16(5)\n"
+ "\tlfd 31, -8(5)\n"
+ "\tmr 1, 5\n"
+ "\tld 0, 16(1)\n"
+ "\tmtlr 0\n"
+ "\tblr\n"
+ : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
+}
+#else // linux_HOST_OS
+#error Only linux support for power64 right now.
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ IA64 architecture
+
+ Again, in assembler - so we can fiddle with the register stack, and because
+ gcc doesn't handle asm-clobbered callee-saves correctly.
+
+ loc0 - loc15: preserved locals
+ loc16 - loc28: STG registers
+ loc29: saved ar.pfs
+ loc30: saved b0
+ loc31: saved gp (gcc 3.3 uses this slot)
+ -------------------------------------------------------------------------- */
+
+#ifdef ia64_HOST_ARCH
+
+/* the memory stack is rarely used, so 16K is excessive */
+#undef RESERVED_C_STACK_BYTES
+#define RESERVED_C_STACK_BYTES 1024
+
+#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
+/* gcc 3.3+: leave an extra slot for gp saves */
+#define LOCALS 32
+#else
+#define LOCALS 31
+#endif
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile(
+ ".global StgRun\n"
+ "StgRun:\n"
+ "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
+ "\tld8 r18 = [r32],8\n" /* get procedure address */
+ "\tadds sp = -%0, sp ;;\n" /* setup stack */
+ "\tld8 gp = [r32]\n" /* get procedure GP */
+ "\tadds r16 = %0-(6*16), sp\n"
+ "\tadds r17 = %0-(5*16), sp ;;\n"
+ "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
+ "\tstf.spill [r17] = f17,32\n"
+ "\tmov b6 = r18 ;;\n" /* set target address */
+ "\tstf.spill [r16] = f18,32\n"
+ "\tstf.spill [r17] = f19,32\n"
+ "\tmov loc30 = b0 ;;\n" /* save return address */
+ "\tstf.spill [r16] = f20,32\n"
+ "\tstf.spill [r17] = f21,32\n"
+ "\tbr.few b6 ;;\n" /* branch to function */
+ ".global StgReturn\n"
+ "StgReturn:\n"
+ "\tmov r8 = loc16\n" /* return value in r8 */
+ "\tadds r16 = %0-(6*16), sp\n"
+ "\tadds r17 = %0-(5*16), sp ;;\n"
+ "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
+ "\tldf.fill f17 = [r17],32\n"
+ "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
+ "\tldf.fill f18 = [r16],32\n"
+ "\tldf.fill f19 = [r17],32\n"
+ "\tmov b0 = loc30 ;;\n" /* restore return address */
+ "\tldf.fill f20 = [r16],32\n"
+ "\tldf.fill f21 = [r17],32\n"
+ "\tadds sp = %0, sp\n" /* restore stack */
+ "\tbr.ret.sptk.many b0 ;;\n" /* return */
+ : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
+}
+
+#endif