/* -----------------------------------------------------------------------------
- * $Id: StgCRun.c,v 1.28 2002/02/06 15:48:56 sewardj Exp $
+ * $Id: StgCRun.c,v 1.41 2003/12/10 11:35:26 wolfgang Exp $
*
- * (c) The GHC Team, 1998-2000
+ * (c) The GHC Team, 1998-2003
*
* STG-to-C glue.
*
#ifdef alpha_TARGET_ARCH
#define alpha_EXTRA_CAREFUL
register long fake_ra __asm__("$26");
+register long fake_gp __asm__("$29");
#ifdef alpha_EXTRA_CAREFUL
register long fake_s6 __asm__("$15");
register double fake_f8 __asm__("$f8");
any architecture (using miniinterpreter)
-------------------------------------------------------------------------- */
-extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg)
+extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
{
while (f) {
- IF_DEBUG(evaluator,
+ IF_DEBUG(interpreter,
fprintf(stderr,"Jumping to ");
- printPtr((P_)f);
+ printPtr((P_)f); fflush(stdout);
fprintf(stderr,"\n");
);
f = (StgFunPtr) (f)();
StgThreadReturnCode
StgRun(StgFunPtr f, StgRegTable *basereg) {
- static volatile void* stg_esp_saved_before_stack_align;
-
unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
StgThreadReturnCode r;
* save callee-saves registers on behalf of the STG code.
*/
"movl %%esp, %%eax\n\t"
- "addl %5, %%eax\n\t"
+ "addl %4, %%eax\n\t"
"movl %%ebx,0(%%eax)\n\t"
"movl %%esi,4(%%eax)\n\t"
"movl %%edi,8(%%eax)\n\t"
/*
* Set BaseReg
*/
- "movl %4,%%ebx\n\t"
+ "movl %3,%%ebx\n\t"
/*
- * grab the function argument from the stack
- */
- "movl %3,%%eax\n\t"
- /*
- * Get %%esp 8-aligned, first saving the current value. This
- * moves it down another zero or four bytes, depending on
- * whether it was already aligned or not. This is a bit
- * subtle -- we must not have a swizzled %esp at any place
- * where we might refer to one of the %digit parameters to
- * this piece of assembly code, since gcc may generate
- * %esp-relative stack offsets which will then be wrong.
- */
- "movl %%esp, %6\n\t"
- "subl $4, %%esp\n\t"
- "andl $0xFFFFFFF8, %%esp\n\t"
- /*
- * And finally, jump to the function argument.
+ * grab the function argument from the stack, and jump to it.
*/
+ "movl %2,%%eax\n\t"
"jmp *%%eax\n\t"
".global " STG_RETURN "\n"
STG_RETURN ":\n\t"
- /*
- * Restore %%esp to pre-alignment state.
- */
- "movl %2, %%esp\n"
- /*
- * Move return value from R1 to %%eax
- */
- "movl %%esi, %%eax\n\t"
+
+ "movl %%esi, %%eax\n\t" /* Return value in R1 */
+
/*
* restore callee-saves registers. (Don't stomp on %%eax!)
*/
"movl %%esp, %%edx\n\t"
- "addl %5, %%edx\n\t"
+ "addl %4, %%edx\n\t"
"movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
"movl 4(%%edx),%%esi\n\t"
"movl 8(%%edx),%%edi\n\t"
"movl 12(%%edx),%%ebp\n\t"
- : "=&a" (r), "=m" (space),
- "=m" (stg_esp_saved_before_stack_align)
- : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES),
- "m" (stg_esp_saved_before_stack_align)
-
+ : "=&a" (r), "=m" (space)
+ : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
: "edx" /* stomps on %edx */
);
#endif
+/* ----------------------------------------------------------------------------
+ x86-64 is almost the same as plain x86.
+
+ I've done it using entirely inline assembler, because I couldn't
+ get gcc to generate the correct subtraction from %rsp by using
+ the local array variable trick. It didn't seem to reserve
+ enough space. Oh well, it's not much harder this way.
+
+ ------------------------------------------------------------------------- */
+
+#ifdef x86_64_TARGET_ARCH
+
+extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ /*
+ * save callee-saves registers on behalf of the STG code.
+ */
+ ".globl StgRun\n"
+ "StgRun:\n\t"
+ "subq %0, %%rsp\n\t"
+ "movq %%rsp, %%rax\n\t"
+ "addq %0-48, %%rax\n\t"
+ "movq %%rbx,0(%%rax)\n\t"
+ "movq %%rbp,8(%%rax)\n\t"
+ "movq %%r12,16(%%rax)\n\t"
+ "movq %%r13,24(%%rax)\n\t"
+ "movq %%r14,32(%%rax)\n\t"
+ "movq %%r15,40(%%rax)\n\t"
+ /*
+ * Set BaseReg
+ */
+ "movq %%rsi,%%rbx\n\t"
+ /*
+ * grab the function argument from the stack, and jump to it.
+ */
+ "movq %%rdi,%%rax\n\t"
+ "jmp *%%rax\n\t"
+
+ ".global " STG_RETURN "\n"
+ STG_RETURN ":\n\t"
+
+ "movq %%r13, %%rax\n\t" /* Return value in R1 */
+
+ /*
+ * restore callee-saves registers. (Don't stomp on %%rax!)
+ */
+ "movq %%rsp, %%rdx\n\t"
+ "addq %0-48, %%rdx\n\t"
+ "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
+ "movq 8(%%rdx),%%rbp\n\t"
+ "movq 16(%%rdx),%%r12\n\t"
+ "movq 24(%%rdx),%%r13\n\t"
+ "movq 32(%%rdx),%%r14\n\t"
+ "movq 40(%%rdx),%%r15\n\t"
+ "addq %0, %%rsp\n\t"
+ "retq"
+
+ : : "i"(RESERVED_C_STACK_BYTES+48 /*stack frame size*/));
+}
+
+#endif /* x86-64 */
+
/* -----------------------------------------------------------------------------
Sparc architecture
StgRun(StgFunPtr f, StgRegTable *basereg)
{
register long real_ra __asm__("$26"); volatile long save_ra;
+ register long real_gp __asm__("$29"); volatile long save_gp;
register long real_s0 __asm__("$9" ); volatile long save_s0;
register long real_s1 __asm__("$10"); volatile long save_s1;
StgThreadReturnCode ret;
save_ra = real_ra;
+ save_gp = real_gp;
save_s0 = real_s0;
save_s1 = real_s1;
#endif
real_ra = save_ra;
+ real_gp = save_gp;
return ret;
}
#endif /* hppa1_1_TARGET_ARCH */
+/* -----------------------------------------------------------------------------
+ PowerPC architecture
+
+ Everything is in assembler, so we don't have to deal with GCC...
+
+ -------------------------------------------------------------------------- */
+
+#ifdef powerpc_TARGET_ARCH
+
+extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
+
+#ifdef darwin_TARGET_OS
+static void StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ "\n.globl _StgRun\n"
+ "_StgRun:\n"
+ "\tmflr r0\n"
+ "\tbl saveFP # f14\n"
+ "\tstmw r13,-220(r1)\n"
+ "\tstwu r1,-%0(r1)\n"
+ "\tmtctr r3\n"
+ "\tmr r12,r3\n"
+ "\tbctr\n"
+ ".globl _StgReturn\n"
+ "_StgReturn:\n"
+ "\tmr r3,r14\n"
+ "\tla r1,%0(r1)\n"
+ "\tlmw r13,-220(r1)\n"
+ "\tb restFP # f14\n"
+ : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
+}
+#else
+
+// This version is for PowerPC Linux.
+
+// Differences from the Darwin/Mac OS X version:
+// *) Different Assembler Syntax
+// *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
+// *) We may not access positive stack offsets
+// (no "Red Zone" as in the Darwin ABI)
+// *) The Link Register is saved to a different offset in the caller's stack frame
+// (Linux: 4(r1), Darwin 8(r1))
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ "\t.globl StgRun\n"
+ "\t.type StgRun,@function\n"
+ "StgRun:\n"
+ "\tmflr 0\n"
+ "\tstw 0,4(1)\n"
+ "\tmr 5,1\n"
+ "\tstwu 1,-%0(1)\n"
+ "\tstmw 13,-220(5)\n"
+ "\tstfd 14,-144(5)\n"
+ "\tstfd 15,-136(5)\n"
+ "\tstfd 16,-128(5)\n"
+ "\tstfd 17,-120(5)\n"
+ "\tstfd 18,-112(5)\n"
+ "\tstfd 19,-104(5)\n"
+ "\tstfd 20,-96(5)\n"
+ "\tstfd 21,-88(5)\n"
+ "\tstfd 22,-80(5)\n"
+ "\tstfd 23,-72(5)\n"
+ "\tstfd 24,-64(5)\n"
+ "\tstfd 25,-56(5)\n"
+ "\tstfd 26,-48(5)\n"
+ "\tstfd 27,-40(5)\n"
+ "\tstfd 28,-32(5)\n"
+ "\tstfd 29,-24(5)\n"
+ "\tstfd 30,-16(5)\n"
+ "\tstfd 31,-8(5)\n"
+ "\tmtctr 3\n"
+ "\tmr 12,3\n"
+ "\tbctr\n"
+ ".globl StgReturn\n"
+ "\t.type StgReturn,@function\n"
+ "StgReturn:\n"
+ "\tmr 3,14\n"
+ "\tla 5,%0(1)\n"
+ "\tlmw 13,-220(5)\n"
+ "\tlfd 14,-144(5)\n"
+ "\tlfd 15,-136(5)\n"
+ "\tlfd 16,-128(5)\n"
+ "\tlfd 17,-120(5)\n"
+ "\tlfd 18,-112(5)\n"
+ "\tlfd 19,-104(5)\n"
+ "\tlfd 20,-96(5)\n"
+ "\tlfd 21,-88(5)\n"
+ "\tlfd 22,-80(5)\n"
+ "\tlfd 23,-72(5)\n"
+ "\tlfd 24,-64(5)\n"
+ "\tlfd 25,-56(5)\n"
+ "\tlfd 26,-48(5)\n"
+ "\tlfd 27,-40(5)\n"
+ "\tlfd 28,-32(5)\n"
+ "\tlfd 29,-24(5)\n"
+ "\tlfd 30,-16(5)\n"
+ "\tlfd 31,-8(5)\n"
+ "\tmr 1,5\n"
+ "\tlwz 0,4(1)\n"
+ "\tmtlr 0\n"
+ "\tblr\n"
+ : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
+}
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ IA64 architecture
+
+ Again, in assembler - so we can fiddle with the register stack, and because
+ gcc doesn't handle asm-clobbered callee-saves correctly.
+
+ loc0 - loc15: preserved locals
+ loc16 - loc28: STG registers
+ loc29: saved ar.pfs
+ loc30: saved b0
+ loc31: saved gp (gcc 3.3 uses this slot)
+ -------------------------------------------------------------------------- */
+
+#ifdef ia64_TARGET_ARCH
+
+/* the memory stack is rarely used, so 16K is excessive */
+#undef RESERVED_C_STACK_BYTES
+#define RESERVED_C_STACK_BYTES 1024
+
+#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
+/* gcc 3.3+: leave an extra slot for gp saves */
+#define LOCALS 32
+#else
+#define LOCALS 31
+#endif
+
+static void StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile(
+ ".global StgRun\n"
+ "StgRun:\n"
+ "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
+ "\tld8 r18 = [r32],8\n" /* get procedure address */
+ "\tadds sp = -%0, sp ;;\n" /* setup stack */
+ "\tld8 gp = [r32]\n" /* get procedure GP */
+ "\tadds r16 = %0-(6*16), sp\n"
+ "\tadds r17 = %0-(5*16), sp ;;\n"
+ "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
+ "\tstf.spill [r17] = f17,32\n"
+ "\tmov b6 = r18 ;;\n" /* set target address */
+ "\tstf.spill [r16] = f18,32\n"
+ "\tstf.spill [r17] = f19,32\n"
+ "\tmov loc30 = b0 ;;\n" /* save return address */
+ "\tstf.spill [r16] = f20,32\n"
+ "\tstf.spill [r17] = f21,32\n"
+ "\tbr.few b6 ;;\n" /* branch to function */
+ ".global StgReturn\n"
+ "StgReturn:\n"
+ "\tmov r8 = loc16\n" /* return value in r8 */
+ "\tadds r16 = %0-(6*16), sp\n"
+ "\tadds r17 = %0-(5*16), sp ;;\n"
+ "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
+ "\tldf.fill f17 = [r17],32\n"
+ "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
+ "\tldf.fill f18 = [r16],32\n"
+ "\tldf.fill f19 = [r17],32\n"
+ "\tmov b0 = loc30 ;;\n" /* restore return address */
+ "\tldf.fill f20 = [r16],32\n"
+ "\tldf.fill f21 = [r17],32\n"
+ "\tadds sp = %0, sp\n" /* restore stack */
+ "\tbr.ret.sptk.many b0 ;;\n" /* return */
+ : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
+}
+
+#endif
+
#endif /* !USE_MINIINTERPRETER */
+