[project @ 2003-08-18 09:27:54 by dons]
[ghc-hetmet.git] / ghc / rts / StgCRun.c
index 884f9c3..fb7179f 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgCRun.c,v 1.32 2002/04/18 19:12:43 ken Exp $
+ * $Id: StgCRun.c,v 1.39 2003/06/09 13:17:41 matthewc Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -84,7 +84,7 @@ register double fake_f9 __asm__("$f9");
 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
 {
    while (f) {
-      IF_DEBUG(evaluator,
+      IF_DEBUG(interpreter,
               fprintf(stderr,"Jumping to ");
               printPtr((P_)f); fflush(stdout);
               fprintf(stderr,"\n");
@@ -457,33 +457,99 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
 /* -----------------------------------------------------------------------------
    PowerPC architecture
 
-   We can use a simple function call as a tail call (the bl instruction places
-   the return address in the Link Register, and we ignore it).
-   We make GCC do the register saving. GCC does a good job
-   and saves all general purpose registers with a single stmw
-   (store multiple words) instruction.
+   Everything is in assembler, so we don't have to deal with GCC...
    
    -------------------------------------------------------------------------- */
 
 #ifdef powerpc_TARGET_ARCH
 
-StgThreadReturnCode
-StgRun(StgFunPtr f, StgRegTable *basereg) {
+extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
 
-    unsigned char space[RESERVED_C_STACK_BYTES];
+static void StgRunIsImplementedInAssembler(void)
+{
+       __asm__ volatile (
+               "\n.globl _StgRun\n"
+               "_StgRun:\n"
+               "\tmflr r0\n"
+               "\tbl saveFP # f14\n"
+               "\tstmw r13,-220(r1)\n"
+               "\tstwu r1,-%0(r1)\n"
+               "\tmtctr r3\n"
+               "\tmr r12,r3\n"
+               "\tbctr\n"
+               ".globl _StgReturn\n"
+               "_StgReturn:\n"
+               "\tmr r3,r14\n"
+               "\tla r1,%0(r1)\n"
+               "\tlmw r13,-220(r1)\n"
+               "\tb restFP # f14\n"
+       : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
+}
 
-    f();
-    __asm__ volatile (
-           ".align 4\n"
-            ".globl " STG_RETURN "\n"
-                   STG_RETURN ":"
-           : : : 
-                       "r14","r15","r16","r17","r18","r19","r20","r21","r22","r23","r24","r25","r26",
-                       "r27","r28","r29","r30","r31",
-                       "fr14","fr15","fr16","fr17","fr18","fr19","fr20",
-                       "fr21","fr22","fr23","fr24","fr25","fr26","fr27","fr28","fr29","fr30","fr31");
-          
-    return (StgThreadReturnCode)R1.i;
+#endif
+
+/* -----------------------------------------------------------------------------
+   IA64 architecture
+
+   Again, in assembler - so we can fiddle with the register stack, and because
+   gcc doesn't handle asm-clobbered callee-saves correctly.
+
+   loc0  - loc15: preserved locals
+   loc16 - loc28: STG registers
+           loc29: saved ar.pfs
+           loc30: saved b0
+           loc31: saved gp (gcc 3.3 uses this slot)
+   -------------------------------------------------------------------------- */
+
+#ifdef ia64_TARGET_ARCH
+
+/* the memory stack is rarely used, so 16K is excessive */
+#undef RESERVED_C_STACK_BYTES
+#define RESERVED_C_STACK_BYTES 1024
+
+#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
+/* gcc 3.3+: leave an extra slot for gp saves */
+#define LOCALS 32
+#else
+#define LOCALS 31
+#endif
+
+static void StgRunIsImplementedInAssembler(void)
+{
+    __asm__ volatile(
+               ".global StgRun\n"
+               "StgRun:\n"
+               "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
+               "\tld8 r18 = [r32],8\n"                 /* get procedure address */
+               "\tadds sp = -%0, sp ;;\n"              /* setup stack */
+               "\tld8 gp = [r32]\n"                    /* get procedure GP */
+               "\tadds r16 = %0-(6*16), sp\n"
+               "\tadds r17 = %0-(5*16), sp ;;\n"
+               "\tstf.spill [r16] = f16,32\n"          /* spill callee-saved fp regs */
+               "\tstf.spill [r17] = f17,32\n"
+               "\tmov b6 = r18 ;;\n"                   /* set target address */
+               "\tstf.spill [r16] = f18,32\n"
+               "\tstf.spill [r17] = f19,32\n"
+               "\tmov loc30 = b0 ;;\n"                 /* save return address */
+               "\tstf.spill [r16] = f20,32\n"
+               "\tstf.spill [r17] = f21,32\n"
+               "\tbr.few b6 ;;\n"                      /* branch to function */
+               ".global StgReturn\n"
+               "StgReturn:\n"
+               "\tmov r8 = loc16\n"            /* return value in r8 */
+               "\tadds r16 = %0-(6*16), sp\n"
+               "\tadds r17 = %0-(5*16), sp ;;\n"
+               "\tldf.fill f16 = [r16],32\n"   /* start restoring fp regs */
+               "\tldf.fill f17 = [r17],32\n"
+               "\tmov ar.pfs = loc29 ;;\n"     /* restore register frame */
+               "\tldf.fill f18 = [r16],32\n"
+               "\tldf.fill f19 = [r17],32\n"
+               "\tmov b0 = loc30 ;;\n"         /* restore return address */
+               "\tldf.fill f20 = [r16],32\n"
+               "\tldf.fill f21 = [r17],32\n"
+               "\tadds sp = %0, sp\n"          /* restore stack */
+               "\tbr.ret.sptk.many b0 ;;\n"    /* return */
+       : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
 }
 
 #endif