[project @ 1996-01-08 20:28:12 by partain]
[ghc-hetmet.git] / ghc / includes / COptJumps.lh
diff --git a/ghc/includes/COptJumps.lh b/ghc/includes/COptJumps.lh
new file mode 100644 (file)
index 0000000..f1053eb
--- /dev/null
@@ -0,0 +1,534 @@
+\section[COptJumps]{Macros for tail-jumping}
+
+% this file is part of the C-as-assembler document
+
+\begin{code}
+#ifndef COPTJUMPS_H
+#define COPTJUMPS_H
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsection[COptJumps-portable]{Tail-(non-)jumping in ``portable~C''}
+%*                                                                     *
+%************************************************************************
+
+\begin{code}
+#if ! (defined(__STG_TAILJUMPS__) && defined(__GNUC__))
+
+#define JMP_(target)   return((F_) (target))
+#define RESUME_(target)        JMP_(target)
+\end{code}
+
+Don't need to do anything magical for the mini-interpreter, because
+we're really going to use the plain old C one (and the debugging
+variant, too, for that matter).
+
+%************************************************************************
+%*                                                                     *
+\subsection[COptJumps-optimised]{Tail-jumping in ``optimised~C''}
+%*                                                                     *
+%************************************************************************
+
+\begin{code}
+#else /* __STG_TAILJUMPS__ && __GNUC__ */
+\end{code}
+
+GCC will have assumed that pushing/popping of C-stack frames is going
+on when it generated its code, and used stack space accordingly.
+However, we actually {\em post-process away} all such stack-framery
+(see \tr{ghc/driver/ghc-asm-*.lprl}).
+Thing will be OK however, if we initially make sure there are
+@RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
+variables.
+
+\begin{code}
+#define RESERVED_C_STACK_BYTES (512 * sizeof(I_))  /* MUST BE OF GENEROUS ALIGNMENT */
+\end{code}
+
+The platform-specific details are given in alphabetical order.
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-alpha]{Tail-jumping on Alphas}
+%*                                                                     *
+%************************************************************************
+
+We have to set the procedure value register (\$27) before branching, so
+that the target function can load the gp (\$29) as appropriate.
+
+It seems that \tr{_procedure} can't be declared within the body of the
+\tr{JMP_} macro...at least, not if we want it to be \$27, which we do!
+
+\begin{code}
+#if alpha_dec_osf1_TARGET
+    /* ToDo: less specific? */
+
+/*
+   Jumping to a new block of code, we need to set up $27 to point
+   at the target, so that the callee can establish its gp (as an
+   offset from its own starting address).  For some reason, gcc
+   refuses to give us $27 for _procedure if it's declared as a 
+   local variable, so the workaround is to make it a global.
+
+   Note:  The local variable works in gcc 2.6.2, but fails in 2.5.8.
+ */
+
+/* MOVED: to COptRegs.lh -- very unsatisfactorily.
+   Otherwise, we can get a "global register variable follows a
+   function definition" error.
+
+   Once we can take gcc 2.6.x as std, then we can use
+   the local variant, and the problem goes away.  (WDP 95/02)
+
+register void *_procedure __asm__("$27");
+*/
+
+#define JMP_(cont)                             \
+    do { _procedure = (void *)(cont);          \
+         goto *_procedure;                     \
+       } while(0)
+
+/* 
+   When we resume at the point where a call was originally made,
+   we need to restore $26, so that gp can be reloaded appropriately.
+   However, sometimes we ``resume'' by entering a new function 
+   (typically EnterNodeCode), so we need to set up $27 as well.
+ */
+
+#define RESUME_(cont)                          \
+    do { _procedure = (void *)(cont);          \
+        __asm__ volatile("mov $27,$26");       \
+         goto *_procedure;                     \
+       } while(0);
+
+#define MINI_INTERPRETER_SETUP                 \
+    __asm__ volatile ("stq $9,-8($30)\n"       \
+                      "stq $10,-16($30)\n"     \
+                      "stq $11,-24($30)\n"     \
+                      "stq $12,-32($30)\n"     \
+                      "stq $13,-40($30)\n"     \
+                      "stq $14,-48($30)\n"     \
+                      "stq $15,-56($30)\n"     \
+                      "stt $f2,-64($30)\n"     \
+                      "stt $f3,-72($30)\n"     \
+                      "stt $f4,-80($30)\n"     \
+                      "stt $f5,-88($30)\n"     \
+                      "stt $f6,-96($30)\n"     \
+                      "stt $f7,-104($30)\n"    \
+                      "stt $f8,-112($30)\n"    \
+                      "stt $f9,-120($30)\n"    \
+                     "lda $30,-%0($30)" : :    \
+                      "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
+
+#define MINI_INTERPRETER_END                   \
+    __asm__ volatile (".align 3\n"             \
+                             ".globl miniInterpretEnd\n" \
+                      "miniInterpretEnd:\n"            \
+                             "lda $30,%0($30)\n"       \
+                             "ldq $9,-8($30)\n"        \
+                             "ldq $10,-16($30)\n"      \
+                             "ldq $11,-24($30)\n"      \
+                             "ldq $12,-32($30)\n"      \
+                             "ldq $13,-40($30)\n"      \
+                             "ldq $14,-48($30)\n"      \
+                             "ldq $15,-56($30)\n"      \
+                             "ldt $f2,-64($30)\n"      \
+                             "ldt $f3,-72($30)\n"      \
+                             "ldt $f4,-80($30)\n"      \
+                             "ldt $f5,-88($30)\n"      \
+                             "ldt $f6,-96($30)\n"      \
+                     "ldt $f7,-104($30)\n"     \
+                     "ldt $f8,-112($30)\n"     \
+                     "ldt $f9,-120($30)" : :   \
+                      "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
+
+#endif /* __alpha */
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-Hpux]{Tail-jumping on a HP-PA machine running HP-UX}
+%*                                                                     *
+%************************************************************************
+
+\begin{code}
+#if hppa1_1_hp_hpux_TARGET
+
+/* do FUNBEGIN/END the easy way */
+#define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
+#define FUNEND      __asm__ volatile ("--- END ---");
+
+/* The stack grows up!  Local variables are allocated just above the
+   frame pointer, and extra arguments are stashed just below the stack
+   pointer, so the safe space is again in the middle (cf. sparc).
+ */
+
+#define JMP_(cont)                             \
+    do { void *_procedure = (void *)(cont);    \
+         goto *_procedure;                     \
+       } while(0)
+
+#define RESUME_(cont)  JMP_(cont)
+
+#define MINI_INTERPRETER_SETUP                 \
+    StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];   \
+    __asm__ volatile ("ldo %0(%%r3),%%r19\n"           \
+                     "\tstw %%r3, 0(0,%%r19)\n"        \
+                      "\tstw %%r4, 4(0,%%r19)\n"       \
+                      "\tstw %%r5, 8(0,%%r19)\n"       \
+                      "\tstw %%r6,12(0,%%r19)\n"       \
+                      "\tstw %%r7,16(0,%%r19)\n"       \
+                      "\tstw %%r8,20(0,%%r19)\n"       \
+                      "\tstw %%r9,24(0,%%r19)\n"       \
+                     "\tstw %%r10,28(0,%%r19)\n"       \
+                      "\tstw %%r11,32(0,%%r19)\n"      \
+                      "\tstw %%r12,36(0,%%r19)\n"      \
+                      "\tstw %%r13,40(0,%%r19)\n"      \
+                      "\tstw %%r14,44(0,%%r19)\n"      \
+                      "\tstw %%r15,48(0,%%r19)\n"      \
+                      "\tstw %%r16,52(0,%%r19)\n"      \
+                      "\tstw %%r17,56(0,%%r19)\n"      \
+                      "\tstw %%r18,60(0,%%r19)\n"      \
+                     "\tldo 80(%%r19),%%r19\n"         \
+                     "\tfstds %%fr12,-16(0,%%r19)\n"   \
+                     "\tfstds %%fr13, -8(0,%%r19)\n"   \
+                     "\tfstds %%fr14,  0(0,%%r19)\n"   \
+                     "\tfstds %%fr15,  8(0,%%r19)\n"   \
+                     "\tldo 32(%%r19),%%r19\n"         \
+                     "\tfstds %%fr16,-16(0,%%r19)\n"   \
+                     "\tfstds %%fr17, -8(0,%%r19)\n"   \
+                     "\tfstds %%fr18,  0(0,%%r19)\n"   \
+                     "\tfstds %%fr19,  8(0,%%r19)\n"   \
+                     "\tldo 32(%%r19),%%r19\n"         \
+                     "\tfstds %%fr20,-16(0,%%r19)\n"   \
+                     "\tfstds %%fr21, -8(0,%%r19)\n" : :   \
+                      "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19" );
+
+#define MINI_INTERPRETER_END                   \
+    __asm__ volatile (".align 4\n"             \
+                             "\t.EXPORT miniInterpretEnd,CODE\n" \
+                     "\t.EXPORT miniInterpretEnd,ENTRY,PRIV_LEV=3\n" \
+                      "miniInterpretEnd\n"             \
+                      "\tldo %0(%%r3),%%r19\n"         \
+                     "\tldw  0(0,%%r19),%%r3\n"        \
+                      "\tldw  4(0,%%r19),%%r4\n"       \
+                      "\tldw  8(0,%%r19),%%r5\n"       \
+                      "\tldw 12(0,%%r19),%%r6\n"       \
+                      "\tldw 16(0,%%r19),%%r7\n"       \
+                      "\tldw 20(0,%%r19),%%r8\n"       \
+                      "\tldw 24(0,%%r19),%%r9\n"       \
+                     "\tldw 28(0,%%r19),%%r10\n"       \
+                      "\tldw 32(0,%%r19),%%r11\n"      \
+                      "\tldw 36(0,%%r19),%%r12\n"      \
+                      "\tldw 40(0,%%r19),%%r13\n"      \
+                      "\tldw 44(0,%%r19),%%r14\n"      \
+                      "\tldw 48(0,%%r19),%%r15\n"      \
+                      "\tldw 52(0,%%r19),%%r16\n"      \
+                      "\tldw 56(0,%%r19),%%r17\n"      \
+                      "\tldw 60(0,%%r19),%%r18\n"      \
+                     "\tldo 80(%%r19),%%r19\n"         \
+                     "\tfldds -16(0,%%r19),%%fr12\n"   \
+                     "\tfldds  -8(0,%%r19),%%fr13\n"   \
+                     "\tfldds   0(0,%%r19),%%fr14\n"   \
+                     "\tfldds   8(0,%%r19),%%fr15\n"   \
+                     "\tldo 32(%%r19),%%r19\n"         \
+                     "\tfldds -16(0,%%r19),%%fr16\n"   \
+                     "\tfldds  -8(0,%%r19),%%fr17\n"   \
+                     "\tfldds   0(0,%%r19),%%fr18\n"   \
+                     "\tfldds   8(0,%%r19),%%fr19\n"   \
+                     "\tldo 32(%%r19),%%r19\n"         \
+                     "\tfldds -16(0,%%r19),%%fr20\n"   \
+                     "\tfldds  -8(0,%%r19),%%fr21\n" : :   \
+                      "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19");
+
+#endif /* hppa1.1-hp-hpux* */
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-iX86]{Tail-jumping on a 386/486}
+%*                                                                     *
+%************************************************************************
+
+\begin{code}
+#if i386_TARGET_ARCH || i486_TARGET_ARCH
+
+/* do FUNBEGIN/END the easy way */
+#define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
+#define FUNEND      __asm__ volatile ("--- END ---");
+
+/* try "m68k-style" for now */
+extern void __DISCARD__(STG_NO_ARGS);
+
+#define JMP_(cont)                     \
+    do { void *target;                 \
+        __DISCARD__();                 \
+        target = (void *)(cont);       \
+         goto *target;                         \
+       } while(0)
+
+#define RESUME_(target)        JMP_(target)
+
+/* The safe part of the stack frame is near the top */
+
+extern P_ SP_stack[];
+extern I_ SP_stack_ptr;
+
+#define MINI_INTERPRETER_SETUP                                 \
+    StgChar space[RESERVED_C_STACK_BYTES+4*sizeof(long)];      \
+    __asm__ volatile ("leal %c0(%%esp),%%eax\n"                        \
+                     "\tmovl %%ebx,0(%%eax)\n"                 \
+                     "\tmovl %%esi,4(%%eax)\n"                 \
+                     "\tmovl %%edi,8(%%eax)\n"                 \
+                     "\tmovl %%ebp,12(%%eax)\n"                \
+                     "\tmovl %%esp,_MainRegTable+100"          \
+                       : : "n" (RESERVED_C_STACK_BYTES)        \
+                       : "%eax");                              \
+    __asm__ volatile ("movl %%esp,%0"                          \
+                       : "=r" (SP_stack[++SP_stack_ptr]));
+
+#define MINI_INTERPRETER_END                           \
+    __asm__ volatile (".align 4\n"                     \
+                     ".globl _miniInterpretEnd\n"      \
+                     "_miniInterpretEnd:\n"            \
+                     "\tnop"                           \
+                       : : : "memory" );               \
+    __asm__ volatile ("movl %0,%%esp\n"                        \
+                     "\tmovl %%esp,_MainRegTable+100"  \
+                       : : "m" (SP_stack[SP_stack_ptr--]) ); \
+    __asm__ volatile ("leal %c0(%%esp),%%eax\n"                \
+                     "\tmovl 0(%%eax),%%ebx\n"         \
+                     "\tmovl 4(%%eax),%%esi\n"         \
+                     "\tmovl 8(%%eax),%%edi\n"         \
+                     "\tmovl 12(%%eax),%%ebp"          \
+                       : : "n" (RESERVED_C_STACK_BYTES) : "%eax");
+
+#endif /* __i[34]86__ */
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-m68k]{Tail-jumping on m68k boxes}
+%*                                                                     *
+%************************************************************************
+
+For 680x0s, we use a quite-magic @JMP_@ macro, which includes
+beginning- and end-of-function markers.
+
+\begin{code}
+#if m68k_TARGET_ARCH
+
+#define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
+#define FUNEND      __asm__ volatile ("--- END ---");
+\end{code}
+
+The call to \tr{__DISCARD__} in @JMP_@ is fodder for GCC, to force it
+to pop arguments to previous function calls before the end of the
+current function.  This is unnecessary if we can manage to compile
+with \tr{-fomit-frame-pointer} as well as \tr{-fno-defer-pop}.  (WDP
+95/02: Either false or dodgy.) At the moment, the asm mangler removes
+these calls to \tr{__DISCARD__}.
+
+
+\begin{code}
+extern void __DISCARD__(STG_NO_ARGS);
+
+#define JMP_(cont)                     \
+    do { void *target;                 \
+        __DISCARD__();                 \
+        target = (void *)(cont);       \
+        goto *target;                  \
+    } while(0)
+
+#define RESUME_(target)        JMP_(target)
+
+#define MINI_INTERPRETER_SETUP                                 \
+    StgChar space[RESERVED_C_STACK_BYTES+11*sizeof(long)];     \
+    __asm__ volatile ("moveml a2-a6/d2-d7,sp@(%c0)\n"          \
+                     "\tlea sp@(%c0),a6" : : "J" (RESERVED_C_STACK_BYTES));
+
+#define MINI_INTERPRETER_END                           \
+    __asm__ volatile (".even\n"                                \
+                     ".globl _miniInterpretEnd\n"      \
+                     "_miniInterpretEnd:\n"            \
+                     "\taddqw #4,sp\n"                 \
+                     "\tmoveml sp@(%c0),a2-a6/d2-d7" : : "J" (RESERVED_C_STACK_BYTES));
+
+#endif /* __m68k__ */
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-mips]{Tail-jumping on a MIPS box}
+%*                                                                     *
+%************************************************************************
+
+\begin{code}
+#if mipseb_TARGET_ARCH || mipsel_TARGET_ARCH
+
+/* do FUNBEGIN/END the easy way */
+#define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
+#define FUNEND      __asm__ volatile ("--- END ---");
+
+/* try "m68k-style" for now */
+extern void __DISCARD__(STG_NO_ARGS);
+
+/* this is "alpha-style" */
+#define JMP_(cont)                             \
+    do { __DISCARD__();                                \
+        _procedure = (void *)(cont);           \
+         goto *_procedure;                     \
+       } while(0)
+
+#define RESUME_(target)        JMP_(target)
+
+/* _All_ callee-saved regs, whether we steal them or not, must be saved
+   (and restored).
+*/
+
+#define MINI_INTERPRETER_SETUP                 \
+    StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \
+    __asm__ volatile ("addu $2,$sp,%0\n"       \
+                      "\ts.d $f20,0($2)\n"     \
+                      "\ts.d $f22,8($2)\n"     \
+                      "\ts.d $f24,16($2)\n"    \
+                      "\ts.d $f26,24($2)\n"    \
+                      "\ts.d $f28,32($2)\n"    \
+                      "\ts.d $f30,40($2)\n"    \
+                     "\tsw  $16,48($2)\n"      \
+                      "\tsw  $17,52($2)\n"     \
+                      "\tsw  $18,56($2)\n"     \
+                      "\tsw  $19,60($2)\n"     \
+                      "\tsw  $20,64($2)\n"     \
+                      "\tsw  $21,68($2)\n"     \
+                      "\tsw  $22,72($2)\n"     \
+                      "\tsw  $23,76($2)\n"     \
+                      "\tsw  $fp,80($2)\n"     \
+                      : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
+
+    /* the 16 bytes is for the argument-register save-area above $sp */
+
+#define MINI_INTERPRETER_END                   \
+    __asm__ volatile (".align 2\n"             \
+                             ".globl miniInterpretEnd\n" \
+                      "miniInterpretEnd:\n"            \
+                     "\taddu $2,$sp,%0\n"      \
+                      "\tl.d $f20,0($2)\n"     \
+                      "\tl.d $f22,8($2)\n"     \
+                      "\tl.d $f24,16($2)\n"    \
+                      "\tl.d $f26,24($2)\n"    \
+                      "\tl.d $f28,32($2)\n"    \
+                      "\tl.d $f30,40($2)\n"    \
+                     "\tlw  $16,48($2)\n"      \
+                      "\tlw  $17,52($2)\n"     \
+                      "\tlw  $18,56($2)\n"     \
+                      "\tlw  $19,60($2)\n"     \
+                      "\tlw  $20,64($2)\n"     \
+                      "\tlw  $21,68($2)\n"     \
+                      "\tlw  $22,72($2)\n"     \
+                      "\tlw  $23,76($2)\n"     \
+                      "\tlw  $fp,80($2)\n"     \
+                      : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
+
+#endif /* mips */
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-RS6000]{Tail-jumping on an IBM RS6000 running AIX}
+%*                                                                     *
+%************************************************************************
+
+\begin{code}
+#if rs6000_ibm_aix_TARGET
+
+#define JMP_(cont)     ((F_) (cont))()
+/* partain: untested */
+
+#endif /* rs6000-ibm-aix* */
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-sparc]{Tail-jumping on Sun4s}
+%*                                                                     *
+%************************************************************************
+
+We want tailjumps to be calls, because `call xxx' is the only Sparc branch
+that allows an arbitrary label as a target.  (Gcc's ``goto *target'' construct
+ends up loading the label into a register and then jumping, at the cost of
+two extra instructions for the 32-bit load.)
+
+When entering the threaded world, we stash our return address in a known
+location so that \tr{%i7} is available as an extra callee-saves register.
+Of course, we have to restore this when coming out of the threaded world.
+
+I hate this god-forsaken architecture.  Since the top of the reserved
+stack space is used for globals and the bottom is reserved for outgoing arguments,
+we have to stick our return address somewhere in the middle.  Currently, I'm
+allowing 100 extra outgoing arguments beyond the first 6.  --JSM
+
+\begin{code}
+#if sparc_TARGET_ARCH
+
+#ifdef solaris2_TARGET_OS
+#define MINI_INTERPRET_END   "miniInterpretEnd"
+#else
+#define MINI_INTERPRET_END   "_miniInterpretEnd"
+#endif
+
+#define JMP_(cont)     ((F_) (cont))()
+       /* Oh so happily, the above turns into a "call" instruction,
+          which, on a SPARC, is nothing but a "jmpl" with the
+          return address in %o7 [which we don't care about].
+       */
+#define RESUME_(target)        JMP_(target)
+
+#define MINI_INTERPRETER_SETUP                 \
+    StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)];      \
+    register void *i7 __asm__("%i7");                  \
+    ((void **)(space))[100] = i7;
+
+#define MINI_INTERPRETER_END                   \
+    __asm__ volatile (".align 4\n"             \
+            ".global " MINI_INTERPRET_END "\n" \
+                   MINI_INTERPRET_END ":\n"            \
+           "\tld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100]));
+
+#endif /* __sparc__ */
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsubsection[COptJumps-OOPS]{Someone screwed up here, too...}
+%*                                                                     *
+%************************************************************************
+
+If one of the above machine-dependent sections wasn't triggered,
+@JMP_@ won't be defined and you'll get link errors (if not
+C-compiler errors).
+
+\begin{code}
+#if !defined(JMP_)
+*???????* No JMP_ macro???
+#endif
+
+#endif /* __STG_TAILJUMPS__ */
+\end{code}
+
+If @FUNBEGIN@ and @FUNEND@ weren't defined, give them the default
+(nothing).  Also, define @FB_@ and @FE_@ (short forms).
+\begin{code}
+#if ! defined(FUNBEGIN)
+#define FUNBEGIN /* nothing */
+#endif
+#if ! defined(FUNEND)
+#define FUNEND  /* nothing */
+#endif
+
+#define FB_    FUNBEGIN        /* short forms */
+#define FE_    FUNEND
+
+#endif /* ! that's all of... COPTJUMPS_H */
+\end{code}