\section[COptJumps]{Macros for tail-jumping} % this file is part of the C-as-assembler document \begin{code} #ifndef COPTJUMPS_H #define COPTJUMPS_H \end{code} %************************************************************************ %* * \subsection[COptJumps-portable]{Tail-(non-)jumping in ``portable~C''} %* * %************************************************************************ \begin{code} #if ! (defined(__STG_TAILJUMPS__) && defined(__GNUC__)) #define JMP_(target) return((F_) (target)) #define RESUME_(target) JMP_(target) \end{code} Don't need to do anything magical for the mini-interpreter, because we're really going to use the plain old C one (and the debugging variant, too, for that matter). %************************************************************************ %* * \subsection[COptJumps-optimised]{Tail-jumping in ``optimised~C''} %* * %************************************************************************ \begin{code} #else /* __STG_TAILJUMPS__ && __GNUC__ */ \end{code} GCC will have assumed that pushing/popping of C-stack frames is going on when it generated its code, and used stack space accordingly. However, we actually {\em post-process away} all such stack-framery (see \tr{ghc/driver/ghc-asm-*.lprl}). Thing will be OK however, if we initially make sure there are @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local variables. \begin{code} #define RESERVED_C_STACK_BYTES (512 * sizeof(I_)) /* MUST BE OF GENEROUS ALIGNMENT */ \end{code} The platform-specific details are given in alphabetical order. %************************************************************************ %* * \subsubsection[COptJumps-alpha]{Tail-jumping on Alphas} %* * %************************************************************************ We have to set the procedure value register (\$27) before branching, so that the target function can load the gp (\$29) as appropriate. It seems that \tr{_procedure} can't be declared within the body of the \tr{JMP_} macro...at least, not if we want it to be \$27, which we do! \begin{code} #if alpha_dec_osf1_TARGET /* ToDo: less specific? */ /* Jumping to a new block of code, we need to set up $27 to point at the target, so that the callee can establish its gp (as an offset from its own starting address). For some reason, gcc refuses to give us $27 for _procedure if it's declared as a local variable, so the workaround is to make it a global. Note: The local variable works in gcc 2.6.2, but fails in 2.5.8. */ /* MOVED: to COptRegs.lh -- very unsatisfactorily. Otherwise, we can get a "global register variable follows a function definition" error. Once we can take gcc 2.6.x as std, then we can use the local variant, and the problem goes away. (WDP 95/02) register void *_procedure __asm__("$27"); */ #define JMP_(cont) \ do { _procedure = (void *)(cont); \ goto *_procedure; \ } while(0) /* When we resume at the point where a call was originally made, we need to restore $26, so that gp can be reloaded appropriately. However, sometimes we ``resume'' by entering a new function (typically EnterNodeCode), so we need to set up $27 as well. */ #define RESUME_(cont) \ do { _procedure = (void *)(cont); \ __asm__ volatile("mov $27,$26"); \ goto *_procedure; \ } while(0); #define MINI_INTERPRETER_SETUP \ __asm__ volatile ("stq $9,-8($30)\n" \ "stq $10,-16($30)\n" \ "stq $11,-24($30)\n" \ "stq $12,-32($30)\n" \ "stq $13,-40($30)\n" \ "stq $14,-48($30)\n" \ "stq $15,-56($30)\n" \ "stt $f2,-64($30)\n" \ "stt $f3,-72($30)\n" \ "stt $f4,-80($30)\n" \ "stt $f5,-88($30)\n" \ "stt $f6,-96($30)\n" \ "stt $f7,-104($30)\n" \ "stt $f8,-112($30)\n" \ "stt $f9,-120($30)\n" \ "lda $30,-%0($30)" : : \ "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long))); #define MINI_INTERPRETER_END \ __asm__ volatile (".align 3\n" \ ".globl miniInterpretEnd\n" \ "miniInterpretEnd:\n" \ "lda $30,%0($30)\n" \ "ldq $9,-8($30)\n" \ "ldq $10,-16($30)\n" \ "ldq $11,-24($30)\n" \ "ldq $12,-32($30)\n" \ "ldq $13,-40($30)\n" \ "ldq $14,-48($30)\n" \ "ldq $15,-56($30)\n" \ "ldt $f2,-64($30)\n" \ "ldt $f3,-72($30)\n" \ "ldt $f4,-80($30)\n" \ "ldt $f5,-88($30)\n" \ "ldt $f6,-96($30)\n" \ "ldt $f7,-104($30)\n" \ "ldt $f8,-112($30)\n" \ "ldt $f9,-120($30)" : : \ "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long))); #endif /* __alpha */ \end{code} %************************************************************************ %* * \subsubsection[COptJumps-Hpux]{Tail-jumping on a HP-PA machine running HP-UX} %* * %************************************************************************ \begin{code} #if hppa1_1_hp_hpux_TARGET /* do FUNBEGIN/END the easy way */ #define FUNBEGIN __asm__ volatile ("--- BEGIN ---"); #define FUNEND __asm__ volatile ("--- END ---"); /* The stack grows up! Local variables are allocated just above the frame pointer, and extra arguments are stashed just below the stack pointer, so the safe space is again in the middle (cf. sparc). */ #define JMP_(cont) \ do { void *_procedure = (void *)(cont); \ goto *_procedure; \ } while(0) #define RESUME_(cont) JMP_(cont) #define MINI_INTERPRETER_SETUP \ StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)]; \ __asm__ volatile ("ldo %0(%%r3),%%r19\n" \ "\tstw %%r3, 0(0,%%r19)\n" \ "\tstw %%r4, 4(0,%%r19)\n" \ "\tstw %%r5, 8(0,%%r19)\n" \ "\tstw %%r6,12(0,%%r19)\n" \ "\tstw %%r7,16(0,%%r19)\n" \ "\tstw %%r8,20(0,%%r19)\n" \ "\tstw %%r9,24(0,%%r19)\n" \ "\tstw %%r10,28(0,%%r19)\n" \ "\tstw %%r11,32(0,%%r19)\n" \ "\tstw %%r12,36(0,%%r19)\n" \ "\tstw %%r13,40(0,%%r19)\n" \ "\tstw %%r14,44(0,%%r19)\n" \ "\tstw %%r15,48(0,%%r19)\n" \ "\tstw %%r16,52(0,%%r19)\n" \ "\tstw %%r17,56(0,%%r19)\n" \ "\tstw %%r18,60(0,%%r19)\n" \ "\tldo 80(%%r19),%%r19\n" \ "\tfstds %%fr12,-16(0,%%r19)\n" \ "\tfstds %%fr13, -8(0,%%r19)\n" \ "\tfstds %%fr14, 0(0,%%r19)\n" \ "\tfstds %%fr15, 8(0,%%r19)\n" \ "\tldo 32(%%r19),%%r19\n" \ "\tfstds %%fr16,-16(0,%%r19)\n" \ "\tfstds %%fr17, -8(0,%%r19)\n" \ "\tfstds %%fr18, 0(0,%%r19)\n" \ "\tfstds %%fr19, 8(0,%%r19)\n" \ "\tldo 32(%%r19),%%r19\n" \ "\tfstds %%fr20,-16(0,%%r19)\n" \ "\tfstds %%fr21, -8(0,%%r19)\n" : : \ "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19" ); #define MINI_INTERPRETER_END \ __asm__ volatile (".align 4\n" \ "\t.EXPORT miniInterpretEnd,CODE\n" \ "\t.EXPORT miniInterpretEnd,ENTRY,PRIV_LEV=3\n" \ "miniInterpretEnd\n" \ "\tldo %0(%%r3),%%r19\n" \ "\tldw 0(0,%%r19),%%r3\n" \ "\tldw 4(0,%%r19),%%r4\n" \ "\tldw 8(0,%%r19),%%r5\n" \ "\tldw 12(0,%%r19),%%r6\n" \ "\tldw 16(0,%%r19),%%r7\n" \ "\tldw 20(0,%%r19),%%r8\n" \ "\tldw 24(0,%%r19),%%r9\n" \ "\tldw 28(0,%%r19),%%r10\n" \ "\tldw 32(0,%%r19),%%r11\n" \ "\tldw 36(0,%%r19),%%r12\n" \ "\tldw 40(0,%%r19),%%r13\n" \ "\tldw 44(0,%%r19),%%r14\n" \ "\tldw 48(0,%%r19),%%r15\n" \ "\tldw 52(0,%%r19),%%r16\n" \ "\tldw 56(0,%%r19),%%r17\n" \ "\tldw 60(0,%%r19),%%r18\n" \ "\tldo 80(%%r19),%%r19\n" \ "\tfldds -16(0,%%r19),%%fr12\n" \ "\tfldds -8(0,%%r19),%%fr13\n" \ "\tfldds 0(0,%%r19),%%fr14\n" \ "\tfldds 8(0,%%r19),%%fr15\n" \ "\tldo 32(%%r19),%%r19\n" \ "\tfldds -16(0,%%r19),%%fr16\n" \ "\tfldds -8(0,%%r19),%%fr17\n" \ "\tfldds 0(0,%%r19),%%fr18\n" \ "\tfldds 8(0,%%r19),%%fr19\n" \ "\tldo 32(%%r19),%%r19\n" \ "\tfldds -16(0,%%r19),%%fr20\n" \ "\tfldds -8(0,%%r19),%%fr21\n" : : \ "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19"); #endif /* hppa1.1-hp-hpux* */ \end{code} %************************************************************************ %* * \subsubsection[COptJumps-iX86]{Tail-jumping on a 386/486} %* * %************************************************************************ \begin{code} #if i386_TARGET_ARCH /* *not* a good way to do this (WDP 96/05) */ #if defined(solaris2_TARGET_OS) || defined(linux_TARGET_OS) #define MINI_INTERPRET_END "miniInterpretEnd" #else #define MINI_INTERPRET_END "_miniInterpretEnd" #endif /* do FUNBEGIN/END the easy way */ #define FUNBEGIN __asm__ volatile ("--- BEGIN ---"); #define FUNEND __asm__ volatile ("--- END ---"); /* try "m68k-style" for now */ extern void __DISCARD__(STG_NO_ARGS); #define JMP_(cont) \ do { void *target; \ __DISCARD__(); \ target = (void *)(cont); \ goto *target; \ } while(0) #define RESUME_(target) JMP_(target) /* The safe part of the stack frame is near the top */ #define MINI_INTERPRETER_SETUP \ StgChar space[RESERVED_C_STACK_BYTES+4*sizeof(long)]; \ __asm__ volatile ("leal %c0(%%esp),%%eax\n" \ "\tmovl %%ebx,0(%%eax)\n" \ "\tmovl %%esi,4(%%eax)\n" \ "\tmovl %%edi,8(%%eax)\n" \ "\tmovl %%ebp,12(%%eax)\n" \ : : "n" (RESERVED_C_STACK_BYTES) \ : "%eax"); /* the initial "addl $f,%esp" in ..._END compensates for the "call" (rather than a jump) in miniInterpret. */ #define MINI_INTERPRETER_END \ __asm__ volatile (".align 4\n" \ ".globl " MINI_INTERPRET_END "\n" \ MINI_INTERPRET_END ":\n" \ "\tnop" \ : : : "memory" ); \ __asm__ volatile ("addl $4,%%esp\n" \ "\tleal %c0(%%esp),%%eax\n" \ "\tmovl 0(%%eax),%%ebx\n" \ "\tmovl 4(%%eax),%%esi\n" \ "\tmovl 8(%%eax),%%edi\n" \ "\tmovl 12(%%eax),%%ebp" \ : : "n" (RESERVED_C_STACK_BYTES) : "%eax"); #endif /* __i[34]86__ */ \end{code} %************************************************************************ %* * \subsubsection[COptJumps-m68k]{Tail-jumping on m68k boxes} %* * %************************************************************************ For 680x0s, we use a quite-magic @JMP_@ macro, which includes beginning- and end-of-function markers. \begin{code} #if m68k_TARGET_ARCH #define FUNBEGIN __asm__ volatile ("--- BEGIN ---"); #define FUNEND __asm__ volatile ("--- END ---"); \end{code} The call to \tr{__DISCARD__} in @JMP_@ is fodder for GCC, to force it to pop arguments to previous function calls before the end of the current function. This is unnecessary if we can manage to compile with \tr{-fomit-frame-pointer} as well as \tr{-fno-defer-pop}. (WDP 95/02: Either false or dodgy.) At the moment, the asm mangler removes these calls to \tr{__DISCARD__}. \begin{code} extern void __DISCARD__(STG_NO_ARGS); #define JMP_(cont) \ do { void *target; \ __DISCARD__(); \ target = (void *)(cont); \ goto *target; \ } while(0) #define RESUME_(target) JMP_(target) #define MINI_INTERPRETER_SETUP \ StgChar space[RESERVED_C_STACK_BYTES+11*sizeof(long)]; \ __asm__ volatile ("moveml a2-a6/d2-d7,sp@(%c0)\n" \ "\tlea sp@(%c0),a6" : : "J" (RESERVED_C_STACK_BYTES)); #define MINI_INTERPRETER_END \ __asm__ volatile (".even\n" \ ".globl _miniInterpretEnd\n" \ "_miniInterpretEnd:\n" \ "\taddqw #4,sp\n" \ "\tmoveml sp@(%c0),a2-a6/d2-d7" : : "J" (RESERVED_C_STACK_BYTES)); #endif /* __m68k__ */ \end{code} %************************************************************************ %* * \subsubsection[COptJumps-mips]{Tail-jumping on a MIPS box} %* * %************************************************************************ \begin{code} #if mipseb_TARGET_ARCH || mipsel_TARGET_ARCH /* do FUNBEGIN/END the easy way */ #define FUNBEGIN __asm__ volatile ("--- BEGIN ---"); #define FUNEND __asm__ volatile ("--- END ---"); /* try "m68k-style" for now */ extern void __DISCARD__(STG_NO_ARGS); /* this is "alpha-style" */ #define JMP_(cont) \ do { __DISCARD__(); \ _procedure = (void *)(cont); \ goto *_procedure; \ } while(0) #define RESUME_(target) JMP_(target) /* _All_ callee-saved regs, whether we steal them or not, must be saved (and restored). */ #define MINI_INTERPRETER_SETUP \ StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \ __asm__ volatile ("addu $2,$sp,%0\n" \ "\ts.d $f20,0($2)\n" \ "\ts.d $f22,8($2)\n" \ "\ts.d $f24,16($2)\n" \ "\ts.d $f26,24($2)\n" \ "\ts.d $f28,32($2)\n" \ "\ts.d $f30,40($2)\n" \ "\tsw $16,48($2)\n" \ "\tsw $17,52($2)\n" \ "\tsw $18,56($2)\n" \ "\tsw $19,60($2)\n" \ "\tsw $20,64($2)\n" \ "\tsw $21,68($2)\n" \ "\tsw $22,72($2)\n" \ "\tsw $23,76($2)\n" \ "\tsw $fp,80($2)\n" \ : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" ); /* the 16 bytes is for the argument-register save-area above $sp */ #define MINI_INTERPRETER_END \ __asm__ volatile (".align 2\n" \ ".globl miniInterpretEnd\n" \ "miniInterpretEnd:\n" \ "\taddu $2,$sp,%0\n" \ "\tl.d $f20,0($2)\n" \ "\tl.d $f22,8($2)\n" \ "\tl.d $f24,16($2)\n" \ "\tl.d $f26,24($2)\n" \ "\tl.d $f28,32($2)\n" \ "\tl.d $f30,40($2)\n" \ "\tlw $16,48($2)\n" \ "\tlw $17,52($2)\n" \ "\tlw $18,56($2)\n" \ "\tlw $19,60($2)\n" \ "\tlw $20,64($2)\n" \ "\tlw $21,68($2)\n" \ "\tlw $22,72($2)\n" \ "\tlw $23,76($2)\n" \ "\tlw $fp,80($2)\n" \ : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" ); #endif /* mips */ \end{code} %************************************************************************ %* * \subsubsection[COptJumps-powerpc]{Tail-jumping on an IBM PowerPC running AIX} %* * %************************************************************************ \begin{code} #if powerpc_TARGET_ARCH /* do FUNBEGIN/END the easy way */ #define FUNBEGIN __asm__ volatile ("--- BEGIN ---"); #define FUNEND __asm__ volatile ("--- END ---"); /* try "m68k-style" for now */ extern void __DISCARD__(STG_NO_ARGS); /* this is "alpha-style" */ #define JMP_(cont) \ do { void *_procedure = (void *)(cont); \ goto *_procedure; \ } while(0) #define RESUME_(target) JMP_(target) /* _All_ callee-saved regs, whether we steal them or not, must be saved (and restored). */ #define MINI_INTERPRETER_SETUP \ StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \ __asm__ volatile ("addu $2,$sp,%0\n" \ "\ts.d $f20,0($2)\n" \ "\ts.d $f22,8($2)\n" \ "\ts.d $f24,16($2)\n" \ "\ts.d $f26,24($2)\n" \ "\ts.d $f28,32($2)\n" \ "\ts.d $f30,40($2)\n" \ "\tsw $16,48($2)\n" \ "\tsw $17,52($2)\n" \ "\tsw $18,56($2)\n" \ "\tsw $19,60($2)\n" \ "\tsw $20,64($2)\n" \ "\tsw $21,68($2)\n" \ "\tsw $22,72($2)\n" \ "\tsw $23,76($2)\n" \ "\tsw $fp,80($2)\n" \ : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" ); /* the 16 bytes is for the argument-register save-area above $sp */ #define MINI_INTERPRETER_END \ __asm__ volatile (".align 2\n" \ ".globl miniInterpretEnd\n" \ "miniInterpretEnd:\n" \ "\taddu $2,$sp,%0\n" \ "\tl.d $f20,0($2)\n" \ "\tl.d $f22,8($2)\n" \ "\tl.d $f24,16($2)\n" \ "\tl.d $f26,24($2)\n" \ "\tl.d $f28,32($2)\n" \ "\tl.d $f30,40($2)\n" \ "\tlw $16,48($2)\n" \ "\tlw $17,52($2)\n" \ "\tlw $18,56($2)\n" \ "\tlw $19,60($2)\n" \ "\tlw $20,64($2)\n" \ "\tlw $21,68($2)\n" \ "\tlw $22,72($2)\n" \ "\tlw $23,76($2)\n" \ "\tlw $fp,80($2)\n" \ : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" ); #endif /* powerpc */ \end{code} %************************************************************************ %* * \subsubsection[COptJumps-sparc]{Tail-jumping on Sun4s} %* * %************************************************************************ We want tailjumps to be calls, because `call xxx' is the only Sparc branch that allows an arbitrary label as a target. (Gcc's ``goto *target'' construct ends up loading the label into a register and then jumping, at the cost of two extra instructions for the 32-bit load.) When entering the threaded world, we stash our return address in a known location so that \tr{%i7} is available as an extra callee-saves register. Of course, we have to restore this when coming out of the threaded world. I hate this god-forsaken architecture. Since the top of the reserved stack space is used for globals and the bottom is reserved for outgoing arguments, we have to stick our return address somewhere in the middle. Currently, I'm allowing 100 extra outgoing arguments beyond the first 6. --JSM \begin{code} #if sparc_TARGET_ARCH #ifdef solaris2_TARGET_OS #define MINI_INTERPRET_END "miniInterpretEnd" #else #define MINI_INTERPRET_END "_miniInterpretEnd" #endif #define JMP_(cont) ((F_) (cont))() /* Oh so happily, the above turns into a "call" instruction, which, on a SPARC, is nothing but a "jmpl" with the return address in %o7 [which we don't care about]. */ #define RESUME_(target) JMP_(target) #define MINI_INTERPRETER_SETUP \ StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)]; \ register void *i7 __asm__("%i7"); \ ((void **)(space))[100] = i7; #define MINI_INTERPRETER_END \ __asm__ volatile (".align 4\n" \ ".global " MINI_INTERPRET_END "\n" \ MINI_INTERPRET_END ":\n" \ "\tld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100])); #endif /* __sparc__ */ \end{code} %************************************************************************ %* * \subsubsection[COptJumps-OOPS]{Someone screwed up here, too...} %* * %************************************************************************ If one of the above machine-dependent sections wasn't triggered, @JMP_@ won't be defined and you'll get link errors (if not C-compiler errors). \begin{code} #if !defined(JMP_) *???????* No JMP_ macro??? #endif #endif /* __STG_TAILJUMPS__ */ \end{code} If @FUNBEGIN@ and @FUNEND@ weren't defined, give them the default (nothing). Also, define @FB_@ and @FE_@ (short forms). \begin{code} #if ! defined(FUNBEGIN) #define FUNBEGIN /* nothing */ #endif #if ! defined(FUNEND) #define FUNEND /* nothing */ #endif #define FB_ FUNBEGIN /* short forms */ #define FE_ FUNEND #endif /* ! that's all of... COPTJUMPS_H */ \end{code}