1 \section[COptJumps]{Macros for tail-jumping}
3 % this file is part of the C-as-assembler document
10 %************************************************************************
12 \subsection[COptJumps-portable]{Tail-(non-)jumping in ``portable~C''}
14 %************************************************************************
17 #if ! (defined(__STG_TAILJUMPS__) && defined(__GNUC__))
19 #define JMP_(target) return((F_) (target))
20 #define RESUME_(target) JMP_(target)
23 Don't need to do anything magical for the mini-interpreter, because
24 we're really going to use the plain old C one (and the debugging
25 variant, too, for that matter).
27 %************************************************************************
29 \subsection[COptJumps-optimised]{Tail-jumping in ``optimised~C''}
31 %************************************************************************
34 #else /* __STG_TAILJUMPS__ && __GNUC__ */
37 GCC will have assumed that pushing/popping of C-stack frames is going
38 on when it generated its code, and used stack space accordingly.
39 However, we actually {\em post-process away} all such stack-framery
40 (see \tr{ghc/driver/ghc-asm-*.lprl}).
41 Thing will be OK however, if we initially make sure there are
42 @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
46 #define RESERVED_C_STACK_BYTES (512 * sizeof(I_)) /* MUST BE OF GENEROUS ALIGNMENT */
49 The platform-specific details are given in alphabetical order.
51 %************************************************************************
53 \subsubsection[COptJumps-alpha]{Tail-jumping on Alphas}
55 %************************************************************************
57 We have to set the procedure value register (\$27) before branching, so
58 that the target function can load the gp (\$29) as appropriate.
60 It seems that \tr{_procedure} can't be declared within the body of the
61 \tr{JMP_} macro...at least, not if we want it to be \$27, which we do!
64 #if alpha_dec_osf1_TARGET
65 /* ToDo: less specific? */
68 Jumping to a new block of code, we need to set up $27 to point
69 at the target, so that the callee can establish its gp (as an
70 offset from its own starting address). For some reason, gcc
71 refuses to give us $27 for _procedure if it's declared as a
72 local variable, so the workaround is to make it a global.
74 Note: The local variable works in gcc 2.6.2, but fails in 2.5.8.
77 /* MOVED: to COptRegs.lh -- very unsatisfactorily.
78 Otherwise, we can get a "global register variable follows a
79 function definition" error.
81 Once we can take gcc 2.6.x as std, then we can use
82 the local variant, and the problem goes away. (WDP 95/02)
84 register void *_procedure __asm__("$27");
88 do { _procedure = (void *)(cont); \
93 When we resume at the point where a call was originally made,
94 we need to restore $26, so that gp can be reloaded appropriately.
95 However, sometimes we ``resume'' by entering a new function
96 (typically EnterNodeCode), so we need to set up $27 as well.
99 #define RESUME_(cont) \
100 do { _procedure = (void *)(cont); \
101 __asm__ volatile("mov $27,$26"); \
105 #define MINI_INTERPRETER_SETUP \
106 __asm__ volatile ("stq $9,-8($30)\n" \
107 "stq $10,-16($30)\n" \
108 "stq $11,-24($30)\n" \
109 "stq $12,-32($30)\n" \
110 "stq $13,-40($30)\n" \
111 "stq $14,-48($30)\n" \
112 "stq $15,-56($30)\n" \
113 "stt $f2,-64($30)\n" \
114 "stt $f3,-72($30)\n" \
115 "stt $f4,-80($30)\n" \
116 "stt $f5,-88($30)\n" \
117 "stt $f6,-96($30)\n" \
118 "stt $f7,-104($30)\n" \
119 "stt $f8,-112($30)\n" \
120 "stt $f9,-120($30)\n" \
121 "lda $30,-%0($30)" : : \
122 "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
124 #define MINI_INTERPRETER_END \
125 __asm__ volatile (".align 3\n" \
126 ".globl miniInterpretEnd\n" \
127 "miniInterpretEnd:\n" \
128 "lda $30,%0($30)\n" \
130 "ldq $10,-16($30)\n" \
131 "ldq $11,-24($30)\n" \
132 "ldq $12,-32($30)\n" \
133 "ldq $13,-40($30)\n" \
134 "ldq $14,-48($30)\n" \
135 "ldq $15,-56($30)\n" \
136 "ldt $f2,-64($30)\n" \
137 "ldt $f3,-72($30)\n" \
138 "ldt $f4,-80($30)\n" \
139 "ldt $f5,-88($30)\n" \
140 "ldt $f6,-96($30)\n" \
141 "ldt $f7,-104($30)\n" \
142 "ldt $f8,-112($30)\n" \
143 "ldt $f9,-120($30)" : : \
144 "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
149 %************************************************************************
151 \subsubsection[COptJumps-Hpux]{Tail-jumping on a HP-PA machine running HP-UX}
153 %************************************************************************
156 #if hppa1_1_hp_hpux_TARGET
158 /* do FUNBEGIN/END the easy way */
159 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
160 #define FUNEND __asm__ volatile ("--- END ---");
162 /* The stack grows up! Local variables are allocated just above the
163 frame pointer, and extra arguments are stashed just below the stack
164 pointer, so the safe space is again in the middle (cf. sparc).
168 do { void *_procedure = (void *)(cont); \
172 #define RESUME_(cont) JMP_(cont)
174 #define MINI_INTERPRETER_SETUP \
175 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)]; \
176 __asm__ volatile ("ldo %0(%%r3),%%r19\n" \
177 "\tstw %%r3, 0(0,%%r19)\n" \
178 "\tstw %%r4, 4(0,%%r19)\n" \
179 "\tstw %%r5, 8(0,%%r19)\n" \
180 "\tstw %%r6,12(0,%%r19)\n" \
181 "\tstw %%r7,16(0,%%r19)\n" \
182 "\tstw %%r8,20(0,%%r19)\n" \
183 "\tstw %%r9,24(0,%%r19)\n" \
184 "\tstw %%r10,28(0,%%r19)\n" \
185 "\tstw %%r11,32(0,%%r19)\n" \
186 "\tstw %%r12,36(0,%%r19)\n" \
187 "\tstw %%r13,40(0,%%r19)\n" \
188 "\tstw %%r14,44(0,%%r19)\n" \
189 "\tstw %%r15,48(0,%%r19)\n" \
190 "\tstw %%r16,52(0,%%r19)\n" \
191 "\tstw %%r17,56(0,%%r19)\n" \
192 "\tstw %%r18,60(0,%%r19)\n" \
193 "\tldo 80(%%r19),%%r19\n" \
194 "\tfstds %%fr12,-16(0,%%r19)\n" \
195 "\tfstds %%fr13, -8(0,%%r19)\n" \
196 "\tfstds %%fr14, 0(0,%%r19)\n" \
197 "\tfstds %%fr15, 8(0,%%r19)\n" \
198 "\tldo 32(%%r19),%%r19\n" \
199 "\tfstds %%fr16,-16(0,%%r19)\n" \
200 "\tfstds %%fr17, -8(0,%%r19)\n" \
201 "\tfstds %%fr18, 0(0,%%r19)\n" \
202 "\tfstds %%fr19, 8(0,%%r19)\n" \
203 "\tldo 32(%%r19),%%r19\n" \
204 "\tfstds %%fr20,-16(0,%%r19)\n" \
205 "\tfstds %%fr21, -8(0,%%r19)\n" : : \
206 "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19" );
208 #define MINI_INTERPRETER_END \
209 __asm__ volatile (".align 4\n" \
210 "\t.EXPORT miniInterpretEnd,CODE\n" \
211 "\t.EXPORT miniInterpretEnd,ENTRY,PRIV_LEV=3\n" \
212 "miniInterpretEnd\n" \
213 "\tldo %0(%%r3),%%r19\n" \
214 "\tldw 0(0,%%r19),%%r3\n" \
215 "\tldw 4(0,%%r19),%%r4\n" \
216 "\tldw 8(0,%%r19),%%r5\n" \
217 "\tldw 12(0,%%r19),%%r6\n" \
218 "\tldw 16(0,%%r19),%%r7\n" \
219 "\tldw 20(0,%%r19),%%r8\n" \
220 "\tldw 24(0,%%r19),%%r9\n" \
221 "\tldw 28(0,%%r19),%%r10\n" \
222 "\tldw 32(0,%%r19),%%r11\n" \
223 "\tldw 36(0,%%r19),%%r12\n" \
224 "\tldw 40(0,%%r19),%%r13\n" \
225 "\tldw 44(0,%%r19),%%r14\n" \
226 "\tldw 48(0,%%r19),%%r15\n" \
227 "\tldw 52(0,%%r19),%%r16\n" \
228 "\tldw 56(0,%%r19),%%r17\n" \
229 "\tldw 60(0,%%r19),%%r18\n" \
230 "\tldo 80(%%r19),%%r19\n" \
231 "\tfldds -16(0,%%r19),%%fr12\n" \
232 "\tfldds -8(0,%%r19),%%fr13\n" \
233 "\tfldds 0(0,%%r19),%%fr14\n" \
234 "\tfldds 8(0,%%r19),%%fr15\n" \
235 "\tldo 32(%%r19),%%r19\n" \
236 "\tfldds -16(0,%%r19),%%fr16\n" \
237 "\tfldds -8(0,%%r19),%%fr17\n" \
238 "\tfldds 0(0,%%r19),%%fr18\n" \
239 "\tfldds 8(0,%%r19),%%fr19\n" \
240 "\tldo 32(%%r19),%%r19\n" \
241 "\tfldds -16(0,%%r19),%%fr20\n" \
242 "\tfldds -8(0,%%r19),%%fr21\n" : : \
243 "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19");
245 #endif /* hppa1.1-hp-hpux* */
248 %************************************************************************
250 \subsubsection[COptJumps-iX86]{Tail-jumping on a 386/486}
252 %************************************************************************
255 #if i386_TARGET_ARCH || i486_TARGET_ARCH
257 /* do FUNBEGIN/END the easy way */
258 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
259 #define FUNEND __asm__ volatile ("--- END ---");
261 /* try "m68k-style" for now */
262 extern void __DISCARD__(STG_NO_ARGS);
267 target = (void *)(cont); \
271 #define RESUME_(target) JMP_(target)
273 /* The safe part of the stack frame is near the top */
275 extern P_ SP_stack[];
276 extern I_ SP_stack_ptr;
278 #define MINI_INTERPRETER_SETUP \
279 StgChar space[RESERVED_C_STACK_BYTES+4*sizeof(long)]; \
280 __asm__ volatile ("leal %c0(%%esp),%%eax\n" \
281 "\tmovl %%ebx,0(%%eax)\n" \
282 "\tmovl %%esi,4(%%eax)\n" \
283 "\tmovl %%edi,8(%%eax)\n" \
284 "\tmovl %%ebp,12(%%eax)\n" \
285 "\tmovl %%esp,_MainRegTable+100" \
286 : : "n" (RESERVED_C_STACK_BYTES) \
288 __asm__ volatile ("movl %%esp,%0" \
289 : "=r" (SP_stack[++SP_stack_ptr]));
291 #define MINI_INTERPRETER_END \
292 __asm__ volatile (".align 4\n" \
293 ".globl _miniInterpretEnd\n" \
294 "_miniInterpretEnd:\n" \
297 __asm__ volatile ("movl %0,%%esp\n" \
298 "\tmovl %%esp,_MainRegTable+100" \
299 : : "m" (SP_stack[SP_stack_ptr--]) ); \
300 __asm__ volatile ("leal %c0(%%esp),%%eax\n" \
301 "\tmovl 0(%%eax),%%ebx\n" \
302 "\tmovl 4(%%eax),%%esi\n" \
303 "\tmovl 8(%%eax),%%edi\n" \
304 "\tmovl 12(%%eax),%%ebp" \
305 : : "n" (RESERVED_C_STACK_BYTES) : "%eax");
307 #endif /* __i[34]86__ */
310 %************************************************************************
312 \subsubsection[COptJumps-m68k]{Tail-jumping on m68k boxes}
314 %************************************************************************
316 For 680x0s, we use a quite-magic @JMP_@ macro, which includes
317 beginning- and end-of-function markers.
322 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
323 #define FUNEND __asm__ volatile ("--- END ---");
326 The call to \tr{__DISCARD__} in @JMP_@ is fodder for GCC, to force it
327 to pop arguments to previous function calls before the end of the
328 current function. This is unnecessary if we can manage to compile
329 with \tr{-fomit-frame-pointer} as well as \tr{-fno-defer-pop}. (WDP
330 95/02: Either false or dodgy.) At the moment, the asm mangler removes
331 these calls to \tr{__DISCARD__}.
335 extern void __DISCARD__(STG_NO_ARGS);
340 target = (void *)(cont); \
344 #define RESUME_(target) JMP_(target)
346 #define MINI_INTERPRETER_SETUP \
347 StgChar space[RESERVED_C_STACK_BYTES+11*sizeof(long)]; \
348 __asm__ volatile ("moveml a2-a6/d2-d7,sp@(%c0)\n" \
349 "\tlea sp@(%c0),a6" : : "J" (RESERVED_C_STACK_BYTES));
351 #define MINI_INTERPRETER_END \
352 __asm__ volatile (".even\n" \
353 ".globl _miniInterpretEnd\n" \
354 "_miniInterpretEnd:\n" \
356 "\tmoveml sp@(%c0),a2-a6/d2-d7" : : "J" (RESERVED_C_STACK_BYTES));
358 #endif /* __m68k__ */
361 %************************************************************************
363 \subsubsection[COptJumps-mips]{Tail-jumping on a MIPS box}
365 %************************************************************************
368 #if mipseb_TARGET_ARCH || mipsel_TARGET_ARCH
370 /* do FUNBEGIN/END the easy way */
371 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
372 #define FUNEND __asm__ volatile ("--- END ---");
374 /* try "m68k-style" for now */
375 extern void __DISCARD__(STG_NO_ARGS);
377 /* this is "alpha-style" */
379 do { __DISCARD__(); \
380 _procedure = (void *)(cont); \
384 #define RESUME_(target) JMP_(target)
386 /* _All_ callee-saved regs, whether we steal them or not, must be saved
390 #define MINI_INTERPRETER_SETUP \
391 StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \
392 __asm__ volatile ("addu $2,$sp,%0\n" \
393 "\ts.d $f20,0($2)\n" \
394 "\ts.d $f22,8($2)\n" \
395 "\ts.d $f24,16($2)\n" \
396 "\ts.d $f26,24($2)\n" \
397 "\ts.d $f28,32($2)\n" \
398 "\ts.d $f30,40($2)\n" \
399 "\tsw $16,48($2)\n" \
400 "\tsw $17,52($2)\n" \
401 "\tsw $18,56($2)\n" \
402 "\tsw $19,60($2)\n" \
403 "\tsw $20,64($2)\n" \
404 "\tsw $21,68($2)\n" \
405 "\tsw $22,72($2)\n" \
406 "\tsw $23,76($2)\n" \
407 "\tsw $fp,80($2)\n" \
408 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
410 /* the 16 bytes is for the argument-register save-area above $sp */
412 #define MINI_INTERPRETER_END \
413 __asm__ volatile (".align 2\n" \
414 ".globl miniInterpretEnd\n" \
415 "miniInterpretEnd:\n" \
416 "\taddu $2,$sp,%0\n" \
417 "\tl.d $f20,0($2)\n" \
418 "\tl.d $f22,8($2)\n" \
419 "\tl.d $f24,16($2)\n" \
420 "\tl.d $f26,24($2)\n" \
421 "\tl.d $f28,32($2)\n" \
422 "\tl.d $f30,40($2)\n" \
423 "\tlw $16,48($2)\n" \
424 "\tlw $17,52($2)\n" \
425 "\tlw $18,56($2)\n" \
426 "\tlw $19,60($2)\n" \
427 "\tlw $20,64($2)\n" \
428 "\tlw $21,68($2)\n" \
429 "\tlw $22,72($2)\n" \
430 "\tlw $23,76($2)\n" \
431 "\tlw $fp,80($2)\n" \
432 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
437 %************************************************************************
439 \subsubsection[COptJumps-RS6000]{Tail-jumping on an IBM RS6000 running AIX}
441 %************************************************************************
444 #if rs6000_ibm_aix_TARGET
446 #define JMP_(cont) ((F_) (cont))()
447 /* partain: untested */
449 #endif /* rs6000-ibm-aix* */
452 %************************************************************************
454 \subsubsection[COptJumps-sparc]{Tail-jumping on Sun4s}
456 %************************************************************************
458 We want tailjumps to be calls, because `call xxx' is the only Sparc branch
459 that allows an arbitrary label as a target. (Gcc's ``goto *target'' construct
460 ends up loading the label into a register and then jumping, at the cost of
461 two extra instructions for the 32-bit load.)
463 When entering the threaded world, we stash our return address in a known
464 location so that \tr{%i7} is available as an extra callee-saves register.
465 Of course, we have to restore this when coming out of the threaded world.
467 I hate this god-forsaken architecture. Since the top of the reserved
468 stack space is used for globals and the bottom is reserved for outgoing arguments,
469 we have to stick our return address somewhere in the middle. Currently, I'm
470 allowing 100 extra outgoing arguments beyond the first 6. --JSM
473 #if sparc_TARGET_ARCH
475 #ifdef solaris2_TARGET_OS
476 #define MINI_INTERPRET_END "miniInterpretEnd"
478 #define MINI_INTERPRET_END "_miniInterpretEnd"
481 #define JMP_(cont) ((F_) (cont))()
482 /* Oh so happily, the above turns into a "call" instruction,
483 which, on a SPARC, is nothing but a "jmpl" with the
484 return address in %o7 [which we don't care about].
486 #define RESUME_(target) JMP_(target)
488 #define MINI_INTERPRETER_SETUP \
489 StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)]; \
490 register void *i7 __asm__("%i7"); \
491 ((void **)(space))[100] = i7;
493 #define MINI_INTERPRETER_END \
494 __asm__ volatile (".align 4\n" \
495 ".global " MINI_INTERPRET_END "\n" \
496 MINI_INTERPRET_END ":\n" \
497 "\tld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100]));
499 #endif /* __sparc__ */
502 %************************************************************************
504 \subsubsection[COptJumps-OOPS]{Someone screwed up here, too...}
506 %************************************************************************
508 If one of the above machine-dependent sections wasn't triggered,
509 @JMP_@ won't be defined and you'll get link errors (if not
514 *???????* No JMP_ macro???
517 #endif /* __STG_TAILJUMPS__ */
520 If @FUNBEGIN@ and @FUNEND@ weren't defined, give them the default
521 (nothing). Also, define @FB_@ and @FE_@ (short forms).
523 #if ! defined(FUNBEGIN)
524 #define FUNBEGIN /* nothing */
526 #if ! defined(FUNEND)
527 #define FUNEND /* nothing */
530 #define FB_ FUNBEGIN /* short forms */
533 #endif /* ! that's all of... COPTJUMPS_H */