1 \section[COptJumps]{Macros for tail-jumping}
3 % this file is part of the C-as-assembler document
10 %************************************************************************
12 \subsection[COptJumps-portable]{Tail-(non-)jumping in ``portable~C''}
14 %************************************************************************
17 #if ! (defined(__STG_TAILJUMPS__) && defined(__GNUC__))
19 #define JMP_(target) return((F_) (target))
20 #define RESUME_(target) JMP_(target)
23 Don't need to do anything magical for the mini-interpreter, because
24 we're really going to use the plain old C one (and the debugging
25 variant, too, for that matter).
27 %************************************************************************
29 \subsection[COptJumps-optimised]{Tail-jumping in ``optimised~C''}
31 %************************************************************************
34 #else /* __STG_TAILJUMPS__ && __GNUC__ */
37 GCC will have assumed that pushing/popping of C-stack frames is going
38 on when it generated its code, and used stack space accordingly.
39 However, we actually {\em post-process away} all such stack-framery
40 (see \tr{ghc/driver/ghc-asm-*.lprl}).
41 Thing will be OK however, if we initially make sure there are
42 @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
46 #define RESERVED_C_STACK_BYTES (512 * sizeof(I_)) /* MUST BE OF GENEROUS ALIGNMENT */
49 The platform-specific details are given in alphabetical order.
51 %************************************************************************
53 \subsubsection[COptJumps-alpha]{Tail-jumping on Alphas}
55 %************************************************************************
57 We have to set the procedure value register (\$27) before branching, so
58 that the target function can load the gp (\$29) as appropriate.
60 It seems that \tr{_procedure} can't be declared within the body of the
61 \tr{JMP_} macro...at least, not if we want it to be \$27, which we do!
64 #if alpha_dec_osf1_TARGET
65 /* ToDo: less specific? */
68 Jumping to a new block of code, we need to set up $27 to point
69 at the target, so that the callee can establish its gp (as an
70 offset from its own starting address). For some reason, gcc
71 refuses to give us $27 for _procedure if it's declared as a
72 local variable, so the workaround is to make it a global.
74 Note: The local variable works in gcc 2.6.2, but fails in 2.5.8.
77 /* MOVED: to COptRegs.lh -- very unsatisfactorily.
78 Otherwise, we can get a "global register variable follows a
79 function definition" error.
81 Once we can take gcc 2.6.x as std, then we can use
82 the local variant, and the problem goes away. (WDP 95/02)
84 register void *_procedure __asm__("$27");
88 do { _procedure = (void *)(cont); \
93 When we resume at the point where a call was originally made,
94 we need to restore $26, so that gp can be reloaded appropriately.
95 However, sometimes we ``resume'' by entering a new function
96 (typically EnterNodeCode), so we need to set up $27 as well.
99 #define RESUME_(cont) \
100 do { _procedure = (void *)(cont); \
101 __asm__ volatile("mov $27,$26"); \
105 #define MINI_INTERPRETER_SETUP \
106 __asm__ volatile ("stq $9,-8($30)\n" \
107 "stq $10,-16($30)\n" \
108 "stq $11,-24($30)\n" \
109 "stq $12,-32($30)\n" \
110 "stq $13,-40($30)\n" \
111 "stq $14,-48($30)\n" \
112 "stq $15,-56($30)\n" \
113 "stt $f2,-64($30)\n" \
114 "stt $f3,-72($30)\n" \
115 "stt $f4,-80($30)\n" \
116 "stt $f5,-88($30)\n" \
117 "stt $f6,-96($30)\n" \
118 "stt $f7,-104($30)\n" \
119 "stt $f8,-112($30)\n" \
120 "stt $f9,-120($30)\n" \
121 "lda $30,-%0($30)" : : \
122 "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
124 #define MINI_INTERPRETER_END \
125 __asm__ volatile (".align 3\n" \
126 ".globl miniInterpretEnd\n" \
127 "miniInterpretEnd:\n" \
128 "lda $30,%0($30)\n" \
130 "ldq $10,-16($30)\n" \
131 "ldq $11,-24($30)\n" \
132 "ldq $12,-32($30)\n" \
133 "ldq $13,-40($30)\n" \
134 "ldq $14,-48($30)\n" \
135 "ldq $15,-56($30)\n" \
136 "ldt $f2,-64($30)\n" \
137 "ldt $f3,-72($30)\n" \
138 "ldt $f4,-80($30)\n" \
139 "ldt $f5,-88($30)\n" \
140 "ldt $f6,-96($30)\n" \
141 "ldt $f7,-104($30)\n" \
142 "ldt $f8,-112($30)\n" \
143 "ldt $f9,-120($30)" : : \
144 "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
149 %************************************************************************
151 \subsubsection[COptJumps-Hpux]{Tail-jumping on a HP-PA machine running HP-UX}
153 %************************************************************************
156 #if hppa1_1_hp_hpux_TARGET
158 /* do FUNBEGIN/END the easy way */
159 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
160 #define FUNEND __asm__ volatile ("--- END ---");
162 /* The stack grows up! Local variables are allocated just above the
163 frame pointer, and extra arguments are stashed just below the stack
164 pointer, so the safe space is again in the middle (cf. sparc).
168 do { void *_procedure = (void *)(cont); \
172 #define RESUME_(cont) JMP_(cont)
174 #define MINI_INTERPRETER_SETUP \
175 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)]; \
176 __asm__ volatile ("ldo %0(%%r3),%%r19\n" \
177 "\tstw %%r3, 0(0,%%r19)\n" \
178 "\tstw %%r4, 4(0,%%r19)\n" \
179 "\tstw %%r5, 8(0,%%r19)\n" \
180 "\tstw %%r6,12(0,%%r19)\n" \
181 "\tstw %%r7,16(0,%%r19)\n" \
182 "\tstw %%r8,20(0,%%r19)\n" \
183 "\tstw %%r9,24(0,%%r19)\n" \
184 "\tstw %%r10,28(0,%%r19)\n" \
185 "\tstw %%r11,32(0,%%r19)\n" \
186 "\tstw %%r12,36(0,%%r19)\n" \
187 "\tstw %%r13,40(0,%%r19)\n" \
188 "\tstw %%r14,44(0,%%r19)\n" \
189 "\tstw %%r15,48(0,%%r19)\n" \
190 "\tstw %%r16,52(0,%%r19)\n" \
191 "\tstw %%r17,56(0,%%r19)\n" \
192 "\tstw %%r18,60(0,%%r19)\n" \
193 "\tldo 80(%%r19),%%r19\n" \
194 "\tfstds %%fr12,-16(0,%%r19)\n" \
195 "\tfstds %%fr13, -8(0,%%r19)\n" \
196 "\tfstds %%fr14, 0(0,%%r19)\n" \
197 "\tfstds %%fr15, 8(0,%%r19)\n" \
198 "\tldo 32(%%r19),%%r19\n" \
199 "\tfstds %%fr16,-16(0,%%r19)\n" \
200 "\tfstds %%fr17, -8(0,%%r19)\n" \
201 "\tfstds %%fr18, 0(0,%%r19)\n" \
202 "\tfstds %%fr19, 8(0,%%r19)\n" \
203 "\tldo 32(%%r19),%%r19\n" \
204 "\tfstds %%fr20,-16(0,%%r19)\n" \
205 "\tfstds %%fr21, -8(0,%%r19)\n" : : \
206 "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19" );
208 #define MINI_INTERPRETER_END \
209 __asm__ volatile (".align 4\n" \
210 "\t.EXPORT miniInterpretEnd,CODE\n" \
211 "\t.EXPORT miniInterpretEnd,ENTRY,PRIV_LEV=3\n" \
212 "miniInterpretEnd\n" \
213 "\tldo %0(%%r3),%%r19\n" \
214 "\tldw 0(0,%%r19),%%r3\n" \
215 "\tldw 4(0,%%r19),%%r4\n" \
216 "\tldw 8(0,%%r19),%%r5\n" \
217 "\tldw 12(0,%%r19),%%r6\n" \
218 "\tldw 16(0,%%r19),%%r7\n" \
219 "\tldw 20(0,%%r19),%%r8\n" \
220 "\tldw 24(0,%%r19),%%r9\n" \
221 "\tldw 28(0,%%r19),%%r10\n" \
222 "\tldw 32(0,%%r19),%%r11\n" \
223 "\tldw 36(0,%%r19),%%r12\n" \
224 "\tldw 40(0,%%r19),%%r13\n" \
225 "\tldw 44(0,%%r19),%%r14\n" \
226 "\tldw 48(0,%%r19),%%r15\n" \
227 "\tldw 52(0,%%r19),%%r16\n" \
228 "\tldw 56(0,%%r19),%%r17\n" \
229 "\tldw 60(0,%%r19),%%r18\n" \
230 "\tldo 80(%%r19),%%r19\n" \
231 "\tfldds -16(0,%%r19),%%fr12\n" \
232 "\tfldds -8(0,%%r19),%%fr13\n" \
233 "\tfldds 0(0,%%r19),%%fr14\n" \
234 "\tfldds 8(0,%%r19),%%fr15\n" \
235 "\tldo 32(%%r19),%%r19\n" \
236 "\tfldds -16(0,%%r19),%%fr16\n" \
237 "\tfldds -8(0,%%r19),%%fr17\n" \
238 "\tfldds 0(0,%%r19),%%fr18\n" \
239 "\tfldds 8(0,%%r19),%%fr19\n" \
240 "\tldo 32(%%r19),%%r19\n" \
241 "\tfldds -16(0,%%r19),%%fr20\n" \
242 "\tfldds -8(0,%%r19),%%fr21\n" : : \
243 "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19");
245 #endif /* hppa1.1-hp-hpux* */
248 %************************************************************************
250 \subsubsection[COptJumps-iX86]{Tail-jumping on a 386/486}
252 %************************************************************************
257 #ifdef solaris2_TARGET_OS
258 #define MINI_INTERPRET_END "miniInterpretEnd"
260 #define MINI_INTERPRET_END "_miniInterpretEnd"
263 /* do FUNBEGIN/END the easy way */
264 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
265 #define FUNEND __asm__ volatile ("--- END ---");
267 /* try "m68k-style" for now */
268 extern void __DISCARD__(STG_NO_ARGS);
273 target = (void *)(cont); \
277 #define RESUME_(target) JMP_(target)
279 /* The safe part of the stack frame is near the top */
281 #define MINI_INTERPRETER_SETUP \
282 StgChar space[RESERVED_C_STACK_BYTES+4*sizeof(long)]; \
283 __asm__ volatile ("leal %c0(%%esp),%%eax\n" \
284 "\tmovl %%ebx,0(%%eax)\n" \
285 "\tmovl %%esi,4(%%eax)\n" \
286 "\tmovl %%edi,8(%%eax)\n" \
287 "\tmovl %%ebp,12(%%eax)\n" \
288 : : "n" (RESERVED_C_STACK_BYTES) \
291 /* the initial "addl $f,%esp" in ..._END compensates for
292 the "call" (rather than a jump) in miniInterpret.
295 #define MINI_INTERPRETER_END \
296 __asm__ volatile (".align 4\n" \
297 ".globl " MINI_INTERPRET_END "\n" \
298 MINI_INTERPRET_END ":\n" \
301 __asm__ volatile ("addl $4,%%esp\n" \
302 "\tleal %c0(%%esp),%%eax\n" \
303 "\tmovl 0(%%eax),%%ebx\n" \
304 "\tmovl 4(%%eax),%%esi\n" \
305 "\tmovl 8(%%eax),%%edi\n" \
306 "\tmovl 12(%%eax),%%ebp" \
307 : : "n" (RESERVED_C_STACK_BYTES) : "%eax");
309 #endif /* __i[34]86__ */
312 %************************************************************************
314 \subsubsection[COptJumps-m68k]{Tail-jumping on m68k boxes}
316 %************************************************************************
318 For 680x0s, we use a quite-magic @JMP_@ macro, which includes
319 beginning- and end-of-function markers.
324 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
325 #define FUNEND __asm__ volatile ("--- END ---");
328 The call to \tr{__DISCARD__} in @JMP_@ is fodder for GCC, to force it
329 to pop arguments to previous function calls before the end of the
330 current function. This is unnecessary if we can manage to compile
331 with \tr{-fomit-frame-pointer} as well as \tr{-fno-defer-pop}. (WDP
332 95/02: Either false or dodgy.) At the moment, the asm mangler removes
333 these calls to \tr{__DISCARD__}.
337 extern void __DISCARD__(STG_NO_ARGS);
342 target = (void *)(cont); \
346 #define RESUME_(target) JMP_(target)
348 #define MINI_INTERPRETER_SETUP \
349 StgChar space[RESERVED_C_STACK_BYTES+11*sizeof(long)]; \
350 __asm__ volatile ("moveml a2-a6/d2-d7,sp@(%c0)\n" \
351 "\tlea sp@(%c0),a6" : : "J" (RESERVED_C_STACK_BYTES));
353 #define MINI_INTERPRETER_END \
354 __asm__ volatile (".even\n" \
355 ".globl _miniInterpretEnd\n" \
356 "_miniInterpretEnd:\n" \
358 "\tmoveml sp@(%c0),a2-a6/d2-d7" : : "J" (RESERVED_C_STACK_BYTES));
360 #endif /* __m68k__ */
363 %************************************************************************
365 \subsubsection[COptJumps-mips]{Tail-jumping on a MIPS box}
367 %************************************************************************
370 #if mipseb_TARGET_ARCH || mipsel_TARGET_ARCH
372 /* do FUNBEGIN/END the easy way */
373 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
374 #define FUNEND __asm__ volatile ("--- END ---");
376 /* try "m68k-style" for now */
377 extern void __DISCARD__(STG_NO_ARGS);
379 /* this is "alpha-style" */
381 do { __DISCARD__(); \
382 _procedure = (void *)(cont); \
386 #define RESUME_(target) JMP_(target)
388 /* _All_ callee-saved regs, whether we steal them or not, must be saved
392 #define MINI_INTERPRETER_SETUP \
393 StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \
394 __asm__ volatile ("addu $2,$sp,%0\n" \
395 "\ts.d $f20,0($2)\n" \
396 "\ts.d $f22,8($2)\n" \
397 "\ts.d $f24,16($2)\n" \
398 "\ts.d $f26,24($2)\n" \
399 "\ts.d $f28,32($2)\n" \
400 "\ts.d $f30,40($2)\n" \
401 "\tsw $16,48($2)\n" \
402 "\tsw $17,52($2)\n" \
403 "\tsw $18,56($2)\n" \
404 "\tsw $19,60($2)\n" \
405 "\tsw $20,64($2)\n" \
406 "\tsw $21,68($2)\n" \
407 "\tsw $22,72($2)\n" \
408 "\tsw $23,76($2)\n" \
409 "\tsw $fp,80($2)\n" \
410 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
412 /* the 16 bytes is for the argument-register save-area above $sp */
414 #define MINI_INTERPRETER_END \
415 __asm__ volatile (".align 2\n" \
416 ".globl miniInterpretEnd\n" \
417 "miniInterpretEnd:\n" \
418 "\taddu $2,$sp,%0\n" \
419 "\tl.d $f20,0($2)\n" \
420 "\tl.d $f22,8($2)\n" \
421 "\tl.d $f24,16($2)\n" \
422 "\tl.d $f26,24($2)\n" \
423 "\tl.d $f28,32($2)\n" \
424 "\tl.d $f30,40($2)\n" \
425 "\tlw $16,48($2)\n" \
426 "\tlw $17,52($2)\n" \
427 "\tlw $18,56($2)\n" \
428 "\tlw $19,60($2)\n" \
429 "\tlw $20,64($2)\n" \
430 "\tlw $21,68($2)\n" \
431 "\tlw $22,72($2)\n" \
432 "\tlw $23,76($2)\n" \
433 "\tlw $fp,80($2)\n" \
434 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
439 %************************************************************************
441 \subsubsection[COptJumps-powerpc]{Tail-jumping on an IBM PowerPC running AIX}
443 %************************************************************************
446 #if powerpc_TARGET_ARCH
448 /* do FUNBEGIN/END the easy way */
449 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
450 #define FUNEND __asm__ volatile ("--- END ---");
452 /* try "m68k-style" for now */
453 extern void __DISCARD__(STG_NO_ARGS);
455 /* this is "alpha-style" */
457 do { void *_procedure = (void *)(cont); \
461 #define RESUME_(target) JMP_(target)
463 /* _All_ callee-saved regs, whether we steal them or not, must be saved
467 #define MINI_INTERPRETER_SETUP \
468 StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \
469 __asm__ volatile ("addu $2,$sp,%0\n" \
470 "\ts.d $f20,0($2)\n" \
471 "\ts.d $f22,8($2)\n" \
472 "\ts.d $f24,16($2)\n" \
473 "\ts.d $f26,24($2)\n" \
474 "\ts.d $f28,32($2)\n" \
475 "\ts.d $f30,40($2)\n" \
476 "\tsw $16,48($2)\n" \
477 "\tsw $17,52($2)\n" \
478 "\tsw $18,56($2)\n" \
479 "\tsw $19,60($2)\n" \
480 "\tsw $20,64($2)\n" \
481 "\tsw $21,68($2)\n" \
482 "\tsw $22,72($2)\n" \
483 "\tsw $23,76($2)\n" \
484 "\tsw $fp,80($2)\n" \
485 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
487 /* the 16 bytes is for the argument-register save-area above $sp */
489 #define MINI_INTERPRETER_END \
490 __asm__ volatile (".align 2\n" \
491 ".globl miniInterpretEnd\n" \
492 "miniInterpretEnd:\n" \
493 "\taddu $2,$sp,%0\n" \
494 "\tl.d $f20,0($2)\n" \
495 "\tl.d $f22,8($2)\n" \
496 "\tl.d $f24,16($2)\n" \
497 "\tl.d $f26,24($2)\n" \
498 "\tl.d $f28,32($2)\n" \
499 "\tl.d $f30,40($2)\n" \
500 "\tlw $16,48($2)\n" \
501 "\tlw $17,52($2)\n" \
502 "\tlw $18,56($2)\n" \
503 "\tlw $19,60($2)\n" \
504 "\tlw $20,64($2)\n" \
505 "\tlw $21,68($2)\n" \
506 "\tlw $22,72($2)\n" \
507 "\tlw $23,76($2)\n" \
508 "\tlw $fp,80($2)\n" \
509 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
514 %************************************************************************
516 \subsubsection[COptJumps-sparc]{Tail-jumping on Sun4s}
518 %************************************************************************
520 We want tailjumps to be calls, because `call xxx' is the only Sparc branch
521 that allows an arbitrary label as a target. (Gcc's ``goto *target'' construct
522 ends up loading the label into a register and then jumping, at the cost of
523 two extra instructions for the 32-bit load.)
525 When entering the threaded world, we stash our return address in a known
526 location so that \tr{%i7} is available as an extra callee-saves register.
527 Of course, we have to restore this when coming out of the threaded world.
529 I hate this god-forsaken architecture. Since the top of the reserved
530 stack space is used for globals and the bottom is reserved for outgoing arguments,
531 we have to stick our return address somewhere in the middle. Currently, I'm
532 allowing 100 extra outgoing arguments beyond the first 6. --JSM
535 #if sparc_TARGET_ARCH
537 #ifdef solaris2_TARGET_OS
538 #define MINI_INTERPRET_END "miniInterpretEnd"
540 #define MINI_INTERPRET_END "_miniInterpretEnd"
543 #define JMP_(cont) ((F_) (cont))()
544 /* Oh so happily, the above turns into a "call" instruction,
545 which, on a SPARC, is nothing but a "jmpl" with the
546 return address in %o7 [which we don't care about].
548 #define RESUME_(target) JMP_(target)
550 #define MINI_INTERPRETER_SETUP \
551 StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)]; \
552 register void *i7 __asm__("%i7"); \
553 ((void **)(space))[100] = i7;
555 #define MINI_INTERPRETER_END \
556 __asm__ volatile (".align 4\n" \
557 ".global " MINI_INTERPRET_END "\n" \
558 MINI_INTERPRET_END ":\n" \
559 "\tld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100]));
561 #endif /* __sparc__ */
564 %************************************************************************
566 \subsubsection[COptJumps-OOPS]{Someone screwed up here, too...}
568 %************************************************************************
570 If one of the above machine-dependent sections wasn't triggered,
571 @JMP_@ won't be defined and you'll get link errors (if not
576 *???????* No JMP_ macro???
579 #endif /* __STG_TAILJUMPS__ */
582 If @FUNBEGIN@ and @FUNEND@ weren't defined, give them the default
583 (nothing). Also, define @FB_@ and @FE_@ (short forms).
585 #if ! defined(FUNBEGIN)
586 #define FUNBEGIN /* nothing */
588 #if ! defined(FUNEND)
589 #define FUNEND /* nothing */
592 #define FB_ FUNBEGIN /* short forms */
595 #endif /* ! that's all of... COPTJUMPS_H */