1 \section[COptJumps]{Macros for tail-jumping}
3 % this file is part of the C-as-assembler document
10 %************************************************************************
12 \subsection[COptJumps-portable]{Tail-(non-)jumping in ``portable~C''}
14 %************************************************************************
17 #if ! (defined(__STG_TAILJUMPS__) && defined(__GNUC__))
19 #define JMP_(target) return((F_) (target))
20 #define RESUME_(target) JMP_(target)
23 Don't need to do anything magical for the mini-interpreter, because
24 we're really going to use the plain old C one (and the debugging
25 variant, too, for that matter).
27 %************************************************************************
29 \subsection[COptJumps-optimised]{Tail-jumping in ``optimised~C''}
31 %************************************************************************
34 #else /* __STG_TAILJUMPS__ && __GNUC__ */
37 GCC will have assumed that pushing/popping of C-stack frames is going
38 on when it generated its code, and used stack space accordingly.
39 However, we actually {\em post-process away} all such stack-framery
40 (see \tr{ghc/driver/ghc-asm-*.lprl}).
41 Thing will be OK however, if we initially make sure there are
42 @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
46 #define RESERVED_C_STACK_BYTES (512 * sizeof(I_)) /* MUST BE OF GENEROUS ALIGNMENT */
49 The platform-specific details are given in alphabetical order.
51 %************************************************************************
53 \subsubsection[COptJumps-alpha]{Tail-jumping on Alphas}
55 %************************************************************************
57 We have to set the procedure value register (\$27) before branching, so
58 that the target function can load the gp (\$29) as appropriate.
60 It seems that \tr{_procedure} can't be declared within the body of the
61 \tr{JMP_} macro...at least, not if we want it to be \$27, which we do!
64 #if alpha_dec_osf1_TARGET
65 /* ToDo: less specific? */
68 Jumping to a new block of code, we need to set up $27 to point
69 at the target, so that the callee can establish its gp (as an
70 offset from its own starting address). For some reason, gcc
71 refuses to give us $27 for _procedure if it's declared as a
72 local variable, so the workaround is to make it a global.
74 Note: The local variable works in gcc 2.6.2, but fails in 2.5.8.
77 /* MOVED: to COptRegs.lh -- very unsatisfactorily.
78 Otherwise, we can get a "global register variable follows a
79 function definition" error.
81 Once we can take gcc 2.6.x as std, then we can use
82 the local variant, and the problem goes away. (WDP 95/02)
84 register void *_procedure __asm__("$27");
88 do { _procedure = (void *)(cont); \
93 When we resume at the point where a call was originally made,
94 we need to restore $26, so that gp can be reloaded appropriately.
95 However, sometimes we ``resume'' by entering a new function
96 (typically EnterNodeCode), so we need to set up $27 as well.
99 #define RESUME_(cont) \
100 do { _procedure = (void *)(cont); \
101 __asm__ volatile("mov $27,$26"); \
105 #define MINI_INTERPRETER_SETUP \
106 __asm__ volatile ("stq $9,-8($30)\n" \
107 "stq $10,-16($30)\n" \
108 "stq $11,-24($30)\n" \
109 "stq $12,-32($30)\n" \
110 "stq $13,-40($30)\n" \
111 "stq $14,-48($30)\n" \
112 "stq $15,-56($30)\n" \
113 "stt $f2,-64($30)\n" \
114 "stt $f3,-72($30)\n" \
115 "stt $f4,-80($30)\n" \
116 "stt $f5,-88($30)\n" \
117 "stt $f6,-96($30)\n" \
118 "stt $f7,-104($30)\n" \
119 "stt $f8,-112($30)\n" \
120 "stt $f9,-120($30)\n" \
121 "lda $30,-%0($30)" : : \
122 "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
124 #define MINI_INTERPRETER_END \
125 __asm__ volatile (".align 3\n" \
126 ".globl miniInterpretEnd\n" \
127 "miniInterpretEnd:\n" \
128 "lda $30,%0($30)\n" \
130 "ldq $10,-16($30)\n" \
131 "ldq $11,-24($30)\n" \
132 "ldq $12,-32($30)\n" \
133 "ldq $13,-40($30)\n" \
134 "ldq $14,-48($30)\n" \
135 "ldq $15,-56($30)\n" \
136 "ldt $f2,-64($30)\n" \
137 "ldt $f3,-72($30)\n" \
138 "ldt $f4,-80($30)\n" \
139 "ldt $f5,-88($30)\n" \
140 "ldt $f6,-96($30)\n" \
141 "ldt $f7,-104($30)\n" \
142 "ldt $f8,-112($30)\n" \
143 "ldt $f9,-120($30)" : : \
144 "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
149 %************************************************************************
151 \subsubsection[COptJumps-Hpux]{Tail-jumping on a HP-PA machine running HP-UX}
153 %************************************************************************
156 #if hppa1_1_hp_hpux_TARGET
158 /* do FUNBEGIN/END the easy way */
159 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
160 #define FUNEND __asm__ volatile ("--- END ---");
162 /* The stack grows up! Local variables are allocated just above the
163 frame pointer, and extra arguments are stashed just below the stack
164 pointer, so the safe space is again in the middle (cf. sparc).
168 do { void *_procedure = (void *)(cont); \
172 #define RESUME_(cont) JMP_(cont)
174 #define MINI_INTERPRETER_SETUP \
175 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)]; \
176 __asm__ volatile ("ldo %0(%%r3),%%r19\n" \
177 "\tstw %%r3, 0(0,%%r19)\n" \
178 "\tstw %%r4, 4(0,%%r19)\n" \
179 "\tstw %%r5, 8(0,%%r19)\n" \
180 "\tstw %%r6,12(0,%%r19)\n" \
181 "\tstw %%r7,16(0,%%r19)\n" \
182 "\tstw %%r8,20(0,%%r19)\n" \
183 "\tstw %%r9,24(0,%%r19)\n" \
184 "\tstw %%r10,28(0,%%r19)\n" \
185 "\tstw %%r11,32(0,%%r19)\n" \
186 "\tstw %%r12,36(0,%%r19)\n" \
187 "\tstw %%r13,40(0,%%r19)\n" \
188 "\tstw %%r14,44(0,%%r19)\n" \
189 "\tstw %%r15,48(0,%%r19)\n" \
190 "\tstw %%r16,52(0,%%r19)\n" \
191 "\tstw %%r17,56(0,%%r19)\n" \
192 "\tstw %%r18,60(0,%%r19)\n" \
193 "\tldo 80(%%r19),%%r19\n" \
194 "\tfstds %%fr12,-16(0,%%r19)\n" \
195 "\tfstds %%fr13, -8(0,%%r19)\n" \
196 "\tfstds %%fr14, 0(0,%%r19)\n" \
197 "\tfstds %%fr15, 8(0,%%r19)\n" \
198 "\tldo 32(%%r19),%%r19\n" \
199 "\tfstds %%fr16,-16(0,%%r19)\n" \
200 "\tfstds %%fr17, -8(0,%%r19)\n" \
201 "\tfstds %%fr18, 0(0,%%r19)\n" \
202 "\tfstds %%fr19, 8(0,%%r19)\n" \
203 "\tldo 32(%%r19),%%r19\n" \
204 "\tfstds %%fr20,-16(0,%%r19)\n" \
205 "\tfstds %%fr21, -8(0,%%r19)\n" : : \
206 "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19" );
208 #define MINI_INTERPRETER_END \
209 __asm__ volatile (".align 4\n" \
210 "\t.EXPORT miniInterpretEnd,CODE\n" \
211 "\t.EXPORT miniInterpretEnd,ENTRY,PRIV_LEV=3\n" \
212 "miniInterpretEnd\n" \
213 "\tldo %0(%%r3),%%r19\n" \
214 "\tldw 0(0,%%r19),%%r3\n" \
215 "\tldw 4(0,%%r19),%%r4\n" \
216 "\tldw 8(0,%%r19),%%r5\n" \
217 "\tldw 12(0,%%r19),%%r6\n" \
218 "\tldw 16(0,%%r19),%%r7\n" \
219 "\tldw 20(0,%%r19),%%r8\n" \
220 "\tldw 24(0,%%r19),%%r9\n" \
221 "\tldw 28(0,%%r19),%%r10\n" \
222 "\tldw 32(0,%%r19),%%r11\n" \
223 "\tldw 36(0,%%r19),%%r12\n" \
224 "\tldw 40(0,%%r19),%%r13\n" \
225 "\tldw 44(0,%%r19),%%r14\n" \
226 "\tldw 48(0,%%r19),%%r15\n" \
227 "\tldw 52(0,%%r19),%%r16\n" \
228 "\tldw 56(0,%%r19),%%r17\n" \
229 "\tldw 60(0,%%r19),%%r18\n" \
230 "\tldo 80(%%r19),%%r19\n" \
231 "\tfldds -16(0,%%r19),%%fr12\n" \
232 "\tfldds -8(0,%%r19),%%fr13\n" \
233 "\tfldds 0(0,%%r19),%%fr14\n" \
234 "\tfldds 8(0,%%r19),%%fr15\n" \
235 "\tldo 32(%%r19),%%r19\n" \
236 "\tfldds -16(0,%%r19),%%fr16\n" \
237 "\tfldds -8(0,%%r19),%%fr17\n" \
238 "\tfldds 0(0,%%r19),%%fr18\n" \
239 "\tfldds 8(0,%%r19),%%fr19\n" \
240 "\tldo 32(%%r19),%%r19\n" \
241 "\tfldds -16(0,%%r19),%%fr20\n" \
242 "\tfldds -8(0,%%r19),%%fr21\n" : : \
243 "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19");
245 #endif /* hppa1.1-hp-hpux* */
248 %************************************************************************
250 \subsubsection[COptJumps-iX86]{Tail-jumping on a 386/486}
252 %************************************************************************
257 /* *not* a good way to do this (WDP 96/05) */
258 #if defined(solaris2_TARGET_OS) || defined(linux_TARGET_OS)
259 #define MINI_INTERPRET_END "miniInterpretEnd"
261 #define MINI_INTERPRET_END "_miniInterpretEnd"
264 /* do FUNBEGIN/END the easy way */
265 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
266 #define FUNEND __asm__ volatile ("--- END ---");
268 /* try "m68k-style" for now */
269 extern void __DISCARD__(STG_NO_ARGS);
274 target = (void *)(cont); \
278 #define RESUME_(target) JMP_(target)
280 /* The safe part of the stack frame is near the top */
282 #define MINI_INTERPRETER_SETUP \
283 StgChar space[RESERVED_C_STACK_BYTES+4*sizeof(long)]; \
284 __asm__ volatile ("leal %c0(%%esp),%%eax\n" \
285 "\tmovl %%ebx,0(%%eax)\n" \
286 "\tmovl %%esi,4(%%eax)\n" \
287 "\tmovl %%edi,8(%%eax)\n" \
288 "\tmovl %%ebp,12(%%eax)\n" \
289 : : "n" (RESERVED_C_STACK_BYTES) \
292 /* the initial "addl $f,%esp" in ..._END compensates for
293 the "call" (rather than a jump) in miniInterpret.
296 #define MINI_INTERPRETER_END \
297 __asm__ volatile (".align 4\n" \
298 ".globl " MINI_INTERPRET_END "\n" \
299 MINI_INTERPRET_END ":\n" \
302 __asm__ volatile ("addl $4,%%esp\n" \
303 "\tleal %c0(%%esp),%%eax\n" \
304 "\tmovl 0(%%eax),%%ebx\n" \
305 "\tmovl 4(%%eax),%%esi\n" \
306 "\tmovl 8(%%eax),%%edi\n" \
307 "\tmovl 12(%%eax),%%ebp" \
308 : : "n" (RESERVED_C_STACK_BYTES) : "%eax");
310 #endif /* __i[34]86__ */
313 %************************************************************************
315 \subsubsection[COptJumps-m68k]{Tail-jumping on m68k boxes}
317 %************************************************************************
319 For 680x0s, we use a quite-magic @JMP_@ macro, which includes
320 beginning- and end-of-function markers.
325 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
326 #define FUNEND __asm__ volatile ("--- END ---");
329 The call to \tr{__DISCARD__} in @JMP_@ is fodder for GCC, to force it
330 to pop arguments to previous function calls before the end of the
331 current function. This is unnecessary if we can manage to compile
332 with \tr{-fomit-frame-pointer} as well as \tr{-fno-defer-pop}. (WDP
333 95/02: Either false or dodgy.) At the moment, the asm mangler removes
334 these calls to \tr{__DISCARD__}.
338 extern void __DISCARD__(STG_NO_ARGS);
343 target = (void *)(cont); \
347 #define RESUME_(target) JMP_(target)
349 #define MINI_INTERPRETER_SETUP \
350 StgChar space[RESERVED_C_STACK_BYTES+11*sizeof(long)]; \
351 __asm__ volatile ("moveml a2-a6/d2-d7,sp@(%c0)\n" \
352 "\tlea sp@(%c0),a6" : : "J" (RESERVED_C_STACK_BYTES));
354 #define MINI_INTERPRETER_END \
355 __asm__ volatile (".even\n" \
356 ".globl _miniInterpretEnd\n" \
357 "_miniInterpretEnd:\n" \
359 "\tmoveml sp@(%c0),a2-a6/d2-d7" : : "J" (RESERVED_C_STACK_BYTES));
361 #endif /* __m68k__ */
364 %************************************************************************
366 \subsubsection[COptJumps-mips]{Tail-jumping on a MIPS box}
368 %************************************************************************
371 #if mipseb_TARGET_ARCH || mipsel_TARGET_ARCH
373 /* do FUNBEGIN/END the easy way */
374 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
375 #define FUNEND __asm__ volatile ("--- END ---");
377 /* try "m68k-style" for now */
378 extern void __DISCARD__(STG_NO_ARGS);
380 /* this is "alpha-style" */
382 do { __DISCARD__(); \
383 _procedure = (void *)(cont); \
387 #define RESUME_(target) JMP_(target)
389 /* _All_ callee-saved regs, whether we steal them or not, must be saved
393 #define MINI_INTERPRETER_SETUP \
394 StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \
395 __asm__ volatile ("addu $2,$sp,%0\n" \
396 "\ts.d $f20,0($2)\n" \
397 "\ts.d $f22,8($2)\n" \
398 "\ts.d $f24,16($2)\n" \
399 "\ts.d $f26,24($2)\n" \
400 "\ts.d $f28,32($2)\n" \
401 "\ts.d $f30,40($2)\n" \
402 "\tsw $16,48($2)\n" \
403 "\tsw $17,52($2)\n" \
404 "\tsw $18,56($2)\n" \
405 "\tsw $19,60($2)\n" \
406 "\tsw $20,64($2)\n" \
407 "\tsw $21,68($2)\n" \
408 "\tsw $22,72($2)\n" \
409 "\tsw $23,76($2)\n" \
410 "\tsw $fp,80($2)\n" \
411 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
413 /* the 16 bytes is for the argument-register save-area above $sp */
415 #define MINI_INTERPRETER_END \
416 __asm__ volatile (".align 2\n" \
417 ".globl miniInterpretEnd\n" \
418 "miniInterpretEnd:\n" \
419 "\taddu $2,$sp,%0\n" \
420 "\tl.d $f20,0($2)\n" \
421 "\tl.d $f22,8($2)\n" \
422 "\tl.d $f24,16($2)\n" \
423 "\tl.d $f26,24($2)\n" \
424 "\tl.d $f28,32($2)\n" \
425 "\tl.d $f30,40($2)\n" \
426 "\tlw $16,48($2)\n" \
427 "\tlw $17,52($2)\n" \
428 "\tlw $18,56($2)\n" \
429 "\tlw $19,60($2)\n" \
430 "\tlw $20,64($2)\n" \
431 "\tlw $21,68($2)\n" \
432 "\tlw $22,72($2)\n" \
433 "\tlw $23,76($2)\n" \
434 "\tlw $fp,80($2)\n" \
435 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
440 %************************************************************************
442 \subsubsection[COptJumps-powerpc]{Tail-jumping on an IBM PowerPC running AIX}
444 %************************************************************************
447 #if powerpc_TARGET_ARCH
449 /* do FUNBEGIN/END the easy way */
450 #define FUNBEGIN __asm__ volatile ("--- BEGIN ---");
451 #define FUNEND __asm__ volatile ("--- END ---");
453 /* try "m68k-style" for now */
454 extern void __DISCARD__(STG_NO_ARGS);
456 /* this is "alpha-style" */
458 do { void *_procedure = (void *)(cont); \
462 #define RESUME_(target) JMP_(target)
464 /* _All_ callee-saved regs, whether we steal them or not, must be saved
468 #define MINI_INTERPRETER_SETUP \
469 StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \
470 __asm__ volatile ("addu $2,$sp,%0\n" \
471 "\ts.d $f20,0($2)\n" \
472 "\ts.d $f22,8($2)\n" \
473 "\ts.d $f24,16($2)\n" \
474 "\ts.d $f26,24($2)\n" \
475 "\ts.d $f28,32($2)\n" \
476 "\ts.d $f30,40($2)\n" \
477 "\tsw $16,48($2)\n" \
478 "\tsw $17,52($2)\n" \
479 "\tsw $18,56($2)\n" \
480 "\tsw $19,60($2)\n" \
481 "\tsw $20,64($2)\n" \
482 "\tsw $21,68($2)\n" \
483 "\tsw $22,72($2)\n" \
484 "\tsw $23,76($2)\n" \
485 "\tsw $fp,80($2)\n" \
486 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
488 /* the 16 bytes is for the argument-register save-area above $sp */
490 #define MINI_INTERPRETER_END \
491 __asm__ volatile (".align 2\n" \
492 ".globl miniInterpretEnd\n" \
493 "miniInterpretEnd:\n" \
494 "\taddu $2,$sp,%0\n" \
495 "\tl.d $f20,0($2)\n" \
496 "\tl.d $f22,8($2)\n" \
497 "\tl.d $f24,16($2)\n" \
498 "\tl.d $f26,24($2)\n" \
499 "\tl.d $f28,32($2)\n" \
500 "\tl.d $f30,40($2)\n" \
501 "\tlw $16,48($2)\n" \
502 "\tlw $17,52($2)\n" \
503 "\tlw $18,56($2)\n" \
504 "\tlw $19,60($2)\n" \
505 "\tlw $20,64($2)\n" \
506 "\tlw $21,68($2)\n" \
507 "\tlw $22,72($2)\n" \
508 "\tlw $23,76($2)\n" \
509 "\tlw $fp,80($2)\n" \
510 : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
515 %************************************************************************
517 \subsubsection[COptJumps-sparc]{Tail-jumping on Sun4s}
519 %************************************************************************
521 We want tailjumps to be calls, because `call xxx' is the only Sparc branch
522 that allows an arbitrary label as a target. (Gcc's ``goto *target'' construct
523 ends up loading the label into a register and then jumping, at the cost of
524 two extra instructions for the 32-bit load.)
526 When entering the threaded world, we stash our return address in a known
527 location so that \tr{%i7} is available as an extra callee-saves register.
528 Of course, we have to restore this when coming out of the threaded world.
530 I hate this god-forsaken architecture. Since the top of the reserved
531 stack space is used for globals and the bottom is reserved for outgoing arguments,
532 we have to stick our return address somewhere in the middle. Currently, I'm
533 allowing 100 extra outgoing arguments beyond the first 6. --JSM
536 #if sparc_TARGET_ARCH
538 #ifdef solaris2_TARGET_OS
539 #define MINI_INTERPRET_END "miniInterpretEnd"
541 #define MINI_INTERPRET_END "_miniInterpretEnd"
544 #define JMP_(cont) ((F_) (cont))()
545 /* Oh so happily, the above turns into a "call" instruction,
546 which, on a SPARC, is nothing but a "jmpl" with the
547 return address in %o7 [which we don't care about].
549 #define RESUME_(target) JMP_(target)
551 #define MINI_INTERPRETER_SETUP \
552 StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)]; \
553 register void *i7 __asm__("%i7"); \
554 ((void **)(space))[100] = i7;
556 #define MINI_INTERPRETER_END \
557 __asm__ volatile (".align 4\n" \
558 ".global " MINI_INTERPRET_END "\n" \
559 MINI_INTERPRET_END ":\n" \
560 "\tld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100]));
562 #endif /* __sparc__ */
565 %************************************************************************
567 \subsubsection[COptJumps-OOPS]{Someone screwed up here, too...}
569 %************************************************************************
571 If one of the above machine-dependent sections wasn't triggered,
572 @JMP_@ won't be defined and you'll get link errors (if not
577 *???????* No JMP_ macro???
580 #endif /* __STG_TAILJUMPS__ */
583 If @FUNBEGIN@ and @FUNEND@ weren't defined, give them the default
584 (nothing). Also, define @FB_@ and @FE_@ (short forms).
586 #if ! defined(FUNBEGIN)
587 #define FUNBEGIN /* nothing */
589 #if ! defined(FUNEND)
590 #define FUNEND /* nothing */
593 #define FB_ FUNBEGIN /* short forms */
596 #endif /* ! that's all of... COPTJUMPS_H */