1 /* -----------------------------------------------------------------------------
2 * $Id: StgCRun.c,v 1.44 2004/09/03 15:28:56 simonmar Exp $
4 * (c) The GHC Team, 1998-2003
8 * To run an STG function from C land, call
10 * rv = StgRun(f,BaseReg);
12 * where "f" is the STG function to call, and BaseReg is the address of the
13 * RegTable for this run (we might have separate RegTables if we're running
14 * multiple threads on an SMP machine).
16 * In the end, "f" must JMP to StgReturn (defined below),
17 * passing the return-value "rv" in R1,
18 * to return to the caller of StgRun returning "rv" in
19 * the whatever way C returns a value.
21 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
22 * other registers (other than saving the C callee-saves
23 * registers). Instead, the called function "f" must do that
26 * GCC will have assumed that pushing/popping of C-stack frames is
27 * going on when it generated its code, and used stack space
28 * accordingly. However, we actually {\em post-process away} all
29 * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
30 * be OK however, if we initially make sure there are
31 * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
34 * -------------------------------------------------------------------------- */
36 #include "PosixSource.h"
40 * We define the following (unused) global register variables, because for
41 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
42 * (unnecessarily saving extra registers on the stack) if we don't.
44 * Why do it at the top of this file, rather than near StgRun() below? Because
45 * gcc doesn't let us define global register variables after any function
46 * definition has been read. Any point after #include "Stg.h" would be too
49 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
50 * that we don't use but which are callee-save registers. The __divq() routine
51 * in libc.a clobbers $s6.
53 #include "ghcconfig.h"
54 #ifdef alpha_TARGET_ARCH
55 #define alpha_EXTRA_CAREFUL
56 register long fake_ra __asm__("$26");
57 register long fake_gp __asm__("$29");
58 #ifdef alpha_EXTRA_CAREFUL
59 register long fake_s6 __asm__("$15");
60 register double fake_f8 __asm__("$f8");
61 register double fake_f9 __asm__("$f9");
65 /* include Stg.h first because we want real machine regs in here: we
66 * have to get the value of R1 back from Stg land to C land intact.
72 #include "OSThreads.h"
73 #include "Capability.h"
80 #ifdef USE_MINIINTERPRETER
82 /* -----------------------------------------------------------------------------
83 any architecture (using miniinterpreter)
84 -------------------------------------------------------------------------- */
86 StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
89 if (RtsFlags[0].DebugFlags.interpreter) {
90 debugBelch("Jumping to ");
91 printPtr((P_)f); fflush(stdout);
94 f = (StgFunPtr) (f)();
96 return (StgThreadReturnCode)R1.i;
99 StgFunPtr StgReturn(void)
104 #else /* !USE_MINIINTERPRETER */
106 #ifdef LEADING_UNDERSCORE
107 #define STG_RETURN "_StgReturn"
109 #define STG_RETURN "StgReturn"
112 /* -----------------------------------------------------------------------------
114 -------------------------------------------------------------------------- */
116 #ifdef i386_TARGET_ARCH
119 StgRun(StgFunPtr f, StgRegTable *basereg) {
121 unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
122 StgThreadReturnCode r;
126 * save callee-saves registers on behalf of the STG code.
128 "movl %%esp, %%eax\n\t"
130 "movl %%ebx,0(%%eax)\n\t"
131 "movl %%esi,4(%%eax)\n\t"
132 "movl %%edi,8(%%eax)\n\t"
133 "movl %%ebp,12(%%eax)\n\t"
139 * grab the function argument from the stack, and jump to it.
144 ".global " STG_RETURN "\n"
147 "movl %%esi, %%eax\n\t" /* Return value in R1 */
150 * restore callee-saves registers. (Don't stomp on %%eax!)
152 "movl %%esp, %%edx\n\t"
154 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
155 "movl 4(%%edx),%%esi\n\t"
156 "movl 8(%%edx),%%edi\n\t"
157 "movl 12(%%edx),%%ebp\n\t"
159 : "=&a" (r), "=m" (space)
160 : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
161 : "edx" /* stomps on %edx */
169 /* ----------------------------------------------------------------------------
170 x86-64 is almost the same as plain x86.
172 I've done it using entirely inline assembler, because I couldn't
173 get gcc to generate the correct subtraction from %rsp by using
174 the local array variable trick. It didn't seem to reserve
175 enough space. Oh well, it's not much harder this way.
177 ------------------------------------------------------------------------- */
179 #ifdef x86_64_TARGET_ARCH
181 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
183 static void StgRunIsImplementedInAssembler(void)
187 * save callee-saves registers on behalf of the STG code.
192 "movq %%rsp, %%rax\n\t"
193 "addq %0-48, %%rax\n\t"
194 "movq %%rbx,0(%%rax)\n\t"
195 "movq %%rbp,8(%%rax)\n\t"
196 "movq %%r12,16(%%rax)\n\t"
197 "movq %%r13,24(%%rax)\n\t"
198 "movq %%r14,32(%%rax)\n\t"
199 "movq %%r15,40(%%rax)\n\t"
203 "movq %%rsi,%%rbx\n\t"
205 * grab the function argument from the stack, and jump to it.
207 "movq %%rdi,%%rax\n\t"
210 ".global " STG_RETURN "\n"
213 "movq %%r13, %%rax\n\t" /* Return value in R1 */
216 * restore callee-saves registers. (Don't stomp on %%rax!)
218 "movq %%rsp, %%rdx\n\t"
219 "addq %0-48, %%rdx\n\t"
220 "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
221 "movq 8(%%rdx),%%rbp\n\t"
222 "movq 16(%%rdx),%%r12\n\t"
223 "movq 24(%%rdx),%%r13\n\t"
224 "movq 32(%%rdx),%%r14\n\t"
225 "movq 40(%%rdx),%%r15\n\t"
229 : : "i"(RESERVED_C_STACK_BYTES+48 /*stack frame size*/));
234 /* -----------------------------------------------------------------------------
238 OLD COMMENT from GHC-3.02:
240 We want tailjumps to be calls, because `call xxx' is the only Sparc
241 branch that allows an arbitrary label as a target. (Gcc's ``goto
242 *target'' construct ends up loading the label into a register and
243 then jumping, at the cost of two extra instructions for the 32-bit
246 When entering the threaded world, we stash our return address in a
247 known location so that \tr{%i7} is available as an extra
248 callee-saves register. Of course, we have to restore this when
249 coming out of the threaded world.
251 I hate this god-forsaken architecture. Since the top of the
252 reserved stack space is used for globals and the bottom is reserved
253 for outgoing arguments, we have to stick our return address
254 somewhere in the middle. Currently, I'm allowing 100 extra
255 outgoing arguments beyond the first 6. --JSM
257 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
258 I'm not sure whether we still need to save it. Incedentally, what
259 does the last paragraph above mean when it says "the top of the
260 stack is used for globals"? What globals? --SDM
262 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
263 -------------------------------------------------------------------------- */
265 #ifdef sparc_TARGET_ARCH
268 StgRun(StgFunPtr f, StgRegTable *basereg) {
270 unsigned char space[RESERVED_C_STACK_BYTES];
272 register void *i7 __asm__("%i7");
273 ((void **)(space))[100] = i7;
278 ".global " STG_RETURN "\n"
280 : : : "l0","l1","l2","l3","l4","l5","l6","l7");
281 /* we tell the C compiler that l0-l7 are clobbered on return to
282 * StgReturn, otherwise it tries to use these to save eg. the
283 * address of space[100] across the call. The correct thing
284 * to do would be to save all the callee-saves regs, but we
285 * can't be bothered to do that.
287 * The code that gcc generates for this little fragment is now
288 * terrible. We could do much better by coding it directly in
292 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
293 * space any more, since gcc tries to save its address across the
294 * call to f(), this gets clobbered in STG land and we end up
295 * dereferencing a bogus pointer in StgReturn.
297 __asm__ volatile ("ld %1,%0"
298 : "=r" (i7) : "m" (((void **)(space))[100]));
300 return (StgThreadReturnCode)R1.i;
305 /* -----------------------------------------------------------------------------
308 "The stack pointer (SP) must at all times denote an address that has octaword
309 alignment. (This restriction has the side effect that the in-memory portion
310 of the argument list, if any, will start on an octaword boundary.) Note that
311 the stack grows toward lower addresses. During a procedure invocation, SP
312 can never be set to a value that is higher than the value of SP at entry to
313 that procedure invocation.
315 "The contents of the stack, located above the portion of the argument list
316 (if any) that is passed in memory, belong to the calling procedure. Because
317 they are part of the calling procedure, they should not be read or written
318 by the called procedure, except as specified by indirect arguments or
319 language-controlled up-level references.
321 "The SP value might be used by the hardware when raising exceptions and
322 asynchronous interrupts. It must be assumed that the contents of the stack
323 below the current SP value and within the stack for the current thread are
324 continually and unpredictably modified, as specified in the _Alpha
325 Architecture Reference Manual_, and as a result of asynchronous software
328 -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
329 Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www.
330 tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
331 -------------------------------------------------------------------------- */
333 #ifdef alpha_TARGET_ARCH
336 StgRun(StgFunPtr f, StgRegTable *basereg)
338 register long real_ra __asm__("$26"); volatile long save_ra;
339 register long real_gp __asm__("$29"); volatile long save_gp;
341 register long real_s0 __asm__("$9" ); volatile long save_s0;
342 register long real_s1 __asm__("$10"); volatile long save_s1;
343 register long real_s2 __asm__("$11"); volatile long save_s2;
344 register long real_s3 __asm__("$12"); volatile long save_s3;
345 register long real_s4 __asm__("$13"); volatile long save_s4;
346 register long real_s5 __asm__("$14"); volatile long save_s5;
347 #ifdef alpha_EXTRA_CAREFUL
348 register long real_s6 __asm__("$15"); volatile long save_s6;
351 register double real_f2 __asm__("$f2"); volatile double save_f2;
352 register double real_f3 __asm__("$f3"); volatile double save_f3;
353 register double real_f4 __asm__("$f4"); volatile double save_f4;
354 register double real_f5 __asm__("$f5"); volatile double save_f5;
355 register double real_f6 __asm__("$f6"); volatile double save_f6;
356 register double real_f7 __asm__("$f7"); volatile double save_f7;
357 #ifdef alpha_EXTRA_CAREFUL
358 register double real_f8 __asm__("$f8"); volatile double save_f8;
359 register double real_f9 __asm__("$f9"); volatile double save_f9;
362 register StgFunPtr real_pv __asm__("$27");
364 StgThreadReturnCode ret;
375 #ifdef alpha_EXTRA_CAREFUL
385 #ifdef alpha_EXTRA_CAREFUL
392 __asm__ volatile( "lda $30,-%0($30)" "\n"
393 "\t" "jmp ($27)" "\n"
395 ".globl " STG_RETURN "\n"
397 "\t" "lda $30,%0($30)" "\n"
398 : : "K" (RESERVED_C_STACK_BYTES));
408 #ifdef alpha_EXTRA_CAREFUL
418 #ifdef alpha_EXTRA_CAREFUL
429 #endif /* alpha_TARGET_ARCH */
431 /* -----------------------------------------------------------------------------
433 -------------------------------------------------------------------------- */
435 #ifdef hppa1_1_TARGET_ARCH
438 StgRun(StgFunPtr f, StgRegTable *basereg)
440 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
441 StgThreadReturnCode ret;
443 __asm__ volatile ("ldo %0(%%r30),%%r19\n"
444 "\tstw %%r3, 0(0,%%r19)\n"
445 "\tstw %%r4, 4(0,%%r19)\n"
446 "\tstw %%r5, 8(0,%%r19)\n"
447 "\tstw %%r6,12(0,%%r19)\n"
448 "\tstw %%r7,16(0,%%r19)\n"
449 "\tstw %%r8,20(0,%%r19)\n"
450 "\tstw %%r9,24(0,%%r19)\n"
451 "\tstw %%r10,28(0,%%r19)\n"
452 "\tstw %%r11,32(0,%%r19)\n"
453 "\tstw %%r12,36(0,%%r19)\n"
454 "\tstw %%r13,40(0,%%r19)\n"
455 "\tstw %%r14,44(0,%%r19)\n"
456 "\tstw %%r15,48(0,%%r19)\n"
457 "\tstw %%r16,52(0,%%r19)\n"
458 "\tstw %%r17,56(0,%%r19)\n"
459 "\tstw %%r18,60(0,%%r19)\n"
460 "\tldo 80(%%r19),%%r19\n"
461 "\tfstds %%fr12,-16(0,%%r19)\n"
462 "\tfstds %%fr13, -8(0,%%r19)\n"
463 "\tfstds %%fr14, 0(0,%%r19)\n"
464 "\tfstds %%fr15, 8(0,%%r19)\n"
465 "\tldo 32(%%r19),%%r19\n"
466 "\tfstds %%fr16,-16(0,%%r19)\n"
467 "\tfstds %%fr17, -8(0,%%r19)\n"
468 "\tfstds %%fr18, 0(0,%%r19)\n"
469 "\tfstds %%fr19, 8(0,%%r19)\n"
470 "\tldo 32(%%r19),%%r19\n"
471 "\tfstds %%fr20,-16(0,%%r19)\n"
472 "\tfstds %%fr21, -8(0,%%r19)\n" : :
473 "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
478 __asm__ volatile (".align 4\n"
479 "\t.EXPORT " STG_RETURN ",CODE\n"
480 "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
482 /* "\tldo %0(%%r3),%%r19\n" */
483 "\tldo %1(%%r30),%%r19\n"
484 "\tcopy %%r11, %0\n" /* save R1 */
485 "\tldw 0(0,%%r19),%%r3\n"
486 "\tldw 4(0,%%r19),%%r4\n"
487 "\tldw 8(0,%%r19),%%r5\n"
488 "\tldw 12(0,%%r19),%%r6\n"
489 "\tldw 16(0,%%r19),%%r7\n"
490 "\tldw 20(0,%%r19),%%r8\n"
491 "\tldw 24(0,%%r19),%%r9\n"
492 "\tldw 28(0,%%r19),%%r10\n"
493 "\tldw 32(0,%%r19),%%r11\n"
494 "\tldw 36(0,%%r19),%%r12\n"
495 "\tldw 40(0,%%r19),%%r13\n"
496 "\tldw 44(0,%%r19),%%r14\n"
497 "\tldw 48(0,%%r19),%%r15\n"
498 "\tldw 52(0,%%r19),%%r16\n"
499 "\tldw 56(0,%%r19),%%r17\n"
500 "\tldw 60(0,%%r19),%%r18\n"
501 "\tldo 80(%%r19),%%r19\n"
502 "\tfldds -16(0,%%r19),%%fr12\n"
503 "\tfldds -8(0,%%r19),%%fr13\n"
504 "\tfldds 0(0,%%r19),%%fr14\n"
505 "\tfldds 8(0,%%r19),%%fr15\n"
506 "\tldo 32(%%r19),%%r19\n"
507 "\tfldds -16(0,%%r19),%%fr16\n"
508 "\tfldds -8(0,%%r19),%%fr17\n"
509 "\tfldds 0(0,%%r19),%%fr18\n"
510 "\tfldds 8(0,%%r19),%%fr19\n"
511 "\tldo 32(%%r19),%%r19\n"
512 "\tfldds -16(0,%%r19),%%fr20\n"
513 "\tfldds -8(0,%%r19),%%fr21\n"
515 : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
522 #endif /* hppa1_1_TARGET_ARCH */
524 /* -----------------------------------------------------------------------------
527 Everything is in assembler, so we don't have to deal with GCC...
529 -------------------------------------------------------------------------- */
531 #ifdef powerpc_TARGET_ARCH
533 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
535 #ifdef darwin_TARGET_OS
536 static void StgRunIsImplementedInAssembler(void)
542 "\tbl saveFP # f14\n"
543 "\tstmw r13,-220(r1)\n"
544 "\tstwu r1,-%0(r1)\n"
545 "\tmr r27,r4\n" // BaseReg == r27
549 ".globl _StgReturn\n"
553 "\tlmw r13,-220(r1)\n"
555 : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
559 // This version is for PowerPC Linux.
561 // Differences from the Darwin/Mac OS X version:
562 // *) Different Assembler Syntax
563 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
564 // *) We may not access positive stack offsets
565 // (no "Red Zone" as in the Darwin ABI)
566 // *) The Link Register is saved to a different offset in the caller's stack frame
567 // (Linux: 4(r1), Darwin 8(r1))
569 static void StgRunIsImplementedInAssembler(void)
573 "\t.type StgRun,@function\n"
579 "\tstmw 13,-220(5)\n"
580 "\tstfd 14,-144(5)\n"
581 "\tstfd 15,-136(5)\n"
582 "\tstfd 16,-128(5)\n"
583 "\tstfd 17,-120(5)\n"
584 "\tstfd 18,-112(5)\n"
585 "\tstfd 19,-104(5)\n"
598 "\tmr 27,4\n" // BaseReg == r27
603 "\t.type StgReturn,@function\n"
630 : : "i"(RESERVED_C_STACK_BYTES+288 /*stack frame size*/));
636 /* -----------------------------------------------------------------------------
639 Again, in assembler - so we can fiddle with the register stack, and because
640 gcc doesn't handle asm-clobbered callee-saves correctly.
642 loc0 - loc15: preserved locals
643 loc16 - loc28: STG registers
646 loc31: saved gp (gcc 3.3 uses this slot)
647 -------------------------------------------------------------------------- */
649 #ifdef ia64_TARGET_ARCH
651 /* the memory stack is rarely used, so 16K is excessive */
652 #undef RESERVED_C_STACK_BYTES
653 #define RESERVED_C_STACK_BYTES 1024
655 #if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
656 /* gcc 3.3+: leave an extra slot for gp saves */
662 static void StgRunIsImplementedInAssembler(void)
667 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
668 "\tld8 r18 = [r32],8\n" /* get procedure address */
669 "\tadds sp = -%0, sp ;;\n" /* setup stack */
670 "\tld8 gp = [r32]\n" /* get procedure GP */
671 "\tadds r16 = %0-(6*16), sp\n"
672 "\tadds r17 = %0-(5*16), sp ;;\n"
673 "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
674 "\tstf.spill [r17] = f17,32\n"
675 "\tmov b6 = r18 ;;\n" /* set target address */
676 "\tstf.spill [r16] = f18,32\n"
677 "\tstf.spill [r17] = f19,32\n"
678 "\tmov loc30 = b0 ;;\n" /* save return address */
679 "\tstf.spill [r16] = f20,32\n"
680 "\tstf.spill [r17] = f21,32\n"
681 "\tbr.few b6 ;;\n" /* branch to function */
682 ".global StgReturn\n"
684 "\tmov r8 = loc16\n" /* return value in r8 */
685 "\tadds r16 = %0-(6*16), sp\n"
686 "\tadds r17 = %0-(5*16), sp ;;\n"
687 "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
688 "\tldf.fill f17 = [r17],32\n"
689 "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
690 "\tldf.fill f18 = [r16],32\n"
691 "\tldf.fill f19 = [r17],32\n"
692 "\tmov b0 = loc30 ;;\n" /* restore return address */
693 "\tldf.fill f20 = [r16],32\n"
694 "\tldf.fill f21 = [r17],32\n"
695 "\tadds sp = %0, sp\n" /* restore stack */
696 "\tbr.ret.sptk.many b0 ;;\n" /* return */
697 : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
702 #endif /* !USE_MINIINTERPRETER */