1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2003
7 * To run an STG function from C land, call
9 * rv = StgRun(f,BaseReg);
11 * where "f" is the STG function to call, and BaseReg is the address of the
12 * RegTable for this run (we might have separate RegTables if we're running
13 * multiple threads on an SMP machine).
15 * In the end, "f" must JMP to StgReturn (defined below),
16 * passing the return-value "rv" in R1,
17 * to return to the caller of StgRun returning "rv" in
18 * the whatever way C returns a value.
20 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
21 * other registers (other than saving the C callee-saves
22 * registers). Instead, the called function "f" must do that
25 * GCC will have assumed that pushing/popping of C-stack frames is
26 * going on when it generated its code, and used stack space
27 * accordingly. However, we actually {\em post-process away} all
28 * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
29 * be OK however, if we initially make sure there are
30 * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
33 * -------------------------------------------------------------------------- */
35 #include "PosixSource.h"
38 * We define the following (unused) global register variables, because for
39 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
40 * (unnecessarily saving extra registers on the stack) if we don't.
42 * Why do it at the top of this file, rather than near StgRun() below? Because
43 * gcc doesn't let us define global register variables after any function
44 * definition has been read. Any point after #include "Stg.h" would be too
47 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
48 * that we don't use but which are callee-save registers. The __divq() routine
49 * in libc.a clobbers $s6.
51 #include "ghcconfig.h"
52 #ifndef USE_MINIINTERPRETER
53 #ifdef alpha_HOST_ARCH
54 #define alpha_EXTRA_CAREFUL
55 register long fake_ra __asm__("$26");
56 register long fake_gp __asm__("$29");
57 #ifdef alpha_EXTRA_CAREFUL
58 register long fake_s6 __asm__("$15");
59 register double fake_f8 __asm__("$f8");
60 register double fake_f9 __asm__("$f9");
65 /* include Stg.h first because we want real machine regs in here: we
66 * have to get the value of R1 back from Stg land to C land intact.
74 #include "Capability.h"
81 #ifdef USE_MINIINTERPRETER
83 /* -----------------------------------------------------------------------------
84 any architecture (using miniinterpreter)
85 -------------------------------------------------------------------------- */
87 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
91 debugBelch("Jumping to ");
92 printPtr((P_)f); fflush(stdout);
95 f = (StgFunPtr) (f)();
97 return (StgRegTable *)R1.p;
100 StgFunPtr StgReturn(void)
105 #else /* !USE_MINIINTERPRETER */
107 #ifdef LEADING_UNDERSCORE
108 #define STG_RUN "_StgRun"
110 #define STG_RUN "StgRun"
113 #ifdef LEADING_UNDERSCORE
114 #define STG_RETURN "_StgReturn"
116 #define STG_RETURN "StgReturn"
119 /* -----------------------------------------------------------------------------
121 -------------------------------------------------------------------------- */
123 #ifdef i386_HOST_ARCH
125 #ifdef darwin_TARGET_OS
126 #define STG_GLOBAL ".globl "
128 #define STG_GLOBAL ".global "
132 StgRun(StgFunPtr f, StgRegTable *basereg) {
134 unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
139 * save callee-saves registers on behalf of the STG code.
141 "movl %%esp, %%eax\n\t"
143 "movl %%ebx,0(%%eax)\n\t"
144 "movl %%esi,4(%%eax)\n\t"
145 "movl %%edi,8(%%eax)\n\t"
146 "movl %%ebp,12(%%eax)\n\t"
152 * grab the function argument from the stack
158 * The stack pointer has to be aligned to a multiple of 16 bytes at
159 * this point. This works out correctly with gcc 4.0.1, but it might
160 * break at any time in the future. TODO: Make this future-proof.
168 STG_GLOBAL STG_RETURN "\n"
171 "movl %%esi, %%eax\n\t" /* Return value in R1 */
174 * restore callee-saves registers. (Don't stomp on %%eax!)
176 "movl %%esp, %%edx\n\t"
178 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
179 "movl 4(%%edx),%%esi\n\t"
180 "movl 8(%%edx),%%edi\n\t"
181 "movl 12(%%edx),%%ebp\n\t"
183 : "=&a" (r), "=m" (space)
184 : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
185 : "edx" /* stomps on %edx */
193 /* ----------------------------------------------------------------------------
194 x86-64 is almost the same as plain x86.
196 I've done it using entirely inline assembler, because I couldn't
197 get gcc to generate the correct subtraction from %rsp by using
198 the local array variable trick. It didn't seem to reserve
199 enough space. Oh well, it's not much harder this way.
201 ------------------------------------------------------------------------- */
203 #ifdef x86_64_HOST_ARCH
205 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
207 static void GNUC3_ATTRIBUTE(used)
208 StgRunIsImplementedInAssembler(void)
212 * save callee-saves registers on behalf of the STG code.
214 ".globl " STG_RUN "\n"
217 "movq %%rsp, %%rax\n\t"
218 "addq %0-48, %%rax\n\t"
219 "movq %%rbx,0(%%rax)\n\t"
220 "movq %%rbp,8(%%rax)\n\t"
221 "movq %%r12,16(%%rax)\n\t"
222 "movq %%r13,24(%%rax)\n\t"
223 "movq %%r14,32(%%rax)\n\t"
224 "movq %%r15,40(%%rax)\n\t"
228 "movq %%rsi,%%r13\n\t"
230 * grab the function argument from the stack, and jump to it.
232 "movq %%rdi,%%rax\n\t"
235 ".globl " STG_RETURN "\n"
238 "movq %%rbx, %%rax\n\t" /* Return value in R1 */
241 * restore callee-saves registers. (Don't stomp on %%rax!)
243 "movq %%rsp, %%rdx\n\t"
244 "addq %0-48, %%rdx\n\t"
245 "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
246 "movq 8(%%rdx),%%rbp\n\t"
247 "movq 16(%%rdx),%%r12\n\t"
248 "movq 24(%%rdx),%%r13\n\t"
249 "movq 32(%%rdx),%%r14\n\t"
250 "movq 40(%%rdx),%%r15\n\t"
254 : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
258 The x86_64 ABI specifies that on a procedure call, %rsp is
259 aligned on a 16-byte boundary + 8. That is, the first
260 argument on the stack after the return address will be
263 Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
266 BUT... when we do a C-call from STG land, gcc likes to put the
267 stack alignment adjustment in the prolog. eg. if we're calling
268 a function with arguments in regs, gcc will insert 'subq $8,%rsp'
269 in the prolog, to keep %rsp aligned (the return address is 8
270 bytes, remember). The mangler throws away the prolog, so we
271 lose the stack alignment.
273 The hack is to add this extra 8 bytes to our %rsp adjustment
274 here, so that throughout STG code, %rsp is 16-byte aligned,
277 A quick way to see if this is wrong is to compile this code:
279 main = System.Exit.exitWith ExitSuccess
281 And run it with +RTS -sstderr. The stats code in the RTS, in
282 particular statsPrintf(), relies on the stack alignment because
283 it saves the %xmm regs on the stack, so it'll fall over if the
284 stack isn't aligned, and calling exitWith from Haskell invokes
285 shutdownHaskellAndExit using a C call.
287 Future gcc releases will almost certainly break this hack...
293 /* -----------------------------------------------------------------------------
297 OLD COMMENT from GHC-3.02:
299 We want tailjumps to be calls, because `call xxx' is the only Sparc
300 branch that allows an arbitrary label as a target. (Gcc's ``goto
301 *target'' construct ends up loading the label into a register and
302 then jumping, at the cost of two extra instructions for the 32-bit
305 When entering the threaded world, we stash our return address in a
306 known location so that \tr{%i7} is available as an extra
307 callee-saves register. Of course, we have to restore this when
308 coming out of the threaded world.
310 I hate this god-forsaken architecture. Since the top of the
311 reserved stack space is used for globals and the bottom is reserved
312 for outgoing arguments, we have to stick our return address
313 somewhere in the middle. Currently, I'm allowing 100 extra
314 outgoing arguments beyond the first 6. --JSM
316 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
317 I'm not sure whether we still need to save it. Incedentally, what
318 does the last paragraph above mean when it says "the top of the
319 stack is used for globals"? What globals? --SDM
321 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
322 -------------------------------------------------------------------------- */
324 #ifdef sparc_HOST_ARCH
327 StgRun(StgFunPtr f, StgRegTable *basereg) {
329 unsigned char space[RESERVED_C_STACK_BYTES];
331 register void *i7 __asm__("%i7");
332 ((void **)(space))[100] = i7;
337 ".global " STG_RETURN "\n"
339 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
340 /* we tell the C compiler that l0-l7 are clobbered on return to
341 * StgReturn, otherwise it tries to use these to save eg. the
342 * address of space[100] across the call. The correct thing
343 * to do would be to save all the callee-saves regs, but we
344 * can't be bothered to do that.
346 * We also explicitly mark space as used since gcc eliminates it
349 * The code that gcc generates for this little fragment is now
350 * terrible. We could do much better by coding it directly in
354 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
355 * space any more, since gcc tries to save its address across the
356 * call to f(), this gets clobbered in STG land and we end up
357 * dereferencing a bogus pointer in StgReturn.
359 __asm__ volatile ("ld %1,%0"
360 : "=r" (i7) : "m" (((void **)(space))[100]));
362 return (StgRegTable *)R1.i;
367 /* -----------------------------------------------------------------------------
370 "The stack pointer (SP) must at all times denote an address that has octaword
371 alignment. (This restriction has the side effect that the in-memory portion
372 of the argument list, if any, will start on an octaword boundary.) Note that
373 the stack grows toward lower addresses. During a procedure invocation, SP
374 can never be set to a value that is higher than the value of SP at entry to
375 that procedure invocation.
377 "The contents of the stack, located above the portion of the argument list
378 (if any) that is passed in memory, belong to the calling procedure. Because
379 they are part of the calling procedure, they should not be read or written
380 by the called procedure, except as specified by indirect arguments or
381 language-controlled up-level references.
383 "The SP value might be used by the hardware when raising exceptions and
384 asynchronous interrupts. It must be assumed that the contents of the stack
385 below the current SP value and within the stack for the current thread are
386 continually and unpredictably modified, as specified in the _Alpha
387 Architecture Reference Manual_, and as a result of asynchronous software
390 -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
391 Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www.
392 tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
393 -------------------------------------------------------------------------- */
395 #ifdef alpha_HOST_ARCH
398 StgRun(StgFunPtr f, StgRegTable *basereg)
400 register long real_ra __asm__("$26"); volatile long save_ra;
401 register long real_gp __asm__("$29"); volatile long save_gp;
403 register long real_s0 __asm__("$9" ); volatile long save_s0;
404 register long real_s1 __asm__("$10"); volatile long save_s1;
405 register long real_s2 __asm__("$11"); volatile long save_s2;
406 register long real_s3 __asm__("$12"); volatile long save_s3;
407 register long real_s4 __asm__("$13"); volatile long save_s4;
408 register long real_s5 __asm__("$14"); volatile long save_s5;
409 #ifdef alpha_EXTRA_CAREFUL
410 register long real_s6 __asm__("$15"); volatile long save_s6;
413 register double real_f2 __asm__("$f2"); volatile double save_f2;
414 register double real_f3 __asm__("$f3"); volatile double save_f3;
415 register double real_f4 __asm__("$f4"); volatile double save_f4;
416 register double real_f5 __asm__("$f5"); volatile double save_f5;
417 register double real_f6 __asm__("$f6"); volatile double save_f6;
418 register double real_f7 __asm__("$f7"); volatile double save_f7;
419 #ifdef alpha_EXTRA_CAREFUL
420 register double real_f8 __asm__("$f8"); volatile double save_f8;
421 register double real_f9 __asm__("$f9"); volatile double save_f9;
424 register StgFunPtr real_pv __asm__("$27");
437 #ifdef alpha_EXTRA_CAREFUL
447 #ifdef alpha_EXTRA_CAREFUL
454 __asm__ volatile( "lda $30,-%0($30)" "\n"
455 "\t" "jmp ($27)" "\n"
457 ".globl " STG_RETURN "\n"
459 "\t" "lda $30,%0($30)" "\n"
460 : : "K" (RESERVED_C_STACK_BYTES));
470 #ifdef alpha_EXTRA_CAREFUL
480 #ifdef alpha_EXTRA_CAREFUL
491 #endif /* alpha_HOST_ARCH */
493 /* -----------------------------------------------------------------------------
495 -------------------------------------------------------------------------- */
497 #ifdef hppa1_1_HOST_ARCH
500 StgRun(StgFunPtr f, StgRegTable *basereg)
502 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
505 __asm__ volatile ("ldo %0(%%r30),%%r19\n"
506 "\tstw %%r3, 0(0,%%r19)\n"
507 "\tstw %%r4, 4(0,%%r19)\n"
508 "\tstw %%r5, 8(0,%%r19)\n"
509 "\tstw %%r6,12(0,%%r19)\n"
510 "\tstw %%r7,16(0,%%r19)\n"
511 "\tstw %%r8,20(0,%%r19)\n"
512 "\tstw %%r9,24(0,%%r19)\n"
513 "\tstw %%r10,28(0,%%r19)\n"
514 "\tstw %%r11,32(0,%%r19)\n"
515 "\tstw %%r12,36(0,%%r19)\n"
516 "\tstw %%r13,40(0,%%r19)\n"
517 "\tstw %%r14,44(0,%%r19)\n"
518 "\tstw %%r15,48(0,%%r19)\n"
519 "\tstw %%r16,52(0,%%r19)\n"
520 "\tstw %%r17,56(0,%%r19)\n"
521 "\tstw %%r18,60(0,%%r19)\n"
522 "\tldo 80(%%r19),%%r19\n"
523 "\tfstds %%fr12,-16(0,%%r19)\n"
524 "\tfstds %%fr13, -8(0,%%r19)\n"
525 "\tfstds %%fr14, 0(0,%%r19)\n"
526 "\tfstds %%fr15, 8(0,%%r19)\n"
527 "\tldo 32(%%r19),%%r19\n"
528 "\tfstds %%fr16,-16(0,%%r19)\n"
529 "\tfstds %%fr17, -8(0,%%r19)\n"
530 "\tfstds %%fr18, 0(0,%%r19)\n"
531 "\tfstds %%fr19, 8(0,%%r19)\n"
532 "\tldo 32(%%r19),%%r19\n"
533 "\tfstds %%fr20,-16(0,%%r19)\n"
534 "\tfstds %%fr21, -8(0,%%r19)\n" : :
535 "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
540 __asm__ volatile (".align 4\n"
541 "\t.EXPORT " STG_RETURN ",CODE\n"
542 "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
544 /* "\tldo %0(%%r3),%%r19\n" */
545 "\tldo %1(%%r30),%%r19\n"
546 "\tcopy %%r11, %0\n" /* save R1 */
547 "\tldw 0(0,%%r19),%%r3\n"
548 "\tldw 4(0,%%r19),%%r4\n"
549 "\tldw 8(0,%%r19),%%r5\n"
550 "\tldw 12(0,%%r19),%%r6\n"
551 "\tldw 16(0,%%r19),%%r7\n"
552 "\tldw 20(0,%%r19),%%r8\n"
553 "\tldw 24(0,%%r19),%%r9\n"
554 "\tldw 28(0,%%r19),%%r10\n"
555 "\tldw 32(0,%%r19),%%r11\n"
556 "\tldw 36(0,%%r19),%%r12\n"
557 "\tldw 40(0,%%r19),%%r13\n"
558 "\tldw 44(0,%%r19),%%r14\n"
559 "\tldw 48(0,%%r19),%%r15\n"
560 "\tldw 52(0,%%r19),%%r16\n"
561 "\tldw 56(0,%%r19),%%r17\n"
562 "\tldw 60(0,%%r19),%%r18\n"
563 "\tldo 80(%%r19),%%r19\n"
564 "\tfldds -16(0,%%r19),%%fr12\n"
565 "\tfldds -8(0,%%r19),%%fr13\n"
566 "\tfldds 0(0,%%r19),%%fr14\n"
567 "\tfldds 8(0,%%r19),%%fr15\n"
568 "\tldo 32(%%r19),%%r19\n"
569 "\tfldds -16(0,%%r19),%%fr16\n"
570 "\tfldds -8(0,%%r19),%%fr17\n"
571 "\tfldds 0(0,%%r19),%%fr18\n"
572 "\tfldds 8(0,%%r19),%%fr19\n"
573 "\tldo 32(%%r19),%%r19\n"
574 "\tfldds -16(0,%%r19),%%fr20\n"
575 "\tfldds -8(0,%%r19),%%fr21\n"
577 : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
584 #endif /* hppa1_1_HOST_ARCH */
586 /* -----------------------------------------------------------------------------
589 Everything is in assembler, so we don't have to deal with GCC...
591 -------------------------------------------------------------------------- */
593 #ifdef powerpc_HOST_ARCH
595 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
597 #ifdef darwin_HOST_OS
598 void StgRunIsImplementedInAssembler(void)
600 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
601 // if the toolchain supports deadstripping, we have to
602 // prevent it here (it tends to get confused here).
603 __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
609 "\tbl saveFP # f14\n"
610 "\tstmw r13,-220(r1)\n"
611 "\tstwu r1,-%0(r1)\n"
612 "\tmr r27,r4\n" // BaseReg == r27
616 ".globl _StgReturn\n"
620 "\tlmw r13,-220(r1)\n"
622 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
626 // This version is for PowerPC Linux.
628 // Differences from the Darwin/Mac OS X version:
629 // *) Different Assembler Syntax
630 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
631 // *) We may not access positive stack offsets
632 // (no "Red Zone" as in the Darwin ABI)
633 // *) The Link Register is saved to a different offset in the caller's stack frame
634 // (Linux: 4(r1), Darwin 8(r1))
636 static void GNUC3_ATTRIBUTE(used)
637 StgRunIsImplementedInAssembler(void)
641 "\t.type StgRun,@function\n"
647 "\tstmw 13,-220(5)\n"
648 "\tstfd 14,-144(5)\n"
649 "\tstfd 15,-136(5)\n"
650 "\tstfd 16,-128(5)\n"
651 "\tstfd 17,-120(5)\n"
652 "\tstfd 18,-112(5)\n"
653 "\tstfd 19,-104(5)\n"
666 "\tmr 27,4\n" // BaseReg == r27
671 "\t.type StgReturn,@function\n"
698 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
704 /* -----------------------------------------------------------------------------
705 PowerPC 64 architecture
707 Everything is in assembler, so we don't have to deal with GCC...
709 -------------------------------------------------------------------------- */
711 #ifdef powerpc64_HOST_ARCH
714 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
716 static void GNUC3_ATTRIBUTE(used)
717 StgRunIsImplementedInAssembler(void)
721 // r2 toc - needs to be saved
722 // r3-r10 argument passing, volatile
723 // r11, r12 very volatile (not saved across cross-module calls)
724 // r13 thread local state (never modified, don't need to save)
725 // r14-r31 callee-save
727 ".section \".opd\",\"aw\"\n"
731 "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
732 "\t.size StgRun,24\n"
735 "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
736 "\t.size StgReturn,24\n"
739 ".type .StgRun,@function\n"
746 "\tstd 14, -288(5)\n"
747 "\tstd 15, -280(5)\n"
748 "\tstd 16, -272(5)\n"
749 "\tstd 17, -264(5)\n"
750 "\tstd 18, -256(5)\n"
751 "\tstd 19, -248(5)\n"
752 "\tstd 20, -240(5)\n"
753 "\tstd 21, -232(5)\n"
754 "\tstd 22, -224(5)\n"
755 "\tstd 23, -216(5)\n"
756 "\tstd 24, -208(5)\n"
757 "\tstd 25, -200(5)\n"
758 "\tstd 26, -192(5)\n"
759 "\tstd 27, -184(5)\n"
760 "\tstd 28, -176(5)\n"
761 "\tstd 29, -168(5)\n"
762 "\tstd 30, -160(5)\n"
763 "\tstd 31, -152(5)\n"
764 "\tstfd 14, -144(5)\n"
765 "\tstfd 15, -136(5)\n"
766 "\tstfd 16, -128(5)\n"
767 "\tstfd 17, -120(5)\n"
768 "\tstfd 18, -112(5)\n"
769 "\tstfd 19, -104(5)\n"
770 "\tstfd 20, -96(5)\n"
771 "\tstfd 21, -88(5)\n"
772 "\tstfd 22, -80(5)\n"
773 "\tstfd 23, -72(5)\n"
774 "\tstfd 24, -64(5)\n"
775 "\tstfd 25, -56(5)\n"
776 "\tstfd 26, -48(5)\n"
777 "\tstfd 27, -40(5)\n"
778 "\tstfd 28, -32(5)\n"
779 "\tstfd 29, -24(5)\n"
780 "\tstfd 30, -16(5)\n"
782 "\tmr 27, 4\n" // BaseReg == r27
787 ".globl .StgReturn\n"
788 ".type .StgReturn,@function\n"
791 "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
811 "\tlfd 14, -144(5)\n"
812 "\tlfd 15, -136(5)\n"
813 "\tlfd 16, -128(5)\n"
814 "\tlfd 17, -120(5)\n"
815 "\tlfd 18, -112(5)\n"
816 "\tlfd 19, -104(5)\n"
833 : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
835 #else // linux_HOST_OS
836 #error Only linux support for power64 right now.
841 /* -----------------------------------------------------------------------------
844 Again, in assembler - so we can fiddle with the register stack, and because
845 gcc doesn't handle asm-clobbered callee-saves correctly.
847 loc0 - loc15: preserved locals
848 loc16 - loc28: STG registers
851 loc31: saved gp (gcc 3.3 uses this slot)
854 f2 - f5: preserved floating-point registers
855 f16 - f23: preserved floating-point registers
856 -------------------------------------------------------------------------- */
858 #ifdef ia64_HOST_ARCH
860 /* the memory stack is rarely used, so 16K is excessive */
861 #undef RESERVED_C_STACK_BYTES
862 #define RESERVED_C_STACK_BYTES 1024
864 /* We don't spill all the callee-save FP registers, only the ones that
865 * gcc has been observed to use */
866 #define PRESERVED_FP_REGISTERS 12
868 /* We always allocate 34 local and 8 output registers. As long as gcc used
869 * fewer than 32 locals, the mangler will adjust the stack frame accordingly. */
872 static void GNUC3_ATTRIBUTE(used)
873 StgRunIsImplementedInAssembler(void)
878 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
879 "\tld8 r18 = [r32],8\n" /* get procedure address */
880 "\tadds sp = -%0, sp ;;\n" /* setup stack */
881 "\tld8 gp = [r32]\n" /* get procedure GP */
882 "\tadds r16 = %0-(%2*16), sp\n"
883 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
884 "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
885 "\tstf.spill [r17] = f17,32\n"
886 "\tmov b6 = r18 ;;\n" /* set target address */
887 "\tstf.spill [r16] = f18,32\n"
888 "\tstf.spill [r17] = f19,32\n"
889 "\tmov loc30 = b0 ;;\n" /* save return address */
890 "\tstf.spill [r16] = f20,32\n"
891 "\tstf.spill [r17] = f21,32 ;;\n"
892 "\tstf.spill [r16] = f22,32\n"
893 "\tstf.spill [r17] = f23,32\n"
894 "\tmov loc32 = ar.lc ;;\n" /* save loop counter */
895 "\tstf.spill [r16] = f2,32\n"
896 "\tstf.spill [r17] = f3,32\n"
897 "\tmov loc33 = pr ;;\n" /* save predicate registers */
898 "\tstf.spill [r16] = f4,32\n"
899 "\tstf.spill [r17] = f5,32\n"
900 "\tbr.few b6 ;;\n" /* branch to function */
901 ".global StgReturn\n"
903 "\tmov r8 = loc16\n" /* return value in r8 */
904 "\tadds r16 = %0-(%2*16), sp\n"
905 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
906 "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
907 "\tldf.fill f17 = [r17],32\n"
908 "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
909 "\tldf.fill f18 = [r16],32\n"
910 "\tldf.fill f19 = [r17],32\n"
911 "\tmov b0 = loc30 ;;\n" /* restore return address */
912 "\tldf.fill f20 = [r16],32\n"
913 "\tldf.fill f21 = [r17],32\n"
914 "\tmov ar.lc = loc32 ;;\n" /* restore loop counter */
915 "\tldf.fill f22 = [r16],32\n"
916 "\tldf.fill f23 = [r17],32\n"
917 "\tmov pr = loc33 ;;\n" /* restore predicate registers */
918 "\tldf.fill f2 = [r16],32\n"
919 "\tldf.fill f3 = [r17],32\n"
920 "\tadds sp = %0, sp ;;\n" /* restore stack */
921 "\tldf.fill f4 = [r16],32\n"
922 "\tldf.fill f5 = [r17],32\n"
923 "\tbr.ret.sptk.many b0 ;;\n" /* return */
924 : : "i"(RESERVED_C_STACK_BYTES + PRESERVED_FP_REGISTERS*16),
926 "i"(PRESERVED_FP_REGISTERS));
931 /* -----------------------------------------------------------------------------
933 -------------------------------------------------------------------------- */
935 #ifdef mips_HOST_ARCH
938 StgRun(StgFunPtr f, StgRegTable *basereg)
940 register StgThreadReturnCode __v0 __asm__("$2");
942 __asm__ __volatile__(
947 " .globl " STG_RETURN " \n"
948 " .aent " STG_RETURN " \n"
953 : "r" (f), "r" (basereg)
954 "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
956 "$f20", "$f22", "$f24", "$f26", "$f28", "$f30",
962 #endif /* mips_HOST_ARCH */
964 #endif /* !USE_MINIINTERPRETER */