[project @ 2005-01-23 06:10:15 by wolfgang]
[ghc-hetmet.git] / ghc / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2003
4  *
5  * STG-to-C glue.
6  *
7  * To run an STG function from C land, call
8  *
9  *              rv = StgRun(f,BaseReg);
10  *
11  * where "f" is the STG function to call, and BaseReg is the address of the
12  * RegTable for this run (we might have separate RegTables if we're running
13  * multiple threads on an SMP machine).
14  *
15  * In the end, "f" must JMP to StgReturn (defined below),
16  * passing the return-value "rv" in R1,
17  * to return to the caller of StgRun returning "rv" in
18  * the whatever way C returns a value.
19  *
20  * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
21  * other registers (other than saving the C callee-saves
22  * registers).  Instead, the called function "f" must do that
23  * in STG land.
24  *
25  * GCC will have assumed that pushing/popping of C-stack frames is
26  * going on when it generated its code, and used stack space
27  * accordingly.  However, we actually {\em post-process away} all
28  * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
29  * be OK however, if we initially make sure there are
30  * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
31  * variables.
32  *
33  * -------------------------------------------------------------------------- */
34
35 #include "PosixSource.h"
36
37
38 /*
39  * We define the following (unused) global register variables, because for
40  * some reason gcc generates sub-optimal code for StgRun() on the Alpha
41  * (unnecessarily saving extra registers on the stack) if we don't.
42  *
43  * Why do it at the top of this file, rather than near StgRun() below?  Because
44  * gcc doesn't let us define global register variables after any function
45  * definition has been read.  Any point after #include "Stg.h" would be too
46  * late.
47  *
48  * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
49  * that we don't use but which are callee-save registers.  The __divq() routine
50  * in libc.a clobbers $s6.
51  */
52 #include "ghcconfig.h"
53 #ifdef alpha_TARGET_ARCH
54 #define alpha_EXTRA_CAREFUL
55 register long   fake_ra __asm__("$26");
56 register long   fake_gp __asm__("$29");
57 #ifdef alpha_EXTRA_CAREFUL
58 register long   fake_s6 __asm__("$15");
59 register double fake_f8 __asm__("$f8");
60 register double fake_f9 __asm__("$f9");
61 #endif
62 #endif
63
64 /* include Stg.h first because we want real machine regs in here: we
65  * have to get the value of R1 back from Stg land to C land intact.
66  */
67 #include "Stg.h"
68 #include "Rts.h"
69 #include "StgRun.h"
70 #include "RtsFlags.h"
71 #include "OSThreads.h"
72 #include "Capability.h"
73
74 #ifdef DEBUG
75 #include "RtsUtils.h"
76 #include "Printer.h"
77 #endif
78
79 #ifdef USE_MINIINTERPRETER
80
81 /* -----------------------------------------------------------------------------
82    any architecture (using miniinterpreter)
83    -------------------------------------------------------------------------- */
84
85 StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
86 {
87     while (f) {
88         if (RtsFlags[0].DebugFlags.interpreter) {
89             debugBelch("Jumping to ");
90             printPtr((P_)f); fflush(stdout);
91             debugBelch("\n");
92         }
93         f = (StgFunPtr) (f)();
94     }
95     return (StgThreadReturnCode)R1.i;
96 }
97
98 StgFunPtr StgReturn(void)
99 {
100     return 0;
101 }
102
103 #else /* !USE_MINIINTERPRETER */
104
105 #ifdef LEADING_UNDERSCORE
106 #define STG_RETURN "_StgReturn"
107 #else
108 #define STG_RETURN "StgReturn"
109 #endif
110
111 /* -----------------------------------------------------------------------------
112    x86 architecture
113    -------------------------------------------------------------------------- */
114
115 #ifdef i386_TARGET_ARCH
116
117 StgThreadReturnCode
118 StgRun(StgFunPtr f, StgRegTable *basereg) {
119
120     unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
121     StgThreadReturnCode r;
122
123     __asm__ volatile (
124         /*
125          * save callee-saves registers on behalf of the STG code.
126          */
127         "movl %%esp, %%eax\n\t"
128         "addl %4, %%eax\n\t"
129         "movl %%ebx,0(%%eax)\n\t"
130         "movl %%esi,4(%%eax)\n\t"
131         "movl %%edi,8(%%eax)\n\t"
132         "movl %%ebp,12(%%eax)\n\t"
133         /*
134          * Set BaseReg
135          */
136         "movl %3,%%ebx\n\t"
137         /*
138          * grab the function argument from the stack, and jump to it.
139          */
140         "movl %2,%%eax\n\t"
141         "jmp *%%eax\n\t"
142
143         ".global " STG_RETURN "\n"
144         STG_RETURN ":\n\t"
145
146         "movl %%esi, %%eax\n\t"   /* Return value in R1  */
147
148         /*
149          * restore callee-saves registers.  (Don't stomp on %%eax!)
150          */
151         "movl %%esp, %%edx\n\t"
152         "addl %4, %%edx\n\t"
153         "movl 0(%%edx),%%ebx\n\t"       /* restore the registers saved above */
154         "movl 4(%%edx),%%esi\n\t"
155         "movl 8(%%edx),%%edi\n\t"
156         "movl 12(%%edx),%%ebp\n\t"
157
158       : "=&a" (r), "=m" (space)
159       : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
160       : "edx" /* stomps on %edx */
161     );
162
163     return r;
164 }
165
166 #endif
167
168 /* ----------------------------------------------------------------------------
169    x86-64 is almost the same as plain x86.
170
171    I've done it using entirely inline assembler, because I couldn't
172    get gcc to generate the correct subtraction from %rsp by using
173    the local array variable trick.  It didn't seem to reserve
174    enough space.  Oh well, it's not much harder this way.
175
176    ------------------------------------------------------------------------- */
177
178 #ifdef x86_64_TARGET_ARCH
179
180 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
181
182 static void StgRunIsImplementedInAssembler(void)
183 {
184     __asm__ volatile (
185         /*
186          * save callee-saves registers on behalf of the STG code.
187          */
188         ".globl StgRun\n"
189         "StgRun:\n\t"
190         "subq %0, %%rsp\n\t"
191         "movq %%rsp, %%rax\n\t"
192         "addq %0-48, %%rax\n\t"
193         "movq %%rbx,0(%%rax)\n\t"
194         "movq %%rbp,8(%%rax)\n\t"
195         "movq %%r12,16(%%rax)\n\t"
196         "movq %%r13,24(%%rax)\n\t"
197         "movq %%r14,32(%%rax)\n\t"
198         "movq %%r15,40(%%rax)\n\t"
199         /*
200          * Set BaseReg
201          */
202         "movq %%rsi,%%rbx\n\t"
203         /*
204          * grab the function argument from the stack, and jump to it.
205          */
206         "movq %%rdi,%%rax\n\t"
207         "jmp *%%rax\n\t"
208
209         ".global " STG_RETURN "\n"
210         STG_RETURN ":\n\t"
211
212         "movq %%r13, %%rax\n\t"   /* Return value in R1  */
213
214         /*
215          * restore callee-saves registers.  (Don't stomp on %%rax!)
216          */
217         "movq %%rsp, %%rdx\n\t"
218         "addq %0-48, %%rdx\n\t"
219         "movq 0(%%rdx),%%rbx\n\t"       /* restore the registers saved above */
220         "movq 8(%%rdx),%%rbp\n\t"
221         "movq 16(%%rdx),%%r12\n\t"
222         "movq 24(%%rdx),%%r13\n\t"
223         "movq 32(%%rdx),%%r14\n\t"
224         "movq 40(%%rdx),%%r15\n\t"
225         "addq %0, %%rsp\n\t"
226         "retq"
227
228         : : "i"(RESERVED_C_STACK_BYTES+48 /*stack frame size*/));
229 }
230
231 #endif /* x86-64 */
232
233 /* -----------------------------------------------------------------------------
234    Sparc architecture
235
236    --
237    OLD COMMENT from GHC-3.02:
238
239    We want tailjumps to be calls, because `call xxx' is the only Sparc
240    branch that allows an arbitrary label as a target.  (Gcc's ``goto
241    *target'' construct ends up loading the label into a register and
242    then jumping, at the cost of two extra instructions for the 32-bit
243    load.)
244
245    When entering the threaded world, we stash our return address in a
246    known location so that \tr{%i7} is available as an extra
247    callee-saves register.  Of course, we have to restore this when
248    coming out of the threaded world.
249
250    I hate this god-forsaken architecture.  Since the top of the
251    reserved stack space is used for globals and the bottom is reserved
252    for outgoing arguments, we have to stick our return address
253    somewhere in the middle.  Currently, I'm allowing 100 extra
254    outgoing arguments beyond the first 6.  --JSM
255
256    Updated info (GHC 4.06): we don't appear to use %i7 any more, so
257    I'm not sure whether we still need to save it.  Incedentally, what
258    does the last paragraph above mean when it says "the top of the
259    stack is used for globals"?  What globals?  --SDM
260
261    Updated info (GHC 4.08.2): not saving %i7 any more (see below).
262    -------------------------------------------------------------------------- */
263
264 #ifdef sparc_TARGET_ARCH
265
266 StgThreadReturnCode
267 StgRun(StgFunPtr f, StgRegTable *basereg) {
268
269     unsigned char space[RESERVED_C_STACK_BYTES];
270 #if 0
271     register void *i7 __asm__("%i7");
272     ((void **)(space))[100] = i7;
273 #endif
274     f();
275     __asm__ volatile (
276             ".align 4\n"
277             ".global " STG_RETURN "\n"
278             STG_RETURN ":"
279             : : : "l0","l1","l2","l3","l4","l5","l6","l7");
280     /* we tell the C compiler that l0-l7 are clobbered on return to
281      * StgReturn, otherwise it tries to use these to save eg. the
282      * address of space[100] across the call.  The correct thing
283      * to do would be to save all the callee-saves regs, but we
284      * can't be bothered to do that.
285      *
286      * The code that gcc generates for this little fragment is now
287      * terrible.  We could do much better by coding it directly in
288      * assembler.
289      */
290 #if 0
291     /* updated 4.08.2: we don't save %i7 in the middle of the reserved
292      * space any more, since gcc tries to save its address across the
293      * call to f(), this gets clobbered in STG land and we end up
294      * dereferencing a bogus pointer in StgReturn.
295      */
296     __asm__ volatile ("ld %1,%0"
297                       : "=r" (i7) : "m" (((void **)(space))[100]));
298 #endif
299     return (StgThreadReturnCode)R1.i;
300 }
301
302 #endif
303
304 /* -----------------------------------------------------------------------------
305    alpha architecture
306
307    "The stack pointer (SP) must at all times denote an address that has octaword
308     alignment. (This restriction has the side effect that the in-memory portion
309     of the argument list, if any, will start on an octaword boundary.) Note that
310     the stack grows toward lower addresses. During a procedure invocation, SP
311     can never be set to a value that is higher than the value of SP at entry to
312     that procedure invocation.
313
314    "The contents of the stack, located above the portion of the argument list
315     (if any) that is passed in memory, belong to the calling procedure. Because
316     they are part of the calling procedure, they should not be read or written
317     by the called procedure, except as specified by indirect arguments or
318     language-controlled up-level references.
319
320    "The SP value might be used by the hardware when raising exceptions and
321     asynchronous interrupts. It must be assumed that the contents of the stack
322     below the current SP value and within the stack for the current thread are
323     continually and unpredictably modified, as specified in the _Alpha
324     Architecture Reference Manual_, and as a result of asynchronous software
325     actions."
326
327    -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
328       Alpha Systems, 5.1 edition, August 2000, section 3.2.1.  http://www.
329       tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
330    -------------------------------------------------------------------------- */
331
332 #ifdef alpha_TARGET_ARCH
333
334 StgThreadReturnCode
335 StgRun(StgFunPtr f, StgRegTable *basereg)
336 {
337     register long   real_ra __asm__("$26"); volatile long   save_ra;
338     register long   real_gp __asm__("$29"); volatile long   save_gp;
339
340     register long   real_s0 __asm__("$9" ); volatile long   save_s0;
341     register long   real_s1 __asm__("$10"); volatile long   save_s1;
342     register long   real_s2 __asm__("$11"); volatile long   save_s2;
343     register long   real_s3 __asm__("$12"); volatile long   save_s3;
344     register long   real_s4 __asm__("$13"); volatile long   save_s4;
345     register long   real_s5 __asm__("$14"); volatile long   save_s5;
346 #ifdef alpha_EXTRA_CAREFUL
347     register long   real_s6 __asm__("$15"); volatile long   save_s6;
348 #endif
349
350     register double real_f2 __asm__("$f2"); volatile double save_f2;
351     register double real_f3 __asm__("$f3"); volatile double save_f3;
352     register double real_f4 __asm__("$f4"); volatile double save_f4;
353     register double real_f5 __asm__("$f5"); volatile double save_f5;
354     register double real_f6 __asm__("$f6"); volatile double save_f6;
355     register double real_f7 __asm__("$f7"); volatile double save_f7;
356 #ifdef alpha_EXTRA_CAREFUL
357     register double real_f8 __asm__("$f8"); volatile double save_f8;
358     register double real_f9 __asm__("$f9"); volatile double save_f9;
359 #endif
360
361     register StgFunPtr real_pv __asm__("$27");
362
363     StgThreadReturnCode ret;
364
365     save_ra = real_ra;
366     save_gp = real_gp;
367
368     save_s0 = real_s0;
369     save_s1 = real_s1;
370     save_s2 = real_s2;
371     save_s3 = real_s3;
372     save_s4 = real_s4;
373     save_s5 = real_s5;
374 #ifdef alpha_EXTRA_CAREFUL
375     save_s6 = real_s6;
376 #endif
377
378     save_f2 = real_f2;
379     save_f3 = real_f3;
380     save_f4 = real_f4;
381     save_f5 = real_f5;
382     save_f6 = real_f6;
383     save_f7 = real_f7;
384 #ifdef alpha_EXTRA_CAREFUL
385     save_f8 = real_f8;
386     save_f9 = real_f9;
387 #endif
388
389     real_pv = f;
390
391     __asm__ volatile(   "lda $30,-%0($30)"      "\n"
392                 "\t"    "jmp ($27)"             "\n"
393                 "\t"    ".align 3"              "\n"
394                 ".globl " STG_RETURN            "\n"
395                 STG_RETURN ":"                  "\n"
396                 "\t"    "lda $30,%0($30)"       "\n"
397                 : : "K" (RESERVED_C_STACK_BYTES));
398
399     ret = real_s5;
400
401     real_s0 = save_s0;
402     real_s1 = save_s1;
403     real_s2 = save_s2;
404     real_s3 = save_s3;
405     real_s4 = save_s4;
406     real_s5 = save_s5;
407 #ifdef alpha_EXTRA_CAREFUL
408     real_s6 = save_s6;
409 #endif
410
411     real_f2 = save_f2;
412     real_f3 = save_f3;
413     real_f4 = save_f4;
414     real_f5 = save_f5;
415     real_f6 = save_f6;
416     real_f7 = save_f7;
417 #ifdef alpha_EXTRA_CAREFUL
418     real_f8 = save_f8;
419     real_f9 = save_f9;
420 #endif
421
422     real_ra = save_ra;
423     real_gp = save_gp;
424
425     return ret;
426 }
427
428 #endif /* alpha_TARGET_ARCH */
429
430 /* -----------------------------------------------------------------------------
431    HP-PA architecture
432    -------------------------------------------------------------------------- */
433
434 #ifdef hppa1_1_TARGET_ARCH
435
436 StgThreadReturnCode
437 StgRun(StgFunPtr f, StgRegTable *basereg)
438 {
439     StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
440     StgThreadReturnCode ret;
441
442     __asm__ volatile ("ldo %0(%%r30),%%r19\n"
443                       "\tstw %%r3, 0(0,%%r19)\n"
444                       "\tstw %%r4, 4(0,%%r19)\n"
445                       "\tstw %%r5, 8(0,%%r19)\n"
446                       "\tstw %%r6,12(0,%%r19)\n"
447                       "\tstw %%r7,16(0,%%r19)\n"
448                       "\tstw %%r8,20(0,%%r19)\n"
449                       "\tstw %%r9,24(0,%%r19)\n"
450                       "\tstw %%r10,28(0,%%r19)\n"
451                       "\tstw %%r11,32(0,%%r19)\n"
452                       "\tstw %%r12,36(0,%%r19)\n"
453                       "\tstw %%r13,40(0,%%r19)\n"
454                       "\tstw %%r14,44(0,%%r19)\n"
455                       "\tstw %%r15,48(0,%%r19)\n"
456                       "\tstw %%r16,52(0,%%r19)\n"
457                       "\tstw %%r17,56(0,%%r19)\n"
458                       "\tstw %%r18,60(0,%%r19)\n"
459                       "\tldo 80(%%r19),%%r19\n"
460                       "\tfstds %%fr12,-16(0,%%r19)\n"
461                       "\tfstds %%fr13, -8(0,%%r19)\n"
462                       "\tfstds %%fr14,  0(0,%%r19)\n"
463                       "\tfstds %%fr15,  8(0,%%r19)\n"
464                       "\tldo 32(%%r19),%%r19\n"
465                       "\tfstds %%fr16,-16(0,%%r19)\n"
466                       "\tfstds %%fr17, -8(0,%%r19)\n"
467                       "\tfstds %%fr18,  0(0,%%r19)\n"
468                       "\tfstds %%fr19,  8(0,%%r19)\n"
469                       "\tldo 32(%%r19),%%r19\n"
470                       "\tfstds %%fr20,-16(0,%%r19)\n"
471                       "\tfstds %%fr21, -8(0,%%r19)\n" : :
472                       "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
473                       );
474
475     f();
476
477     __asm__ volatile (".align 4\n"
478                       "\t.EXPORT " STG_RETURN ",CODE\n"
479                       "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
480                       STG_RETURN "\n"
481                       /* "\tldo %0(%%r3),%%r19\n" */
482                       "\tldo %1(%%r30),%%r19\n"
483                       "\tcopy %%r11, %0\n"  /* save R1 */
484                       "\tldw  0(0,%%r19),%%r3\n"
485                       "\tldw  4(0,%%r19),%%r4\n"
486                       "\tldw  8(0,%%r19),%%r5\n"
487                       "\tldw 12(0,%%r19),%%r6\n"
488                       "\tldw 16(0,%%r19),%%r7\n"
489                       "\tldw 20(0,%%r19),%%r8\n"
490                       "\tldw 24(0,%%r19),%%r9\n"
491                       "\tldw 28(0,%%r19),%%r10\n"
492                       "\tldw 32(0,%%r19),%%r11\n"
493                       "\tldw 36(0,%%r19),%%r12\n"
494                       "\tldw 40(0,%%r19),%%r13\n"
495                       "\tldw 44(0,%%r19),%%r14\n"
496                       "\tldw 48(0,%%r19),%%r15\n"
497                       "\tldw 52(0,%%r19),%%r16\n"
498                       "\tldw 56(0,%%r19),%%r17\n"
499                       "\tldw 60(0,%%r19),%%r18\n"
500                       "\tldo 80(%%r19),%%r19\n"
501                       "\tfldds -16(0,%%r19),%%fr12\n"
502                       "\tfldds  -8(0,%%r19),%%fr13\n"
503                       "\tfldds   0(0,%%r19),%%fr14\n"
504                       "\tfldds   8(0,%%r19),%%fr15\n"
505                       "\tldo 32(%%r19),%%r19\n"
506                       "\tfldds -16(0,%%r19),%%fr16\n"
507                       "\tfldds  -8(0,%%r19),%%fr17\n"
508                       "\tfldds   0(0,%%r19),%%fr18\n"
509                       "\tfldds   8(0,%%r19),%%fr19\n"
510                       "\tldo 32(%%r19),%%r19\n"
511                       "\tfldds -16(0,%%r19),%%fr20\n"
512                       "\tfldds  -8(0,%%r19),%%fr21\n"
513                          : "=r" (ret)
514                          : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
515                          : "%r19"
516                       );
517
518     return ret;
519 }
520
521 #endif /* hppa1_1_TARGET_ARCH */
522
523 /* -----------------------------------------------------------------------------
524    PowerPC architecture
525
526    Everything is in assembler, so we don't have to deal with GCC...
527    
528    -------------------------------------------------------------------------- */
529
530 #ifdef powerpc_TARGET_ARCH
531
532 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
533
534 #ifdef darwin_TARGET_OS
535 static void StgRunIsImplementedInAssembler(void)
536 {
537 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
538             // if the toolchain supports deadstripping, we have to
539             // prevent it here (it tends to get confused here).
540         __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
541 #endif
542         __asm__ volatile (
543                 "\n.globl _StgRun\n"
544                 "_StgRun:\n"
545                 "\tmflr r0\n"
546                 "\tbl saveFP # f14\n"
547                 "\tstmw r13,-220(r1)\n"
548                 "\tstwu r1,-%0(r1)\n"
549                 "\tmr r27,r4\n" // BaseReg == r27
550                 "\tmtctr r3\n"
551                 "\tmr r12,r3\n"
552                 "\tbctr\n"
553                 ".globl _StgReturn\n"
554                 "_StgReturn:\n"
555                 "\tmr r3,r14\n"
556                 "\tla r1,%0(r1)\n"
557                 "\tlmw r13,-220(r1)\n"
558                 "\tb restFP # f14\n"
559         : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
560 }
561 #else
562
563 // This version is for PowerPC Linux.
564
565 // Differences from the Darwin/Mac OS X version:
566 // *) Different Assembler Syntax
567 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
568 // *) We may not access positive stack offsets
569 //    (no "Red Zone" as in the Darwin ABI)
570 // *) The Link Register is saved to a different offset in the caller's stack frame
571 //    (Linux: 4(r1), Darwin 8(r1))
572
573 static void StgRunIsImplementedInAssembler(void)
574 {
575         __asm__ volatile (
576                 "\t.globl StgRun\n"
577                 "\t.type StgRun,@function\n"
578                 "StgRun:\n"
579                 "\tmflr 0\n"
580                 "\tstw 0,4(1)\n"
581                 "\tmr 5,1\n"
582                 "\tstwu 1,-%0(1)\n"
583                 "\tstmw 13,-220(5)\n"
584                 "\tstfd 14,-144(5)\n"
585                 "\tstfd 15,-136(5)\n"
586                 "\tstfd 16,-128(5)\n"
587                 "\tstfd 17,-120(5)\n"
588                 "\tstfd 18,-112(5)\n"
589                 "\tstfd 19,-104(5)\n"
590                 "\tstfd 20,-96(5)\n"
591                 "\tstfd 21,-88(5)\n"
592                 "\tstfd 22,-80(5)\n"
593                 "\tstfd 23,-72(5)\n"
594                 "\tstfd 24,-64(5)\n"
595                 "\tstfd 25,-56(5)\n"
596                 "\tstfd 26,-48(5)\n"
597                 "\tstfd 27,-40(5)\n"
598                 "\tstfd 28,-32(5)\n"
599                 "\tstfd 29,-24(5)\n"
600                 "\tstfd 30,-16(5)\n"
601                 "\tstfd 31,-8(5)\n"
602                 "\tmr 27,4\n"  // BaseReg == r27
603                 "\tmtctr 3\n"
604                 "\tmr 12,3\n"
605                 "\tbctr\n"
606                 ".globl StgReturn\n"
607                 "\t.type StgReturn,@function\n"
608                 "StgReturn:\n"
609                 "\tmr 3,14\n"
610                 "\tla 5,%0(1)\n"
611                 "\tlmw 13,-220(5)\n"
612                 "\tlfd 14,-144(5)\n"
613                 "\tlfd 15,-136(5)\n"
614                 "\tlfd 16,-128(5)\n"
615                 "\tlfd 17,-120(5)\n"
616                 "\tlfd 18,-112(5)\n"
617                 "\tlfd 19,-104(5)\n"
618                 "\tlfd 20,-96(5)\n"
619                 "\tlfd 21,-88(5)\n"
620                 "\tlfd 22,-80(5)\n"
621                 "\tlfd 23,-72(5)\n"
622                 "\tlfd 24,-64(5)\n"
623                 "\tlfd 25,-56(5)\n"
624                 "\tlfd 26,-48(5)\n"
625                 "\tlfd 27,-40(5)\n"
626                 "\tlfd 28,-32(5)\n"
627                 "\tlfd 29,-24(5)\n"
628                 "\tlfd 30,-16(5)\n"
629                 "\tlfd 31,-8(5)\n"
630                 "\tmr 1,5\n"
631                 "\tlwz 0,4(1)\n"
632                 "\tmtlr 0\n"
633                 "\tblr\n"
634         : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
635 }
636 #endif
637
638 #endif
639
640 /* -----------------------------------------------------------------------------
641    PowerPC 64 architecture
642
643    Everything is in assembler, so we don't have to deal with GCC...
644    
645    -------------------------------------------------------------------------- */
646
647 #ifdef powerpc64_TARGET_ARCH
648
649 #ifdef linux_TARGET_OS
650 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
651
652 static void StgRunIsImplementedInAssembler(void)
653 {
654         // r0 volatile
655         // r1 stack pointer
656         // r2 toc - needs to be saved
657         // r3-r10 argument passing, volatile
658         // r11, r12 very volatile (not saved across cross-module calls)
659         // r13 thread local state (never modified, don't need to save)
660         // r14-r31 callee-save
661         __asm__ volatile (
662                 ".section \".opd\",\"aw\"\n"
663                 ".align 3\n"
664                 ".globl StgRun\n"
665                 "StgRun:\n"
666                         "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
667                         "\t.size StgRun,24\n"
668                 ".globl StgReturn\n"
669                 "StgReturn:\n"
670                         "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
671                         "\t.size StgReturn,24\n"
672                 ".previous\n"
673                 ".globl .StgRun\n"
674                 ".type .StgRun,@function\n"
675                 ".StgRun:\n"
676                         "\tmflr 0\n"
677                         "\tmr 5, 1\n"
678                         "\tstd 0, 16(1)\n"
679                         "\tstdu 1, -%0(1)\n"
680                         "\tstd 2, -296(5)\n"
681                         "\tstd 14, -288(5)\n"
682                         "\tstd 15, -280(5)\n"
683                         "\tstd 16, -272(5)\n"
684                         "\tstd 17, -264(5)\n"
685                         "\tstd 18, -256(5)\n"
686                         "\tstd 19, -248(5)\n"
687                         "\tstd 20, -240(5)\n"
688                         "\tstd 21, -232(5)\n"
689                         "\tstd 22, -224(5)\n"
690                         "\tstd 23, -216(5)\n"
691                         "\tstd 24, -208(5)\n"
692                         "\tstd 25, -200(5)\n"
693                         "\tstd 26, -192(5)\n"
694                         "\tstd 27, -184(5)\n"
695                         "\tstd 28, -176(5)\n"
696                         "\tstd 29, -168(5)\n"
697                         "\tstd 30, -160(5)\n"
698                         "\tstd 31, -152(5)\n"
699                         "\tstfd 14, -144(5)\n"
700                         "\tstfd 15, -136(5)\n"
701                         "\tstfd 16, -128(5)\n"
702                         "\tstfd 17, -120(5)\n"
703                         "\tstfd 18, -112(5)\n"
704                         "\tstfd 19, -104(5)\n"
705                         "\tstfd 20, -96(5)\n"
706                         "\tstfd 21, -88(5)\n"
707                         "\tstfd 22, -80(5)\n"
708                         "\tstfd 23, -72(5)\n"
709                         "\tstfd 24, -64(5)\n"
710                         "\tstfd 25, -56(5)\n"
711                         "\tstfd 26, -48(5)\n"
712                         "\tstfd 27, -40(5)\n"
713                         "\tstfd 28, -32(5)\n"
714                         "\tstfd 29, -24(5)\n"
715                         "\tstfd 30, -16(5)\n"
716                         "\tstfd 31, -8(5)\n"
717                         "\tmr 27, 4\n"  // BaseReg == r27
718                         "\tld 2, 8(3)\n"
719                         "\tld 3, 0(3)\n"
720                         "\tmtctr 3\n"
721                         "\tbctr\n"
722                 ".globl .StgReturn\n"
723                 ".type .StgReturn,@function\n"
724                 ".StgReturn:\n"
725                         "\tmr 3,14\n"
726                         "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
727                         "\tld 2, -296(5)\n"
728                         "\tld 14, -288(5)\n"
729                         "\tld 15, -280(5)\n"
730                         "\tld 16, -272(5)\n"
731                         "\tld 17, -264(5)\n"
732                         "\tld 18, -256(5)\n"
733                         "\tld 19, -248(5)\n"
734                         "\tld 20, -240(5)\n"
735                         "\tld 21, -232(5)\n"
736                         "\tld 22, -224(5)\n"
737                         "\tld 23, -216(5)\n"
738                         "\tld 24, -208(5)\n"
739                         "\tld 25, -200(5)\n"
740                         "\tld 26, -192(5)\n"
741                         "\tld 27, -184(5)\n"
742                         "\tld 28, -176(5)\n"
743                         "\tld 29, -168(5)\n"
744                         "\tld 30, -160(5)\n"
745                         "\tld 31, -152(5)\n"
746                         "\tlfd 14, -144(5)\n"
747                         "\tlfd 15, -136(5)\n"
748                         "\tlfd 16, -128(5)\n"
749                         "\tlfd 17, -120(5)\n"
750                         "\tlfd 18, -112(5)\n"
751                         "\tlfd 19, -104(5)\n"
752                         "\tlfd 20, -96(5)\n"
753                         "\tlfd 21, -88(5)\n"
754                         "\tlfd 22, -80(5)\n"
755                         "\tlfd 23, -72(5)\n"
756                         "\tlfd 24, -64(5)\n"
757                         "\tlfd 25, -56(5)\n"
758                         "\tlfd 26, -48(5)\n"
759                         "\tlfd 27, -40(5)\n"
760                         "\tlfd 28, -32(5)\n"
761                         "\tlfd 29, -24(5)\n"
762                         "\tlfd 30, -16(5)\n"
763                         "\tlfd 31, -8(5)\n"
764                         "\tmr 1, 5\n"
765                         "\tld 0, 16(1)\n"
766                         "\tmtlr 0\n"
767                         "\tblr\n"
768         : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
769 }
770 #else // linux_TARGET_OS
771 #error Only linux support for power64 right now.
772 #endif
773
774 #endif
775
776 /* -----------------------------------------------------------------------------
777    IA64 architecture
778
779    Again, in assembler - so we can fiddle with the register stack, and because
780    gcc doesn't handle asm-clobbered callee-saves correctly.
781
782    loc0  - loc15: preserved locals
783    loc16 - loc28: STG registers
784            loc29: saved ar.pfs
785            loc30: saved b0
786            loc31: saved gp (gcc 3.3 uses this slot)
787    -------------------------------------------------------------------------- */
788
789 #ifdef ia64_TARGET_ARCH
790
791 /* the memory stack is rarely used, so 16K is excessive */
792 #undef RESERVED_C_STACK_BYTES
793 #define RESERVED_C_STACK_BYTES 1024
794
795 #if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
796 /* gcc 3.3+: leave an extra slot for gp saves */
797 #define LOCALS 32
798 #else
799 #define LOCALS 31
800 #endif
801
802 static void StgRunIsImplementedInAssembler(void)
803 {
804     __asm__ volatile(
805                 ".global StgRun\n"
806                 "StgRun:\n"
807                 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
808                 "\tld8 r18 = [r32],8\n"                 /* get procedure address */
809                 "\tadds sp = -%0, sp ;;\n"              /* setup stack */
810                 "\tld8 gp = [r32]\n"                    /* get procedure GP */
811                 "\tadds r16 = %0-(6*16), sp\n"
812                 "\tadds r17 = %0-(5*16), sp ;;\n"
813                 "\tstf.spill [r16] = f16,32\n"          /* spill callee-saved fp regs */
814                 "\tstf.spill [r17] = f17,32\n"
815                 "\tmov b6 = r18 ;;\n"                   /* set target address */
816                 "\tstf.spill [r16] = f18,32\n"
817                 "\tstf.spill [r17] = f19,32\n"
818                 "\tmov loc30 = b0 ;;\n"                 /* save return address */
819                 "\tstf.spill [r16] = f20,32\n"
820                 "\tstf.spill [r17] = f21,32\n"
821                 "\tbr.few b6 ;;\n"                      /* branch to function */
822                 ".global StgReturn\n"
823                 "StgReturn:\n"
824                 "\tmov r8 = loc16\n"            /* return value in r8 */
825                 "\tadds r16 = %0-(6*16), sp\n"
826                 "\tadds r17 = %0-(5*16), sp ;;\n"
827                 "\tldf.fill f16 = [r16],32\n"   /* start restoring fp regs */
828                 "\tldf.fill f17 = [r17],32\n"
829                 "\tmov ar.pfs = loc29 ;;\n"     /* restore register frame */
830                 "\tldf.fill f18 = [r16],32\n"
831                 "\tldf.fill f19 = [r17],32\n"
832                 "\tmov b0 = loc30 ;;\n"         /* restore return address */
833                 "\tldf.fill f20 = [r16],32\n"
834                 "\tldf.fill f21 = [r17],32\n"
835                 "\tadds sp = %0, sp\n"          /* restore stack */
836                 "\tbr.ret.sptk.many b0 ;;\n"    /* return */
837         : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
838 }
839
840 #endif
841
842 #endif /* !USE_MINIINTERPRETER */