Adding TcGadt.lhs
[ghc-hetmet.git] / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2003
4  *
5  * STG-to-C glue.
6  *
7  * To run an STG function from C land, call
8  *
9  *              rv = StgRun(f,BaseReg);
10  *
11  * where "f" is the STG function to call, and BaseReg is the address of the
12  * RegTable for this run (we might have separate RegTables if we're running
13  * multiple threads on an SMP machine).
14  *
15  * In the end, "f" must JMP to StgReturn (defined below),
16  * passing the return-value "rv" in R1,
17  * to return to the caller of StgRun returning "rv" in
18  * the whatever way C returns a value.
19  *
20  * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
21  * other registers (other than saving the C callee-saves
22  * registers).  Instead, the called function "f" must do that
23  * in STG land.
24  *
25  * GCC will have assumed that pushing/popping of C-stack frames is
26  * going on when it generated its code, and used stack space
27  * accordingly.  However, we actually {\em post-process away} all
28  * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
29  * be OK however, if we initially make sure there are
30  * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
31  * variables.
32  *
33  * -------------------------------------------------------------------------- */
34
35 #include "PosixSource.h"
36
37
38 /*
39  * We define the following (unused) global register variables, because for
40  * some reason gcc generates sub-optimal code for StgRun() on the Alpha
41  * (unnecessarily saving extra registers on the stack) if we don't.
42  *
43  * Why do it at the top of this file, rather than near StgRun() below?  Because
44  * gcc doesn't let us define global register variables after any function
45  * definition has been read.  Any point after #include "Stg.h" would be too
46  * late.
47  *
48  * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
49  * that we don't use but which are callee-save registers.  The __divq() routine
50  * in libc.a clobbers $s6.
51  */
52 #include "ghcconfig.h"
53 #ifdef alpha_HOST_ARCH
54 #define alpha_EXTRA_CAREFUL
55 register long   fake_ra __asm__("$26");
56 register long   fake_gp __asm__("$29");
57 #ifdef alpha_EXTRA_CAREFUL
58 register long   fake_s6 __asm__("$15");
59 register double fake_f8 __asm__("$f8");
60 register double fake_f9 __asm__("$f9");
61 #endif
62 #endif
63
64 /* include Stg.h first because we want real machine regs in here: we
65  * have to get the value of R1 back from Stg land to C land intact.
66  */
67 #include "Stg.h"
68 #include "Rts.h"
69 #include "StgRun.h"
70 #include "RtsFlags.h"
71 #include "OSThreads.h"
72 #include "Capability.h"
73
74 #ifdef DEBUG
75 #include "RtsUtils.h"
76 #include "Printer.h"
77 #endif
78
79 #ifdef USE_MINIINTERPRETER
80
81 /* -----------------------------------------------------------------------------
82    any architecture (using miniinterpreter)
83    -------------------------------------------------------------------------- */
84
85 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
86 {
87     while (f) {
88         /* XXX Disabled due to RtsFlags[]/RtsFlags mismatch
89         IF_DEBUG(interpreter,
90             debugBelch("Jumping to ");
91             printPtr((P_)f); fflush(stdout);
92             debugBelch("\n");
93             );
94         */
95         f = (StgFunPtr) (f)();
96     }
97     return (StgRegTable *)R1.p;
98 }
99
100 StgFunPtr StgReturn(void)
101 {
102     return 0;
103 }
104
105 #else /* !USE_MINIINTERPRETER */
106
107 #ifdef LEADING_UNDERSCORE
108 #define STG_RETURN "_StgReturn"
109 #else
110 #define STG_RETURN "StgReturn"
111 #endif
112
113 /* -----------------------------------------------------------------------------
114    x86 architecture
115    -------------------------------------------------------------------------- */
116
117 #ifdef i386_HOST_ARCH
118
119 #ifdef darwin_TARGET_OS
120 #define STG_GLOBAL ".globl "
121 #else
122 #define STG_GLOBAL ".global "
123 #endif
124
125 StgRegTable *
126 StgRun(StgFunPtr f, StgRegTable *basereg) {
127
128     unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
129     StgRegTable * r;
130
131     __asm__ volatile (
132         /*
133          * save callee-saves registers on behalf of the STG code.
134          */
135         "movl %%esp, %%eax\n\t"
136         "addl %4, %%eax\n\t"
137         "movl %%ebx,0(%%eax)\n\t"
138         "movl %%esi,4(%%eax)\n\t"
139         "movl %%edi,8(%%eax)\n\t"
140         "movl %%ebp,12(%%eax)\n\t"
141         /*
142          * Set BaseReg
143          */
144         "movl %3,%%ebx\n\t"
145         /*
146          * grab the function argument from the stack
147          */
148         "movl %2,%%eax\n\t"
149         
150         /*
151          * Darwin note:
152          * The stack pointer has to be aligned to a multiple of 16 bytes at
153          * this point. This works out correctly with gcc 4.0.1, but it might
154          * break at any time in the future. TODO: Make this future-proof.
155          */
156
157         /*
158          * jump to it
159          */
160         "jmp *%%eax\n\t"
161
162         STG_GLOBAL STG_RETURN "\n"
163         STG_RETURN ":\n\t"
164
165         "movl %%esi, %%eax\n\t"   /* Return value in R1  */
166
167         /*
168          * restore callee-saves registers.  (Don't stomp on %%eax!)
169          */
170         "movl %%esp, %%edx\n\t"
171         "addl %4, %%edx\n\t"
172         "movl 0(%%edx),%%ebx\n\t"       /* restore the registers saved above */
173         "movl 4(%%edx),%%esi\n\t"
174         "movl 8(%%edx),%%edi\n\t"
175         "movl 12(%%edx),%%ebp\n\t"
176
177       : "=&a" (r), "=m" (space)
178       : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
179       : "edx" /* stomps on %edx */
180     );
181
182     return r;
183 }
184
185 #endif
186
187 /* ----------------------------------------------------------------------------
188    x86-64 is almost the same as plain x86.
189
190    I've done it using entirely inline assembler, because I couldn't
191    get gcc to generate the correct subtraction from %rsp by using
192    the local array variable trick.  It didn't seem to reserve
193    enough space.  Oh well, it's not much harder this way.
194
195    ------------------------------------------------------------------------- */
196
197 #ifdef x86_64_HOST_ARCH
198
199 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
200
201 static void GNUC3_ATTRIBUTE(used)
202 StgRunIsImplementedInAssembler(void)
203 {
204     __asm__ volatile (
205         /*
206          * save callee-saves registers on behalf of the STG code.
207          */
208         ".globl StgRun\n"
209         "StgRun:\n\t"
210         "subq %0, %%rsp\n\t"
211         "movq %%rsp, %%rax\n\t"
212         "addq %0-48, %%rax\n\t"
213         "movq %%rbx,0(%%rax)\n\t"
214         "movq %%rbp,8(%%rax)\n\t"
215         "movq %%r12,16(%%rax)\n\t"
216         "movq %%r13,24(%%rax)\n\t"
217         "movq %%r14,32(%%rax)\n\t"
218         "movq %%r15,40(%%rax)\n\t"
219         /*
220          * Set BaseReg
221          */
222         "movq %%rsi,%%r13\n\t"
223         /*
224          * grab the function argument from the stack, and jump to it.
225          */
226         "movq %%rdi,%%rax\n\t"
227         "jmp *%%rax\n\t"
228
229         ".global " STG_RETURN "\n"
230         STG_RETURN ":\n\t"
231
232         "movq %%rbx, %%rax\n\t"   /* Return value in R1  */
233
234         /*
235          * restore callee-saves registers.  (Don't stomp on %%rax!)
236          */
237         "movq %%rsp, %%rdx\n\t"
238         "addq %0-48, %%rdx\n\t"
239         "movq 0(%%rdx),%%rbx\n\t"       /* restore the registers saved above */
240         "movq 8(%%rdx),%%rbp\n\t"
241         "movq 16(%%rdx),%%r12\n\t"
242         "movq 24(%%rdx),%%r13\n\t"
243         "movq 32(%%rdx),%%r14\n\t"
244         "movq 40(%%rdx),%%r15\n\t"
245         "addq %0, %%rsp\n\t"
246         "retq"
247
248         : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
249     /* 
250        HACK alert!
251
252        The x86_64 ABI specifies that on a procedure call, %rsp is
253        aligned on a 16-byte boundary + 8.  That is, the first
254        argument on the stack after the return address will be
255        16-byte aligned.  
256        
257        Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
258        of 16 bytes.  
259        
260        BUT... when we do a C-call from STG land, gcc likes to put the
261        stack alignment adjustment in the prolog.  eg. if we're calling
262        a function with arguments in regs, gcc will insert 'subq $8,%rsp'
263        in the prolog, to keep %rsp aligned (the return address is 8
264        bytes, remember).  The mangler throws away the prolog, so we
265        lose the stack alignment.
266
267        The hack is to add this extra 8 bytes to our %rsp adjustment
268        here, so that throughout STG code, %rsp is 16-byte aligned,
269        ready for a C-call.  
270
271        A quick way to see if this is wrong is to compile this code:
272
273           main = System.Exit.exitWith ExitSuccess
274
275        And run it with +RTS -sstderr.  The stats code in the RTS, in
276        particular statsPrintf(), relies on the stack alignment because
277        it saves the %xmm regs on the stack, so it'll fall over if the
278        stack isn't aligned, and calling exitWith from Haskell invokes
279        shutdownHaskellAndExit using a C call.
280
281        Future gcc releases will almost certainly break this hack...
282     */
283 }
284
285 #endif /* x86-64 */
286
287 /* -----------------------------------------------------------------------------
288    Sparc architecture
289
290    --
291    OLD COMMENT from GHC-3.02:
292
293    We want tailjumps to be calls, because `call xxx' is the only Sparc
294    branch that allows an arbitrary label as a target.  (Gcc's ``goto
295    *target'' construct ends up loading the label into a register and
296    then jumping, at the cost of two extra instructions for the 32-bit
297    load.)
298
299    When entering the threaded world, we stash our return address in a
300    known location so that \tr{%i7} is available as an extra
301    callee-saves register.  Of course, we have to restore this when
302    coming out of the threaded world.
303
304    I hate this god-forsaken architecture.  Since the top of the
305    reserved stack space is used for globals and the bottom is reserved
306    for outgoing arguments, we have to stick our return address
307    somewhere in the middle.  Currently, I'm allowing 100 extra
308    outgoing arguments beyond the first 6.  --JSM
309
310    Updated info (GHC 4.06): we don't appear to use %i7 any more, so
311    I'm not sure whether we still need to save it.  Incedentally, what
312    does the last paragraph above mean when it says "the top of the
313    stack is used for globals"?  What globals?  --SDM
314
315    Updated info (GHC 4.08.2): not saving %i7 any more (see below).
316    -------------------------------------------------------------------------- */
317
318 #ifdef sparc_HOST_ARCH
319
320 StgRegTable *
321 StgRun(StgFunPtr f, StgRegTable *basereg) {
322
323     unsigned char space[RESERVED_C_STACK_BYTES];
324 #if 0
325     register void *i7 __asm__("%i7");
326     ((void **)(space))[100] = i7;
327 #endif
328     f();
329     __asm__ volatile (
330             ".align 4\n"
331             ".global " STG_RETURN "\n"
332             STG_RETURN ":"
333             : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
334     /* we tell the C compiler that l0-l7 are clobbered on return to
335      * StgReturn, otherwise it tries to use these to save eg. the
336      * address of space[100] across the call.  The correct thing
337      * to do would be to save all the callee-saves regs, but we
338      * can't be bothered to do that.
339      *
340      * We also explicitly mark space as used since gcc eliminates it
341      * otherwise.
342      *
343      * The code that gcc generates for this little fragment is now
344      * terrible.  We could do much better by coding it directly in
345      * assembler.
346      */
347 #if 0
348     /* updated 4.08.2: we don't save %i7 in the middle of the reserved
349      * space any more, since gcc tries to save its address across the
350      * call to f(), this gets clobbered in STG land and we end up
351      * dereferencing a bogus pointer in StgReturn.
352      */
353     __asm__ volatile ("ld %1,%0"
354                       : "=r" (i7) : "m" (((void **)(space))[100]));
355 #endif
356     return (StgRegTable *)R1.i;
357 }
358
359 #endif
360
361 /* -----------------------------------------------------------------------------
362    alpha architecture
363
364    "The stack pointer (SP) must at all times denote an address that has octaword
365     alignment. (This restriction has the side effect that the in-memory portion
366     of the argument list, if any, will start on an octaword boundary.) Note that
367     the stack grows toward lower addresses. During a procedure invocation, SP
368     can never be set to a value that is higher than the value of SP at entry to
369     that procedure invocation.
370
371    "The contents of the stack, located above the portion of the argument list
372     (if any) that is passed in memory, belong to the calling procedure. Because
373     they are part of the calling procedure, they should not be read or written
374     by the called procedure, except as specified by indirect arguments or
375     language-controlled up-level references.
376
377    "The SP value might be used by the hardware when raising exceptions and
378     asynchronous interrupts. It must be assumed that the contents of the stack
379     below the current SP value and within the stack for the current thread are
380     continually and unpredictably modified, as specified in the _Alpha
381     Architecture Reference Manual_, and as a result of asynchronous software
382     actions."
383
384    -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
385       Alpha Systems, 5.1 edition, August 2000, section 3.2.1.  http://www.
386       tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
387    -------------------------------------------------------------------------- */
388
389 #ifdef alpha_HOST_ARCH
390
391 StgRegTable *
392 StgRun(StgFunPtr f, StgRegTable *basereg)
393 {
394     register long   real_ra __asm__("$26"); volatile long   save_ra;
395     register long   real_gp __asm__("$29"); volatile long   save_gp;
396
397     register long   real_s0 __asm__("$9" ); volatile long   save_s0;
398     register long   real_s1 __asm__("$10"); volatile long   save_s1;
399     register long   real_s2 __asm__("$11"); volatile long   save_s2;
400     register long   real_s3 __asm__("$12"); volatile long   save_s3;
401     register long   real_s4 __asm__("$13"); volatile long   save_s4;
402     register long   real_s5 __asm__("$14"); volatile long   save_s5;
403 #ifdef alpha_EXTRA_CAREFUL
404     register long   real_s6 __asm__("$15"); volatile long   save_s6;
405 #endif
406
407     register double real_f2 __asm__("$f2"); volatile double save_f2;
408     register double real_f3 __asm__("$f3"); volatile double save_f3;
409     register double real_f4 __asm__("$f4"); volatile double save_f4;
410     register double real_f5 __asm__("$f5"); volatile double save_f5;
411     register double real_f6 __asm__("$f6"); volatile double save_f6;
412     register double real_f7 __asm__("$f7"); volatile double save_f7;
413 #ifdef alpha_EXTRA_CAREFUL
414     register double real_f8 __asm__("$f8"); volatile double save_f8;
415     register double real_f9 __asm__("$f9"); volatile double save_f9;
416 #endif
417
418     register StgFunPtr real_pv __asm__("$27");
419
420     StgRegTable * ret;
421
422     save_ra = real_ra;
423     save_gp = real_gp;
424
425     save_s0 = real_s0;
426     save_s1 = real_s1;
427     save_s2 = real_s2;
428     save_s3 = real_s3;
429     save_s4 = real_s4;
430     save_s5 = real_s5;
431 #ifdef alpha_EXTRA_CAREFUL
432     save_s6 = real_s6;
433 #endif
434
435     save_f2 = real_f2;
436     save_f3 = real_f3;
437     save_f4 = real_f4;
438     save_f5 = real_f5;
439     save_f6 = real_f6;
440     save_f7 = real_f7;
441 #ifdef alpha_EXTRA_CAREFUL
442     save_f8 = real_f8;
443     save_f9 = real_f9;
444 #endif
445
446     real_pv = f;
447
448     __asm__ volatile(   "lda $30,-%0($30)"      "\n"
449                 "\t"    "jmp ($27)"             "\n"
450                 "\t"    ".align 3"              "\n"
451                 ".globl " STG_RETURN            "\n"
452                 STG_RETURN ":"                  "\n"
453                 "\t"    "lda $30,%0($30)"       "\n"
454                 : : "K" (RESERVED_C_STACK_BYTES));
455
456     ret = real_s5;
457
458     real_s0 = save_s0;
459     real_s1 = save_s1;
460     real_s2 = save_s2;
461     real_s3 = save_s3;
462     real_s4 = save_s4;
463     real_s5 = save_s5;
464 #ifdef alpha_EXTRA_CAREFUL
465     real_s6 = save_s6;
466 #endif
467
468     real_f2 = save_f2;
469     real_f3 = save_f3;
470     real_f4 = save_f4;
471     real_f5 = save_f5;
472     real_f6 = save_f6;
473     real_f7 = save_f7;
474 #ifdef alpha_EXTRA_CAREFUL
475     real_f8 = save_f8;
476     real_f9 = save_f9;
477 #endif
478
479     real_ra = save_ra;
480     real_gp = save_gp;
481
482     return ret;
483 }
484
485 #endif /* alpha_HOST_ARCH */
486
487 /* -----------------------------------------------------------------------------
488    HP-PA architecture
489    -------------------------------------------------------------------------- */
490
491 #ifdef hppa1_1_HOST_ARCH
492
493 StgRegTable *
494 StgRun(StgFunPtr f, StgRegTable *basereg)
495 {
496     StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
497     StgRegTable * ret;
498
499     __asm__ volatile ("ldo %0(%%r30),%%r19\n"
500                       "\tstw %%r3, 0(0,%%r19)\n"
501                       "\tstw %%r4, 4(0,%%r19)\n"
502                       "\tstw %%r5, 8(0,%%r19)\n"
503                       "\tstw %%r6,12(0,%%r19)\n"
504                       "\tstw %%r7,16(0,%%r19)\n"
505                       "\tstw %%r8,20(0,%%r19)\n"
506                       "\tstw %%r9,24(0,%%r19)\n"
507                       "\tstw %%r10,28(0,%%r19)\n"
508                       "\tstw %%r11,32(0,%%r19)\n"
509                       "\tstw %%r12,36(0,%%r19)\n"
510                       "\tstw %%r13,40(0,%%r19)\n"
511                       "\tstw %%r14,44(0,%%r19)\n"
512                       "\tstw %%r15,48(0,%%r19)\n"
513                       "\tstw %%r16,52(0,%%r19)\n"
514                       "\tstw %%r17,56(0,%%r19)\n"
515                       "\tstw %%r18,60(0,%%r19)\n"
516                       "\tldo 80(%%r19),%%r19\n"
517                       "\tfstds %%fr12,-16(0,%%r19)\n"
518                       "\tfstds %%fr13, -8(0,%%r19)\n"
519                       "\tfstds %%fr14,  0(0,%%r19)\n"
520                       "\tfstds %%fr15,  8(0,%%r19)\n"
521                       "\tldo 32(%%r19),%%r19\n"
522                       "\tfstds %%fr16,-16(0,%%r19)\n"
523                       "\tfstds %%fr17, -8(0,%%r19)\n"
524                       "\tfstds %%fr18,  0(0,%%r19)\n"
525                       "\tfstds %%fr19,  8(0,%%r19)\n"
526                       "\tldo 32(%%r19),%%r19\n"
527                       "\tfstds %%fr20,-16(0,%%r19)\n"
528                       "\tfstds %%fr21, -8(0,%%r19)\n" : :
529                       "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
530                       );
531
532     f();
533
534     __asm__ volatile (".align 4\n"
535                       "\t.EXPORT " STG_RETURN ",CODE\n"
536                       "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
537                       STG_RETURN "\n"
538                       /* "\tldo %0(%%r3),%%r19\n" */
539                       "\tldo %1(%%r30),%%r19\n"
540                       "\tcopy %%r11, %0\n"  /* save R1 */
541                       "\tldw  0(0,%%r19),%%r3\n"
542                       "\tldw  4(0,%%r19),%%r4\n"
543                       "\tldw  8(0,%%r19),%%r5\n"
544                       "\tldw 12(0,%%r19),%%r6\n"
545                       "\tldw 16(0,%%r19),%%r7\n"
546                       "\tldw 20(0,%%r19),%%r8\n"
547                       "\tldw 24(0,%%r19),%%r9\n"
548                       "\tldw 28(0,%%r19),%%r10\n"
549                       "\tldw 32(0,%%r19),%%r11\n"
550                       "\tldw 36(0,%%r19),%%r12\n"
551                       "\tldw 40(0,%%r19),%%r13\n"
552                       "\tldw 44(0,%%r19),%%r14\n"
553                       "\tldw 48(0,%%r19),%%r15\n"
554                       "\tldw 52(0,%%r19),%%r16\n"
555                       "\tldw 56(0,%%r19),%%r17\n"
556                       "\tldw 60(0,%%r19),%%r18\n"
557                       "\tldo 80(%%r19),%%r19\n"
558                       "\tfldds -16(0,%%r19),%%fr12\n"
559                       "\tfldds  -8(0,%%r19),%%fr13\n"
560                       "\tfldds   0(0,%%r19),%%fr14\n"
561                       "\tfldds   8(0,%%r19),%%fr15\n"
562                       "\tldo 32(%%r19),%%r19\n"
563                       "\tfldds -16(0,%%r19),%%fr16\n"
564                       "\tfldds  -8(0,%%r19),%%fr17\n"
565                       "\tfldds   0(0,%%r19),%%fr18\n"
566                       "\tfldds   8(0,%%r19),%%fr19\n"
567                       "\tldo 32(%%r19),%%r19\n"
568                       "\tfldds -16(0,%%r19),%%fr20\n"
569                       "\tfldds  -8(0,%%r19),%%fr21\n"
570                          : "=r" (ret)
571                          : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
572                          : "%r19"
573                       );
574
575     return ret;
576 }
577
578 #endif /* hppa1_1_HOST_ARCH */
579
580 /* -----------------------------------------------------------------------------
581    PowerPC architecture
582
583    Everything is in assembler, so we don't have to deal with GCC...
584    
585    -------------------------------------------------------------------------- */
586
587 #ifdef powerpc_HOST_ARCH
588
589 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
590
591 #ifdef darwin_HOST_OS
592 void StgRunIsImplementedInAssembler(void)
593 {
594 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
595             // if the toolchain supports deadstripping, we have to
596             // prevent it here (it tends to get confused here).
597         __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
598 #endif
599         __asm__ volatile (
600                 "\n.globl _StgRun\n"
601                 "_StgRun:\n"
602                 "\tmflr r0\n"
603                 "\tbl saveFP # f14\n"
604                 "\tstmw r13,-220(r1)\n"
605                 "\tstwu r1,-%0(r1)\n"
606                 "\tmr r27,r4\n" // BaseReg == r27
607                 "\tmtctr r3\n"
608                 "\tmr r12,r3\n"
609                 "\tbctr\n"
610                 ".globl _StgReturn\n"
611                 "_StgReturn:\n"
612                 "\tmr r3,r14\n"
613                 "\tla r1,%0(r1)\n"
614                 "\tlmw r13,-220(r1)\n"
615                 "\tb restFP # f14\n"
616         : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
617 }
618 #else
619
620 // This version is for PowerPC Linux.
621
622 // Differences from the Darwin/Mac OS X version:
623 // *) Different Assembler Syntax
624 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
625 // *) We may not access positive stack offsets
626 //    (no "Red Zone" as in the Darwin ABI)
627 // *) The Link Register is saved to a different offset in the caller's stack frame
628 //    (Linux: 4(r1), Darwin 8(r1))
629
630 static void GNUC3_ATTRIBUTE(used)
631 StgRunIsImplementedInAssembler(void)
632 {
633         __asm__ volatile (
634                 "\t.globl StgRun\n"
635                 "\t.type StgRun,@function\n"
636                 "StgRun:\n"
637                 "\tmflr 0\n"
638                 "\tstw 0,4(1)\n"
639                 "\tmr 5,1\n"
640                 "\tstwu 1,-%0(1)\n"
641                 "\tstmw 13,-220(5)\n"
642                 "\tstfd 14,-144(5)\n"
643                 "\tstfd 15,-136(5)\n"
644                 "\tstfd 16,-128(5)\n"
645                 "\tstfd 17,-120(5)\n"
646                 "\tstfd 18,-112(5)\n"
647                 "\tstfd 19,-104(5)\n"
648                 "\tstfd 20,-96(5)\n"
649                 "\tstfd 21,-88(5)\n"
650                 "\tstfd 22,-80(5)\n"
651                 "\tstfd 23,-72(5)\n"
652                 "\tstfd 24,-64(5)\n"
653                 "\tstfd 25,-56(5)\n"
654                 "\tstfd 26,-48(5)\n"
655                 "\tstfd 27,-40(5)\n"
656                 "\tstfd 28,-32(5)\n"
657                 "\tstfd 29,-24(5)\n"
658                 "\tstfd 30,-16(5)\n"
659                 "\tstfd 31,-8(5)\n"
660                 "\tmr 27,4\n"  // BaseReg == r27
661                 "\tmtctr 3\n"
662                 "\tmr 12,3\n"
663                 "\tbctr\n"
664                 ".globl StgReturn\n"
665                 "\t.type StgReturn,@function\n"
666                 "StgReturn:\n"
667                 "\tmr 3,14\n"
668                 "\tla 5,%0(1)\n"
669                 "\tlmw 13,-220(5)\n"
670                 "\tlfd 14,-144(5)\n"
671                 "\tlfd 15,-136(5)\n"
672                 "\tlfd 16,-128(5)\n"
673                 "\tlfd 17,-120(5)\n"
674                 "\tlfd 18,-112(5)\n"
675                 "\tlfd 19,-104(5)\n"
676                 "\tlfd 20,-96(5)\n"
677                 "\tlfd 21,-88(5)\n"
678                 "\tlfd 22,-80(5)\n"
679                 "\tlfd 23,-72(5)\n"
680                 "\tlfd 24,-64(5)\n"
681                 "\tlfd 25,-56(5)\n"
682                 "\tlfd 26,-48(5)\n"
683                 "\tlfd 27,-40(5)\n"
684                 "\tlfd 28,-32(5)\n"
685                 "\tlfd 29,-24(5)\n"
686                 "\tlfd 30,-16(5)\n"
687                 "\tlfd 31,-8(5)\n"
688                 "\tmr 1,5\n"
689                 "\tlwz 0,4(1)\n"
690                 "\tmtlr 0\n"
691                 "\tblr\n"
692         : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
693 }
694 #endif
695
696 #endif
697
698 /* -----------------------------------------------------------------------------
699    PowerPC 64 architecture
700
701    Everything is in assembler, so we don't have to deal with GCC...
702    
703    -------------------------------------------------------------------------- */
704
705 #ifdef powerpc64_HOST_ARCH
706
707 #ifdef linux_HOST_OS
708 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
709
710 static void GNUC3_ATTRIBUTE(used)
711 StgRunIsImplementedInAssembler(void)
712 {
713         // r0 volatile
714         // r1 stack pointer
715         // r2 toc - needs to be saved
716         // r3-r10 argument passing, volatile
717         // r11, r12 very volatile (not saved across cross-module calls)
718         // r13 thread local state (never modified, don't need to save)
719         // r14-r31 callee-save
720         __asm__ volatile (
721                 ".section \".opd\",\"aw\"\n"
722                 ".align 3\n"
723                 ".globl StgRun\n"
724                 "StgRun:\n"
725                         "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
726                         "\t.size StgRun,24\n"
727                 ".globl StgReturn\n"
728                 "StgReturn:\n"
729                         "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
730                         "\t.size StgReturn,24\n"
731                 ".previous\n"
732                 ".globl .StgRun\n"
733                 ".type .StgRun,@function\n"
734                 ".StgRun:\n"
735                         "\tmflr 0\n"
736                         "\tmr 5, 1\n"
737                         "\tstd 0, 16(1)\n"
738                         "\tstdu 1, -%0(1)\n"
739                         "\tstd 2, -296(5)\n"
740                         "\tstd 14, -288(5)\n"
741                         "\tstd 15, -280(5)\n"
742                         "\tstd 16, -272(5)\n"
743                         "\tstd 17, -264(5)\n"
744                         "\tstd 18, -256(5)\n"
745                         "\tstd 19, -248(5)\n"
746                         "\tstd 20, -240(5)\n"
747                         "\tstd 21, -232(5)\n"
748                         "\tstd 22, -224(5)\n"
749                         "\tstd 23, -216(5)\n"
750                         "\tstd 24, -208(5)\n"
751                         "\tstd 25, -200(5)\n"
752                         "\tstd 26, -192(5)\n"
753                         "\tstd 27, -184(5)\n"
754                         "\tstd 28, -176(5)\n"
755                         "\tstd 29, -168(5)\n"
756                         "\tstd 30, -160(5)\n"
757                         "\tstd 31, -152(5)\n"
758                         "\tstfd 14, -144(5)\n"
759                         "\tstfd 15, -136(5)\n"
760                         "\tstfd 16, -128(5)\n"
761                         "\tstfd 17, -120(5)\n"
762                         "\tstfd 18, -112(5)\n"
763                         "\tstfd 19, -104(5)\n"
764                         "\tstfd 20, -96(5)\n"
765                         "\tstfd 21, -88(5)\n"
766                         "\tstfd 22, -80(5)\n"
767                         "\tstfd 23, -72(5)\n"
768                         "\tstfd 24, -64(5)\n"
769                         "\tstfd 25, -56(5)\n"
770                         "\tstfd 26, -48(5)\n"
771                         "\tstfd 27, -40(5)\n"
772                         "\tstfd 28, -32(5)\n"
773                         "\tstfd 29, -24(5)\n"
774                         "\tstfd 30, -16(5)\n"
775                         "\tstfd 31, -8(5)\n"
776                         "\tmr 27, 4\n"  // BaseReg == r27
777                         "\tld 2, 8(3)\n"
778                         "\tld 3, 0(3)\n"
779                         "\tmtctr 3\n"
780                         "\tbctr\n"
781                 ".globl .StgReturn\n"
782                 ".type .StgReturn,@function\n"
783                 ".StgReturn:\n"
784                         "\tmr 3,14\n"
785                         "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
786                         "\tld 2, -296(5)\n"
787                         "\tld 14, -288(5)\n"
788                         "\tld 15, -280(5)\n"
789                         "\tld 16, -272(5)\n"
790                         "\tld 17, -264(5)\n"
791                         "\tld 18, -256(5)\n"
792                         "\tld 19, -248(5)\n"
793                         "\tld 20, -240(5)\n"
794                         "\tld 21, -232(5)\n"
795                         "\tld 22, -224(5)\n"
796                         "\tld 23, -216(5)\n"
797                         "\tld 24, -208(5)\n"
798                         "\tld 25, -200(5)\n"
799                         "\tld 26, -192(5)\n"
800                         "\tld 27, -184(5)\n"
801                         "\tld 28, -176(5)\n"
802                         "\tld 29, -168(5)\n"
803                         "\tld 30, -160(5)\n"
804                         "\tld 31, -152(5)\n"
805                         "\tlfd 14, -144(5)\n"
806                         "\tlfd 15, -136(5)\n"
807                         "\tlfd 16, -128(5)\n"
808                         "\tlfd 17, -120(5)\n"
809                         "\tlfd 18, -112(5)\n"
810                         "\tlfd 19, -104(5)\n"
811                         "\tlfd 20, -96(5)\n"
812                         "\tlfd 21, -88(5)\n"
813                         "\tlfd 22, -80(5)\n"
814                         "\tlfd 23, -72(5)\n"
815                         "\tlfd 24, -64(5)\n"
816                         "\tlfd 25, -56(5)\n"
817                         "\tlfd 26, -48(5)\n"
818                         "\tlfd 27, -40(5)\n"
819                         "\tlfd 28, -32(5)\n"
820                         "\tlfd 29, -24(5)\n"
821                         "\tlfd 30, -16(5)\n"
822                         "\tlfd 31, -8(5)\n"
823                         "\tmr 1, 5\n"
824                         "\tld 0, 16(1)\n"
825                         "\tmtlr 0\n"
826                         "\tblr\n"
827         : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
828 }
829 #else // linux_HOST_OS
830 #error Only linux support for power64 right now.
831 #endif
832
833 #endif
834
835 /* -----------------------------------------------------------------------------
836    IA64 architecture
837
838    Again, in assembler - so we can fiddle with the register stack, and because
839    gcc doesn't handle asm-clobbered callee-saves correctly.
840
841    loc0  - loc15: preserved locals
842    loc16 - loc28: STG registers
843            loc29: saved ar.pfs
844            loc30: saved b0
845            loc31: saved gp (gcc 3.3 uses this slot)
846    -------------------------------------------------------------------------- */
847
848 #ifdef ia64_HOST_ARCH
849
850 /* the memory stack is rarely used, so 16K is excessive */
851 #undef RESERVED_C_STACK_BYTES
852 #define RESERVED_C_STACK_BYTES 1024
853
854 #if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
855 /* gcc 3.3+: leave an extra slot for gp saves */
856 #define LOCALS 32
857 #else
858 #define LOCALS 31
859 #endif
860
861 static void GNUC3_ATTRIBUTE(used)
862 StgRunIsImplementedInAssembler(void)
863 {
864     __asm__ volatile(
865                 ".global StgRun\n"
866                 "StgRun:\n"
867                 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
868                 "\tld8 r18 = [r32],8\n"                 /* get procedure address */
869                 "\tadds sp = -%0, sp ;;\n"              /* setup stack */
870                 "\tld8 gp = [r32]\n"                    /* get procedure GP */
871                 "\tadds r16 = %0-(6*16), sp\n"
872                 "\tadds r17 = %0-(5*16), sp ;;\n"
873                 "\tstf.spill [r16] = f16,32\n"          /* spill callee-saved fp regs */
874                 "\tstf.spill [r17] = f17,32\n"
875                 "\tmov b6 = r18 ;;\n"                   /* set target address */
876                 "\tstf.spill [r16] = f18,32\n"
877                 "\tstf.spill [r17] = f19,32\n"
878                 "\tmov loc30 = b0 ;;\n"                 /* save return address */
879                 "\tstf.spill [r16] = f20,32\n"
880                 "\tstf.spill [r17] = f21,32\n"
881                 "\tbr.few b6 ;;\n"                      /* branch to function */
882                 ".global StgReturn\n"
883                 "StgReturn:\n"
884                 "\tmov r8 = loc16\n"            /* return value in r8 */
885                 "\tadds r16 = %0-(6*16), sp\n"
886                 "\tadds r17 = %0-(5*16), sp ;;\n"
887                 "\tldf.fill f16 = [r16],32\n"   /* start restoring fp regs */
888                 "\tldf.fill f17 = [r17],32\n"
889                 "\tmov ar.pfs = loc29 ;;\n"     /* restore register frame */
890                 "\tldf.fill f18 = [r16],32\n"
891                 "\tldf.fill f19 = [r17],32\n"
892                 "\tmov b0 = loc30 ;;\n"         /* restore return address */
893                 "\tldf.fill f20 = [r16],32\n"
894                 "\tldf.fill f21 = [r17],32\n"
895                 "\tadds sp = %0, sp\n"          /* restore stack */
896                 "\tbr.ret.sptk.many b0 ;;\n"    /* return */
897         : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
898 }
899
900 #endif
901
902 /* -----------------------------------------------------------------------------
903    MIPS architecture
904    -------------------------------------------------------------------------- */
905
906 #ifdef mips_HOST_ARCH
907
908 StgThreadReturnCode
909 StgRun(StgFunPtr f, StgRegTable *basereg)
910 {
911     register StgThreadReturnCode __v0 __asm__("$2");
912
913     __asm__ __volatile__(
914         "       la      $25, %1                 \n"
915         "       move    $30, %2                 \n"
916         "       jr      %1                      \n"
917         "       .align 3                        \n"
918         "       .globl " STG_RETURN "           \n"
919         "       .aent " STG_RETURN "            \n"
920         STG_RETURN ":                           \n"
921         "       move    %0, $16                 \n"
922         "       move    $3, $17                 \n"
923         : "=r" (__v0),
924         : "r" (f), "r" (basereg)
925         "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
926         "$25", "$28", "$30",
927         "$f20", "$f22", "$f24", "$f26", "$f28", "$f30",
928         "memory");
929
930     return __v0;
931 }
932
933 #endif /* mips_HOST_ARCH */
934
935 #endif /* !USE_MINIINTERPRETER */