[project @ 2005-06-09 05:55:40 by wolfgang]
[ghc-hetmet.git] / ghc / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2003
4  *
5  * STG-to-C glue.
6  *
7  * To run an STG function from C land, call
8  *
9  *              rv = StgRun(f,BaseReg);
10  *
11  * where "f" is the STG function to call, and BaseReg is the address of the
12  * RegTable for this run (we might have separate RegTables if we're running
13  * multiple threads on an SMP machine).
14  *
15  * In the end, "f" must JMP to StgReturn (defined below),
16  * passing the return-value "rv" in R1,
17  * to return to the caller of StgRun returning "rv" in
18  * the whatever way C returns a value.
19  *
20  * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
21  * other registers (other than saving the C callee-saves
22  * registers).  Instead, the called function "f" must do that
23  * in STG land.
24  *
25  * GCC will have assumed that pushing/popping of C-stack frames is
26  * going on when it generated its code, and used stack space
27  * accordingly.  However, we actually {\em post-process away} all
28  * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
29  * be OK however, if we initially make sure there are
30  * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
31  * variables.
32  *
33  * -------------------------------------------------------------------------- */
34
35 #include "PosixSource.h"
36
37
38 /*
39  * We define the following (unused) global register variables, because for
40  * some reason gcc generates sub-optimal code for StgRun() on the Alpha
41  * (unnecessarily saving extra registers on the stack) if we don't.
42  *
43  * Why do it at the top of this file, rather than near StgRun() below?  Because
44  * gcc doesn't let us define global register variables after any function
45  * definition has been read.  Any point after #include "Stg.h" would be too
46  * late.
47  *
48  * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
49  * that we don't use but which are callee-save registers.  The __divq() routine
50  * in libc.a clobbers $s6.
51  */
52 #include "ghcconfig.h"
53 #ifdef alpha_HOST_ARCH
54 #define alpha_EXTRA_CAREFUL
55 register long   fake_ra __asm__("$26");
56 register long   fake_gp __asm__("$29");
57 #ifdef alpha_EXTRA_CAREFUL
58 register long   fake_s6 __asm__("$15");
59 register double fake_f8 __asm__("$f8");
60 register double fake_f9 __asm__("$f9");
61 #endif
62 #endif
63
64 /* include Stg.h first because we want real machine regs in here: we
65  * have to get the value of R1 back from Stg land to C land intact.
66  */
67 #include "Stg.h"
68 #include "Rts.h"
69 #include "StgRun.h"
70 #include "RtsFlags.h"
71 #include "OSThreads.h"
72 #include "Capability.h"
73
74 #ifdef DEBUG
75 #include "RtsUtils.h"
76 #include "Printer.h"
77 #endif
78
79 #ifdef USE_MINIINTERPRETER
80
81 /* -----------------------------------------------------------------------------
82    any architecture (using miniinterpreter)
83    -------------------------------------------------------------------------- */
84
85 StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
86 {
87     while (f) {
88         if (RtsFlags[0].DebugFlags.interpreter) {
89             debugBelch("Jumping to ");
90             printPtr((P_)f); fflush(stdout);
91             debugBelch("\n");
92         }
93         f = (StgFunPtr) (f)();
94     }
95     return (StgThreadReturnCode)R1.i;
96 }
97
98 StgFunPtr StgReturn(void)
99 {
100     return 0;
101 }
102
103 #else /* !USE_MINIINTERPRETER */
104
105 #ifdef LEADING_UNDERSCORE
106 #define STG_RETURN "_StgReturn"
107 #else
108 #define STG_RETURN "StgReturn"
109 #endif
110
111 /* -----------------------------------------------------------------------------
112    x86 architecture
113    -------------------------------------------------------------------------- */
114
115 #ifdef i386_HOST_ARCH
116
117 #ifdef darwin_TARGET_OS
118 #define STG_GLOBAL ".globl "
119 #else
120 #define STG_GLOBAL ".global "
121 #endif
122
123 StgThreadReturnCode
124 StgRun(StgFunPtr f, StgRegTable *basereg) {
125
126     unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
127     StgThreadReturnCode r;
128
129     __asm__ volatile (
130         /*
131          * save callee-saves registers on behalf of the STG code.
132          */
133         "movl %%esp, %%eax\n\t"
134         "addl %4, %%eax\n\t"
135         "movl %%ebx,0(%%eax)\n\t"
136         "movl %%esi,4(%%eax)\n\t"
137         "movl %%edi,8(%%eax)\n\t"
138         "movl %%ebp,12(%%eax)\n\t"
139         /*
140          * Set BaseReg
141          */
142         "movl %3,%%ebx\n\t"
143         /*
144          * grab the function argument from the stack, and jump to it.
145          */
146         "movl %2,%%eax\n\t"
147         "jmp *%%eax\n\t"
148
149         STG_GLOBAL STG_RETURN "\n"
150         STG_RETURN ":\n\t"
151
152         "movl %%esi, %%eax\n\t"   /* Return value in R1  */
153
154         /*
155          * restore callee-saves registers.  (Don't stomp on %%eax!)
156          */
157         "movl %%esp, %%edx\n\t"
158         "addl %4, %%edx\n\t"
159         "movl 0(%%edx),%%ebx\n\t"       /* restore the registers saved above */
160         "movl 4(%%edx),%%esi\n\t"
161         "movl 8(%%edx),%%edi\n\t"
162         "movl 12(%%edx),%%ebp\n\t"
163
164       : "=&a" (r), "=m" (space)
165       : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
166       : "edx" /* stomps on %edx */
167     );
168
169     return r;
170 }
171
172 #endif
173
174 /* ----------------------------------------------------------------------------
175    x86-64 is almost the same as plain x86.
176
177    I've done it using entirely inline assembler, because I couldn't
178    get gcc to generate the correct subtraction from %rsp by using
179    the local array variable trick.  It didn't seem to reserve
180    enough space.  Oh well, it's not much harder this way.
181
182    ------------------------------------------------------------------------- */
183
184 #ifdef x86_64_HOST_ARCH
185
186 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
187
188 static void StgRunIsImplementedInAssembler(void)
189 {
190     __asm__ volatile (
191         /*
192          * save callee-saves registers on behalf of the STG code.
193          */
194         ".globl StgRun\n"
195         "StgRun:\n\t"
196         "subq %0, %%rsp\n\t"
197         "movq %%rsp, %%rax\n\t"
198         "addq %0-48, %%rax\n\t"
199         "movq %%rbx,0(%%rax)\n\t"
200         "movq %%rbp,8(%%rax)\n\t"
201         "movq %%r12,16(%%rax)\n\t"
202         "movq %%r13,24(%%rax)\n\t"
203         "movq %%r14,32(%%rax)\n\t"
204         "movq %%r15,40(%%rax)\n\t"
205         /*
206          * Set BaseReg
207          */
208         "movq %%rsi,%%rbx\n\t"
209         /*
210          * grab the function argument from the stack, and jump to it.
211          */
212         "movq %%rdi,%%rax\n\t"
213         "jmp *%%rax\n\t"
214
215         ".global " STG_RETURN "\n"
216         STG_RETURN ":\n\t"
217
218         "movq %%r13, %%rax\n\t"   /* Return value in R1  */
219
220         /*
221          * restore callee-saves registers.  (Don't stomp on %%rax!)
222          */
223         "movq %%rsp, %%rdx\n\t"
224         "addq %0-48, %%rdx\n\t"
225         "movq 0(%%rdx),%%rbx\n\t"       /* restore the registers saved above */
226         "movq 8(%%rdx),%%rbp\n\t"
227         "movq 16(%%rdx),%%r12\n\t"
228         "movq 24(%%rdx),%%r13\n\t"
229         "movq 32(%%rdx),%%r14\n\t"
230         "movq 40(%%rdx),%%r15\n\t"
231         "addq %0, %%rsp\n\t"
232         "retq"
233
234         : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
235     /* 
236        HACK alert!
237
238        The x86_64 ABI specifies that on a procedure call, %rsp is
239        aligned on a 16-byte boundary + 8.  That is, the first
240        argument on the stack after the return address will be
241        16-byte aligned.  
242        
243        Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
244        of 16 bytes.  
245        
246        BUT... when we do a C-call from STG land, gcc likes to put the
247        stack alignment adjustment in the prolog.  eg. if we're calling
248        a function with arguments in regs, gcc will insert 'subq $8,%rsp'
249        in the prolog, to keep %rsp aligned (the return address is 8
250        bytes, remember).  The mangler throws away the prolog, so we
251        lose the stack alignment.
252
253        The hack is to add this extra 8 bytes to our %rsp adjustment
254        here, so that throughout STG code, %rsp is 16-byte aligned,
255        ready for a C-call.  
256
257        A quick way to see if this is wrong is to compile this code:
258
259           main = System.Exit.exitWith ExitSuccess
260
261        And run it with +RTS -sstderr.  The stats code in the RTS, in
262        particular statsPrintf(), relies on the stack alignment because
263        it saves the %xmm regs on the stack, so it'll fall over if the
264        stack isn't aligned, and calling exitWith from Haskell invokes
265        shutdownHaskellAndExit using a C call.
266
267        Future gcc releases will almost certainly break this hack...
268     */
269 }
270
271 #endif /* x86-64 */
272
273 /* -----------------------------------------------------------------------------
274    Sparc architecture
275
276    --
277    OLD COMMENT from GHC-3.02:
278
279    We want tailjumps to be calls, because `call xxx' is the only Sparc
280    branch that allows an arbitrary label as a target.  (Gcc's ``goto
281    *target'' construct ends up loading the label into a register and
282    then jumping, at the cost of two extra instructions for the 32-bit
283    load.)
284
285    When entering the threaded world, we stash our return address in a
286    known location so that \tr{%i7} is available as an extra
287    callee-saves register.  Of course, we have to restore this when
288    coming out of the threaded world.
289
290    I hate this god-forsaken architecture.  Since the top of the
291    reserved stack space is used for globals and the bottom is reserved
292    for outgoing arguments, we have to stick our return address
293    somewhere in the middle.  Currently, I'm allowing 100 extra
294    outgoing arguments beyond the first 6.  --JSM
295
296    Updated info (GHC 4.06): we don't appear to use %i7 any more, so
297    I'm not sure whether we still need to save it.  Incedentally, what
298    does the last paragraph above mean when it says "the top of the
299    stack is used for globals"?  What globals?  --SDM
300
301    Updated info (GHC 4.08.2): not saving %i7 any more (see below).
302    -------------------------------------------------------------------------- */
303
304 #ifdef sparc_HOST_ARCH
305
306 StgThreadReturnCode
307 StgRun(StgFunPtr f, StgRegTable *basereg) {
308
309     unsigned char space[RESERVED_C_STACK_BYTES];
310 #if 0
311     register void *i7 __asm__("%i7");
312     ((void **)(space))[100] = i7;
313 #endif
314     f();
315     __asm__ volatile (
316             ".align 4\n"
317             ".global " STG_RETURN "\n"
318             STG_RETURN ":"
319             : : : "l0","l1","l2","l3","l4","l5","l6","l7");
320     /* we tell the C compiler that l0-l7 are clobbered on return to
321      * StgReturn, otherwise it tries to use these to save eg. the
322      * address of space[100] across the call.  The correct thing
323      * to do would be to save all the callee-saves regs, but we
324      * can't be bothered to do that.
325      *
326      * The code that gcc generates for this little fragment is now
327      * terrible.  We could do much better by coding it directly in
328      * assembler.
329      */
330 #if 0
331     /* updated 4.08.2: we don't save %i7 in the middle of the reserved
332      * space any more, since gcc tries to save its address across the
333      * call to f(), this gets clobbered in STG land and we end up
334      * dereferencing a bogus pointer in StgReturn.
335      */
336     __asm__ volatile ("ld %1,%0"
337                       : "=r" (i7) : "m" (((void **)(space))[100]));
338 #endif
339     return (StgThreadReturnCode)R1.i;
340 }
341
342 #endif
343
344 /* -----------------------------------------------------------------------------
345    alpha architecture
346
347    "The stack pointer (SP) must at all times denote an address that has octaword
348     alignment. (This restriction has the side effect that the in-memory portion
349     of the argument list, if any, will start on an octaword boundary.) Note that
350     the stack grows toward lower addresses. During a procedure invocation, SP
351     can never be set to a value that is higher than the value of SP at entry to
352     that procedure invocation.
353
354    "The contents of the stack, located above the portion of the argument list
355     (if any) that is passed in memory, belong to the calling procedure. Because
356     they are part of the calling procedure, they should not be read or written
357     by the called procedure, except as specified by indirect arguments or
358     language-controlled up-level references.
359
360    "The SP value might be used by the hardware when raising exceptions and
361     asynchronous interrupts. It must be assumed that the contents of the stack
362     below the current SP value and within the stack for the current thread are
363     continually and unpredictably modified, as specified in the _Alpha
364     Architecture Reference Manual_, and as a result of asynchronous software
365     actions."
366
367    -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
368       Alpha Systems, 5.1 edition, August 2000, section 3.2.1.  http://www.
369       tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
370    -------------------------------------------------------------------------- */
371
372 #ifdef alpha_HOST_ARCH
373
374 StgThreadReturnCode
375 StgRun(StgFunPtr f, StgRegTable *basereg)
376 {
377     register long   real_ra __asm__("$26"); volatile long   save_ra;
378     register long   real_gp __asm__("$29"); volatile long   save_gp;
379
380     register long   real_s0 __asm__("$9" ); volatile long   save_s0;
381     register long   real_s1 __asm__("$10"); volatile long   save_s1;
382     register long   real_s2 __asm__("$11"); volatile long   save_s2;
383     register long   real_s3 __asm__("$12"); volatile long   save_s3;
384     register long   real_s4 __asm__("$13"); volatile long   save_s4;
385     register long   real_s5 __asm__("$14"); volatile long   save_s5;
386 #ifdef alpha_EXTRA_CAREFUL
387     register long   real_s6 __asm__("$15"); volatile long   save_s6;
388 #endif
389
390     register double real_f2 __asm__("$f2"); volatile double save_f2;
391     register double real_f3 __asm__("$f3"); volatile double save_f3;
392     register double real_f4 __asm__("$f4"); volatile double save_f4;
393     register double real_f5 __asm__("$f5"); volatile double save_f5;
394     register double real_f6 __asm__("$f6"); volatile double save_f6;
395     register double real_f7 __asm__("$f7"); volatile double save_f7;
396 #ifdef alpha_EXTRA_CAREFUL
397     register double real_f8 __asm__("$f8"); volatile double save_f8;
398     register double real_f9 __asm__("$f9"); volatile double save_f9;
399 #endif
400
401     register StgFunPtr real_pv __asm__("$27");
402
403     StgThreadReturnCode ret;
404
405     save_ra = real_ra;
406     save_gp = real_gp;
407
408     save_s0 = real_s0;
409     save_s1 = real_s1;
410     save_s2 = real_s2;
411     save_s3 = real_s3;
412     save_s4 = real_s4;
413     save_s5 = real_s5;
414 #ifdef alpha_EXTRA_CAREFUL
415     save_s6 = real_s6;
416 #endif
417
418     save_f2 = real_f2;
419     save_f3 = real_f3;
420     save_f4 = real_f4;
421     save_f5 = real_f5;
422     save_f6 = real_f6;
423     save_f7 = real_f7;
424 #ifdef alpha_EXTRA_CAREFUL
425     save_f8 = real_f8;
426     save_f9 = real_f9;
427 #endif
428
429     real_pv = f;
430
431     __asm__ volatile(   "lda $30,-%0($30)"      "\n"
432                 "\t"    "jmp ($27)"             "\n"
433                 "\t"    ".align 3"              "\n"
434                 ".globl " STG_RETURN            "\n"
435                 STG_RETURN ":"                  "\n"
436                 "\t"    "lda $30,%0($30)"       "\n"
437                 : : "K" (RESERVED_C_STACK_BYTES));
438
439     ret = real_s5;
440
441     real_s0 = save_s0;
442     real_s1 = save_s1;
443     real_s2 = save_s2;
444     real_s3 = save_s3;
445     real_s4 = save_s4;
446     real_s5 = save_s5;
447 #ifdef alpha_EXTRA_CAREFUL
448     real_s6 = save_s6;
449 #endif
450
451     real_f2 = save_f2;
452     real_f3 = save_f3;
453     real_f4 = save_f4;
454     real_f5 = save_f5;
455     real_f6 = save_f6;
456     real_f7 = save_f7;
457 #ifdef alpha_EXTRA_CAREFUL
458     real_f8 = save_f8;
459     real_f9 = save_f9;
460 #endif
461
462     real_ra = save_ra;
463     real_gp = save_gp;
464
465     return ret;
466 }
467
468 #endif /* alpha_HOST_ARCH */
469
470 /* -----------------------------------------------------------------------------
471    HP-PA architecture
472    -------------------------------------------------------------------------- */
473
474 #ifdef hppa1_1_HOST_ARCH
475
476 StgThreadReturnCode
477 StgRun(StgFunPtr f, StgRegTable *basereg)
478 {
479     StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
480     StgThreadReturnCode ret;
481
482     __asm__ volatile ("ldo %0(%%r30),%%r19\n"
483                       "\tstw %%r3, 0(0,%%r19)\n"
484                       "\tstw %%r4, 4(0,%%r19)\n"
485                       "\tstw %%r5, 8(0,%%r19)\n"
486                       "\tstw %%r6,12(0,%%r19)\n"
487                       "\tstw %%r7,16(0,%%r19)\n"
488                       "\tstw %%r8,20(0,%%r19)\n"
489                       "\tstw %%r9,24(0,%%r19)\n"
490                       "\tstw %%r10,28(0,%%r19)\n"
491                       "\tstw %%r11,32(0,%%r19)\n"
492                       "\tstw %%r12,36(0,%%r19)\n"
493                       "\tstw %%r13,40(0,%%r19)\n"
494                       "\tstw %%r14,44(0,%%r19)\n"
495                       "\tstw %%r15,48(0,%%r19)\n"
496                       "\tstw %%r16,52(0,%%r19)\n"
497                       "\tstw %%r17,56(0,%%r19)\n"
498                       "\tstw %%r18,60(0,%%r19)\n"
499                       "\tldo 80(%%r19),%%r19\n"
500                       "\tfstds %%fr12,-16(0,%%r19)\n"
501                       "\tfstds %%fr13, -8(0,%%r19)\n"
502                       "\tfstds %%fr14,  0(0,%%r19)\n"
503                       "\tfstds %%fr15,  8(0,%%r19)\n"
504                       "\tldo 32(%%r19),%%r19\n"
505                       "\tfstds %%fr16,-16(0,%%r19)\n"
506                       "\tfstds %%fr17, -8(0,%%r19)\n"
507                       "\tfstds %%fr18,  0(0,%%r19)\n"
508                       "\tfstds %%fr19,  8(0,%%r19)\n"
509                       "\tldo 32(%%r19),%%r19\n"
510                       "\tfstds %%fr20,-16(0,%%r19)\n"
511                       "\tfstds %%fr21, -8(0,%%r19)\n" : :
512                       "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
513                       );
514
515     f();
516
517     __asm__ volatile (".align 4\n"
518                       "\t.EXPORT " STG_RETURN ",CODE\n"
519                       "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
520                       STG_RETURN "\n"
521                       /* "\tldo %0(%%r3),%%r19\n" */
522                       "\tldo %1(%%r30),%%r19\n"
523                       "\tcopy %%r11, %0\n"  /* save R1 */
524                       "\tldw  0(0,%%r19),%%r3\n"
525                       "\tldw  4(0,%%r19),%%r4\n"
526                       "\tldw  8(0,%%r19),%%r5\n"
527                       "\tldw 12(0,%%r19),%%r6\n"
528                       "\tldw 16(0,%%r19),%%r7\n"
529                       "\tldw 20(0,%%r19),%%r8\n"
530                       "\tldw 24(0,%%r19),%%r9\n"
531                       "\tldw 28(0,%%r19),%%r10\n"
532                       "\tldw 32(0,%%r19),%%r11\n"
533                       "\tldw 36(0,%%r19),%%r12\n"
534                       "\tldw 40(0,%%r19),%%r13\n"
535                       "\tldw 44(0,%%r19),%%r14\n"
536                       "\tldw 48(0,%%r19),%%r15\n"
537                       "\tldw 52(0,%%r19),%%r16\n"
538                       "\tldw 56(0,%%r19),%%r17\n"
539                       "\tldw 60(0,%%r19),%%r18\n"
540                       "\tldo 80(%%r19),%%r19\n"
541                       "\tfldds -16(0,%%r19),%%fr12\n"
542                       "\tfldds  -8(0,%%r19),%%fr13\n"
543                       "\tfldds   0(0,%%r19),%%fr14\n"
544                       "\tfldds   8(0,%%r19),%%fr15\n"
545                       "\tldo 32(%%r19),%%r19\n"
546                       "\tfldds -16(0,%%r19),%%fr16\n"
547                       "\tfldds  -8(0,%%r19),%%fr17\n"
548                       "\tfldds   0(0,%%r19),%%fr18\n"
549                       "\tfldds   8(0,%%r19),%%fr19\n"
550                       "\tldo 32(%%r19),%%r19\n"
551                       "\tfldds -16(0,%%r19),%%fr20\n"
552                       "\tfldds  -8(0,%%r19),%%fr21\n"
553                          : "=r" (ret)
554                          : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
555                          : "%r19"
556                       );
557
558     return ret;
559 }
560
561 #endif /* hppa1_1_HOST_ARCH */
562
563 /* -----------------------------------------------------------------------------
564    PowerPC architecture
565
566    Everything is in assembler, so we don't have to deal with GCC...
567    
568    -------------------------------------------------------------------------- */
569
570 #ifdef powerpc_HOST_ARCH
571
572 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
573
574 #ifdef darwin_HOST_OS
575 static void StgRunIsImplementedInAssembler(void)
576 {
577 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
578             // if the toolchain supports deadstripping, we have to
579             // prevent it here (it tends to get confused here).
580         __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
581 #endif
582         __asm__ volatile (
583                 "\n.globl _StgRun\n"
584                 "_StgRun:\n"
585                 "\tmflr r0\n"
586                 "\tbl saveFP # f14\n"
587                 "\tstmw r13,-220(r1)\n"
588                 "\tstwu r1,-%0(r1)\n"
589                 "\tmr r27,r4\n" // BaseReg == r27
590                 "\tmtctr r3\n"
591                 "\tmr r12,r3\n"
592                 "\tbctr\n"
593                 ".globl _StgReturn\n"
594                 "_StgReturn:\n"
595                 "\tmr r3,r14\n"
596                 "\tla r1,%0(r1)\n"
597                 "\tlmw r13,-220(r1)\n"
598                 "\tb restFP # f14\n"
599         : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
600 }
601 #else
602
603 // This version is for PowerPC Linux.
604
605 // Differences from the Darwin/Mac OS X version:
606 // *) Different Assembler Syntax
607 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
608 // *) We may not access positive stack offsets
609 //    (no "Red Zone" as in the Darwin ABI)
610 // *) The Link Register is saved to a different offset in the caller's stack frame
611 //    (Linux: 4(r1), Darwin 8(r1))
612
613 static void StgRunIsImplementedInAssembler(void)
614 {
615         __asm__ volatile (
616                 "\t.globl StgRun\n"
617                 "\t.type StgRun,@function\n"
618                 "StgRun:\n"
619                 "\tmflr 0\n"
620                 "\tstw 0,4(1)\n"
621                 "\tmr 5,1\n"
622                 "\tstwu 1,-%0(1)\n"
623                 "\tstmw 13,-220(5)\n"
624                 "\tstfd 14,-144(5)\n"
625                 "\tstfd 15,-136(5)\n"
626                 "\tstfd 16,-128(5)\n"
627                 "\tstfd 17,-120(5)\n"
628                 "\tstfd 18,-112(5)\n"
629                 "\tstfd 19,-104(5)\n"
630                 "\tstfd 20,-96(5)\n"
631                 "\tstfd 21,-88(5)\n"
632                 "\tstfd 22,-80(5)\n"
633                 "\tstfd 23,-72(5)\n"
634                 "\tstfd 24,-64(5)\n"
635                 "\tstfd 25,-56(5)\n"
636                 "\tstfd 26,-48(5)\n"
637                 "\tstfd 27,-40(5)\n"
638                 "\tstfd 28,-32(5)\n"
639                 "\tstfd 29,-24(5)\n"
640                 "\tstfd 30,-16(5)\n"
641                 "\tstfd 31,-8(5)\n"
642                 "\tmr 27,4\n"  // BaseReg == r27
643                 "\tmtctr 3\n"
644                 "\tmr 12,3\n"
645                 "\tbctr\n"
646                 ".globl StgReturn\n"
647                 "\t.type StgReturn,@function\n"
648                 "StgReturn:\n"
649                 "\tmr 3,14\n"
650                 "\tla 5,%0(1)\n"
651                 "\tlmw 13,-220(5)\n"
652                 "\tlfd 14,-144(5)\n"
653                 "\tlfd 15,-136(5)\n"
654                 "\tlfd 16,-128(5)\n"
655                 "\tlfd 17,-120(5)\n"
656                 "\tlfd 18,-112(5)\n"
657                 "\tlfd 19,-104(5)\n"
658                 "\tlfd 20,-96(5)\n"
659                 "\tlfd 21,-88(5)\n"
660                 "\tlfd 22,-80(5)\n"
661                 "\tlfd 23,-72(5)\n"
662                 "\tlfd 24,-64(5)\n"
663                 "\tlfd 25,-56(5)\n"
664                 "\tlfd 26,-48(5)\n"
665                 "\tlfd 27,-40(5)\n"
666                 "\tlfd 28,-32(5)\n"
667                 "\tlfd 29,-24(5)\n"
668                 "\tlfd 30,-16(5)\n"
669                 "\tlfd 31,-8(5)\n"
670                 "\tmr 1,5\n"
671                 "\tlwz 0,4(1)\n"
672                 "\tmtlr 0\n"
673                 "\tblr\n"
674         : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
675 }
676 #endif
677
678 #endif
679
680 /* -----------------------------------------------------------------------------
681    PowerPC 64 architecture
682
683    Everything is in assembler, so we don't have to deal with GCC...
684    
685    -------------------------------------------------------------------------- */
686
687 #ifdef powerpc64_HOST_ARCH
688
689 #ifdef linux_HOST_OS
690 extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
691
692 static void StgRunIsImplementedInAssembler(void)
693 {
694         // r0 volatile
695         // r1 stack pointer
696         // r2 toc - needs to be saved
697         // r3-r10 argument passing, volatile
698         // r11, r12 very volatile (not saved across cross-module calls)
699         // r13 thread local state (never modified, don't need to save)
700         // r14-r31 callee-save
701         __asm__ volatile (
702                 ".section \".opd\",\"aw\"\n"
703                 ".align 3\n"
704                 ".globl StgRun\n"
705                 "StgRun:\n"
706                         "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
707                         "\t.size StgRun,24\n"
708                 ".globl StgReturn\n"
709                 "StgReturn:\n"
710                         "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
711                         "\t.size StgReturn,24\n"
712                 ".previous\n"
713                 ".globl .StgRun\n"
714                 ".type .StgRun,@function\n"
715                 ".StgRun:\n"
716                         "\tmflr 0\n"
717                         "\tmr 5, 1\n"
718                         "\tstd 0, 16(1)\n"
719                         "\tstdu 1, -%0(1)\n"
720                         "\tstd 2, -296(5)\n"
721                         "\tstd 14, -288(5)\n"
722                         "\tstd 15, -280(5)\n"
723                         "\tstd 16, -272(5)\n"
724                         "\tstd 17, -264(5)\n"
725                         "\tstd 18, -256(5)\n"
726                         "\tstd 19, -248(5)\n"
727                         "\tstd 20, -240(5)\n"
728                         "\tstd 21, -232(5)\n"
729                         "\tstd 22, -224(5)\n"
730                         "\tstd 23, -216(5)\n"
731                         "\tstd 24, -208(5)\n"
732                         "\tstd 25, -200(5)\n"
733                         "\tstd 26, -192(5)\n"
734                         "\tstd 27, -184(5)\n"
735                         "\tstd 28, -176(5)\n"
736                         "\tstd 29, -168(5)\n"
737                         "\tstd 30, -160(5)\n"
738                         "\tstd 31, -152(5)\n"
739                         "\tstfd 14, -144(5)\n"
740                         "\tstfd 15, -136(5)\n"
741                         "\tstfd 16, -128(5)\n"
742                         "\tstfd 17, -120(5)\n"
743                         "\tstfd 18, -112(5)\n"
744                         "\tstfd 19, -104(5)\n"
745                         "\tstfd 20, -96(5)\n"
746                         "\tstfd 21, -88(5)\n"
747                         "\tstfd 22, -80(5)\n"
748                         "\tstfd 23, -72(5)\n"
749                         "\tstfd 24, -64(5)\n"
750                         "\tstfd 25, -56(5)\n"
751                         "\tstfd 26, -48(5)\n"
752                         "\tstfd 27, -40(5)\n"
753                         "\tstfd 28, -32(5)\n"
754                         "\tstfd 29, -24(5)\n"
755                         "\tstfd 30, -16(5)\n"
756                         "\tstfd 31, -8(5)\n"
757                         "\tmr 27, 4\n"  // BaseReg == r27
758                         "\tld 2, 8(3)\n"
759                         "\tld 3, 0(3)\n"
760                         "\tmtctr 3\n"
761                         "\tbctr\n"
762                 ".globl .StgReturn\n"
763                 ".type .StgReturn,@function\n"
764                 ".StgReturn:\n"
765                         "\tmr 3,14\n"
766                         "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
767                         "\tld 2, -296(5)\n"
768                         "\tld 14, -288(5)\n"
769                         "\tld 15, -280(5)\n"
770                         "\tld 16, -272(5)\n"
771                         "\tld 17, -264(5)\n"
772                         "\tld 18, -256(5)\n"
773                         "\tld 19, -248(5)\n"
774                         "\tld 20, -240(5)\n"
775                         "\tld 21, -232(5)\n"
776                         "\tld 22, -224(5)\n"
777                         "\tld 23, -216(5)\n"
778                         "\tld 24, -208(5)\n"
779                         "\tld 25, -200(5)\n"
780                         "\tld 26, -192(5)\n"
781                         "\tld 27, -184(5)\n"
782                         "\tld 28, -176(5)\n"
783                         "\tld 29, -168(5)\n"
784                         "\tld 30, -160(5)\n"
785                         "\tld 31, -152(5)\n"
786                         "\tlfd 14, -144(5)\n"
787                         "\tlfd 15, -136(5)\n"
788                         "\tlfd 16, -128(5)\n"
789                         "\tlfd 17, -120(5)\n"
790                         "\tlfd 18, -112(5)\n"
791                         "\tlfd 19, -104(5)\n"
792                         "\tlfd 20, -96(5)\n"
793                         "\tlfd 21, -88(5)\n"
794                         "\tlfd 22, -80(5)\n"
795                         "\tlfd 23, -72(5)\n"
796                         "\tlfd 24, -64(5)\n"
797                         "\tlfd 25, -56(5)\n"
798                         "\tlfd 26, -48(5)\n"
799                         "\tlfd 27, -40(5)\n"
800                         "\tlfd 28, -32(5)\n"
801                         "\tlfd 29, -24(5)\n"
802                         "\tlfd 30, -16(5)\n"
803                         "\tlfd 31, -8(5)\n"
804                         "\tmr 1, 5\n"
805                         "\tld 0, 16(1)\n"
806                         "\tmtlr 0\n"
807                         "\tblr\n"
808         : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
809 }
810 #else // linux_HOST_OS
811 #error Only linux support for power64 right now.
812 #endif
813
814 #endif
815
816 /* -----------------------------------------------------------------------------
817    IA64 architecture
818
819    Again, in assembler - so we can fiddle with the register stack, and because
820    gcc doesn't handle asm-clobbered callee-saves correctly.
821
822    loc0  - loc15: preserved locals
823    loc16 - loc28: STG registers
824            loc29: saved ar.pfs
825            loc30: saved b0
826            loc31: saved gp (gcc 3.3 uses this slot)
827    -------------------------------------------------------------------------- */
828
829 #ifdef ia64_HOST_ARCH
830
831 /* the memory stack is rarely used, so 16K is excessive */
832 #undef RESERVED_C_STACK_BYTES
833 #define RESERVED_C_STACK_BYTES 1024
834
835 #if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
836 /* gcc 3.3+: leave an extra slot for gp saves */
837 #define LOCALS 32
838 #else
839 #define LOCALS 31
840 #endif
841
842 static void StgRunIsImplementedInAssembler(void)
843 {
844     __asm__ volatile(
845                 ".global StgRun\n"
846                 "StgRun:\n"
847                 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
848                 "\tld8 r18 = [r32],8\n"                 /* get procedure address */
849                 "\tadds sp = -%0, sp ;;\n"              /* setup stack */
850                 "\tld8 gp = [r32]\n"                    /* get procedure GP */
851                 "\tadds r16 = %0-(6*16), sp\n"
852                 "\tadds r17 = %0-(5*16), sp ;;\n"
853                 "\tstf.spill [r16] = f16,32\n"          /* spill callee-saved fp regs */
854                 "\tstf.spill [r17] = f17,32\n"
855                 "\tmov b6 = r18 ;;\n"                   /* set target address */
856                 "\tstf.spill [r16] = f18,32\n"
857                 "\tstf.spill [r17] = f19,32\n"
858                 "\tmov loc30 = b0 ;;\n"                 /* save return address */
859                 "\tstf.spill [r16] = f20,32\n"
860                 "\tstf.spill [r17] = f21,32\n"
861                 "\tbr.few b6 ;;\n"                      /* branch to function */
862                 ".global StgReturn\n"
863                 "StgReturn:\n"
864                 "\tmov r8 = loc16\n"            /* return value in r8 */
865                 "\tadds r16 = %0-(6*16), sp\n"
866                 "\tadds r17 = %0-(5*16), sp ;;\n"
867                 "\tldf.fill f16 = [r16],32\n"   /* start restoring fp regs */
868                 "\tldf.fill f17 = [r17],32\n"
869                 "\tmov ar.pfs = loc29 ;;\n"     /* restore register frame */
870                 "\tldf.fill f18 = [r16],32\n"
871                 "\tldf.fill f19 = [r17],32\n"
872                 "\tmov b0 = loc30 ;;\n"         /* restore return address */
873                 "\tldf.fill f20 = [r16],32\n"
874                 "\tldf.fill f21 = [r17],32\n"
875                 "\tadds sp = %0, sp\n"          /* restore stack */
876                 "\tbr.ret.sptk.many b0 ;;\n"    /* return */
877         : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
878 }
879
880 #endif
881
882 #endif /* !USE_MINIINTERPRETER */