1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
53 #ifdef LEADING_UNDERSCORE
54 #define UNDERSCORE "_"
58 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
60 Now here's something obscure for you:
62 When generating an adjustor thunk that uses the C calling
63 convention, we have to make sure that the thunk kicks off
64 the process of jumping into Haskell with a tail jump. Why?
65 Because as a result of jumping in into Haskell we may end
66 up freeing the very adjustor thunk we came from using
67 freeHaskellFunctionPtr(). Hence, we better not return to
68 the adjustor code on our way out, since it could by then
71 The fix is readily at hand, just include the opcodes
72 for the C stack fixup code that we need to perform when
73 returning in some static piece of memory and arrange
74 to return to it before tail jumping from the adjustor thunk.
76 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
79 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
80 UNDERSCORE "obscure_ccall_ret_code:\n\t"
85 extern void obscure_ccall_ret_code(void);
87 #if defined(openbsd_HOST_OS)
88 static unsigned char *obscure_ccall_ret_code_dyn;
93 #if defined(x86_64_HOST_ARCH)
94 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
97 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
98 UNDERSCORE "obscure_ccall_ret_code:\n\t"
103 extern void obscure_ccall_ret_code(void);
106 #if defined(alpha_HOST_ARCH)
107 /* To get the definition of PAL_imb: */
108 # if defined(linux_HOST_OS)
109 # include <asm/pal.h>
111 # include <machine/pal.h>
115 #if defined(ia64_HOST_ARCH)
118 /* Layout of a function descriptor */
119 typedef struct _IA64FunDesc {
125 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
128 nat data_size_in_words, total_size_in_words;
130 /* round up to a whole number of words */
131 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
132 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
134 /* allocate and fill it in */
135 arr = (StgArrWords *)allocate(total_size_in_words);
136 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
138 /* obtain a stable ptr */
139 *stable = getStablePtr((StgPtr)arr);
141 /* and return a ptr to the goods inside the array */
142 return(&(arr->payload));
146 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
147 __asm__("obscure_ccall_ret_code:\n\t"
152 extern void obscure_ccall_ret_code(void);
155 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
156 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
158 /* !!! !!! WARNING: !!! !!!
159 * This structure is accessed from AdjustorAsm.s
160 * Any changes here have to be mirrored in the offsets there.
163 typedef struct AdjustorStub {
164 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
171 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
172 /* powerpc64-darwin: just guessing that it won't use fundescs. */
183 /* fundesc-based ABIs */
192 StgInt negative_framesize;
193 StgInt extrawords_plus_one;
199 #if defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
201 /* !!! !!! WARNING: !!! !!!
202 * This structure is accessed from AdjustorAsm.s
203 * Any changes here have to be mirrored in the offsets there.
206 typedef struct AdjustorStub {
207 unsigned char call[8];
211 StgInt argument_size;
215 #if defined(darwin_HOST_OS) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
216 static int totalArgumentSize(char *typeString)
221 char t = *typeString++;
225 // on 32-bit platforms, Double and Int64 occupy two words.
228 if(sizeof(void*) == 4)
233 // everything else is one word.
243 createAdjustor(int cconv, StgStablePtr hptr,
246 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
251 void *adjustor = NULL;
255 case 0: /* _stdcall */
256 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
257 /* Magic constant computed by inspecting the code length of
258 the following assembly language snippet
259 (offset and machine code prefixed):
261 <0>: 58 popl %eax # temp. remove ret addr..
262 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
263 # hold a StgStablePtr
264 <6>: 50 pushl %eax # put back ret. addr
265 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
266 <c>: ff e0 jmp %eax # and jump to it.
267 # the callee cleans up the stack
269 adjustor = stgMallocBytesRWX(14);
271 unsigned char *const adj_code = (unsigned char *)adjustor;
272 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
274 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
275 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
277 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
279 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
280 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
282 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
283 adj_code[0x0d] = (unsigned char)0xe0;
289 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
290 /* Magic constant computed by inspecting the code length of
291 the following assembly language snippet
292 (offset and machine code prefixed):
294 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
295 # hold a StgStablePtr
296 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
297 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
298 <0f>: ff e0 jmp *%eax # jump to wptr
300 The ccall'ing version is a tad different, passing in the return
301 address of the caller to the auto-generated C stub (which enters
302 via the stable pointer.) (The auto-generated C stub is in on this
303 game, don't worry :-)
305 See the comment next to obscure_ccall_ret_code why we need to
306 perform a tail jump instead of a call, followed by some C stack
309 Note: The adjustor makes the assumption that any return value
310 coming back from the C stub is not stored on the stack.
311 That's (thankfully) the case here with the restricted set of
312 return types that we support.
314 adjustor = stgMallocBytesRWX(17);
316 unsigned char *const adj_code = (unsigned char *)adjustor;
318 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
319 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
321 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
322 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
324 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
325 *((StgFunPtr*)(adj_code + 0x0b)) =
326 #if !defined(openbsd_HOST_OS)
327 (StgFunPtr)obscure_ccall_ret_code;
329 (StgFunPtr)obscure_ccall_ret_code_dyn;
332 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
333 adj_code[0x10] = (unsigned char)0xe0;
335 #elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
338 What's special about Darwin/Mac OS X on i386?
339 It wants the stack to stay 16-byte aligned.
341 We offload most of the work to AdjustorAsm.S.
343 AdjustorStub *adjustorStub = stgMallocBytesRWX(sizeof(AdjustorStub));
344 adjustor = adjustorStub;
346 extern void adjustorCode(void);
347 int sz = totalArgumentSize(typeString);
349 adjustorStub->call[0] = 0xe8;
350 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5);
351 adjustorStub->hptr = hptr;
352 adjustorStub->wptr = wptr;
353 adjustorStub->frame_size = sz * 4 + 12 /* ebp save + extra args */;
354 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15; // align to 16 bytes
355 adjustorStub->frame_size -= 12; // we push the extra args separately
356 adjustorStub->argument_size = sz;
359 #elif defined(x86_64_HOST_ARCH)
366 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
368 if there are <6 integer args, then we can just push the
369 StablePtr into %edi and shuffle the other args up.
371 If there are >=6 integer args, then we have to flush one arg
372 to the stack, and arrange to adjust the stack ptr on return.
373 The stack will be rearranged to this:
378 return address *** <-- dummy arg in stub fn.
380 obscure_ccall_ret_code
382 This unfortunately means that the type of the stub function
383 must have a dummy argument for the original return address
384 pointer inserted just after the 6th integer argument.
386 Code for the simple case:
388 0: 4d 89 c1 mov %r8,%r9
389 3: 49 89 c8 mov %rcx,%r8
390 6: 48 89 d1 mov %rdx,%rcx
391 9: 48 89 f2 mov %rsi,%rdx
392 c: 48 89 fe mov %rdi,%rsi
393 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
394 16: e9 00 00 00 00 jmpq stub_function
396 20: .quad 0 # aligned on 8-byte boundary
399 And the version for >=6 integer arguments:
402 2: 68 00 00 00 00 pushq $obscure_ccall_ret_code
403 7: 4d 89 c1 mov %r8,%r9
404 a: 49 89 c8 mov %rcx,%r8
405 d: 48 89 d1 mov %rdx,%rcx
406 10: 48 89 f2 mov %rsi,%rdx
407 13: 48 89 fe mov %rdi,%rsi
408 16: 48 8b 3d 0b 00 00 00 mov 11(%rip),%rdi
409 1d: e9 00 00 00 00 jmpq stub_function
411 28: .quad 0 # aligned on 8-byte boundary
414 /* we assume the small code model (gcc -mcmmodel=small) where
415 * all symbols are <2^32, so hence wptr should fit into 32 bits.
417 ASSERT(((long)wptr >> 32) == 0);
423 // determine whether we have 6 or more integer arguments,
424 // and therefore need to flush one to the stack.
425 for (c = typeString; *c != '\0'; c++) {
426 if (*c == 'i' || *c == 'l') i++;
431 adjustor = stgMallocBytesRWX(40);
433 *(StgInt32 *)adjustor = 0x49c1894d;
434 *(StgInt32 *)(adjustor+4) = 0x8948c889;
435 *(StgInt32 *)(adjustor+8) = 0xf28948d1;
436 *(StgInt32 *)(adjustor+12) = 0x48fe8948;
437 *(StgInt32 *)(adjustor+16) = 0x000a3d8b;
438 *(StgInt32 *)(adjustor+20) = 0x00e90000;
440 *(StgInt32 *)(adjustor+23) =
441 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 27);
442 *(StgInt64 *)(adjustor+32) = (StgInt64)hptr;
446 adjustor = stgMallocBytesRWX(48);
448 *(StgInt32 *)adjustor = 0x00685141;
449 *(StgInt32 *)(adjustor+4) = 0x4d000000;
450 *(StgInt32 *)(adjustor+8) = 0x8949c189;
451 *(StgInt32 *)(adjustor+12) = 0xd18948c8;
452 *(StgInt32 *)(adjustor+16) = 0x48f28948;
453 *(StgInt32 *)(adjustor+20) = 0x8b48fe89;
454 *(StgInt32 *)(adjustor+24) = 0x00000b3d;
455 *(StgInt32 *)(adjustor+28) = 0x0000e900;
457 *(StgInt32 *)(adjustor+3) =
458 (StgInt32)(StgInt64)obscure_ccall_ret_code;
459 *(StgInt32 *)(adjustor+30) =
460 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 34);
461 *(StgInt64 *)(adjustor+40) = (StgInt64)hptr;
464 #elif defined(sparc_HOST_ARCH)
465 /* Magic constant computed by inspecting the code length of the following
466 assembly language snippet (offset and machine code prefixed):
468 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
469 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
470 <08>: D823A05C st %o4, [%sp + 92]
471 <0C>: 9A10000B mov %o3, %o5
472 <10>: 9810000A mov %o2, %o4
473 <14>: 96100009 mov %o1, %o3
474 <18>: 94100008 mov %o0, %o2
475 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
476 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
477 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
478 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
479 <2C> 00000000 ! place for getting hptr back easily
481 ccall'ing on SPARC is easy, because we are quite lucky to push a
482 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
483 existing arguments (note that %sp must stay double-word aligned at
484 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
485 To do this, we extend the *caller's* stack frame by 2 words and shift
486 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
487 procedure because of the tail-jump) by 2 positions. This makes room in
488 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
489 for destination addr of jump on SPARC, return address on x86, ...). This
490 shouldn't cause any problems for a C-like caller: alloca is implemented
491 similarly, and local variables should be accessed via %fp, not %sp. In a
492 nutshell: This should work! (Famous last words! :-)
494 adjustor = stgMallocBytesRWX(4*(11+1));
496 unsigned long *const adj_code = (unsigned long *)adjustor;
498 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
499 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
500 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
501 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
502 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
503 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
504 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
505 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
506 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
507 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
508 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
509 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
510 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
511 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
512 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
514 adj_code[11] = (unsigned long)hptr;
517 asm("flush %0" : : "r" (adj_code ));
518 asm("flush %0" : : "r" (adj_code + 2));
519 asm("flush %0" : : "r" (adj_code + 4));
520 asm("flush %0" : : "r" (adj_code + 6));
521 asm("flush %0" : : "r" (adj_code + 10));
523 /* max. 5 instructions latency, and we need at >= 1 for returning */
529 #elif defined(alpha_HOST_ARCH)
530 /* Magic constant computed by inspecting the code length of
531 the following assembly language snippet
532 (offset and machine code prefixed; note that the machine code
533 shown is longwords stored in little-endian order):
535 <00>: 46520414 mov a2, a4
536 <04>: 46100412 mov a0, a2
537 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
538 <0c>: 46730415 mov a3, a5
539 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
540 <14>: 46310413 mov a1, a3
541 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
542 <1c>: 00000000 # padding for alignment
543 <20>: [8 bytes for hptr quadword]
544 <28>: [8 bytes for wptr quadword]
546 The "computed" jump at <08> above is really a jump to a fixed
547 location. Accordingly, we place an always-correct hint in the
548 jump instruction, namely the address offset from <0c> to wptr,
549 divided by 4, taking the lowest 14 bits.
551 We only support passing 4 or fewer argument words, for the same
552 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
553 On the Alpha the first 6 integer arguments are in a0 through a5,
554 and the rest on the stack. Hence we want to shuffle the original
555 caller's arguments by two.
557 On the Alpha the calling convention is so complex and dependent
558 on the callee's signature -- for example, the stack pointer has
559 to be a multiple of 16 -- that it seems impossible to me [ccshan]
560 to handle the general case correctly without changing how the
561 adjustor is called from C. For now, our solution of shuffling
562 registers only and ignoring the stack only works if the original
563 caller passed 4 or fewer argument words.
565 TODO: Depending on how much allocation overhead stgMallocBytes uses for
566 header information (more precisely, if the overhead is no more than
567 4 bytes), we should move the first three instructions above down by
568 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
570 ASSERT(((StgWord64)wptr & 3) == 0);
571 adjustor = stgMallocBytesRWX(48);
573 StgWord64 *const code = (StgWord64 *)adjustor;
575 code[0] = 0x4610041246520414L;
576 code[1] = 0x46730415a61b0020L;
577 code[2] = 0x46310413a77b0028L;
578 code[3] = 0x000000006bfb0000L
579 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
581 code[4] = (StgWord64)hptr;
582 code[5] = (StgWord64)wptr;
584 /* Ensure that instruction cache is consistent with our new code */
585 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
587 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
589 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
590 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
592 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
593 We need to calculate all the details of the stack frame layout,
594 taking into account the types of all the arguments, and then
595 generate code on the fly. */
597 int src_gpr = 3, dst_gpr = 5;
599 int src_offset = 0, dst_offset = 0;
600 int n = strlen(typeString),i;
601 int src_locs[n], dst_locs[n];
606 Calculate where the arguments should go.
607 src_locs[] will contain the locations of the arguments in the
608 original stack frame passed to the adjustor.
609 dst_locs[] will contain the locations of the arguments after the
610 adjustor runs, on entry to the wrapper proc pointed to by wptr.
612 This algorithm is based on the one described on page 3-19 of the
613 System V ABI PowerPC Processor Supplement.
615 for(i=0;typeString[i];i++)
617 char t = typeString[i];
618 if((t == 'f' || t == 'd') && fpr <= 8)
619 src_locs[i] = dst_locs[i] = -32-(fpr++);
622 if(t == 'l' && src_gpr <= 9)
624 if((src_gpr & 1) == 0)
626 src_locs[i] = -src_gpr;
629 else if(t == 'i' && src_gpr <= 10)
631 src_locs[i] = -(src_gpr++);
635 if(t == 'l' || t == 'd')
640 src_locs[i] = src_offset;
641 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
644 if(t == 'l' && dst_gpr <= 9)
646 if((dst_gpr & 1) == 0)
648 dst_locs[i] = -dst_gpr;
651 else if(t == 'i' && dst_gpr <= 10)
653 dst_locs[i] = -(dst_gpr++);
657 if(t == 'l' || t == 'd')
662 dst_locs[i] = dst_offset;
663 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
668 frameSize = dst_offset + 8;
669 frameSize = (frameSize+15) & ~0xF;
674 // allocate space for at most 4 insns per parameter
675 // plus 14 more instructions.
676 adjustor = stgMallocBytesRWX(4 * (4*n + 14));
677 code = (unsigned*)adjustor;
679 *code++ = 0x48000008; // b *+8
680 // * Put the hptr in a place where freeHaskellFunctionPtr
682 *code++ = (unsigned) hptr;
684 // * save the link register
685 *code++ = 0x7c0802a6; // mflr r0;
686 *code++ = 0x90010004; // stw r0, 4(r1);
687 // * and build a new stack frame
688 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
690 // * now generate instructions to copy arguments
691 // from the old stack frame into the new stack frame.
694 if(src_locs[i] < -32)
695 ASSERT(dst_locs[i] == src_locs[i]);
696 else if(src_locs[i] < 0)
699 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
702 ASSERT(dst_locs[i] > -32);
703 // dst is in GPR, too.
705 if(typeString[i] == 'l')
709 | ((-dst_locs[i]+1) << 16)
710 | ((-src_locs[i]+1) << 11)
711 | ((-src_locs[i]+1) << 21);
715 | ((-dst_locs[i]) << 16)
716 | ((-src_locs[i]) << 11)
717 | ((-src_locs[i]) << 21);
721 if(typeString[i] == 'l')
723 // stw src+1, dst_offset+4(r1)
725 | ((-src_locs[i]+1) << 21)
729 // stw src, dst_offset(r1)
731 | ((-src_locs[i]) << 21)
737 ASSERT(dst_locs[i] >= 0);
738 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
740 if(typeString[i] == 'l')
742 // lwz r0, src_offset(r1)
744 | (src_locs[i] + frameSize + 8 + 4);
745 // stw r0, dst_offset(r1)
747 | (dst_locs[i] + 8 + 4);
749 // lwz r0, src_offset(r1)
751 | (src_locs[i] + frameSize + 8);
752 // stw r0, dst_offset(r1)
758 // * hptr will be the new first argument.
760 *code++ = OP_HI(0x3c60, hptr);
761 // ori r3,r3,lo(hptr)
762 *code++ = OP_LO(0x6063, hptr);
764 // * we need to return to a piece of code
765 // which will tear down the stack frame.
766 // lis r11,hi(obscure_ccall_ret_code)
767 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
768 // ori r11,r11,lo(obscure_ccall_ret_code)
769 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
771 *code++ = 0x7d6803a6;
775 *code++ = OP_HI(0x3d60, wptr);
776 // ori r11,r11,lo(wptr)
777 *code++ = OP_LO(0x616b, wptr);
779 *code++ = 0x7d6903a6;
781 *code++ = 0x4e800420;
783 // Flush the Instruction cache:
785 unsigned *p = adjustor;
788 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
792 __asm__ volatile ("sync\n\tisync");
796 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
798 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
799 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
801 /* The following code applies to all PowerPC and PowerPC64 platforms
802 whose stack layout is based on the AIX ABI.
804 Besides (obviously) AIX, this includes
805 Mac OS 9 and BeOS/PPC (may they rest in peace),
806 which use the 32-bit AIX ABI
808 which uses the 64-bit AIX ABI
809 and Darwin (Mac OS X),
810 which uses the same stack layout as AIX,
811 but no function descriptors.
813 The actual stack-frame shuffling is implemented out-of-line
814 in the function adjustorCode, in AdjustorAsm.S.
815 Here, we set up an AdjustorStub structure, which
816 is a function descriptor (on platforms that have function
817 descriptors) or a short piece of stub code (on Darwin) to call
818 adjustorCode with a pointer to the AdjustorStub struct loaded
821 One nice thing about this is that there is _no_ code generated at
822 runtime on the platforms that have function descriptors.
824 AdjustorStub *adjustorStub;
825 int sz = 0, extra_sz, total_sz;
827 // from AdjustorAsm.s
828 // not declared as a function so that AIX-style
829 // fundescs can never get in the way.
830 extern void *adjustorCode;
833 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
835 adjustorStub = stgMallocBytesRWX(sizeof(AdjustorStub));
837 adjustor = adjustorStub;
839 adjustorStub->code = (void*) &adjustorCode;
842 // function descriptors are a cool idea.
843 // We don't need to generate any code at runtime.
844 adjustorStub->toc = adjustorStub;
847 // no function descriptors :-(
848 // We need to do things "by hand".
849 #if defined(powerpc_HOST_ARCH)
850 // lis r2, hi(adjustorStub)
851 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
852 // ori r2, r2, lo(adjustorStub)
853 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
855 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
856 - (char*)adjustorStub);
858 adjustorStub->mtctr = 0x7c0903a6;
860 adjustorStub->bctr = 0x4e800420;
862 barf("adjustor creation not supported on this platform");
865 // Flush the Instruction cache:
867 int n = sizeof(AdjustorStub)/sizeof(unsigned);
868 unsigned *p = (unsigned*)adjustor;
871 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
875 __asm__ volatile ("sync\n\tisync");
879 // Calculate the size of the stack frame, in words.
880 sz = totalArgumentSize(typeString);
882 // The first eight words of the parameter area
883 // are just "backing store" for the parameters passed in
884 // the GPRs. extra_sz is the number of words beyond those first
890 // Calculate the total size of the stack frame.
891 total_sz = (6 /* linkage area */
892 + 8 /* minimum parameter area */
893 + 2 /* two extra arguments */
894 + extra_sz)*sizeof(StgWord);
896 // align to 16 bytes.
897 // AIX only requires 8 bytes, but who cares?
898 total_sz = (total_sz+15) & ~0xF;
900 // Fill in the information that adjustorCode in AdjustorAsm.S
901 // will use to create a new stack frame with the additional args.
902 adjustorStub->hptr = hptr;
903 adjustorStub->wptr = wptr;
904 adjustorStub->negative_framesize = -total_sz;
905 adjustorStub->extrawords_plus_one = extra_sz + 1;
908 #elif defined(ia64_HOST_ARCH)
910 Up to 8 inputs are passed in registers. We flush the last two inputs to
911 the stack, initially into the 16-byte scratch region left by the caller.
912 We then shuffle the others along by 4 (taking 2 registers for ourselves
913 to save return address and previous function state - we need to come back
914 here on the way out to restore the stack, so this is a real function
915 rather than just a trampoline).
917 The function descriptor we create contains the gp of the target function
918 so gp is already loaded correctly.
920 [MLX] alloc r16=ar.pfs,10,2,0
922 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
923 mov r41=r37 // out7 = in5 (out3)
924 mov r40=r36;; // out6 = in4 (out2)
925 [MII] st8.spill [r12]=r39 // spill in7 (out5)
927 mov r38=r34;; // out4 = in2 (out0)
928 [MII] mov r39=r35 // out5 = in3 (out1)
929 mov r37=r33 // out3 = in1 (loc1)
930 mov r36=r32 // out2 = in0 (loc0)
931 [MLX] adds r12=-24,r12 // update sp
932 movl r34=hptr;; // out0 = hptr
933 [MIB] mov r33=r16 // loc1 = ar.pfs
934 mov r32=b0 // loc0 = retaddr
935 br.call.sptk.many b0=b6;;
937 [MII] adds r12=-16,r12
942 br.ret.sptk.many b0;;
945 /* These macros distribute a long constant into the two words of an MLX bundle */
946 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
947 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
948 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
949 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
953 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
954 StgWord64 wcode = wdesc->ip;
958 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
959 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
961 fdesc = (IA64FunDesc *)adjustor;
962 code = (StgWord64 *)(fdesc + 1);
963 fdesc->ip = (StgWord64)code;
964 fdesc->gp = wdesc->gp;
966 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
967 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
968 code[2] = 0x029015d818984001;
969 code[3] = 0x8401200500420094;
970 code[4] = 0x886011d8189c0001;
971 code[5] = 0x84011004c00380c0;
972 code[6] = 0x0250210046013800;
973 code[7] = 0x8401000480420084;
974 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
975 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
976 code[10] = 0x0200210020010811;
977 code[11] = 0x1080006800006200;
978 code[12] = 0x0000210018406000;
979 code[13] = 0x00aa021000038005;
980 code[14] = 0x000000010000001d;
981 code[15] = 0x0084000880000200;
983 /* save stable pointers in convenient form */
984 code[16] = (StgWord64)hptr;
985 code[17] = (StgWord64)stable;
988 barf("adjustor creation not supported on this platform");
1003 freeHaskellFunctionPtr(void* ptr)
1005 #if defined(i386_HOST_ARCH)
1006 if ( *(unsigned char*)ptr != 0x68 &&
1007 *(unsigned char*)ptr != 0x58 ) {
1008 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1012 /* Free the stable pointer first..*/
1013 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
1014 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
1016 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1018 #elif defined(x86_64_HOST_ARCH)
1019 if ( *(StgWord16 *)ptr == 0x894d ) {
1020 freeStablePtr(*(StgStablePtr*)(ptr+32));
1021 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1022 freeStablePtr(*(StgStablePtr*)(ptr+40));
1024 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1027 #elif defined(sparc_HOST_ARCH)
1028 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1029 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1033 /* Free the stable pointer first..*/
1034 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1035 #elif defined(alpha_HOST_ARCH)
1036 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1037 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1041 /* Free the stable pointer first..*/
1042 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1043 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1044 if ( *(StgWord*)ptr != 0x48000008 ) {
1045 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1048 freeStablePtr(((StgStablePtr*)ptr)[1]);
1049 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1050 extern void* adjustorCode;
1051 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1052 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1055 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1056 #elif defined(ia64_HOST_ARCH)
1057 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1058 StgWord64 *code = (StgWord64 *)(fdesc+1);
1060 if (fdesc->ip != (StgWord64)code) {
1061 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1064 freeStablePtr((StgStablePtr)code[16]);
1065 freeStablePtr((StgStablePtr)code[17]);
1070 *((unsigned char*)ptr) = '\0';
1077 * Function: initAdjustor()
1079 * Perform initialisation of adjustor thunk layer (if needed.)
1084 #if defined(i386_HOST_ARCH) && defined(openbsd_HOST_OS)
1085 obscure_ccall_ret_code_dyn = stgMallocBytesRWX(4);
1086 obscure_ccall_ret_code_dyn[0] = ((unsigned char *)obscure_ccall_ret_code)[0];
1087 obscure_ccall_ret_code_dyn[1] = ((unsigned char *)obscure_ccall_ret_code)[1];
1088 obscure_ccall_ret_code_dyn[2] = ((unsigned char *)obscure_ccall_ret_code)[2];
1089 obscure_ccall_ret_code_dyn[3] = ((unsigned char *)obscure_ccall_ret_code)[3];