1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(openbsd_HOST_OS) || defined(linux_HOST_OS)
51 #include <sys/types.h>
54 /* no C99 header stdint.h on OpenBSD? */
55 #if defined(openbsd_HOST_OS)
56 typedef unsigned long my_uintptr_t;
59 typedef uintptr_t my_uintptr_t;
63 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
67 /* Heavily arch-specific, I'm afraid.. */
70 * Allocate len bytes which are readable, writable, and executable.
72 * ToDo: If this turns out to be a performance bottleneck, one could
73 * e.g. cache the last VirtualProtect/mprotect-ed region and do
74 * nothing in case of a cache hit.
77 mallocBytesRWX(int len)
79 void *addr = stgMallocBytes(len, "mallocBytesRWX");
80 #if defined(i386_HOST_ARCH) && defined(_WIN32)
81 /* This could be necessary for processors which distinguish between READ and
82 EXECUTE memory accesses, e.g. Itaniums. */
83 DWORD dwOldProtect = 0;
84 if (VirtualProtect (addr, len, PAGE_EXECUTE_READWRITE, &dwOldProtect) == 0) {
85 barf("mallocBytesRWX: failed to protect 0x%p; error=%lu; old protection: %lu\n",
86 addr, (unsigned long)GetLastError(), (unsigned long)dwOldProtect);
88 #elif defined(openbsd_HOST_OS) || defined(linux_HOST_OS)
89 /* malloced memory isn't executable by default on OpenBSD */
90 my_uintptr_t pageSize = sysconf(_SC_PAGESIZE);
91 my_uintptr_t mask = ~(pageSize - 1);
92 my_uintptr_t startOfFirstPage = ((my_uintptr_t)addr ) & mask;
93 my_uintptr_t startOfLastPage = ((my_uintptr_t)addr + len - 1) & mask;
94 my_uintptr_t size = startOfLastPage - startOfFirstPage + pageSize;
95 if (mprotect((void*)startOfFirstPage, (size_t)size, PROT_EXEC | PROT_READ | PROT_WRITE) != 0) {
96 barf("mallocBytesRWX: failed to protect 0x%p\n", addr);
102 #if defined(i386_HOST_ARCH)
104 Now here's something obscure for you:
106 When generating an adjustor thunk that uses the C calling
107 convention, we have to make sure that the thunk kicks off
108 the process of jumping into Haskell with a tail jump. Why?
109 Because as a result of jumping in into Haskell we may end
110 up freeing the very adjustor thunk we came from using
111 freeHaskellFunctionPtr(). Hence, we better not return to
112 the adjustor code on our way out, since it could by then
115 The fix is readily at hand, just include the opcodes
116 for the C stack fixup code that we need to perform when
117 returning in some static piece of memory and arrange
118 to return to it before tail jumping from the adjustor thunk.
121 ".globl obscure_ccall_ret_code\n"
122 "obscure_ccall_ret_code:\n\t"
123 "addl $0x4, %esp\n\t"
126 extern void obscure_ccall_ret_code(void);
129 #if defined(x86_64_TARGET_ARCH)
131 ".globl obscure_ccall_ret_code\n"
132 "obscure_ccall_ret_code:\n\t"
133 "addq $0x8, %rsp\n\t"
136 extern void obscure_ccall_ret_code(void);
139 #if defined(alpha_HOST_ARCH)
140 /* To get the definition of PAL_imb: */
141 # if defined(linux_HOST_OS)
142 # include <asm/pal.h>
144 # include <machine/pal.h>
148 #if defined(ia64_HOST_ARCH)
151 /* Layout of a function descriptor */
152 typedef struct _IA64FunDesc {
158 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
161 nat data_size_in_words, total_size_in_words;
163 /* round up to a whole number of words */
164 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
165 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
167 /* allocate and fill it in */
168 arr = (StgArrWords *)allocate(total_size_in_words);
169 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
171 /* obtain a stable ptr */
172 *stable = getStablePtr((StgPtr)arr);
174 /* and return a ptr to the goods inside the array */
175 return(BYTE_ARR_CTS(arr));
179 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
180 __asm__("obscure_ccall_ret_code:\n\t"
185 extern void obscure_ccall_ret_code(void);
188 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
189 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
191 /* !!! !!! WARNING: !!! !!!
192 * This structure is accessed from AdjustorAsm.s
193 * Any changes here have to be mirrored in the offsets there.
196 typedef struct AdjustorStub {
197 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
204 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
205 /* powerpc64-darwin: just guessing that it won't use fundescs. */
216 /* fundesc-based ABIs */
225 StgInt negative_framesize;
226 StgInt extrawords_plus_one;
233 createAdjustor(int cconv, StgStablePtr hptr,
236 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_TARGET_ARCH)
241 void *adjustor = NULL;
245 case 0: /* _stdcall */
246 #if defined(i386_HOST_ARCH)
247 /* Magic constant computed by inspecting the code length of
248 the following assembly language snippet
249 (offset and machine code prefixed):
251 <0>: 58 popl %eax # temp. remove ret addr..
252 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
253 # hold a StgStablePtr
254 <6>: 50 pushl %eax # put back ret. addr
255 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
256 <c>: ff e0 jmp %eax # and jump to it.
257 # the callee cleans up the stack
259 adjustor = mallocBytesRWX(14);
261 unsigned char *const adj_code = (unsigned char *)adjustor;
262 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
264 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
265 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
267 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
269 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
270 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
272 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
273 adj_code[0x0d] = (unsigned char)0xe0;
279 #if defined(i386_HOST_ARCH)
280 /* Magic constant computed by inspecting the code length of
281 the following assembly language snippet
282 (offset and machine code prefixed):
284 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
285 # hold a StgStablePtr
286 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
287 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
288 <0f>: ff e0 jmp *%eax # jump to wptr
290 The ccall'ing version is a tad different, passing in the return
291 address of the caller to the auto-generated C stub (which enters
292 via the stable pointer.) (The auto-generated C stub is in on this
293 game, don't worry :-)
295 See the comment next to obscure_ccall_ret_code why we need to
296 perform a tail jump instead of a call, followed by some C stack
299 Note: The adjustor makes the assumption that any return value
300 coming back from the C stub is not stored on the stack.
301 That's (thankfully) the case here with the restricted set of
302 return types that we support.
304 adjustor = mallocBytesRWX(17);
306 unsigned char *const adj_code = (unsigned char *)adjustor;
308 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
309 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
311 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
312 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
314 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
315 *((StgFunPtr*)(adj_code + 0x0b)) = (StgFunPtr)obscure_ccall_ret_code;
317 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
318 adj_code[0x10] = (unsigned char)0xe0;
320 #elif defined(x86_64_HOST_ARCH)
327 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
329 if there are <6 integer args, then we can just push the
330 StablePtr into %edi and shuffle the other args up.
332 If there are >=6 integer args, then we have to flush one arg
333 to the stack, and arrange to adjust the stack ptr on return.
334 The stack will be rearranged to this:
339 return address *** <-- dummy arg in stub fn.
341 obscure_ccall_ret_code
343 This unfortunately means that the type of the stub function
344 must have a dummy argument for the original return address
345 pointer inserted just after the 6th integer argument.
347 Code for the simple case:
349 0: 4d 89 c1 mov %r8,%r9
350 3: 49 89 c8 mov %rcx,%r8
351 6: 48 89 d1 mov %rdx,%rcx
352 9: 48 89 f2 mov %rsi,%rdx
353 c: 48 89 fe mov %rdi,%rsi
354 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
355 16: e9 00 00 00 00 jmpq stub_function
357 20: .quad 0 # aligned on 8-byte boundary
360 And the version for >=6 integer arguments:
363 2: 68 00 00 00 00 pushq $obscure_ccall_ret_code
364 7: 4d 89 c1 mov %r8,%r9
365 a: 49 89 c8 mov %rcx,%r8
366 d: 48 89 d1 mov %rdx,%rcx
367 10: 48 89 f2 mov %rsi,%rdx
368 13: 48 89 fe mov %rdi,%rsi
369 16: 48 8b 3d 0b 00 00 00 mov 11(%rip),%rdi
370 1d: e9 00 00 00 00 jmpq stub_function
372 28: .quad 0 # aligned on 8-byte boundary
375 /* we assume the small code model (gcc -mcmmodel=small) where
376 * all symbols are <2^32, so hence wptr should fit into 32 bits.
378 ASSERT(((long)wptr >> 32) == 0);
384 // determine whether we have 6 or more integer arguments,
385 // and therefore need to flush one to the stack.
386 for (c = typeString; *c != '\0'; c++) {
387 if (*c == 'i' || *c == 'l') i++;
392 adjustor = mallocBytesRWX(40);
394 *(StgInt32 *)adjustor = 0x49c1894d;
395 *(StgInt32 *)(adjustor+4) = 0x8948c889;
396 *(StgInt32 *)(adjustor+8) = 0xf28948d1;
397 *(StgInt32 *)(adjustor+12) = 0x48fe8948;
398 *(StgInt32 *)(adjustor+16) = 0x000a3d8b;
399 *(StgInt32 *)(adjustor+20) = 0x00e90000;
401 *(StgInt32 *)(adjustor+23) =
402 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 27);
403 *(StgInt64 *)(adjustor+32) = (StgInt64)hptr;
407 adjustor = mallocBytesRWX(48);
409 *(StgInt32 *)adjustor = 0x00685141;
410 *(StgInt32 *)(adjustor+4) = 0x4d000000;
411 *(StgInt32 *)(adjustor+8) = 0x8949c189;
412 *(StgInt32 *)(adjustor+12) = 0xd18948c8;
413 *(StgInt32 *)(adjustor+16) = 0x48f28948;
414 *(StgInt32 *)(adjustor+20) = 0x8b48fe89;
415 *(StgInt32 *)(adjustor+24) = 0x00000b3d;
416 *(StgInt32 *)(adjustor+28) = 0x0000e900;
418 *(StgInt32 *)(adjustor+3) =
419 (StgInt32)(StgInt64)obscure_ccall_ret_code;
420 *(StgInt32 *)(adjustor+30) =
421 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 34);
422 *(StgInt64 *)(adjustor+40) = (StgInt64)hptr;
425 #elif defined(sparc_HOST_ARCH)
426 /* Magic constant computed by inspecting the code length of the following
427 assembly language snippet (offset and machine code prefixed):
429 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
430 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
431 <08>: D823A05C st %o4, [%sp + 92]
432 <0C>: 9A10000B mov %o3, %o5
433 <10>: 9810000A mov %o2, %o4
434 <14>: 96100009 mov %o1, %o3
435 <18>: 94100008 mov %o0, %o2
436 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
437 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
438 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
439 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
440 <2C> 00000000 ! place for getting hptr back easily
442 ccall'ing on SPARC is easy, because we are quite lucky to push a
443 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
444 existing arguments (note that %sp must stay double-word aligned at
445 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
446 To do this, we extend the *caller's* stack frame by 2 words and shift
447 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
448 procedure because of the tail-jump) by 2 positions. This makes room in
449 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
450 for destination addr of jump on SPARC, return address on x86, ...). This
451 shouldn't cause any problems for a C-like caller: alloca is implemented
452 similarly, and local variables should be accessed via %fp, not %sp. In a
453 nutshell: This should work! (Famous last words! :-)
455 adjustor = mallocBytesRWX(4*(11+1));
457 unsigned long *const adj_code = (unsigned long *)adjustor;
459 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
460 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
461 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
462 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
463 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
464 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
465 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
466 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
467 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
468 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
469 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
470 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
471 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
472 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
473 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
475 adj_code[11] = (unsigned long)hptr;
478 asm("flush %0" : : "r" (adj_code ));
479 asm("flush %0" : : "r" (adj_code + 2));
480 asm("flush %0" : : "r" (adj_code + 4));
481 asm("flush %0" : : "r" (adj_code + 6));
482 asm("flush %0" : : "r" (adj_code + 10));
484 /* max. 5 instructions latency, and we need at >= 1 for returning */
490 #elif defined(alpha_HOST_ARCH)
491 /* Magic constant computed by inspecting the code length of
492 the following assembly language snippet
493 (offset and machine code prefixed; note that the machine code
494 shown is longwords stored in little-endian order):
496 <00>: 46520414 mov a2, a4
497 <04>: 46100412 mov a0, a2
498 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
499 <0c>: 46730415 mov a3, a5
500 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
501 <14>: 46310413 mov a1, a3
502 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
503 <1c>: 00000000 # padding for alignment
504 <20>: [8 bytes for hptr quadword]
505 <28>: [8 bytes for wptr quadword]
507 The "computed" jump at <08> above is really a jump to a fixed
508 location. Accordingly, we place an always-correct hint in the
509 jump instruction, namely the address offset from <0c> to wptr,
510 divided by 4, taking the lowest 14 bits.
512 We only support passing 4 or fewer argument words, for the same
513 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
514 On the Alpha the first 6 integer arguments are in a0 through a5,
515 and the rest on the stack. Hence we want to shuffle the original
516 caller's arguments by two.
518 On the Alpha the calling convention is so complex and dependent
519 on the callee's signature -- for example, the stack pointer has
520 to be a multiple of 16 -- that it seems impossible to me [ccshan]
521 to handle the general case correctly without changing how the
522 adjustor is called from C. For now, our solution of shuffling
523 registers only and ignoring the stack only works if the original
524 caller passed 4 or fewer argument words.
526 TODO: Depending on how much allocation overhead stgMallocBytes uses for
527 header information (more precisely, if the overhead is no more than
528 4 bytes), we should move the first three instructions above down by
529 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
531 ASSERT(((StgWord64)wptr & 3) == 0);
532 adjustor = mallocBytesRWX(48);
534 StgWord64 *const code = (StgWord64 *)adjustor;
536 code[0] = 0x4610041246520414L;
537 code[1] = 0x46730415a61b0020L;
538 code[2] = 0x46310413a77b0028L;
539 code[3] = 0x000000006bfb0000L
540 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
542 code[4] = (StgWord64)hptr;
543 code[5] = (StgWord64)wptr;
545 /* Ensure that instruction cache is consistent with our new code */
546 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
548 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
550 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
551 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
553 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
554 We need to calculate all the details of the stack frame layout,
555 taking into account the types of all the arguments, and then
556 generate code on the fly. */
558 int src_gpr = 3, dst_gpr = 5;
560 int src_offset = 0, dst_offset = 0;
561 int n = strlen(typeString),i;
562 int src_locs[n], dst_locs[n];
567 Calculate where the arguments should go.
568 src_locs[] will contain the locations of the arguments in the
569 original stack frame passed to the adjustor.
570 dst_locs[] will contain the locations of the arguments after the
571 adjustor runs, on entry to the wrapper proc pointed to by wptr.
573 This algorithm is based on the one described on page 3-19 of the
574 System V ABI PowerPC Processor Supplement.
576 for(i=0;typeString[i];i++)
578 char t = typeString[i];
579 if((t == 'f' || t == 'd') && fpr <= 8)
580 src_locs[i] = dst_locs[i] = -32-(fpr++);
583 if(t == 'l' && src_gpr <= 9)
585 if((src_gpr & 1) == 0)
587 src_locs[i] = -src_gpr;
590 else if(t == 'i' && src_gpr <= 10)
592 src_locs[i] = -(src_gpr++);
596 if(t == 'l' || t == 'd')
601 src_locs[i] = src_offset;
602 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
605 if(t == 'l' && dst_gpr <= 9)
607 if((dst_gpr & 1) == 0)
609 dst_locs[i] = -dst_gpr;
612 else if(t == 'i' && dst_gpr <= 10)
614 dst_locs[i] = -(dst_gpr++);
618 if(t == 'l' || t == 'd')
623 dst_locs[i] = dst_offset;
624 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
629 frameSize = dst_offset + 8;
630 frameSize = (frameSize+15) & ~0xF;
635 // allocate space for at most 4 insns per parameter
636 // plus 14 more instructions.
637 adjustor = mallocBytesRWX(4 * (4*n + 14));
638 code = (unsigned*)adjustor;
640 *code++ = 0x48000008; // b *+8
641 // * Put the hptr in a place where freeHaskellFunctionPtr
643 *code++ = (unsigned) hptr;
645 // * save the link register
646 *code++ = 0x7c0802a6; // mflr r0;
647 *code++ = 0x90010004; // stw r0, 4(r1);
648 // * and build a new stack frame
649 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
651 // * now generate instructions to copy arguments
652 // from the old stack frame into the new stack frame.
655 if(src_locs[i] < -32)
656 ASSERT(dst_locs[i] == src_locs[i]);
657 else if(src_locs[i] < 0)
660 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
663 ASSERT(dst_locs[i] > -32);
664 // dst is in GPR, too.
666 if(typeString[i] == 'l')
670 | ((-dst_locs[i]+1) << 16)
671 | ((-src_locs[i]+1) << 11)
672 | ((-src_locs[i]+1) << 21);
676 | ((-dst_locs[i]) << 16)
677 | ((-src_locs[i]) << 11)
678 | ((-src_locs[i]) << 21);
682 if(typeString[i] == 'l')
684 // stw src+1, dst_offset+4(r1)
686 | ((-src_locs[i]+1) << 21)
690 // stw src, dst_offset(r1)
692 | ((-src_locs[i]) << 21)
698 ASSERT(dst_locs[i] >= 0);
699 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
701 if(typeString[i] == 'l')
703 // lwz r0, src_offset(r1)
705 | (src_locs[i] + frameSize + 8 + 4);
706 // stw r0, dst_offset(r1)
708 | (dst_locs[i] + 8 + 4);
710 // lwz r0, src_offset(r1)
712 | (src_locs[i] + frameSize + 8);
713 // stw r0, dst_offset(r1)
719 // * hptr will be the new first argument.
721 *code++ = OP_HI(0x3c60, hptr);
722 // ori r3,r3,lo(hptr)
723 *code++ = OP_LO(0x6063, hptr);
725 // * we need to return to a piece of code
726 // which will tear down the stack frame.
727 // lis r11,hi(obscure_ccall_ret_code)
728 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
729 // ori r11,r11,lo(obscure_ccall_ret_code)
730 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
732 *code++ = 0x7d6803a6;
736 *code++ = OP_HI(0x3d60, wptr);
737 // ori r11,r11,lo(wptr)
738 *code++ = OP_LO(0x616b, wptr);
740 *code++ = 0x7d6903a6;
742 *code++ = 0x4e800420;
744 // Flush the Instruction cache:
746 unsigned *p = adjustor;
749 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
753 __asm__ volatile ("sync\n\tisync");
757 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
759 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
760 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
762 /* The following code applies to all PowerPC and PowerPC64 platforms
763 whose stack layout is based on the AIX ABI.
765 Besides (obviously) AIX, this includes
766 Mac OS 9 and BeOS/PPC (may they rest in peace),
767 which use the 32-bit AIX ABI
769 which uses the 64-bit AIX ABI
770 and Darwin (Mac OS X),
771 which uses the same stack layout as AIX,
772 but no function descriptors.
774 The actual stack-frame shuffling is implemented out-of-line
775 in the function adjustorCode, in AdjustorAsm.S.
776 Here, we set up an AdjustorStub structure, which
777 is a function descriptor (on platforms that have function
778 descriptors) or a short piece of stub code (on Darwin) to call
779 adjustorCode with a pointer to the AdjustorStub struct loaded
782 One nice thing about this is that there is _no_ code generated at
783 runtime on the platforms that have function descriptors.
785 AdjustorStub *adjustorStub;
786 int sz = 0, extra_sz, total_sz;
788 // from AdjustorAsm.s
789 // not declared as a function so that AIX-style
790 // fundescs can never get in the way.
791 extern void *adjustorCode;
794 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
796 adjustorStub = mallocBytesRWX(sizeof(AdjustorStub));
798 adjustor = adjustorStub;
800 adjustorStub->code = (void*) &adjustorCode;
803 // function descriptors are a cool idea.
804 // We don't need to generate any code at runtime.
805 adjustorStub->toc = adjustorStub;
808 // no function descriptors :-(
809 // We need to do things "by hand".
810 #if defined(powerpc_HOST_ARCH)
811 // lis r2, hi(adjustorStub)
812 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
813 // ori r2, r2, lo(adjustorStub)
814 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
816 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
817 - (char*)adjustorStub);
819 adjustorStub->mtctr = 0x7c0903a6;
821 adjustorStub->bctr = 0x4e800420;
823 barf("adjustor creation not supported on this platform");
826 // Flush the Instruction cache:
828 int n = sizeof(AdjustorStub)/sizeof(unsigned);
829 unsigned *p = (unsigned*)adjustor;
832 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
836 __asm__ volatile ("sync\n\tisync");
840 // Calculate the size of the stack frame, in words.
843 char t = *typeString++;
847 #if defined(powerpc_HOST_ARCH)
848 // on 32-bit platforms, Double and Int64 occupy two words.
854 // everything else is one word.
859 // The first eight words of the parameter area
860 // are just "backing store" for the parameters passed in
861 // the GPRs. extra_sz is the number of words beyond those first
867 // Calculate the total size of the stack frame.
868 total_sz = (6 /* linkage area */
869 + 8 /* minimum parameter area */
870 + 2 /* two extra arguments */
871 + extra_sz)*sizeof(StgWord);
873 // align to 16 bytes.
874 // AIX only requires 8 bytes, but who cares?
875 total_sz = (total_sz+15) & ~0xF;
877 // Fill in the information that adjustorCode in AdjustorAsm.S
878 // will use to create a new stack frame with the additional args.
879 adjustorStub->hptr = hptr;
880 adjustorStub->wptr = wptr;
881 adjustorStub->negative_framesize = -total_sz;
882 adjustorStub->extrawords_plus_one = extra_sz + 1;
885 #elif defined(ia64_HOST_ARCH)
887 Up to 8 inputs are passed in registers. We flush the last two inputs to
888 the stack, initially into the 16-byte scratch region left by the caller.
889 We then shuffle the others along by 4 (taking 2 registers for ourselves
890 to save return address and previous function state - we need to come back
891 here on the way out to restore the stack, so this is a real function
892 rather than just a trampoline).
894 The function descriptor we create contains the gp of the target function
895 so gp is already loaded correctly.
897 [MLX] alloc r16=ar.pfs,10,2,0
899 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
900 mov r41=r37 // out7 = in5 (out3)
901 mov r40=r36;; // out6 = in4 (out2)
902 [MII] st8.spill [r12]=r39 // spill in7 (out5)
904 mov r38=r34;; // out4 = in2 (out0)
905 [MII] mov r39=r35 // out5 = in3 (out1)
906 mov r37=r33 // out3 = in1 (loc1)
907 mov r36=r32 // out2 = in0 (loc0)
908 [MLX] adds r12=-24,r12 // update sp
909 movl r34=hptr;; // out0 = hptr
910 [MIB] mov r33=r16 // loc1 = ar.pfs
911 mov r32=b0 // loc0 = retaddr
912 br.call.sptk.many b0=b6;;
914 [MII] adds r12=-16,r12
919 br.ret.sptk.many b0;;
922 /* These macros distribute a long constant into the two words of an MLX bundle */
923 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
924 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
925 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
926 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
930 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
931 StgWord64 wcode = wdesc->ip;
935 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
936 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
938 fdesc = (IA64FunDesc *)adjustor;
939 code = (StgWord64 *)(fdesc + 1);
940 fdesc->ip = (StgWord64)code;
941 fdesc->gp = wdesc->gp;
943 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
944 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
945 code[2] = 0x029015d818984001;
946 code[3] = 0x8401200500420094;
947 code[4] = 0x886011d8189c0001;
948 code[5] = 0x84011004c00380c0;
949 code[6] = 0x0250210046013800;
950 code[7] = 0x8401000480420084;
951 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
952 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
953 code[10] = 0x0200210020010811;
954 code[11] = 0x1080006800006200;
955 code[12] = 0x0000210018406000;
956 code[13] = 0x00aa021000038005;
957 code[14] = 0x000000010000001d;
958 code[15] = 0x0084000880000200;
960 /* save stable pointers in convenient form */
961 code[16] = (StgWord64)hptr;
962 code[17] = (StgWord64)stable;
965 barf("adjustor creation not supported on this platform");
980 freeHaskellFunctionPtr(void* ptr)
982 #if defined(i386_HOST_ARCH)
983 if ( *(unsigned char*)ptr != 0x68 &&
984 *(unsigned char*)ptr != 0x58 ) {
985 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
989 /* Free the stable pointer first..*/
990 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
991 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
993 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
995 #elif defined(x86_64_HOST_ARCH)
996 if ( *(StgWord16 *)ptr == 0x894d ) {
997 freeStablePtr(*(StgStablePtr*)(ptr+32));
998 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
999 freeStablePtr(*(StgStablePtr*)(ptr+40));
1001 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1004 #elif defined(sparc_HOST_ARCH)
1005 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1006 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1010 /* Free the stable pointer first..*/
1011 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1012 #elif defined(alpha_HOST_ARCH)
1013 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1014 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1018 /* Free the stable pointer first..*/
1019 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1020 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1021 if ( *(StgWord*)ptr != 0x48000008 ) {
1022 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1025 freeStablePtr(((StgStablePtr*)ptr)[1]);
1026 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1027 extern void* adjustorCode;
1028 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1029 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1032 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1033 #elif defined(ia64_HOST_ARCH)
1034 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1035 StgWord64 *code = (StgWord64 *)(fdesc+1);
1037 if (fdesc->ip != (StgWord64)code) {
1038 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1041 freeStablePtr((StgStablePtr)code[16]);
1042 freeStablePtr((StgStablePtr)code[17]);
1047 *((unsigned char*)ptr) = '\0';
1054 * Function: initAdjustor()
1056 * Perform initialisation of adjustor thunk layer (if needed.)