1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(openbsd_HOST_OS) || defined(linux_HOST_OS)
51 #include <sys/types.h>
54 /* no C99 header stdint.h on OpenBSD? */
55 #if defined(openbsd_HOST_OS)
56 typedef unsigned long my_uintptr_t;
59 typedef uintptr_t my_uintptr_t;
63 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
67 /* Heavily arch-specific, I'm afraid.. */
70 * Allocate len bytes which are readable, writable, and executable.
72 * ToDo: If this turns out to be a performance bottleneck, one could
73 * e.g. cache the last VirtualProtect/mprotect-ed region and do
74 * nothing in case of a cache hit.
77 mallocBytesRWX(int len)
79 void *addr = stgMallocBytes(len, "mallocBytesRWX");
80 #if defined(i386_HOST_ARCH) && defined(_WIN32)
81 /* This could be necessary for processors which distinguish between READ and
82 EXECUTE memory accesses, e.g. Itaniums. */
83 DWORD dwOldProtect = 0;
84 if (VirtualProtect (addr, len, PAGE_EXECUTE_READWRITE, &dwOldProtect) == 0) {
85 barf("mallocBytesRWX: failed to protect 0x%p; error=%lu; old protection: %lu\n",
86 addr, (unsigned long)GetLastError(), (unsigned long)dwOldProtect);
88 #elif defined(openbsd_HOST_OS) || defined(linux_HOST_OS)
89 /* malloced memory isn't executable by default on OpenBSD */
90 my_uintptr_t pageSize = sysconf(_SC_PAGESIZE);
91 my_uintptr_t mask = ~(pageSize - 1);
92 my_uintptr_t startOfFirstPage = ((my_uintptr_t)addr ) & mask;
93 my_uintptr_t startOfLastPage = ((my_uintptr_t)addr + len - 1) & mask;
94 my_uintptr_t size = startOfLastPage - startOfFirstPage + pageSize;
95 if (mprotect((void*)startOfFirstPage, (size_t)size, PROT_EXEC | PROT_READ | PROT_WRITE) != 0) {
96 barf("mallocBytesRWX: failed to protect 0x%p\n", addr);
102 #ifdef LEADING_UNDERSCORE
103 #define UNDERSCORE "_"
105 #define UNDERSCORE ""
107 #if defined(i386_HOST_ARCH)
109 Now here's something obscure for you:
111 When generating an adjustor thunk that uses the C calling
112 convention, we have to make sure that the thunk kicks off
113 the process of jumping into Haskell with a tail jump. Why?
114 Because as a result of jumping in into Haskell we may end
115 up freeing the very adjustor thunk we came from using
116 freeHaskellFunctionPtr(). Hence, we better not return to
117 the adjustor code on our way out, since it could by then
120 The fix is readily at hand, just include the opcodes
121 for the C stack fixup code that we need to perform when
122 returning in some static piece of memory and arrange
123 to return to it before tail jumping from the adjustor thunk.
125 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
128 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
129 UNDERSCORE "obscure_ccall_ret_code:\n\t"
130 "addl $0x4, %esp\n\t"
134 extern void obscure_ccall_ret_code(void);
136 #if defined(openbsd_HOST_OS)
137 static unsigned char *obscure_ccall_ret_code_dyn;
142 #if defined(x86_64_HOST_ARCH)
143 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
146 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
147 UNDERSCORE "obscure_ccall_ret_code:\n\t"
148 "addq $0x8, %rsp\n\t"
152 extern void obscure_ccall_ret_code(void);
155 #if defined(alpha_HOST_ARCH)
156 /* To get the definition of PAL_imb: */
157 # if defined(linux_HOST_OS)
158 # include <asm/pal.h>
160 # include <machine/pal.h>
164 #if defined(ia64_HOST_ARCH)
167 /* Layout of a function descriptor */
168 typedef struct _IA64FunDesc {
174 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
177 nat data_size_in_words, total_size_in_words;
179 /* round up to a whole number of words */
180 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
181 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
183 /* allocate and fill it in */
184 arr = (StgArrWords *)allocate(total_size_in_words);
185 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
187 /* obtain a stable ptr */
188 *stable = getStablePtr((StgPtr)arr);
190 /* and return a ptr to the goods inside the array */
191 return(BYTE_ARR_CTS(arr));
195 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
196 __asm__("obscure_ccall_ret_code:\n\t"
201 extern void obscure_ccall_ret_code(void);
204 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
205 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
207 /* !!! !!! WARNING: !!! !!!
208 * This structure is accessed from AdjustorAsm.s
209 * Any changes here have to be mirrored in the offsets there.
212 typedef struct AdjustorStub {
213 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
220 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
221 /* powerpc64-darwin: just guessing that it won't use fundescs. */
232 /* fundesc-based ABIs */
241 StgInt negative_framesize;
242 StgInt extrawords_plus_one;
249 createAdjustor(int cconv, StgStablePtr hptr,
252 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
257 void *adjustor = NULL;
261 case 0: /* _stdcall */
262 #if defined(i386_HOST_ARCH)
263 /* Magic constant computed by inspecting the code length of
264 the following assembly language snippet
265 (offset and machine code prefixed):
267 <0>: 58 popl %eax # temp. remove ret addr..
268 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
269 # hold a StgStablePtr
270 <6>: 50 pushl %eax # put back ret. addr
271 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
272 <c>: ff e0 jmp %eax # and jump to it.
273 # the callee cleans up the stack
275 adjustor = mallocBytesRWX(14);
277 unsigned char *const adj_code = (unsigned char *)adjustor;
278 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
280 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
281 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
283 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
285 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
286 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
288 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
289 adj_code[0x0d] = (unsigned char)0xe0;
295 #if defined(i386_HOST_ARCH)
296 /* Magic constant computed by inspecting the code length of
297 the following assembly language snippet
298 (offset and machine code prefixed):
300 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
301 # hold a StgStablePtr
302 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
303 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
304 <0f>: ff e0 jmp *%eax # jump to wptr
306 The ccall'ing version is a tad different, passing in the return
307 address of the caller to the auto-generated C stub (which enters
308 via the stable pointer.) (The auto-generated C stub is in on this
309 game, don't worry :-)
311 See the comment next to obscure_ccall_ret_code why we need to
312 perform a tail jump instead of a call, followed by some C stack
315 Note: The adjustor makes the assumption that any return value
316 coming back from the C stub is not stored on the stack.
317 That's (thankfully) the case here with the restricted set of
318 return types that we support.
320 adjustor = mallocBytesRWX(17);
322 unsigned char *const adj_code = (unsigned char *)adjustor;
324 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
325 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
327 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
328 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
330 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
331 *((StgFunPtr*)(adj_code + 0x0b)) =
332 #if !defined(openbsd_HOST_OS)
333 (StgFunPtr)obscure_ccall_ret_code;
335 (StgFunPtr)obscure_ccall_ret_code_dyn;
338 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
339 adj_code[0x10] = (unsigned char)0xe0;
341 #elif defined(x86_64_HOST_ARCH)
348 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
350 if there are <6 integer args, then we can just push the
351 StablePtr into %edi and shuffle the other args up.
353 If there are >=6 integer args, then we have to flush one arg
354 to the stack, and arrange to adjust the stack ptr on return.
355 The stack will be rearranged to this:
360 return address *** <-- dummy arg in stub fn.
362 obscure_ccall_ret_code
364 This unfortunately means that the type of the stub function
365 must have a dummy argument for the original return address
366 pointer inserted just after the 6th integer argument.
368 Code for the simple case:
370 0: 4d 89 c1 mov %r8,%r9
371 3: 49 89 c8 mov %rcx,%r8
372 6: 48 89 d1 mov %rdx,%rcx
373 9: 48 89 f2 mov %rsi,%rdx
374 c: 48 89 fe mov %rdi,%rsi
375 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
376 16: e9 00 00 00 00 jmpq stub_function
378 20: .quad 0 # aligned on 8-byte boundary
381 And the version for >=6 integer arguments:
384 2: 68 00 00 00 00 pushq $obscure_ccall_ret_code
385 7: 4d 89 c1 mov %r8,%r9
386 a: 49 89 c8 mov %rcx,%r8
387 d: 48 89 d1 mov %rdx,%rcx
388 10: 48 89 f2 mov %rsi,%rdx
389 13: 48 89 fe mov %rdi,%rsi
390 16: 48 8b 3d 0b 00 00 00 mov 11(%rip),%rdi
391 1d: e9 00 00 00 00 jmpq stub_function
393 28: .quad 0 # aligned on 8-byte boundary
396 /* we assume the small code model (gcc -mcmmodel=small) where
397 * all symbols are <2^32, so hence wptr should fit into 32 bits.
399 ASSERT(((long)wptr >> 32) == 0);
405 // determine whether we have 6 or more integer arguments,
406 // and therefore need to flush one to the stack.
407 for (c = typeString; *c != '\0'; c++) {
408 if (*c == 'i' || *c == 'l') i++;
413 adjustor = mallocBytesRWX(40);
415 *(StgInt32 *)adjustor = 0x49c1894d;
416 *(StgInt32 *)(adjustor+4) = 0x8948c889;
417 *(StgInt32 *)(adjustor+8) = 0xf28948d1;
418 *(StgInt32 *)(adjustor+12) = 0x48fe8948;
419 *(StgInt32 *)(adjustor+16) = 0x000a3d8b;
420 *(StgInt32 *)(adjustor+20) = 0x00e90000;
422 *(StgInt32 *)(adjustor+23) =
423 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 27);
424 *(StgInt64 *)(adjustor+32) = (StgInt64)hptr;
428 adjustor = mallocBytesRWX(48);
430 *(StgInt32 *)adjustor = 0x00685141;
431 *(StgInt32 *)(adjustor+4) = 0x4d000000;
432 *(StgInt32 *)(adjustor+8) = 0x8949c189;
433 *(StgInt32 *)(adjustor+12) = 0xd18948c8;
434 *(StgInt32 *)(adjustor+16) = 0x48f28948;
435 *(StgInt32 *)(adjustor+20) = 0x8b48fe89;
436 *(StgInt32 *)(adjustor+24) = 0x00000b3d;
437 *(StgInt32 *)(adjustor+28) = 0x0000e900;
439 *(StgInt32 *)(adjustor+3) =
440 (StgInt32)(StgInt64)obscure_ccall_ret_code;
441 *(StgInt32 *)(adjustor+30) =
442 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 34);
443 *(StgInt64 *)(adjustor+40) = (StgInt64)hptr;
446 #elif defined(sparc_HOST_ARCH)
447 /* Magic constant computed by inspecting the code length of the following
448 assembly language snippet (offset and machine code prefixed):
450 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
451 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
452 <08>: D823A05C st %o4, [%sp + 92]
453 <0C>: 9A10000B mov %o3, %o5
454 <10>: 9810000A mov %o2, %o4
455 <14>: 96100009 mov %o1, %o3
456 <18>: 94100008 mov %o0, %o2
457 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
458 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
459 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
460 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
461 <2C> 00000000 ! place for getting hptr back easily
463 ccall'ing on SPARC is easy, because we are quite lucky to push a
464 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
465 existing arguments (note that %sp must stay double-word aligned at
466 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
467 To do this, we extend the *caller's* stack frame by 2 words and shift
468 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
469 procedure because of the tail-jump) by 2 positions. This makes room in
470 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
471 for destination addr of jump on SPARC, return address on x86, ...). This
472 shouldn't cause any problems for a C-like caller: alloca is implemented
473 similarly, and local variables should be accessed via %fp, not %sp. In a
474 nutshell: This should work! (Famous last words! :-)
476 adjustor = mallocBytesRWX(4*(11+1));
478 unsigned long *const adj_code = (unsigned long *)adjustor;
480 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
481 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
482 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
483 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
484 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
485 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
486 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
487 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
488 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
489 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
490 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
491 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
492 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
493 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
494 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
496 adj_code[11] = (unsigned long)hptr;
499 asm("flush %0" : : "r" (adj_code ));
500 asm("flush %0" : : "r" (adj_code + 2));
501 asm("flush %0" : : "r" (adj_code + 4));
502 asm("flush %0" : : "r" (adj_code + 6));
503 asm("flush %0" : : "r" (adj_code + 10));
505 /* max. 5 instructions latency, and we need at >= 1 for returning */
511 #elif defined(alpha_HOST_ARCH)
512 /* Magic constant computed by inspecting the code length of
513 the following assembly language snippet
514 (offset and machine code prefixed; note that the machine code
515 shown is longwords stored in little-endian order):
517 <00>: 46520414 mov a2, a4
518 <04>: 46100412 mov a0, a2
519 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
520 <0c>: 46730415 mov a3, a5
521 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
522 <14>: 46310413 mov a1, a3
523 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
524 <1c>: 00000000 # padding for alignment
525 <20>: [8 bytes for hptr quadword]
526 <28>: [8 bytes for wptr quadword]
528 The "computed" jump at <08> above is really a jump to a fixed
529 location. Accordingly, we place an always-correct hint in the
530 jump instruction, namely the address offset from <0c> to wptr,
531 divided by 4, taking the lowest 14 bits.
533 We only support passing 4 or fewer argument words, for the same
534 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
535 On the Alpha the first 6 integer arguments are in a0 through a5,
536 and the rest on the stack. Hence we want to shuffle the original
537 caller's arguments by two.
539 On the Alpha the calling convention is so complex and dependent
540 on the callee's signature -- for example, the stack pointer has
541 to be a multiple of 16 -- that it seems impossible to me [ccshan]
542 to handle the general case correctly without changing how the
543 adjustor is called from C. For now, our solution of shuffling
544 registers only and ignoring the stack only works if the original
545 caller passed 4 or fewer argument words.
547 TODO: Depending on how much allocation overhead stgMallocBytes uses for
548 header information (more precisely, if the overhead is no more than
549 4 bytes), we should move the first three instructions above down by
550 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
552 ASSERT(((StgWord64)wptr & 3) == 0);
553 adjustor = mallocBytesRWX(48);
555 StgWord64 *const code = (StgWord64 *)adjustor;
557 code[0] = 0x4610041246520414L;
558 code[1] = 0x46730415a61b0020L;
559 code[2] = 0x46310413a77b0028L;
560 code[3] = 0x000000006bfb0000L
561 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
563 code[4] = (StgWord64)hptr;
564 code[5] = (StgWord64)wptr;
566 /* Ensure that instruction cache is consistent with our new code */
567 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
569 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
571 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
572 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
574 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
575 We need to calculate all the details of the stack frame layout,
576 taking into account the types of all the arguments, and then
577 generate code on the fly. */
579 int src_gpr = 3, dst_gpr = 5;
581 int src_offset = 0, dst_offset = 0;
582 int n = strlen(typeString),i;
583 int src_locs[n], dst_locs[n];
588 Calculate where the arguments should go.
589 src_locs[] will contain the locations of the arguments in the
590 original stack frame passed to the adjustor.
591 dst_locs[] will contain the locations of the arguments after the
592 adjustor runs, on entry to the wrapper proc pointed to by wptr.
594 This algorithm is based on the one described on page 3-19 of the
595 System V ABI PowerPC Processor Supplement.
597 for(i=0;typeString[i];i++)
599 char t = typeString[i];
600 if((t == 'f' || t == 'd') && fpr <= 8)
601 src_locs[i] = dst_locs[i] = -32-(fpr++);
604 if(t == 'l' && src_gpr <= 9)
606 if((src_gpr & 1) == 0)
608 src_locs[i] = -src_gpr;
611 else if(t == 'i' && src_gpr <= 10)
613 src_locs[i] = -(src_gpr++);
617 if(t == 'l' || t == 'd')
622 src_locs[i] = src_offset;
623 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
626 if(t == 'l' && dst_gpr <= 9)
628 if((dst_gpr & 1) == 0)
630 dst_locs[i] = -dst_gpr;
633 else if(t == 'i' && dst_gpr <= 10)
635 dst_locs[i] = -(dst_gpr++);
639 if(t == 'l' || t == 'd')
644 dst_locs[i] = dst_offset;
645 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
650 frameSize = dst_offset + 8;
651 frameSize = (frameSize+15) & ~0xF;
656 // allocate space for at most 4 insns per parameter
657 // plus 14 more instructions.
658 adjustor = mallocBytesRWX(4 * (4*n + 14));
659 code = (unsigned*)adjustor;
661 *code++ = 0x48000008; // b *+8
662 // * Put the hptr in a place where freeHaskellFunctionPtr
664 *code++ = (unsigned) hptr;
666 // * save the link register
667 *code++ = 0x7c0802a6; // mflr r0;
668 *code++ = 0x90010004; // stw r0, 4(r1);
669 // * and build a new stack frame
670 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
672 // * now generate instructions to copy arguments
673 // from the old stack frame into the new stack frame.
676 if(src_locs[i] < -32)
677 ASSERT(dst_locs[i] == src_locs[i]);
678 else if(src_locs[i] < 0)
681 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
684 ASSERT(dst_locs[i] > -32);
685 // dst is in GPR, too.
687 if(typeString[i] == 'l')
691 | ((-dst_locs[i]+1) << 16)
692 | ((-src_locs[i]+1) << 11)
693 | ((-src_locs[i]+1) << 21);
697 | ((-dst_locs[i]) << 16)
698 | ((-src_locs[i]) << 11)
699 | ((-src_locs[i]) << 21);
703 if(typeString[i] == 'l')
705 // stw src+1, dst_offset+4(r1)
707 | ((-src_locs[i]+1) << 21)
711 // stw src, dst_offset(r1)
713 | ((-src_locs[i]) << 21)
719 ASSERT(dst_locs[i] >= 0);
720 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
722 if(typeString[i] == 'l')
724 // lwz r0, src_offset(r1)
726 | (src_locs[i] + frameSize + 8 + 4);
727 // stw r0, dst_offset(r1)
729 | (dst_locs[i] + 8 + 4);
731 // lwz r0, src_offset(r1)
733 | (src_locs[i] + frameSize + 8);
734 // stw r0, dst_offset(r1)
740 // * hptr will be the new first argument.
742 *code++ = OP_HI(0x3c60, hptr);
743 // ori r3,r3,lo(hptr)
744 *code++ = OP_LO(0x6063, hptr);
746 // * we need to return to a piece of code
747 // which will tear down the stack frame.
748 // lis r11,hi(obscure_ccall_ret_code)
749 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
750 // ori r11,r11,lo(obscure_ccall_ret_code)
751 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
753 *code++ = 0x7d6803a6;
757 *code++ = OP_HI(0x3d60, wptr);
758 // ori r11,r11,lo(wptr)
759 *code++ = OP_LO(0x616b, wptr);
761 *code++ = 0x7d6903a6;
763 *code++ = 0x4e800420;
765 // Flush the Instruction cache:
767 unsigned *p = adjustor;
770 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
774 __asm__ volatile ("sync\n\tisync");
778 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
780 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
781 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
783 /* The following code applies to all PowerPC and PowerPC64 platforms
784 whose stack layout is based on the AIX ABI.
786 Besides (obviously) AIX, this includes
787 Mac OS 9 and BeOS/PPC (may they rest in peace),
788 which use the 32-bit AIX ABI
790 which uses the 64-bit AIX ABI
791 and Darwin (Mac OS X),
792 which uses the same stack layout as AIX,
793 but no function descriptors.
795 The actual stack-frame shuffling is implemented out-of-line
796 in the function adjustorCode, in AdjustorAsm.S.
797 Here, we set up an AdjustorStub structure, which
798 is a function descriptor (on platforms that have function
799 descriptors) or a short piece of stub code (on Darwin) to call
800 adjustorCode with a pointer to the AdjustorStub struct loaded
803 One nice thing about this is that there is _no_ code generated at
804 runtime on the platforms that have function descriptors.
806 AdjustorStub *adjustorStub;
807 int sz = 0, extra_sz, total_sz;
809 // from AdjustorAsm.s
810 // not declared as a function so that AIX-style
811 // fundescs can never get in the way.
812 extern void *adjustorCode;
815 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
817 adjustorStub = mallocBytesRWX(sizeof(AdjustorStub));
819 adjustor = adjustorStub;
821 adjustorStub->code = (void*) &adjustorCode;
824 // function descriptors are a cool idea.
825 // We don't need to generate any code at runtime.
826 adjustorStub->toc = adjustorStub;
829 // no function descriptors :-(
830 // We need to do things "by hand".
831 #if defined(powerpc_HOST_ARCH)
832 // lis r2, hi(adjustorStub)
833 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
834 // ori r2, r2, lo(adjustorStub)
835 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
837 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
838 - (char*)adjustorStub);
840 adjustorStub->mtctr = 0x7c0903a6;
842 adjustorStub->bctr = 0x4e800420;
844 barf("adjustor creation not supported on this platform");
847 // Flush the Instruction cache:
849 int n = sizeof(AdjustorStub)/sizeof(unsigned);
850 unsigned *p = (unsigned*)adjustor;
853 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
857 __asm__ volatile ("sync\n\tisync");
861 // Calculate the size of the stack frame, in words.
864 char t = *typeString++;
868 #if defined(powerpc_HOST_ARCH)
869 // on 32-bit platforms, Double and Int64 occupy two words.
875 // everything else is one word.
880 // The first eight words of the parameter area
881 // are just "backing store" for the parameters passed in
882 // the GPRs. extra_sz is the number of words beyond those first
888 // Calculate the total size of the stack frame.
889 total_sz = (6 /* linkage area */
890 + 8 /* minimum parameter area */
891 + 2 /* two extra arguments */
892 + extra_sz)*sizeof(StgWord);
894 // align to 16 bytes.
895 // AIX only requires 8 bytes, but who cares?
896 total_sz = (total_sz+15) & ~0xF;
898 // Fill in the information that adjustorCode in AdjustorAsm.S
899 // will use to create a new stack frame with the additional args.
900 adjustorStub->hptr = hptr;
901 adjustorStub->wptr = wptr;
902 adjustorStub->negative_framesize = -total_sz;
903 adjustorStub->extrawords_plus_one = extra_sz + 1;
906 #elif defined(ia64_HOST_ARCH)
908 Up to 8 inputs are passed in registers. We flush the last two inputs to
909 the stack, initially into the 16-byte scratch region left by the caller.
910 We then shuffle the others along by 4 (taking 2 registers for ourselves
911 to save return address and previous function state - we need to come back
912 here on the way out to restore the stack, so this is a real function
913 rather than just a trampoline).
915 The function descriptor we create contains the gp of the target function
916 so gp is already loaded correctly.
918 [MLX] alloc r16=ar.pfs,10,2,0
920 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
921 mov r41=r37 // out7 = in5 (out3)
922 mov r40=r36;; // out6 = in4 (out2)
923 [MII] st8.spill [r12]=r39 // spill in7 (out5)
925 mov r38=r34;; // out4 = in2 (out0)
926 [MII] mov r39=r35 // out5 = in3 (out1)
927 mov r37=r33 // out3 = in1 (loc1)
928 mov r36=r32 // out2 = in0 (loc0)
929 [MLX] adds r12=-24,r12 // update sp
930 movl r34=hptr;; // out0 = hptr
931 [MIB] mov r33=r16 // loc1 = ar.pfs
932 mov r32=b0 // loc0 = retaddr
933 br.call.sptk.many b0=b6;;
935 [MII] adds r12=-16,r12
940 br.ret.sptk.many b0;;
943 /* These macros distribute a long constant into the two words of an MLX bundle */
944 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
945 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
946 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
947 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
951 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
952 StgWord64 wcode = wdesc->ip;
956 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
957 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
959 fdesc = (IA64FunDesc *)adjustor;
960 code = (StgWord64 *)(fdesc + 1);
961 fdesc->ip = (StgWord64)code;
962 fdesc->gp = wdesc->gp;
964 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
965 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
966 code[2] = 0x029015d818984001;
967 code[3] = 0x8401200500420094;
968 code[4] = 0x886011d8189c0001;
969 code[5] = 0x84011004c00380c0;
970 code[6] = 0x0250210046013800;
971 code[7] = 0x8401000480420084;
972 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
973 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
974 code[10] = 0x0200210020010811;
975 code[11] = 0x1080006800006200;
976 code[12] = 0x0000210018406000;
977 code[13] = 0x00aa021000038005;
978 code[14] = 0x000000010000001d;
979 code[15] = 0x0084000880000200;
981 /* save stable pointers in convenient form */
982 code[16] = (StgWord64)hptr;
983 code[17] = (StgWord64)stable;
986 barf("adjustor creation not supported on this platform");
1001 freeHaskellFunctionPtr(void* ptr)
1003 #if defined(i386_HOST_ARCH)
1004 if ( *(unsigned char*)ptr != 0x68 &&
1005 *(unsigned char*)ptr != 0x58 ) {
1006 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1010 /* Free the stable pointer first..*/
1011 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
1012 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
1014 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1016 #elif defined(x86_64_HOST_ARCH)
1017 if ( *(StgWord16 *)ptr == 0x894d ) {
1018 freeStablePtr(*(StgStablePtr*)(ptr+32));
1019 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1020 freeStablePtr(*(StgStablePtr*)(ptr+40));
1022 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1025 #elif defined(sparc_HOST_ARCH)
1026 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1027 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1031 /* Free the stable pointer first..*/
1032 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1033 #elif defined(alpha_HOST_ARCH)
1034 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1035 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1039 /* Free the stable pointer first..*/
1040 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1041 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1042 if ( *(StgWord*)ptr != 0x48000008 ) {
1043 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1046 freeStablePtr(((StgStablePtr*)ptr)[1]);
1047 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1048 extern void* adjustorCode;
1049 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1050 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1053 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1054 #elif defined(ia64_HOST_ARCH)
1055 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1056 StgWord64 *code = (StgWord64 *)(fdesc+1);
1058 if (fdesc->ip != (StgWord64)code) {
1059 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1062 freeStablePtr((StgStablePtr)code[16]);
1063 freeStablePtr((StgStablePtr)code[17]);
1068 *((unsigned char*)ptr) = '\0';
1075 * Function: initAdjustor()
1077 * Perform initialisation of adjustor thunk layer (if needed.)
1082 #if defined(i386_HOST_ARCH) && defined(openbsd_HOST_OS)
1083 obscure_ccall_ret_code_dyn = mallocBytesRWX(4);
1084 obscure_ccall_ret_code_dyn[0] = ((unsigned char *)obscure_ccall_ret_code)[0];
1085 obscure_ccall_ret_code_dyn[1] = ((unsigned char *)obscure_ccall_ret_code)[1];
1086 obscure_ccall_ret_code_dyn[2] = ((unsigned char *)obscure_ccall_ret_code)[2];
1087 obscure_ccall_ret_code_dyn[3] = ((unsigned char *)obscure_ccall_ret_code)[3];