1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
53 #ifdef LEADING_UNDERSCORE
54 #define UNDERSCORE "_"
58 #if defined(i386_HOST_ARCH)
60 Now here's something obscure for you:
62 When generating an adjustor thunk that uses the C calling
63 convention, we have to make sure that the thunk kicks off
64 the process of jumping into Haskell with a tail jump. Why?
65 Because as a result of jumping in into Haskell we may end
66 up freeing the very adjustor thunk we came from using
67 freeHaskellFunctionPtr(). Hence, we better not return to
68 the adjustor code on our way out, since it could by then
71 The fix is readily at hand, just include the opcodes
72 for the C stack fixup code that we need to perform when
73 returning in some static piece of memory and arrange
74 to return to it before tail jumping from the adjustor thunk.
76 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
79 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
80 UNDERSCORE "obscure_ccall_ret_code:\n\t"
85 extern void obscure_ccall_ret_code(void);
87 #if defined(openbsd_HOST_OS)
88 static unsigned char *obscure_ccall_ret_code_dyn;
93 #if defined(x86_64_HOST_ARCH)
94 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
97 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
98 UNDERSCORE "obscure_ccall_ret_code:\n\t"
103 extern void obscure_ccall_ret_code(void);
106 #if defined(alpha_HOST_ARCH)
107 /* To get the definition of PAL_imb: */
108 # if defined(linux_HOST_OS)
109 # include <asm/pal.h>
111 # include <machine/pal.h>
115 #if defined(ia64_HOST_ARCH)
118 /* Layout of a function descriptor */
119 typedef struct _IA64FunDesc {
125 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
128 nat data_size_in_words, total_size_in_words;
130 /* round up to a whole number of words */
131 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
132 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
134 /* allocate and fill it in */
135 arr = (StgArrWords *)allocate(total_size_in_words);
136 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
138 /* obtain a stable ptr */
139 *stable = getStablePtr((StgPtr)arr);
141 /* and return a ptr to the goods inside the array */
142 return(&(arr->payload));
146 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
147 __asm__("obscure_ccall_ret_code:\n\t"
152 extern void obscure_ccall_ret_code(void);
155 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
156 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
158 /* !!! !!! WARNING: !!! !!!
159 * This structure is accessed from AdjustorAsm.s
160 * Any changes here have to be mirrored in the offsets there.
163 typedef struct AdjustorStub {
164 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
171 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
172 /* powerpc64-darwin: just guessing that it won't use fundescs. */
183 /* fundesc-based ABIs */
192 StgInt negative_framesize;
193 StgInt extrawords_plus_one;
200 createAdjustor(int cconv, StgStablePtr hptr,
203 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
208 void *adjustor = NULL;
212 case 0: /* _stdcall */
213 #if defined(i386_HOST_ARCH)
214 /* Magic constant computed by inspecting the code length of
215 the following assembly language snippet
216 (offset and machine code prefixed):
218 <0>: 58 popl %eax # temp. remove ret addr..
219 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
220 # hold a StgStablePtr
221 <6>: 50 pushl %eax # put back ret. addr
222 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
223 <c>: ff e0 jmp %eax # and jump to it.
224 # the callee cleans up the stack
226 adjustor = stgMallocBytesRWX(14);
228 unsigned char *const adj_code = (unsigned char *)adjustor;
229 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
231 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
232 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
234 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
236 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
237 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
239 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
240 adj_code[0x0d] = (unsigned char)0xe0;
246 #if defined(i386_HOST_ARCH)
247 /* Magic constant computed by inspecting the code length of
248 the following assembly language snippet
249 (offset and machine code prefixed):
251 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
252 # hold a StgStablePtr
253 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
254 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
255 <0f>: ff e0 jmp *%eax # jump to wptr
257 The ccall'ing version is a tad different, passing in the return
258 address of the caller to the auto-generated C stub (which enters
259 via the stable pointer.) (The auto-generated C stub is in on this
260 game, don't worry :-)
262 See the comment next to obscure_ccall_ret_code why we need to
263 perform a tail jump instead of a call, followed by some C stack
266 Note: The adjustor makes the assumption that any return value
267 coming back from the C stub is not stored on the stack.
268 That's (thankfully) the case here with the restricted set of
269 return types that we support.
271 adjustor = stgMallocBytesRWX(17);
273 unsigned char *const adj_code = (unsigned char *)adjustor;
275 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
276 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
278 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
279 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
281 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
282 *((StgFunPtr*)(adj_code + 0x0b)) =
283 #if !defined(openbsd_HOST_OS)
284 (StgFunPtr)obscure_ccall_ret_code;
286 (StgFunPtr)obscure_ccall_ret_code_dyn;
289 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
290 adj_code[0x10] = (unsigned char)0xe0;
292 #elif defined(x86_64_HOST_ARCH)
299 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
301 if there are <6 integer args, then we can just push the
302 StablePtr into %edi and shuffle the other args up.
304 If there are >=6 integer args, then we have to flush one arg
305 to the stack, and arrange to adjust the stack ptr on return.
306 The stack will be rearranged to this:
311 return address *** <-- dummy arg in stub fn.
313 obscure_ccall_ret_code
315 This unfortunately means that the type of the stub function
316 must have a dummy argument for the original return address
317 pointer inserted just after the 6th integer argument.
319 Code for the simple case:
321 0: 4d 89 c1 mov %r8,%r9
322 3: 49 89 c8 mov %rcx,%r8
323 6: 48 89 d1 mov %rdx,%rcx
324 9: 48 89 f2 mov %rsi,%rdx
325 c: 48 89 fe mov %rdi,%rsi
326 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
327 16: e9 00 00 00 00 jmpq stub_function
329 20: .quad 0 # aligned on 8-byte boundary
332 And the version for >=6 integer arguments:
335 2: 68 00 00 00 00 pushq $obscure_ccall_ret_code
336 7: 4d 89 c1 mov %r8,%r9
337 a: 49 89 c8 mov %rcx,%r8
338 d: 48 89 d1 mov %rdx,%rcx
339 10: 48 89 f2 mov %rsi,%rdx
340 13: 48 89 fe mov %rdi,%rsi
341 16: 48 8b 3d 0b 00 00 00 mov 11(%rip),%rdi
342 1d: e9 00 00 00 00 jmpq stub_function
344 28: .quad 0 # aligned on 8-byte boundary
347 /* we assume the small code model (gcc -mcmmodel=small) where
348 * all symbols are <2^32, so hence wptr should fit into 32 bits.
350 ASSERT(((long)wptr >> 32) == 0);
356 // determine whether we have 6 or more integer arguments,
357 // and therefore need to flush one to the stack.
358 for (c = typeString; *c != '\0'; c++) {
359 if (*c == 'i' || *c == 'l') i++;
364 adjustor = stgMallocBytesRWX(40);
366 *(StgInt32 *)adjustor = 0x49c1894d;
367 *(StgInt32 *)(adjustor+4) = 0x8948c889;
368 *(StgInt32 *)(adjustor+8) = 0xf28948d1;
369 *(StgInt32 *)(adjustor+12) = 0x48fe8948;
370 *(StgInt32 *)(adjustor+16) = 0x000a3d8b;
371 *(StgInt32 *)(adjustor+20) = 0x00e90000;
373 *(StgInt32 *)(adjustor+23) =
374 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 27);
375 *(StgInt64 *)(adjustor+32) = (StgInt64)hptr;
379 adjustor = stgMallocBytesRWX(48);
381 *(StgInt32 *)adjustor = 0x00685141;
382 *(StgInt32 *)(adjustor+4) = 0x4d000000;
383 *(StgInt32 *)(adjustor+8) = 0x8949c189;
384 *(StgInt32 *)(adjustor+12) = 0xd18948c8;
385 *(StgInt32 *)(adjustor+16) = 0x48f28948;
386 *(StgInt32 *)(adjustor+20) = 0x8b48fe89;
387 *(StgInt32 *)(adjustor+24) = 0x00000b3d;
388 *(StgInt32 *)(adjustor+28) = 0x0000e900;
390 *(StgInt32 *)(adjustor+3) =
391 (StgInt32)(StgInt64)obscure_ccall_ret_code;
392 *(StgInt32 *)(adjustor+30) =
393 (StgInt32)((StgInt64)wptr - (StgInt64)adjustor - 34);
394 *(StgInt64 *)(adjustor+40) = (StgInt64)hptr;
397 #elif defined(sparc_HOST_ARCH)
398 /* Magic constant computed by inspecting the code length of the following
399 assembly language snippet (offset and machine code prefixed):
401 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
402 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
403 <08>: D823A05C st %o4, [%sp + 92]
404 <0C>: 9A10000B mov %o3, %o5
405 <10>: 9810000A mov %o2, %o4
406 <14>: 96100009 mov %o1, %o3
407 <18>: 94100008 mov %o0, %o2
408 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
409 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
410 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
411 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
412 <2C> 00000000 ! place for getting hptr back easily
414 ccall'ing on SPARC is easy, because we are quite lucky to push a
415 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
416 existing arguments (note that %sp must stay double-word aligned at
417 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
418 To do this, we extend the *caller's* stack frame by 2 words and shift
419 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
420 procedure because of the tail-jump) by 2 positions. This makes room in
421 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
422 for destination addr of jump on SPARC, return address on x86, ...). This
423 shouldn't cause any problems for a C-like caller: alloca is implemented
424 similarly, and local variables should be accessed via %fp, not %sp. In a
425 nutshell: This should work! (Famous last words! :-)
427 adjustor = stgMallocBytesRWX(4*(11+1));
429 unsigned long *const adj_code = (unsigned long *)adjustor;
431 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
432 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
433 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
434 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
435 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
436 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
437 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
438 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
439 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
440 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
441 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
442 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
443 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
444 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
445 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
447 adj_code[11] = (unsigned long)hptr;
450 asm("flush %0" : : "r" (adj_code ));
451 asm("flush %0" : : "r" (adj_code + 2));
452 asm("flush %0" : : "r" (adj_code + 4));
453 asm("flush %0" : : "r" (adj_code + 6));
454 asm("flush %0" : : "r" (adj_code + 10));
456 /* max. 5 instructions latency, and we need at >= 1 for returning */
462 #elif defined(alpha_HOST_ARCH)
463 /* Magic constant computed by inspecting the code length of
464 the following assembly language snippet
465 (offset and machine code prefixed; note that the machine code
466 shown is longwords stored in little-endian order):
468 <00>: 46520414 mov a2, a4
469 <04>: 46100412 mov a0, a2
470 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
471 <0c>: 46730415 mov a3, a5
472 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
473 <14>: 46310413 mov a1, a3
474 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
475 <1c>: 00000000 # padding for alignment
476 <20>: [8 bytes for hptr quadword]
477 <28>: [8 bytes for wptr quadword]
479 The "computed" jump at <08> above is really a jump to a fixed
480 location. Accordingly, we place an always-correct hint in the
481 jump instruction, namely the address offset from <0c> to wptr,
482 divided by 4, taking the lowest 14 bits.
484 We only support passing 4 or fewer argument words, for the same
485 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
486 On the Alpha the first 6 integer arguments are in a0 through a5,
487 and the rest on the stack. Hence we want to shuffle the original
488 caller's arguments by two.
490 On the Alpha the calling convention is so complex and dependent
491 on the callee's signature -- for example, the stack pointer has
492 to be a multiple of 16 -- that it seems impossible to me [ccshan]
493 to handle the general case correctly without changing how the
494 adjustor is called from C. For now, our solution of shuffling
495 registers only and ignoring the stack only works if the original
496 caller passed 4 or fewer argument words.
498 TODO: Depending on how much allocation overhead stgMallocBytes uses for
499 header information (more precisely, if the overhead is no more than
500 4 bytes), we should move the first three instructions above down by
501 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
503 ASSERT(((StgWord64)wptr & 3) == 0);
504 adjustor = stgMallocBytesRWX(48);
506 StgWord64 *const code = (StgWord64 *)adjustor;
508 code[0] = 0x4610041246520414L;
509 code[1] = 0x46730415a61b0020L;
510 code[2] = 0x46310413a77b0028L;
511 code[3] = 0x000000006bfb0000L
512 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
514 code[4] = (StgWord64)hptr;
515 code[5] = (StgWord64)wptr;
517 /* Ensure that instruction cache is consistent with our new code */
518 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
520 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
522 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
523 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
525 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
526 We need to calculate all the details of the stack frame layout,
527 taking into account the types of all the arguments, and then
528 generate code on the fly. */
530 int src_gpr = 3, dst_gpr = 5;
532 int src_offset = 0, dst_offset = 0;
533 int n = strlen(typeString),i;
534 int src_locs[n], dst_locs[n];
539 Calculate where the arguments should go.
540 src_locs[] will contain the locations of the arguments in the
541 original stack frame passed to the adjustor.
542 dst_locs[] will contain the locations of the arguments after the
543 adjustor runs, on entry to the wrapper proc pointed to by wptr.
545 This algorithm is based on the one described on page 3-19 of the
546 System V ABI PowerPC Processor Supplement.
548 for(i=0;typeString[i];i++)
550 char t = typeString[i];
551 if((t == 'f' || t == 'd') && fpr <= 8)
552 src_locs[i] = dst_locs[i] = -32-(fpr++);
555 if(t == 'l' && src_gpr <= 9)
557 if((src_gpr & 1) == 0)
559 src_locs[i] = -src_gpr;
562 else if(t == 'i' && src_gpr <= 10)
564 src_locs[i] = -(src_gpr++);
568 if(t == 'l' || t == 'd')
573 src_locs[i] = src_offset;
574 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
577 if(t == 'l' && dst_gpr <= 9)
579 if((dst_gpr & 1) == 0)
581 dst_locs[i] = -dst_gpr;
584 else if(t == 'i' && dst_gpr <= 10)
586 dst_locs[i] = -(dst_gpr++);
590 if(t == 'l' || t == 'd')
595 dst_locs[i] = dst_offset;
596 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
601 frameSize = dst_offset + 8;
602 frameSize = (frameSize+15) & ~0xF;
607 // allocate space for at most 4 insns per parameter
608 // plus 14 more instructions.
609 adjustor = stgMallocBytesRWX(4 * (4*n + 14));
610 code = (unsigned*)adjustor;
612 *code++ = 0x48000008; // b *+8
613 // * Put the hptr in a place where freeHaskellFunctionPtr
615 *code++ = (unsigned) hptr;
617 // * save the link register
618 *code++ = 0x7c0802a6; // mflr r0;
619 *code++ = 0x90010004; // stw r0, 4(r1);
620 // * and build a new stack frame
621 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
623 // * now generate instructions to copy arguments
624 // from the old stack frame into the new stack frame.
627 if(src_locs[i] < -32)
628 ASSERT(dst_locs[i] == src_locs[i]);
629 else if(src_locs[i] < 0)
632 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
635 ASSERT(dst_locs[i] > -32);
636 // dst is in GPR, too.
638 if(typeString[i] == 'l')
642 | ((-dst_locs[i]+1) << 16)
643 | ((-src_locs[i]+1) << 11)
644 | ((-src_locs[i]+1) << 21);
648 | ((-dst_locs[i]) << 16)
649 | ((-src_locs[i]) << 11)
650 | ((-src_locs[i]) << 21);
654 if(typeString[i] == 'l')
656 // stw src+1, dst_offset+4(r1)
658 | ((-src_locs[i]+1) << 21)
662 // stw src, dst_offset(r1)
664 | ((-src_locs[i]) << 21)
670 ASSERT(dst_locs[i] >= 0);
671 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
673 if(typeString[i] == 'l')
675 // lwz r0, src_offset(r1)
677 | (src_locs[i] + frameSize + 8 + 4);
678 // stw r0, dst_offset(r1)
680 | (dst_locs[i] + 8 + 4);
682 // lwz r0, src_offset(r1)
684 | (src_locs[i] + frameSize + 8);
685 // stw r0, dst_offset(r1)
691 // * hptr will be the new first argument.
693 *code++ = OP_HI(0x3c60, hptr);
694 // ori r3,r3,lo(hptr)
695 *code++ = OP_LO(0x6063, hptr);
697 // * we need to return to a piece of code
698 // which will tear down the stack frame.
699 // lis r11,hi(obscure_ccall_ret_code)
700 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
701 // ori r11,r11,lo(obscure_ccall_ret_code)
702 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
704 *code++ = 0x7d6803a6;
708 *code++ = OP_HI(0x3d60, wptr);
709 // ori r11,r11,lo(wptr)
710 *code++ = OP_LO(0x616b, wptr);
712 *code++ = 0x7d6903a6;
714 *code++ = 0x4e800420;
716 // Flush the Instruction cache:
718 unsigned *p = adjustor;
721 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
725 __asm__ volatile ("sync\n\tisync");
729 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
731 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
732 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
734 /* The following code applies to all PowerPC and PowerPC64 platforms
735 whose stack layout is based on the AIX ABI.
737 Besides (obviously) AIX, this includes
738 Mac OS 9 and BeOS/PPC (may they rest in peace),
739 which use the 32-bit AIX ABI
741 which uses the 64-bit AIX ABI
742 and Darwin (Mac OS X),
743 which uses the same stack layout as AIX,
744 but no function descriptors.
746 The actual stack-frame shuffling is implemented out-of-line
747 in the function adjustorCode, in AdjustorAsm.S.
748 Here, we set up an AdjustorStub structure, which
749 is a function descriptor (on platforms that have function
750 descriptors) or a short piece of stub code (on Darwin) to call
751 adjustorCode with a pointer to the AdjustorStub struct loaded
754 One nice thing about this is that there is _no_ code generated at
755 runtime on the platforms that have function descriptors.
757 AdjustorStub *adjustorStub;
758 int sz = 0, extra_sz, total_sz;
760 // from AdjustorAsm.s
761 // not declared as a function so that AIX-style
762 // fundescs can never get in the way.
763 extern void *adjustorCode;
766 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
768 adjustorStub = stgMallocBytesRWX(sizeof(AdjustorStub));
770 adjustor = adjustorStub;
772 adjustorStub->code = (void*) &adjustorCode;
775 // function descriptors are a cool idea.
776 // We don't need to generate any code at runtime.
777 adjustorStub->toc = adjustorStub;
780 // no function descriptors :-(
781 // We need to do things "by hand".
782 #if defined(powerpc_HOST_ARCH)
783 // lis r2, hi(adjustorStub)
784 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
785 // ori r2, r2, lo(adjustorStub)
786 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
788 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
789 - (char*)adjustorStub);
791 adjustorStub->mtctr = 0x7c0903a6;
793 adjustorStub->bctr = 0x4e800420;
795 barf("adjustor creation not supported on this platform");
798 // Flush the Instruction cache:
800 int n = sizeof(AdjustorStub)/sizeof(unsigned);
801 unsigned *p = (unsigned*)adjustor;
804 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
808 __asm__ volatile ("sync\n\tisync");
812 // Calculate the size of the stack frame, in words.
815 char t = *typeString++;
819 #if defined(powerpc_HOST_ARCH)
820 // on 32-bit platforms, Double and Int64 occupy two words.
826 // everything else is one word.
831 // The first eight words of the parameter area
832 // are just "backing store" for the parameters passed in
833 // the GPRs. extra_sz is the number of words beyond those first
839 // Calculate the total size of the stack frame.
840 total_sz = (6 /* linkage area */
841 + 8 /* minimum parameter area */
842 + 2 /* two extra arguments */
843 + extra_sz)*sizeof(StgWord);
845 // align to 16 bytes.
846 // AIX only requires 8 bytes, but who cares?
847 total_sz = (total_sz+15) & ~0xF;
849 // Fill in the information that adjustorCode in AdjustorAsm.S
850 // will use to create a new stack frame with the additional args.
851 adjustorStub->hptr = hptr;
852 adjustorStub->wptr = wptr;
853 adjustorStub->negative_framesize = -total_sz;
854 adjustorStub->extrawords_plus_one = extra_sz + 1;
857 #elif defined(ia64_HOST_ARCH)
859 Up to 8 inputs are passed in registers. We flush the last two inputs to
860 the stack, initially into the 16-byte scratch region left by the caller.
861 We then shuffle the others along by 4 (taking 2 registers for ourselves
862 to save return address and previous function state - we need to come back
863 here on the way out to restore the stack, so this is a real function
864 rather than just a trampoline).
866 The function descriptor we create contains the gp of the target function
867 so gp is already loaded correctly.
869 [MLX] alloc r16=ar.pfs,10,2,0
871 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
872 mov r41=r37 // out7 = in5 (out3)
873 mov r40=r36;; // out6 = in4 (out2)
874 [MII] st8.spill [r12]=r39 // spill in7 (out5)
876 mov r38=r34;; // out4 = in2 (out0)
877 [MII] mov r39=r35 // out5 = in3 (out1)
878 mov r37=r33 // out3 = in1 (loc1)
879 mov r36=r32 // out2 = in0 (loc0)
880 [MLX] adds r12=-24,r12 // update sp
881 movl r34=hptr;; // out0 = hptr
882 [MIB] mov r33=r16 // loc1 = ar.pfs
883 mov r32=b0 // loc0 = retaddr
884 br.call.sptk.many b0=b6;;
886 [MII] adds r12=-16,r12
891 br.ret.sptk.many b0;;
894 /* These macros distribute a long constant into the two words of an MLX bundle */
895 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
896 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
897 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
898 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
902 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
903 StgWord64 wcode = wdesc->ip;
907 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
908 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
910 fdesc = (IA64FunDesc *)adjustor;
911 code = (StgWord64 *)(fdesc + 1);
912 fdesc->ip = (StgWord64)code;
913 fdesc->gp = wdesc->gp;
915 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
916 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
917 code[2] = 0x029015d818984001;
918 code[3] = 0x8401200500420094;
919 code[4] = 0x886011d8189c0001;
920 code[5] = 0x84011004c00380c0;
921 code[6] = 0x0250210046013800;
922 code[7] = 0x8401000480420084;
923 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
924 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
925 code[10] = 0x0200210020010811;
926 code[11] = 0x1080006800006200;
927 code[12] = 0x0000210018406000;
928 code[13] = 0x00aa021000038005;
929 code[14] = 0x000000010000001d;
930 code[15] = 0x0084000880000200;
932 /* save stable pointers in convenient form */
933 code[16] = (StgWord64)hptr;
934 code[17] = (StgWord64)stable;
937 barf("adjustor creation not supported on this platform");
952 freeHaskellFunctionPtr(void* ptr)
954 #if defined(i386_HOST_ARCH)
955 if ( *(unsigned char*)ptr != 0x68 &&
956 *(unsigned char*)ptr != 0x58 ) {
957 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
961 /* Free the stable pointer first..*/
962 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
963 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
965 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
967 #elif defined(x86_64_HOST_ARCH)
968 if ( *(StgWord16 *)ptr == 0x894d ) {
969 freeStablePtr(*(StgStablePtr*)(ptr+32));
970 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
971 freeStablePtr(*(StgStablePtr*)(ptr+40));
973 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
976 #elif defined(sparc_HOST_ARCH)
977 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
978 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
982 /* Free the stable pointer first..*/
983 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
984 #elif defined(alpha_HOST_ARCH)
985 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
986 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
990 /* Free the stable pointer first..*/
991 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
992 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
993 if ( *(StgWord*)ptr != 0x48000008 ) {
994 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
997 freeStablePtr(((StgStablePtr*)ptr)[1]);
998 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
999 extern void* adjustorCode;
1000 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1001 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1004 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1005 #elif defined(ia64_HOST_ARCH)
1006 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1007 StgWord64 *code = (StgWord64 *)(fdesc+1);
1009 if (fdesc->ip != (StgWord64)code) {
1010 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1013 freeStablePtr((StgStablePtr)code[16]);
1014 freeStablePtr((StgStablePtr)code[17]);
1019 *((unsigned char*)ptr) = '\0';
1026 * Function: initAdjustor()
1028 * Perform initialisation of adjustor thunk layer (if needed.)
1033 #if defined(i386_HOST_ARCH) && defined(openbsd_HOST_OS)
1034 obscure_ccall_ret_code_dyn = stgMallocBytesRWX(4);
1035 obscure_ccall_ret_code_dyn[0] = ((unsigned char *)obscure_ccall_ret_code)[0];
1036 obscure_ccall_ret_code_dyn[1] = ((unsigned char *)obscure_ccall_ret_code)[1];
1037 obscure_ccall_ret_code_dyn[2] = ((unsigned char *)obscure_ccall_ret_code)[2];
1038 obscure_ccall_ret_code_dyn[3] = ((unsigned char *)obscure_ccall_ret_code)[3];