1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so will result in memory leaks on both the C and
39 #include "PosixSource.h"
45 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
50 #if defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
51 extern void adjustorCode(void);
52 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
54 // not declared as a function so that AIX-style
55 // fundescs can never get in the way.
56 extern void *adjustorCode;
59 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
61 freeHaskellFunctionPtr(void* ptr)
65 cl = (ffi_closure*)ptr;
66 freeStablePtr(cl->user_data);
67 stgFree(cl->cif->arg_types);
72 static ffi_type * char_to_ffi_type(char c)
75 case 'v': return &ffi_type_void;
76 case 'f': return &ffi_type_float;
77 case 'd': return &ffi_type_double;
78 case 'L': return &ffi_type_sint64;
79 case 'l': return &ffi_type_uint64;
80 case 'W': return &ffi_type_sint32;
81 case 'w': return &ffi_type_uint32;
82 case 'S': return &ffi_type_sint16;
83 case 's': return &ffi_type_uint16;
84 case 'B': return &ffi_type_sint8;
85 case 'b': return &ffi_type_uint8;
86 case 'p': return &ffi_type_pointer;
87 default: barf("char_to_ffi_type: unknown type '%c'", c);
92 createAdjustor (int cconv,
100 ffi_type *result_type;
105 n_args = strlen(typeString) - 1;
106 cif = stgMallocBytes(sizeof(ffi_cif), "createAdjustor");
107 arg_types = stgMallocBytes(n_args * sizeof(ffi_type*), "createAdjustor");
109 result_type = char_to_ffi_type(typeString[0]);
110 for (i=0; i < n_args; i++) {
111 arg_types[i] = char_to_ffi_type(typeString[i+1]);
114 #ifdef mingw32_TARGET_OS
115 case 0: /* stdcall */
120 abi = FFI_DEFAULT_ABI;
123 barf("createAdjustor: convention %d not supported on this platform", cconv);
126 r = ffi_prep_cif(cif, abi, n_args, result_type, arg_types);
127 if (r != FFI_OK) barf("ffi_prep_cif failed: %d", r);
129 cl = allocateExec(sizeof(ffi_closure), &code);
131 barf("createAdjustor: failed to allocate memory");
134 r = ffi_prep_closure(cl, cif, (void*)wptr, hptr/*userdata*/);
135 if (r != FFI_OK) barf("ffi_prep_closure failed: %d", r);
140 #else // To end of file...
146 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
150 #ifdef LEADING_UNDERSCORE
151 #define UNDERSCORE "_"
153 #define UNDERSCORE ""
155 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
157 Now here's something obscure for you:
159 When generating an adjustor thunk that uses the C calling
160 convention, we have to make sure that the thunk kicks off
161 the process of jumping into Haskell with a tail jump. Why?
162 Because as a result of jumping in into Haskell we may end
163 up freeing the very adjustor thunk we came from using
164 freeHaskellFunctionPtr(). Hence, we better not return to
165 the adjustor code on our way out, since it could by then
168 The fix is readily at hand, just include the opcodes
169 for the C stack fixup code that we need to perform when
170 returning in some static piece of memory and arrange
171 to return to it before tail jumping from the adjustor thunk.
173 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
176 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
177 UNDERSCORE "obscure_ccall_ret_code:\n\t"
178 "addl $0x4, %esp\n\t"
182 extern void obscure_ccall_ret_code(void);
186 #if defined(x86_64_HOST_ARCH)
187 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
190 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
191 UNDERSCORE "obscure_ccall_ret_code:\n\t"
192 "addq $0x8, %rsp\n\t"
196 extern void obscure_ccall_ret_code(void);
199 #if defined(alpha_HOST_ARCH)
200 /* To get the definition of PAL_imb: */
201 # if defined(linux_HOST_OS)
202 # include <asm/pal.h>
204 # include <machine/pal.h>
208 #if defined(ia64_HOST_ARCH)
210 /* Layout of a function descriptor */
211 typedef struct _IA64FunDesc {
217 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
220 nat data_size_in_words, total_size_in_words;
222 /* round up to a whole number of words */
223 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
224 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
226 /* allocate and fill it in */
227 arr = (StgArrWords *)allocate(total_size_in_words);
228 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
230 /* obtain a stable ptr */
231 *stable = getStablePtr((StgPtr)arr);
233 /* and return a ptr to the goods inside the array */
234 return(&(arr->payload));
238 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
239 __asm__("obscure_ccall_ret_code:\n\t"
244 extern void obscure_ccall_ret_code(void);
247 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
248 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
250 /* !!! !!! WARNING: !!! !!!
251 * This structure is accessed from AdjustorAsm.s
252 * Any changes here have to be mirrored in the offsets there.
255 typedef struct AdjustorStub {
256 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
263 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
264 /* powerpc64-darwin: just guessing that it won't use fundescs. */
275 /* fundesc-based ABIs */
284 StgInt negative_framesize;
285 StgInt extrawords_plus_one;
291 #if defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
293 /* !!! !!! WARNING: !!! !!!
294 * This structure is accessed from AdjustorAsm.s
295 * Any changes here have to be mirrored in the offsets there.
298 typedef struct AdjustorStub {
299 unsigned char call[8];
303 StgInt argument_size;
307 #if (defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
308 static int totalArgumentSize(char *typeString)
313 char t = *typeString++;
317 // on 32-bit platforms, Double and Int64 occupy two words.
321 if(sizeof(void*) == 4)
326 // everything else is one word.
336 createAdjustor(int cconv, StgStablePtr hptr,
339 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
344 void *adjustor = NULL;
349 case 0: /* _stdcall */
350 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
351 /* Magic constant computed by inspecting the code length of
352 the following assembly language snippet
353 (offset and machine code prefixed):
355 <0>: 58 popl %eax # temp. remove ret addr..
356 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
357 # hold a StgStablePtr
358 <6>: 50 pushl %eax # put back ret. addr
359 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
360 <c>: ff e0 jmp %eax # and jump to it.
361 # the callee cleans up the stack
363 adjustor = allocateExec(14,&code);
365 unsigned char *const adj_code = (unsigned char *)adjustor;
366 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
368 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
369 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
371 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
373 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
374 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
376 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
377 adj_code[0x0d] = (unsigned char)0xe0;
383 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
384 /* Magic constant computed by inspecting the code length of
385 the following assembly language snippet
386 (offset and machine code prefixed):
388 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
389 # hold a StgStablePtr
390 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
391 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
392 <0f>: ff e0 jmp *%eax # jump to wptr
394 The ccall'ing version is a tad different, passing in the return
395 address of the caller to the auto-generated C stub (which enters
396 via the stable pointer.) (The auto-generated C stub is in on this
397 game, don't worry :-)
399 See the comment next to obscure_ccall_ret_code why we need to
400 perform a tail jump instead of a call, followed by some C stack
403 Note: The adjustor makes the assumption that any return value
404 coming back from the C stub is not stored on the stack.
405 That's (thankfully) the case here with the restricted set of
406 return types that we support.
408 adjustor = allocateExec(17,&code);
410 unsigned char *const adj_code = (unsigned char *)adjustor;
412 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
413 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
415 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
416 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
418 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
419 *((StgFunPtr*)(adj_code + 0x0b)) =
420 (StgFunPtr)obscure_ccall_ret_code;
422 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
423 adj_code[0x10] = (unsigned char)0xe0;
425 #elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
428 What's special about Darwin/Mac OS X on i386?
429 It wants the stack to stay 16-byte aligned.
431 We offload most of the work to AdjustorAsm.S.
433 AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
434 adjustor = adjustorStub;
436 int sz = totalArgumentSize(typeString);
438 adjustorStub->call[0] = 0xe8;
439 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5);
440 adjustorStub->hptr = hptr;
441 adjustorStub->wptr = wptr;
443 // The adjustor puts the following things on the stack:
445 // 2.) padding and (a copy of) the arguments
446 // 3.) a dummy argument
448 // 5.) return address (for returning to the adjustor)
449 // All these have to add up to a multiple of 16.
451 // first, include everything in frame_size
452 adjustorStub->frame_size = sz * 4 + 16;
454 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
455 // only count 2.) and 3.) as part of frame_size
456 adjustorStub->frame_size -= 12;
457 adjustorStub->argument_size = sz;
460 #elif defined(x86_64_HOST_ARCH)
467 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
469 if there are <6 integer args, then we can just push the
470 StablePtr into %edi and shuffle the other args up.
472 If there are >=6 integer args, then we have to flush one arg
473 to the stack, and arrange to adjust the stack ptr on return.
474 The stack will be rearranged to this:
479 return address *** <-- dummy arg in stub fn.
481 obscure_ccall_ret_code
483 This unfortunately means that the type of the stub function
484 must have a dummy argument for the original return address
485 pointer inserted just after the 6th integer argument.
487 Code for the simple case:
489 0: 4d 89 c1 mov %r8,%r9
490 3: 49 89 c8 mov %rcx,%r8
491 6: 48 89 d1 mov %rdx,%rcx
492 9: 48 89 f2 mov %rsi,%rdx
493 c: 48 89 fe mov %rdi,%rsi
494 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
495 16: ff 25 0c 00 00 00 jmpq *12(%rip)
497 20: .quad 0 # aligned on 8-byte boundary
498 28: .quad 0 # aligned on 8-byte boundary
501 And the version for >=6 integer arguments:
504 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
505 8: 4d 89 c1 mov %r8,%r9
506 b: 49 89 c8 mov %rcx,%r8
507 e: 48 89 d1 mov %rdx,%rcx
508 11: 48 89 f2 mov %rsi,%rdx
509 14: 48 89 fe mov %rdi,%rsi
510 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
511 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
513 28: .quad 0 # aligned on 8-byte boundary
514 30: .quad 0 # aligned on 8-byte boundary
515 38: .quad 0 # aligned on 8-byte boundary
523 // determine whether we have 6 or more integer arguments,
524 // and therefore need to flush one to the stack.
525 for (c = typeString; *c != '\0'; c++) {
526 if (*c != 'f' && *c != 'd') i++;
531 adjustor = allocateExec(0x30,&code);
532 adj_code = (StgWord8*)adjustor;
534 *(StgInt32 *)adj_code = 0x49c1894d;
535 *(StgInt32 *)(adj_code+0x4) = 0x8948c889;
536 *(StgInt32 *)(adj_code+0x8) = 0xf28948d1;
537 *(StgInt32 *)(adj_code+0xc) = 0x48fe8948;
538 *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b;
539 *(StgInt32 *)(adj_code+0x14) = 0x25ff0000;
540 *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
541 *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr;
542 *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr;
546 adjustor = allocateExec(0x40,&code);
547 adj_code = (StgWord8*)adjustor;
549 *(StgInt32 *)adj_code = 0x35ff5141;
550 *(StgInt32 *)(adj_code+0x4) = 0x00000020;
551 *(StgInt32 *)(adj_code+0x8) = 0x49c1894d;
552 *(StgInt32 *)(adj_code+0xc) = 0x8948c889;
553 *(StgInt32 *)(adj_code+0x10) = 0xf28948d1;
554 *(StgInt32 *)(adj_code+0x14) = 0x48fe8948;
555 *(StgInt32 *)(adj_code+0x18) = 0x00123d8b;
556 *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000;
557 *(StgInt32 *)(adj_code+0x20) = 0x00000014;
559 *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code;
560 *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr;
561 *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr;
564 #elif defined(sparc_HOST_ARCH)
565 /* Magic constant computed by inspecting the code length of the following
566 assembly language snippet (offset and machine code prefixed):
568 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
569 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
570 <08>: D823A05C st %o4, [%sp + 92]
571 <0C>: 9A10000B mov %o3, %o5
572 <10>: 9810000A mov %o2, %o4
573 <14>: 96100009 mov %o1, %o3
574 <18>: 94100008 mov %o0, %o2
575 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
576 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
577 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
578 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
579 <2C> 00000000 ! place for getting hptr back easily
581 ccall'ing on SPARC is easy, because we are quite lucky to push a
582 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
583 existing arguments (note that %sp must stay double-word aligned at
584 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
585 To do this, we extend the *caller's* stack frame by 2 words and shift
586 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
587 procedure because of the tail-jump) by 2 positions. This makes room in
588 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
589 for destination addr of jump on SPARC, return address on x86, ...). This
590 shouldn't cause any problems for a C-like caller: alloca is implemented
591 similarly, and local variables should be accessed via %fp, not %sp. In a
592 nutshell: This should work! (Famous last words! :-)
594 adjustor = allocateExec(4*(11+1),&code);
596 unsigned long *const adj_code = (unsigned long *)adjustor;
598 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
599 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
600 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
601 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
602 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
603 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
604 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
605 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
606 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
607 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
608 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
609 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
610 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
611 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
612 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
614 adj_code[11] = (unsigned long)hptr;
617 asm("flush %0" : : "r" (adj_code ));
618 asm("flush %0" : : "r" (adj_code + 2));
619 asm("flush %0" : : "r" (adj_code + 4));
620 asm("flush %0" : : "r" (adj_code + 6));
621 asm("flush %0" : : "r" (adj_code + 10));
623 /* max. 5 instructions latency, and we need at >= 1 for returning */
629 #elif defined(alpha_HOST_ARCH)
630 /* Magic constant computed by inspecting the code length of
631 the following assembly language snippet
632 (offset and machine code prefixed; note that the machine code
633 shown is longwords stored in little-endian order):
635 <00>: 46520414 mov a2, a4
636 <04>: 46100412 mov a0, a2
637 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
638 <0c>: 46730415 mov a3, a5
639 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
640 <14>: 46310413 mov a1, a3
641 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
642 <1c>: 00000000 # padding for alignment
643 <20>: [8 bytes for hptr quadword]
644 <28>: [8 bytes for wptr quadword]
646 The "computed" jump at <08> above is really a jump to a fixed
647 location. Accordingly, we place an always-correct hint in the
648 jump instruction, namely the address offset from <0c> to wptr,
649 divided by 4, taking the lowest 14 bits.
651 We only support passing 4 or fewer argument words, for the same
652 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
653 On the Alpha the first 6 integer arguments are in a0 through a5,
654 and the rest on the stack. Hence we want to shuffle the original
655 caller's arguments by two.
657 On the Alpha the calling convention is so complex and dependent
658 on the callee's signature -- for example, the stack pointer has
659 to be a multiple of 16 -- that it seems impossible to me [ccshan]
660 to handle the general case correctly without changing how the
661 adjustor is called from C. For now, our solution of shuffling
662 registers only and ignoring the stack only works if the original
663 caller passed 4 or fewer argument words.
665 TODO: Depending on how much allocation overhead stgMallocBytes uses for
666 header information (more precisely, if the overhead is no more than
667 4 bytes), we should move the first three instructions above down by
668 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
670 ASSERT(((StgWord64)wptr & 3) == 0);
671 adjustor = allocateExec(48,&code);
673 StgWord64 *const code = (StgWord64 *)adjustor;
675 code[0] = 0x4610041246520414L;
676 code[1] = 0x46730415a61b0020L;
677 code[2] = 0x46310413a77b0028L;
678 code[3] = 0x000000006bfb0000L
679 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
681 code[4] = (StgWord64)hptr;
682 code[5] = (StgWord64)wptr;
684 /* Ensure that instruction cache is consistent with our new code */
685 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
687 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
689 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
690 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
692 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
693 We need to calculate all the details of the stack frame layout,
694 taking into account the types of all the arguments, and then
695 generate code on the fly. */
697 int src_gpr = 3, dst_gpr = 5;
699 int src_offset = 0, dst_offset = 0;
700 int n = strlen(typeString),i;
701 int src_locs[n], dst_locs[n];
706 Calculate where the arguments should go.
707 src_locs[] will contain the locations of the arguments in the
708 original stack frame passed to the adjustor.
709 dst_locs[] will contain the locations of the arguments after the
710 adjustor runs, on entry to the wrapper proc pointed to by wptr.
712 This algorithm is based on the one described on page 3-19 of the
713 System V ABI PowerPC Processor Supplement.
715 for(i=0;typeString[i];i++)
717 char t = typeString[i];
718 if((t == 'f' || t == 'd') && fpr <= 8)
719 src_locs[i] = dst_locs[i] = -32-(fpr++);
722 if((t == 'l' || t == 'L') && src_gpr <= 9)
724 if((src_gpr & 1) == 0)
726 src_locs[i] = -src_gpr;
729 else if((t == 'w' || t == 'W') && src_gpr <= 10)
731 src_locs[i] = -(src_gpr++);
735 if(t == 'l' || t == 'L' || t == 'd')
740 src_locs[i] = src_offset;
741 src_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
744 if((t == 'l' || t == 'L') && dst_gpr <= 9)
746 if((dst_gpr & 1) == 0)
748 dst_locs[i] = -dst_gpr;
751 else if((t == 'w' || t == 'W') && dst_gpr <= 10)
753 dst_locs[i] = -(dst_gpr++);
757 if(t == 'l' || t == 'L' || t == 'd')
762 dst_locs[i] = dst_offset;
763 dst_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
768 frameSize = dst_offset + 8;
769 frameSize = (frameSize+15) & ~0xF;
774 // allocate space for at most 4 insns per parameter
775 // plus 14 more instructions.
776 adjustor = allocateExec(4 * (4*n + 14),&code);
777 code = (unsigned*)adjustor;
779 *code++ = 0x48000008; // b *+8
780 // * Put the hptr in a place where freeHaskellFunctionPtr
782 *code++ = (unsigned) hptr;
784 // * save the link register
785 *code++ = 0x7c0802a6; // mflr r0;
786 *code++ = 0x90010004; // stw r0, 4(r1);
787 // * and build a new stack frame
788 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
790 // * now generate instructions to copy arguments
791 // from the old stack frame into the new stack frame.
794 if(src_locs[i] < -32)
795 ASSERT(dst_locs[i] == src_locs[i]);
796 else if(src_locs[i] < 0)
799 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
802 ASSERT(dst_locs[i] > -32);
803 // dst is in GPR, too.
805 if(typeString[i] == 'l' || typeString[i] == 'L')
809 | ((-dst_locs[i]+1) << 16)
810 | ((-src_locs[i]+1) << 11)
811 | ((-src_locs[i]+1) << 21);
815 | ((-dst_locs[i]) << 16)
816 | ((-src_locs[i]) << 11)
817 | ((-src_locs[i]) << 21);
821 if(typeString[i] == 'l' || typeString[i] == 'L')
823 // stw src+1, dst_offset+4(r1)
825 | ((-src_locs[i]+1) << 21)
829 // stw src, dst_offset(r1)
831 | ((-src_locs[i]) << 21)
837 ASSERT(dst_locs[i] >= 0);
838 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
840 if(typeString[i] == 'l' || typeString[i] == 'L')
842 // lwz r0, src_offset(r1)
844 | (src_locs[i] + frameSize + 8 + 4);
845 // stw r0, dst_offset(r1)
847 | (dst_locs[i] + 8 + 4);
849 // lwz r0, src_offset(r1)
851 | (src_locs[i] + frameSize + 8);
852 // stw r0, dst_offset(r1)
858 // * hptr will be the new first argument.
860 *code++ = OP_HI(0x3c60, hptr);
861 // ori r3,r3,lo(hptr)
862 *code++ = OP_LO(0x6063, hptr);
864 // * we need to return to a piece of code
865 // which will tear down the stack frame.
866 // lis r11,hi(obscure_ccall_ret_code)
867 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
868 // ori r11,r11,lo(obscure_ccall_ret_code)
869 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
871 *code++ = 0x7d6803a6;
875 *code++ = OP_HI(0x3d60, wptr);
876 // ori r11,r11,lo(wptr)
877 *code++ = OP_LO(0x616b, wptr);
879 *code++ = 0x7d6903a6;
881 *code++ = 0x4e800420;
883 // Flush the Instruction cache:
885 unsigned *p = adjustor;
888 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
892 __asm__ volatile ("sync\n\tisync");
896 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
898 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
899 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
901 /* The following code applies to all PowerPC and PowerPC64 platforms
902 whose stack layout is based on the AIX ABI.
904 Besides (obviously) AIX, this includes
905 Mac OS 9 and BeOS/PPC (may they rest in peace),
906 which use the 32-bit AIX ABI
908 which uses the 64-bit AIX ABI
909 and Darwin (Mac OS X),
910 which uses the same stack layout as AIX,
911 but no function descriptors.
913 The actual stack-frame shuffling is implemented out-of-line
914 in the function adjustorCode, in AdjustorAsm.S.
915 Here, we set up an AdjustorStub structure, which
916 is a function descriptor (on platforms that have function
917 descriptors) or a short piece of stub code (on Darwin) to call
918 adjustorCode with a pointer to the AdjustorStub struct loaded
921 One nice thing about this is that there is _no_ code generated at
922 runtime on the platforms that have function descriptors.
924 AdjustorStub *adjustorStub;
925 int sz = 0, extra_sz, total_sz;
928 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
930 adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
932 adjustor = adjustorStub;
934 adjustorStub->code = (void*) &adjustorCode;
937 // function descriptors are a cool idea.
938 // We don't need to generate any code at runtime.
939 adjustorStub->toc = adjustorStub;
942 // no function descriptors :-(
943 // We need to do things "by hand".
944 #if defined(powerpc_HOST_ARCH)
945 // lis r2, hi(adjustorStub)
946 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
947 // ori r2, r2, lo(adjustorStub)
948 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
950 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
951 - (char*)adjustorStub);
953 adjustorStub->mtctr = 0x7c0903a6;
955 adjustorStub->bctr = 0x4e800420;
957 barf("adjustor creation not supported on this platform");
960 // Flush the Instruction cache:
962 int n = sizeof(AdjustorStub)/sizeof(unsigned);
963 unsigned *p = (unsigned*)adjustor;
966 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
970 __asm__ volatile ("sync\n\tisync");
974 // Calculate the size of the stack frame, in words.
975 sz = totalArgumentSize(typeString);
977 // The first eight words of the parameter area
978 // are just "backing store" for the parameters passed in
979 // the GPRs. extra_sz is the number of words beyond those first
985 // Calculate the total size of the stack frame.
986 total_sz = (6 /* linkage area */
987 + 8 /* minimum parameter area */
988 + 2 /* two extra arguments */
989 + extra_sz)*sizeof(StgWord);
991 // align to 16 bytes.
992 // AIX only requires 8 bytes, but who cares?
993 total_sz = (total_sz+15) & ~0xF;
995 // Fill in the information that adjustorCode in AdjustorAsm.S
996 // will use to create a new stack frame with the additional args.
997 adjustorStub->hptr = hptr;
998 adjustorStub->wptr = wptr;
999 adjustorStub->negative_framesize = -total_sz;
1000 adjustorStub->extrawords_plus_one = extra_sz + 1;
1003 #elif defined(ia64_HOST_ARCH)
1005 Up to 8 inputs are passed in registers. We flush the last two inputs to
1006 the stack, initially into the 16-byte scratch region left by the caller.
1007 We then shuffle the others along by 4 (taking 2 registers for ourselves
1008 to save return address and previous function state - we need to come back
1009 here on the way out to restore the stack, so this is a real function
1010 rather than just a trampoline).
1012 The function descriptor we create contains the gp of the target function
1013 so gp is already loaded correctly.
1015 [MLX] alloc r16=ar.pfs,10,2,0
1017 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
1018 mov r41=r37 // out7 = in5 (out3)
1019 mov r40=r36;; // out6 = in4 (out2)
1020 [MII] st8.spill [r12]=r39 // spill in7 (out5)
1022 mov r38=r34;; // out4 = in2 (out0)
1023 [MII] mov r39=r35 // out5 = in3 (out1)
1024 mov r37=r33 // out3 = in1 (loc1)
1025 mov r36=r32 // out2 = in0 (loc0)
1026 [MLX] adds r12=-24,r12 // update sp
1027 movl r34=hptr;; // out0 = hptr
1028 [MIB] mov r33=r16 // loc1 = ar.pfs
1029 mov r32=b0 // loc0 = retaddr
1030 br.call.sptk.many b0=b6;;
1032 [MII] adds r12=-16,r12
1037 br.ret.sptk.many b0;;
1040 /* These macros distribute a long constant into the two words of an MLX bundle */
1041 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
1042 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
1043 #define MOVL_HIWORD(val) ( (BITS(val,0,7) << 36) \
1044 | (BITS(val,7,9) << 50) \
1045 | (BITS(val,16,5) << 45) \
1046 | (BITS(val,21,1) << 44) \
1047 | (BITS(val,40,23)) \
1048 | (BITS(val,63,1) << 59))
1051 StgStablePtr stable;
1052 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
1053 StgWord64 wcode = wdesc->ip;
1057 /* we allocate on the Haskell heap since malloc'd memory isn't
1058 * executable - argh */
1059 /* Allocated memory is word-aligned (8 bytes) but functions on ia64
1060 * must be aligned to 16 bytes. We allocate an extra 8 bytes of
1061 * wiggle room so that we can put the code on a 16 byte boundary. */
1062 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8+8, &stable);
1064 fdesc = (IA64FunDesc *)adjustor;
1065 code = (StgWord64 *)(fdesc + 1);
1066 /* add 8 bytes to code if needed to align to a 16-byte boundary */
1067 if ((StgWord64)code & 15) code++;
1068 fdesc->ip = (StgWord64)code;
1069 fdesc->gp = wdesc->gp;
1071 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
1072 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
1073 code[2] = 0x029015d818984001;
1074 code[3] = 0x8401200500420094;
1075 code[4] = 0x886011d8189c0001;
1076 code[5] = 0x84011004c00380c0;
1077 code[6] = 0x0250210046013800;
1078 code[7] = 0x8401000480420084;
1079 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
1080 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
1081 code[10] = 0x0200210020010811;
1082 code[11] = 0x1080006800006200;
1083 code[12] = 0x0000210018406000;
1084 code[13] = 0x00aa021000038005;
1085 code[14] = 0x000000010000001d;
1086 code[15] = 0x0084000880000200;
1088 /* save stable pointers in convenient form */
1089 code[16] = (StgWord64)hptr;
1090 code[17] = (StgWord64)stable;
1093 barf("adjustor creation not supported on this platform");
1108 freeHaskellFunctionPtr(void* ptr)
1110 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
1111 if ( *(unsigned char*)ptr != 0x68 &&
1112 *(unsigned char*)ptr != 0x58 ) {
1113 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1117 /* Free the stable pointer first..*/
1118 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
1119 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
1121 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1123 #elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
1124 if ( *(unsigned char*)ptr != 0xe8 ) {
1125 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1128 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1129 #elif defined(x86_64_HOST_ARCH)
1130 if ( *(StgWord16 *)ptr == 0x894d ) {
1131 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x20));
1132 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1133 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30));
1135 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1138 #elif defined(sparc_HOST_ARCH)
1139 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1140 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1144 /* Free the stable pointer first..*/
1145 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1146 #elif defined(alpha_HOST_ARCH)
1147 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1148 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1152 /* Free the stable pointer first..*/
1153 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1154 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1155 if ( *(StgWord*)ptr != 0x48000008 ) {
1156 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1159 freeStablePtr(((StgStablePtr*)ptr)[1]);
1160 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1161 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1162 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1165 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1166 #elif defined(ia64_HOST_ARCH)
1167 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1168 StgWord64 *code = (StgWord64 *)(fdesc+1);
1170 if (fdesc->ip != (StgWord64)code) {
1171 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1174 freeStablePtr((StgStablePtr)code[16]);
1175 freeStablePtr((StgStablePtr)code[17]);
1180 // Can't write to this memory, it is only executable:
1181 // *((unsigned char*)ptr) = '\0';
1186 #endif // !USE_LIBFFI_FOR_ADJUSTORS