1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
51 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
55 #ifdef LEADING_UNDERSCORE
56 #define UNDERSCORE "_"
60 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
62 Now here's something obscure for you:
64 When generating an adjustor thunk that uses the C calling
65 convention, we have to make sure that the thunk kicks off
66 the process of jumping into Haskell with a tail jump. Why?
67 Because as a result of jumping in into Haskell we may end
68 up freeing the very adjustor thunk we came from using
69 freeHaskellFunctionPtr(). Hence, we better not return to
70 the adjustor code on our way out, since it could by then
73 The fix is readily at hand, just include the opcodes
74 for the C stack fixup code that we need to perform when
75 returning in some static piece of memory and arrange
76 to return to it before tail jumping from the adjustor thunk.
78 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
81 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
82 UNDERSCORE "obscure_ccall_ret_code:\n\t"
87 extern void obscure_ccall_ret_code(void);
89 #if defined(openbsd_HOST_OS)
90 static unsigned char *obscure_ccall_ret_code_dyn;
95 #if defined(x86_64_HOST_ARCH)
96 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
99 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
100 UNDERSCORE "obscure_ccall_ret_code:\n\t"
101 "addq $0x8, %rsp\n\t"
105 extern void obscure_ccall_ret_code(void);
108 #if defined(alpha_HOST_ARCH)
109 /* To get the definition of PAL_imb: */
110 # if defined(linux_HOST_OS)
111 # include <asm/pal.h>
113 # include <machine/pal.h>
117 #if defined(ia64_HOST_ARCH)
120 /* Layout of a function descriptor */
121 typedef struct _IA64FunDesc {
127 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
130 nat data_size_in_words, total_size_in_words;
132 /* round up to a whole number of words */
133 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
134 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
136 /* allocate and fill it in */
137 arr = (StgArrWords *)allocate(total_size_in_words);
138 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
140 /* obtain a stable ptr */
141 *stable = getStablePtr((StgPtr)arr);
143 /* and return a ptr to the goods inside the array */
144 return(&(arr->payload));
148 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
149 __asm__("obscure_ccall_ret_code:\n\t"
154 extern void obscure_ccall_ret_code(void);
157 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
158 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
160 /* !!! !!! WARNING: !!! !!!
161 * This structure is accessed from AdjustorAsm.s
162 * Any changes here have to be mirrored in the offsets there.
165 typedef struct AdjustorStub {
166 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
173 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
174 /* powerpc64-darwin: just guessing that it won't use fundescs. */
185 /* fundesc-based ABIs */
194 StgInt negative_framesize;
195 StgInt extrawords_plus_one;
201 #if defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
203 /* !!! !!! WARNING: !!! !!!
204 * This structure is accessed from AdjustorAsm.s
205 * Any changes here have to be mirrored in the offsets there.
208 typedef struct AdjustorStub {
209 unsigned char call[8];
213 StgInt argument_size;
217 #if defined(darwin_HOST_OS) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
218 static int totalArgumentSize(char *typeString)
223 char t = *typeString++;
227 // on 32-bit platforms, Double and Int64 occupy two words.
230 if(sizeof(void*) == 4)
235 // everything else is one word.
245 createAdjustor(int cconv, StgStablePtr hptr,
248 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
253 void *adjustor = NULL;
257 case 0: /* _stdcall */
258 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
259 /* Magic constant computed by inspecting the code length of
260 the following assembly language snippet
261 (offset and machine code prefixed):
263 <0>: 58 popl %eax # temp. remove ret addr..
264 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
265 # hold a StgStablePtr
266 <6>: 50 pushl %eax # put back ret. addr
267 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
268 <c>: ff e0 jmp %eax # and jump to it.
269 # the callee cleans up the stack
271 adjustor = allocateExec(14);
273 unsigned char *const adj_code = (unsigned char *)adjustor;
274 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
276 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
277 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
279 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
281 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
282 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
284 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
285 adj_code[0x0d] = (unsigned char)0xe0;
291 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
292 /* Magic constant computed by inspecting the code length of
293 the following assembly language snippet
294 (offset and machine code prefixed):
296 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
297 # hold a StgStablePtr
298 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
299 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
300 <0f>: ff e0 jmp *%eax # jump to wptr
302 The ccall'ing version is a tad different, passing in the return
303 address of the caller to the auto-generated C stub (which enters
304 via the stable pointer.) (The auto-generated C stub is in on this
305 game, don't worry :-)
307 See the comment next to obscure_ccall_ret_code why we need to
308 perform a tail jump instead of a call, followed by some C stack
311 Note: The adjustor makes the assumption that any return value
312 coming back from the C stub is not stored on the stack.
313 That's (thankfully) the case here with the restricted set of
314 return types that we support.
316 adjustor = allocateExec(17);
318 unsigned char *const adj_code = (unsigned char *)adjustor;
320 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
321 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
323 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
324 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
326 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
327 *((StgFunPtr*)(adj_code + 0x0b)) =
328 #if !defined(openbsd_HOST_OS)
329 (StgFunPtr)obscure_ccall_ret_code;
331 (StgFunPtr)obscure_ccall_ret_code_dyn;
334 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
335 adj_code[0x10] = (unsigned char)0xe0;
337 #elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
340 What's special about Darwin/Mac OS X on i386?
341 It wants the stack to stay 16-byte aligned.
343 We offload most of the work to AdjustorAsm.S.
345 AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub));
346 adjustor = adjustorStub;
348 extern void adjustorCode(void);
349 int sz = totalArgumentSize(typeString);
351 adjustorStub->call[0] = 0xe8;
352 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5);
353 adjustorStub->hptr = hptr;
354 adjustorStub->wptr = wptr;
356 // The adjustor puts the following things on the stack:
358 // 2.) padding and (a copy of) the arguments
359 // 3.) a dummy argument
361 // 5.) return address (for returning to the adjustor)
362 // All these have to add up to a multiple of 16.
364 // first, include everything in frame_size
365 adjustorStub->frame_size = sz * 4 + 16;
367 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
368 // only count 2.) and 3.) as part of frame_size
369 adjustorStub->frame_size -= 12;
370 adjustorStub->argument_size = sz;
373 #elif defined(x86_64_HOST_ARCH)
380 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
382 if there are <6 integer args, then we can just push the
383 StablePtr into %edi and shuffle the other args up.
385 If there are >=6 integer args, then we have to flush one arg
386 to the stack, and arrange to adjust the stack ptr on return.
387 The stack will be rearranged to this:
392 return address *** <-- dummy arg in stub fn.
394 obscure_ccall_ret_code
396 This unfortunately means that the type of the stub function
397 must have a dummy argument for the original return address
398 pointer inserted just after the 6th integer argument.
400 Code for the simple case:
402 0: 4d 89 c1 mov %r8,%r9
403 3: 49 89 c8 mov %rcx,%r8
404 6: 48 89 d1 mov %rdx,%rcx
405 9: 48 89 f2 mov %rsi,%rdx
406 c: 48 89 fe mov %rdi,%rsi
407 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
408 16: ff 25 0c 00 00 00 jmpq *12(%rip)
410 20: .quad 0 # aligned on 8-byte boundary
411 28: .quad 0 # aligned on 8-byte boundary
414 And the version for >=6 integer arguments:
417 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
418 8: 4d 89 c1 mov %r8,%r9
419 b: 49 89 c8 mov %rcx,%r8
420 e: 48 89 d1 mov %rdx,%rcx
421 11: 48 89 f2 mov %rsi,%rdx
422 14: 48 89 fe mov %rdi,%rsi
423 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
424 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
426 28: .quad 0 # aligned on 8-byte boundary
427 30: .quad 0 # aligned on 8-byte boundary
428 38: .quad 0 # aligned on 8-byte boundary
431 /* we assume the small code model (gcc -mcmmodel=small) where
432 * all symbols are <2^32, so hence wptr should fit into 32 bits.
434 ASSERT(((long)wptr >> 32) == 0);
440 // determine whether we have 6 or more integer arguments,
441 // and therefore need to flush one to the stack.
442 for (c = typeString; *c != '\0'; c++) {
443 if (*c == 'i' || *c == 'l') i++;
448 adjustor = allocateExec(0x30);
450 *(StgInt32 *)adjustor = 0x49c1894d;
451 *(StgInt32 *)(adjustor+0x4) = 0x8948c889;
452 *(StgInt32 *)(adjustor+0x8) = 0xf28948d1;
453 *(StgInt32 *)(adjustor+0xc) = 0x48fe8948;
454 *(StgInt32 *)(adjustor+0x10) = 0x000a3d8b;
455 *(StgInt32 *)(adjustor+0x14) = 0x25ff0000;
456 *(StgInt32 *)(adjustor+0x18) = 0x0000000c;
457 *(StgInt64 *)(adjustor+0x20) = (StgInt64)hptr;
458 *(StgInt64 *)(adjustor+0x28) = (StgInt64)wptr;
462 adjustor = allocateExec(0x40);
464 *(StgInt32 *)adjustor = 0x35ff5141;
465 *(StgInt32 *)(adjustor+0x4) = 0x00000020;
466 *(StgInt32 *)(adjustor+0x8) = 0x49c1894d;
467 *(StgInt32 *)(adjustor+0xc) = 0x8948c889;
468 *(StgInt32 *)(adjustor+0x10) = 0xf28948d1;
469 *(StgInt32 *)(adjustor+0x14) = 0x48fe8948;
470 *(StgInt32 *)(adjustor+0x18) = 0x00123d8b;
471 *(StgInt32 *)(adjustor+0x1c) = 0x25ff0000;
472 *(StgInt32 *)(adjustor+0x20) = 0x00000014;
474 *(StgInt64 *)(adjustor+0x28) = (StgInt64)obscure_ccall_ret_code;
475 *(StgInt64 *)(adjustor+0x30) = (StgInt64)hptr;
476 *(StgInt64 *)(adjustor+0x38) = (StgInt64)wptr;
479 #elif defined(sparc_HOST_ARCH)
480 /* Magic constant computed by inspecting the code length of the following
481 assembly language snippet (offset and machine code prefixed):
483 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
484 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
485 <08>: D823A05C st %o4, [%sp + 92]
486 <0C>: 9A10000B mov %o3, %o5
487 <10>: 9810000A mov %o2, %o4
488 <14>: 96100009 mov %o1, %o3
489 <18>: 94100008 mov %o0, %o2
490 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
491 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
492 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
493 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
494 <2C> 00000000 ! place for getting hptr back easily
496 ccall'ing on SPARC is easy, because we are quite lucky to push a
497 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
498 existing arguments (note that %sp must stay double-word aligned at
499 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
500 To do this, we extend the *caller's* stack frame by 2 words and shift
501 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
502 procedure because of the tail-jump) by 2 positions. This makes room in
503 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
504 for destination addr of jump on SPARC, return address on x86, ...). This
505 shouldn't cause any problems for a C-like caller: alloca is implemented
506 similarly, and local variables should be accessed via %fp, not %sp. In a
507 nutshell: This should work! (Famous last words! :-)
509 adjustor = allocateExec(4*(11+1));
511 unsigned long *const adj_code = (unsigned long *)adjustor;
513 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
514 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
515 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
516 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
517 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
518 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
519 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
520 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
521 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
522 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
523 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
524 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
525 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
526 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
527 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
529 adj_code[11] = (unsigned long)hptr;
532 asm("flush %0" : : "r" (adj_code ));
533 asm("flush %0" : : "r" (adj_code + 2));
534 asm("flush %0" : : "r" (adj_code + 4));
535 asm("flush %0" : : "r" (adj_code + 6));
536 asm("flush %0" : : "r" (adj_code + 10));
538 /* max. 5 instructions latency, and we need at >= 1 for returning */
544 #elif defined(alpha_HOST_ARCH)
545 /* Magic constant computed by inspecting the code length of
546 the following assembly language snippet
547 (offset and machine code prefixed; note that the machine code
548 shown is longwords stored in little-endian order):
550 <00>: 46520414 mov a2, a4
551 <04>: 46100412 mov a0, a2
552 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
553 <0c>: 46730415 mov a3, a5
554 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
555 <14>: 46310413 mov a1, a3
556 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
557 <1c>: 00000000 # padding for alignment
558 <20>: [8 bytes for hptr quadword]
559 <28>: [8 bytes for wptr quadword]
561 The "computed" jump at <08> above is really a jump to a fixed
562 location. Accordingly, we place an always-correct hint in the
563 jump instruction, namely the address offset from <0c> to wptr,
564 divided by 4, taking the lowest 14 bits.
566 We only support passing 4 or fewer argument words, for the same
567 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
568 On the Alpha the first 6 integer arguments are in a0 through a5,
569 and the rest on the stack. Hence we want to shuffle the original
570 caller's arguments by two.
572 On the Alpha the calling convention is so complex and dependent
573 on the callee's signature -- for example, the stack pointer has
574 to be a multiple of 16 -- that it seems impossible to me [ccshan]
575 to handle the general case correctly without changing how the
576 adjustor is called from C. For now, our solution of shuffling
577 registers only and ignoring the stack only works if the original
578 caller passed 4 or fewer argument words.
580 TODO: Depending on how much allocation overhead stgMallocBytes uses for
581 header information (more precisely, if the overhead is no more than
582 4 bytes), we should move the first three instructions above down by
583 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
585 ASSERT(((StgWord64)wptr & 3) == 0);
586 adjustor = allocateExec(48);
588 StgWord64 *const code = (StgWord64 *)adjustor;
590 code[0] = 0x4610041246520414L;
591 code[1] = 0x46730415a61b0020L;
592 code[2] = 0x46310413a77b0028L;
593 code[3] = 0x000000006bfb0000L
594 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
596 code[4] = (StgWord64)hptr;
597 code[5] = (StgWord64)wptr;
599 /* Ensure that instruction cache is consistent with our new code */
600 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
602 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
604 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
605 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
607 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
608 We need to calculate all the details of the stack frame layout,
609 taking into account the types of all the arguments, and then
610 generate code on the fly. */
612 int src_gpr = 3, dst_gpr = 5;
614 int src_offset = 0, dst_offset = 0;
615 int n = strlen(typeString),i;
616 int src_locs[n], dst_locs[n];
621 Calculate where the arguments should go.
622 src_locs[] will contain the locations of the arguments in the
623 original stack frame passed to the adjustor.
624 dst_locs[] will contain the locations of the arguments after the
625 adjustor runs, on entry to the wrapper proc pointed to by wptr.
627 This algorithm is based on the one described on page 3-19 of the
628 System V ABI PowerPC Processor Supplement.
630 for(i=0;typeString[i];i++)
632 char t = typeString[i];
633 if((t == 'f' || t == 'd') && fpr <= 8)
634 src_locs[i] = dst_locs[i] = -32-(fpr++);
637 if(t == 'l' && src_gpr <= 9)
639 if((src_gpr & 1) == 0)
641 src_locs[i] = -src_gpr;
644 else if(t == 'i' && src_gpr <= 10)
646 src_locs[i] = -(src_gpr++);
650 if(t == 'l' || t == 'd')
655 src_locs[i] = src_offset;
656 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
659 if(t == 'l' && dst_gpr <= 9)
661 if((dst_gpr & 1) == 0)
663 dst_locs[i] = -dst_gpr;
666 else if(t == 'i' && dst_gpr <= 10)
668 dst_locs[i] = -(dst_gpr++);
672 if(t == 'l' || t == 'd')
677 dst_locs[i] = dst_offset;
678 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
683 frameSize = dst_offset + 8;
684 frameSize = (frameSize+15) & ~0xF;
689 // allocate space for at most 4 insns per parameter
690 // plus 14 more instructions.
691 adjustor = allocateExec(4 * (4*n + 14));
692 code = (unsigned*)adjustor;
694 *code++ = 0x48000008; // b *+8
695 // * Put the hptr in a place where freeHaskellFunctionPtr
697 *code++ = (unsigned) hptr;
699 // * save the link register
700 *code++ = 0x7c0802a6; // mflr r0;
701 *code++ = 0x90010004; // stw r0, 4(r1);
702 // * and build a new stack frame
703 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
705 // * now generate instructions to copy arguments
706 // from the old stack frame into the new stack frame.
709 if(src_locs[i] < -32)
710 ASSERT(dst_locs[i] == src_locs[i]);
711 else if(src_locs[i] < 0)
714 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
717 ASSERT(dst_locs[i] > -32);
718 // dst is in GPR, too.
720 if(typeString[i] == 'l')
724 | ((-dst_locs[i]+1) << 16)
725 | ((-src_locs[i]+1) << 11)
726 | ((-src_locs[i]+1) << 21);
730 | ((-dst_locs[i]) << 16)
731 | ((-src_locs[i]) << 11)
732 | ((-src_locs[i]) << 21);
736 if(typeString[i] == 'l')
738 // stw src+1, dst_offset+4(r1)
740 | ((-src_locs[i]+1) << 21)
744 // stw src, dst_offset(r1)
746 | ((-src_locs[i]) << 21)
752 ASSERT(dst_locs[i] >= 0);
753 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
755 if(typeString[i] == 'l')
757 // lwz r0, src_offset(r1)
759 | (src_locs[i] + frameSize + 8 + 4);
760 // stw r0, dst_offset(r1)
762 | (dst_locs[i] + 8 + 4);
764 // lwz r0, src_offset(r1)
766 | (src_locs[i] + frameSize + 8);
767 // stw r0, dst_offset(r1)
773 // * hptr will be the new first argument.
775 *code++ = OP_HI(0x3c60, hptr);
776 // ori r3,r3,lo(hptr)
777 *code++ = OP_LO(0x6063, hptr);
779 // * we need to return to a piece of code
780 // which will tear down the stack frame.
781 // lis r11,hi(obscure_ccall_ret_code)
782 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
783 // ori r11,r11,lo(obscure_ccall_ret_code)
784 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
786 *code++ = 0x7d6803a6;
790 *code++ = OP_HI(0x3d60, wptr);
791 // ori r11,r11,lo(wptr)
792 *code++ = OP_LO(0x616b, wptr);
794 *code++ = 0x7d6903a6;
796 *code++ = 0x4e800420;
798 // Flush the Instruction cache:
800 unsigned *p = adjustor;
803 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
807 __asm__ volatile ("sync\n\tisync");
811 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
813 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
814 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
816 /* The following code applies to all PowerPC and PowerPC64 platforms
817 whose stack layout is based on the AIX ABI.
819 Besides (obviously) AIX, this includes
820 Mac OS 9 and BeOS/PPC (may they rest in peace),
821 which use the 32-bit AIX ABI
823 which uses the 64-bit AIX ABI
824 and Darwin (Mac OS X),
825 which uses the same stack layout as AIX,
826 but no function descriptors.
828 The actual stack-frame shuffling is implemented out-of-line
829 in the function adjustorCode, in AdjustorAsm.S.
830 Here, we set up an AdjustorStub structure, which
831 is a function descriptor (on platforms that have function
832 descriptors) or a short piece of stub code (on Darwin) to call
833 adjustorCode with a pointer to the AdjustorStub struct loaded
836 One nice thing about this is that there is _no_ code generated at
837 runtime on the platforms that have function descriptors.
839 AdjustorStub *adjustorStub;
840 int sz = 0, extra_sz, total_sz;
842 // from AdjustorAsm.s
843 // not declared as a function so that AIX-style
844 // fundescs can never get in the way.
845 extern void *adjustorCode;
848 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
850 adjustorStub = allocateExec(sizeof(AdjustorStub));
852 adjustor = adjustorStub;
854 adjustorStub->code = (void*) &adjustorCode;
857 // function descriptors are a cool idea.
858 // We don't need to generate any code at runtime.
859 adjustorStub->toc = adjustorStub;
862 // no function descriptors :-(
863 // We need to do things "by hand".
864 #if defined(powerpc_HOST_ARCH)
865 // lis r2, hi(adjustorStub)
866 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
867 // ori r2, r2, lo(adjustorStub)
868 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
870 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
871 - (char*)adjustorStub);
873 adjustorStub->mtctr = 0x7c0903a6;
875 adjustorStub->bctr = 0x4e800420;
877 barf("adjustor creation not supported on this platform");
880 // Flush the Instruction cache:
882 int n = sizeof(AdjustorStub)/sizeof(unsigned);
883 unsigned *p = (unsigned*)adjustor;
886 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
890 __asm__ volatile ("sync\n\tisync");
894 // Calculate the size of the stack frame, in words.
895 sz = totalArgumentSize(typeString);
897 // The first eight words of the parameter area
898 // are just "backing store" for the parameters passed in
899 // the GPRs. extra_sz is the number of words beyond those first
905 // Calculate the total size of the stack frame.
906 total_sz = (6 /* linkage area */
907 + 8 /* minimum parameter area */
908 + 2 /* two extra arguments */
909 + extra_sz)*sizeof(StgWord);
911 // align to 16 bytes.
912 // AIX only requires 8 bytes, but who cares?
913 total_sz = (total_sz+15) & ~0xF;
915 // Fill in the information that adjustorCode in AdjustorAsm.S
916 // will use to create a new stack frame with the additional args.
917 adjustorStub->hptr = hptr;
918 adjustorStub->wptr = wptr;
919 adjustorStub->negative_framesize = -total_sz;
920 adjustorStub->extrawords_plus_one = extra_sz + 1;
923 #elif defined(ia64_HOST_ARCH)
925 Up to 8 inputs are passed in registers. We flush the last two inputs to
926 the stack, initially into the 16-byte scratch region left by the caller.
927 We then shuffle the others along by 4 (taking 2 registers for ourselves
928 to save return address and previous function state - we need to come back
929 here on the way out to restore the stack, so this is a real function
930 rather than just a trampoline).
932 The function descriptor we create contains the gp of the target function
933 so gp is already loaded correctly.
935 [MLX] alloc r16=ar.pfs,10,2,0
937 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
938 mov r41=r37 // out7 = in5 (out3)
939 mov r40=r36;; // out6 = in4 (out2)
940 [MII] st8.spill [r12]=r39 // spill in7 (out5)
942 mov r38=r34;; // out4 = in2 (out0)
943 [MII] mov r39=r35 // out5 = in3 (out1)
944 mov r37=r33 // out3 = in1 (loc1)
945 mov r36=r32 // out2 = in0 (loc0)
946 [MLX] adds r12=-24,r12 // update sp
947 movl r34=hptr;; // out0 = hptr
948 [MIB] mov r33=r16 // loc1 = ar.pfs
949 mov r32=b0 // loc0 = retaddr
950 br.call.sptk.many b0=b6;;
952 [MII] adds r12=-16,r12
957 br.ret.sptk.many b0;;
960 /* These macros distribute a long constant into the two words of an MLX bundle */
961 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
962 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
963 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
964 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
968 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
969 StgWord64 wcode = wdesc->ip;
973 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
974 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
976 fdesc = (IA64FunDesc *)adjustor;
977 code = (StgWord64 *)(fdesc + 1);
978 fdesc->ip = (StgWord64)code;
979 fdesc->gp = wdesc->gp;
981 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
982 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
983 code[2] = 0x029015d818984001;
984 code[3] = 0x8401200500420094;
985 code[4] = 0x886011d8189c0001;
986 code[5] = 0x84011004c00380c0;
987 code[6] = 0x0250210046013800;
988 code[7] = 0x8401000480420084;
989 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
990 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
991 code[10] = 0x0200210020010811;
992 code[11] = 0x1080006800006200;
993 code[12] = 0x0000210018406000;
994 code[13] = 0x00aa021000038005;
995 code[14] = 0x000000010000001d;
996 code[15] = 0x0084000880000200;
998 /* save stable pointers in convenient form */
999 code[16] = (StgWord64)hptr;
1000 code[17] = (StgWord64)stable;
1003 barf("adjustor creation not supported on this platform");
1018 freeHaskellFunctionPtr(void* ptr)
1020 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
1021 if ( *(unsigned char*)ptr != 0x68 &&
1022 *(unsigned char*)ptr != 0x58 ) {
1023 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1027 /* Free the stable pointer first..*/
1028 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
1029 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
1031 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1033 #elif defined(x86_HOST_ARCH) && defined(darwin_HOST_OS)
1034 if ( *(unsigned char*)ptr != 0xe8 ) {
1035 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1038 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1039 #elif defined(x86_64_HOST_ARCH)
1040 if ( *(StgWord16 *)ptr == 0x894d ) {
1041 freeStablePtr(*(StgStablePtr*)(ptr+0x20));
1042 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1043 freeStablePtr(*(StgStablePtr*)(ptr+0x30));
1045 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1048 #elif defined(sparc_HOST_ARCH)
1049 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1050 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1054 /* Free the stable pointer first..*/
1055 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1056 #elif defined(alpha_HOST_ARCH)
1057 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1058 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1062 /* Free the stable pointer first..*/
1063 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1064 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1065 if ( *(StgWord*)ptr != 0x48000008 ) {
1066 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1069 freeStablePtr(((StgStablePtr*)ptr)[1]);
1070 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1071 extern void* adjustorCode;
1072 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1073 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1076 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1077 #elif defined(ia64_HOST_ARCH)
1078 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1079 StgWord64 *code = (StgWord64 *)(fdesc+1);
1081 if (fdesc->ip != (StgWord64)code) {
1082 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1085 freeStablePtr((StgStablePtr)code[16]);
1086 freeStablePtr((StgStablePtr)code[17]);
1091 *((unsigned char*)ptr) = '\0';
1098 * Function: initAdjustor()
1100 * Perform initialisation of adjustor thunk layer (if needed.)
1105 #if defined(i386_HOST_ARCH) && defined(openbsd_HOST_OS)
1106 obscure_ccall_ret_code_dyn = allocateExec(4);
1107 obscure_ccall_ret_code_dyn[0] = ((unsigned char *)obscure_ccall_ret_code)[0];
1108 obscure_ccall_ret_code_dyn[1] = ((unsigned char *)obscure_ccall_ret_code)[1];
1109 obscure_ccall_ret_code_dyn[2] = ((unsigned char *)obscure_ccall_ret_code)[2];
1110 obscure_ccall_ret_code_dyn[3] = ((unsigned char *)obscure_ccall_ret_code)[3];