1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
50 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
54 #ifdef LEADING_UNDERSCORE
55 #define UNDERSCORE "_"
59 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
61 Now here's something obscure for you:
63 When generating an adjustor thunk that uses the C calling
64 convention, we have to make sure that the thunk kicks off
65 the process of jumping into Haskell with a tail jump. Why?
66 Because as a result of jumping in into Haskell we may end
67 up freeing the very adjustor thunk we came from using
68 freeHaskellFunctionPtr(). Hence, we better not return to
69 the adjustor code on our way out, since it could by then
72 The fix is readily at hand, just include the opcodes
73 for the C stack fixup code that we need to perform when
74 returning in some static piece of memory and arrange
75 to return to it before tail jumping from the adjustor thunk.
77 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
80 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
81 UNDERSCORE "obscure_ccall_ret_code:\n\t"
86 extern void obscure_ccall_ret_code(void);
88 #if defined(openbsd_HOST_OS)
89 static unsigned char *obscure_ccall_ret_code_dyn;
94 #if defined(x86_64_HOST_ARCH)
95 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
98 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
99 UNDERSCORE "obscure_ccall_ret_code:\n\t"
100 "addq $0x8, %rsp\n\t"
104 extern void obscure_ccall_ret_code(void);
107 #if defined(alpha_HOST_ARCH)
108 /* To get the definition of PAL_imb: */
109 # if defined(linux_HOST_OS)
110 # include <asm/pal.h>
112 # include <machine/pal.h>
116 #if defined(ia64_HOST_ARCH)
119 /* Layout of a function descriptor */
120 typedef struct _IA64FunDesc {
126 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
129 nat data_size_in_words, total_size_in_words;
131 /* round up to a whole number of words */
132 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
133 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
135 /* allocate and fill it in */
136 arr = (StgArrWords *)allocate(total_size_in_words);
137 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
139 /* obtain a stable ptr */
140 *stable = getStablePtr((StgPtr)arr);
142 /* and return a ptr to the goods inside the array */
143 return(&(arr->payload));
147 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
148 __asm__("obscure_ccall_ret_code:\n\t"
153 extern void obscure_ccall_ret_code(void);
156 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
157 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
159 /* !!! !!! WARNING: !!! !!!
160 * This structure is accessed from AdjustorAsm.s
161 * Any changes here have to be mirrored in the offsets there.
164 typedef struct AdjustorStub {
165 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
172 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
173 /* powerpc64-darwin: just guessing that it won't use fundescs. */
184 /* fundesc-based ABIs */
193 StgInt negative_framesize;
194 StgInt extrawords_plus_one;
200 #if defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
202 /* !!! !!! WARNING: !!! !!!
203 * This structure is accessed from AdjustorAsm.s
204 * Any changes here have to be mirrored in the offsets there.
207 typedef struct AdjustorStub {
208 unsigned char call[8];
212 StgInt argument_size;
216 #if defined(darwin_HOST_OS) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
217 static int totalArgumentSize(char *typeString)
222 char t = *typeString++;
226 // on 32-bit platforms, Double and Int64 occupy two words.
229 if(sizeof(void*) == 4)
234 // everything else is one word.
244 createAdjustor(int cconv, StgStablePtr hptr,
247 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
252 void *adjustor = NULL;
256 case 0: /* _stdcall */
257 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
258 /* Magic constant computed by inspecting the code length of
259 the following assembly language snippet
260 (offset and machine code prefixed):
262 <0>: 58 popl %eax # temp. remove ret addr..
263 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
264 # hold a StgStablePtr
265 <6>: 50 pushl %eax # put back ret. addr
266 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
267 <c>: ff e0 jmp %eax # and jump to it.
268 # the callee cleans up the stack
270 adjustor = allocateExec(14);
272 unsigned char *const adj_code = (unsigned char *)adjustor;
273 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
275 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
276 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
278 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
280 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
281 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
283 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
284 adj_code[0x0d] = (unsigned char)0xe0;
290 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
291 /* Magic constant computed by inspecting the code length of
292 the following assembly language snippet
293 (offset and machine code prefixed):
295 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
296 # hold a StgStablePtr
297 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
298 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
299 <0f>: ff e0 jmp *%eax # jump to wptr
301 The ccall'ing version is a tad different, passing in the return
302 address of the caller to the auto-generated C stub (which enters
303 via the stable pointer.) (The auto-generated C stub is in on this
304 game, don't worry :-)
306 See the comment next to obscure_ccall_ret_code why we need to
307 perform a tail jump instead of a call, followed by some C stack
310 Note: The adjustor makes the assumption that any return value
311 coming back from the C stub is not stored on the stack.
312 That's (thankfully) the case here with the restricted set of
313 return types that we support.
315 adjustor = allocateExec(17);
317 unsigned char *const adj_code = (unsigned char *)adjustor;
319 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
320 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
322 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
323 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
325 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
326 *((StgFunPtr*)(adj_code + 0x0b)) =
327 #if !defined(openbsd_HOST_OS)
328 (StgFunPtr)obscure_ccall_ret_code;
330 (StgFunPtr)obscure_ccall_ret_code_dyn;
333 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
334 adj_code[0x10] = (unsigned char)0xe0;
336 #elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
339 What's special about Darwin/Mac OS X on i386?
340 It wants the stack to stay 16-byte aligned.
342 We offload most of the work to AdjustorAsm.S.
344 AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub));
345 adjustor = adjustorStub;
347 extern void adjustorCode(void);
348 int sz = totalArgumentSize(typeString);
350 adjustorStub->call[0] = 0xe8;
351 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5);
352 adjustorStub->hptr = hptr;
353 adjustorStub->wptr = wptr;
355 // The adjustor puts the following things on the stack:
357 // 2.) padding and (a copy of) the arguments
358 // 3.) a dummy argument
360 // 5.) return address (for returning to the adjustor)
361 // All these have to add up to a multiple of 16.
363 // first, include everything in frame_size
364 adjustorStub->frame_size = sz * 4 + 16;
366 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
367 // only count 2.) and 3.) as part of frame_size
368 adjustorStub->frame_size -= 12;
369 adjustorStub->argument_size = sz;
372 #elif defined(x86_64_HOST_ARCH)
379 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
381 if there are <6 integer args, then we can just push the
382 StablePtr into %edi and shuffle the other args up.
384 If there are >=6 integer args, then we have to flush one arg
385 to the stack, and arrange to adjust the stack ptr on return.
386 The stack will be rearranged to this:
391 return address *** <-- dummy arg in stub fn.
393 obscure_ccall_ret_code
395 This unfortunately means that the type of the stub function
396 must have a dummy argument for the original return address
397 pointer inserted just after the 6th integer argument.
399 Code for the simple case:
401 0: 4d 89 c1 mov %r8,%r9
402 3: 49 89 c8 mov %rcx,%r8
403 6: 48 89 d1 mov %rdx,%rcx
404 9: 48 89 f2 mov %rsi,%rdx
405 c: 48 89 fe mov %rdi,%rsi
406 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
407 16: ff 25 0c 00 00 00 jmpq *12(%rip)
409 20: .quad 0 # aligned on 8-byte boundary
410 28: .quad 0 # aligned on 8-byte boundary
413 And the version for >=6 integer arguments:
416 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
417 8: 4d 89 c1 mov %r8,%r9
418 b: 49 89 c8 mov %rcx,%r8
419 e: 48 89 d1 mov %rdx,%rcx
420 11: 48 89 f2 mov %rsi,%rdx
421 14: 48 89 fe mov %rdi,%rsi
422 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
423 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
425 28: .quad 0 # aligned on 8-byte boundary
426 30: .quad 0 # aligned on 8-byte boundary
427 38: .quad 0 # aligned on 8-byte boundary
430 /* we assume the small code model (gcc -mcmmodel=small) where
431 * all symbols are <2^32, so hence wptr should fit into 32 bits.
433 ASSERT(((long)wptr >> 32) == 0);
439 // determine whether we have 6 or more integer arguments,
440 // and therefore need to flush one to the stack.
441 for (c = typeString; *c != '\0'; c++) {
442 if (*c == 'i' || *c == 'l') i++;
447 adjustor = allocateExec(0x30);
449 *(StgInt32 *)adjustor = 0x49c1894d;
450 *(StgInt32 *)(adjustor+0x4) = 0x8948c889;
451 *(StgInt32 *)(adjustor+0x8) = 0xf28948d1;
452 *(StgInt32 *)(adjustor+0xc) = 0x48fe8948;
453 *(StgInt32 *)(adjustor+0x10) = 0x000a3d8b;
454 *(StgInt32 *)(adjustor+0x14) = 0x25ff0000;
455 *(StgInt32 *)(adjustor+0x18) = 0x0000000c;
456 *(StgInt64 *)(adjustor+0x20) = (StgInt64)hptr;
457 *(StgInt64 *)(adjustor+0x28) = (StgInt64)wptr;
461 adjustor = allocateExec(0x40);
463 *(StgInt32 *)adjustor = 0x35ff5141;
464 *(StgInt32 *)(adjustor+0x4) = 0x00000020;
465 *(StgInt32 *)(adjustor+0x8) = 0x49c1894d;
466 *(StgInt32 *)(adjustor+0xc) = 0x8948c889;
467 *(StgInt32 *)(adjustor+0x10) = 0xf28948d1;
468 *(StgInt32 *)(adjustor+0x14) = 0x48fe8948;
469 *(StgInt32 *)(adjustor+0x18) = 0x00123d8b;
470 *(StgInt32 *)(adjustor+0x1c) = 0x25ff0000;
471 *(StgInt32 *)(adjustor+0x20) = 0x00000014;
473 *(StgInt64 *)(adjustor+0x28) = (StgInt64)obscure_ccall_ret_code;
474 *(StgInt64 *)(adjustor+0x30) = (StgInt64)hptr;
475 *(StgInt64 *)(adjustor+0x38) = (StgInt64)wptr;
478 #elif defined(sparc_HOST_ARCH)
479 /* Magic constant computed by inspecting the code length of the following
480 assembly language snippet (offset and machine code prefixed):
482 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
483 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
484 <08>: D823A05C st %o4, [%sp + 92]
485 <0C>: 9A10000B mov %o3, %o5
486 <10>: 9810000A mov %o2, %o4
487 <14>: 96100009 mov %o1, %o3
488 <18>: 94100008 mov %o0, %o2
489 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
490 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
491 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
492 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
493 <2C> 00000000 ! place for getting hptr back easily
495 ccall'ing on SPARC is easy, because we are quite lucky to push a
496 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
497 existing arguments (note that %sp must stay double-word aligned at
498 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
499 To do this, we extend the *caller's* stack frame by 2 words and shift
500 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
501 procedure because of the tail-jump) by 2 positions. This makes room in
502 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
503 for destination addr of jump on SPARC, return address on x86, ...). This
504 shouldn't cause any problems for a C-like caller: alloca is implemented
505 similarly, and local variables should be accessed via %fp, not %sp. In a
506 nutshell: This should work! (Famous last words! :-)
508 adjustor = allocateExec(4*(11+1));
510 unsigned long *const adj_code = (unsigned long *)adjustor;
512 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
513 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
514 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
515 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
516 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
517 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
518 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
519 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
520 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
521 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
522 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
523 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
524 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
525 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
526 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
528 adj_code[11] = (unsigned long)hptr;
531 asm("flush %0" : : "r" (adj_code ));
532 asm("flush %0" : : "r" (adj_code + 2));
533 asm("flush %0" : : "r" (adj_code + 4));
534 asm("flush %0" : : "r" (adj_code + 6));
535 asm("flush %0" : : "r" (adj_code + 10));
537 /* max. 5 instructions latency, and we need at >= 1 for returning */
543 #elif defined(alpha_HOST_ARCH)
544 /* Magic constant computed by inspecting the code length of
545 the following assembly language snippet
546 (offset and machine code prefixed; note that the machine code
547 shown is longwords stored in little-endian order):
549 <00>: 46520414 mov a2, a4
550 <04>: 46100412 mov a0, a2
551 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
552 <0c>: 46730415 mov a3, a5
553 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
554 <14>: 46310413 mov a1, a3
555 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
556 <1c>: 00000000 # padding for alignment
557 <20>: [8 bytes for hptr quadword]
558 <28>: [8 bytes for wptr quadword]
560 The "computed" jump at <08> above is really a jump to a fixed
561 location. Accordingly, we place an always-correct hint in the
562 jump instruction, namely the address offset from <0c> to wptr,
563 divided by 4, taking the lowest 14 bits.
565 We only support passing 4 or fewer argument words, for the same
566 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
567 On the Alpha the first 6 integer arguments are in a0 through a5,
568 and the rest on the stack. Hence we want to shuffle the original
569 caller's arguments by two.
571 On the Alpha the calling convention is so complex and dependent
572 on the callee's signature -- for example, the stack pointer has
573 to be a multiple of 16 -- that it seems impossible to me [ccshan]
574 to handle the general case correctly without changing how the
575 adjustor is called from C. For now, our solution of shuffling
576 registers only and ignoring the stack only works if the original
577 caller passed 4 or fewer argument words.
579 TODO: Depending on how much allocation overhead stgMallocBytes uses for
580 header information (more precisely, if the overhead is no more than
581 4 bytes), we should move the first three instructions above down by
582 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
584 ASSERT(((StgWord64)wptr & 3) == 0);
585 adjustor = allocateExec(48);
587 StgWord64 *const code = (StgWord64 *)adjustor;
589 code[0] = 0x4610041246520414L;
590 code[1] = 0x46730415a61b0020L;
591 code[2] = 0x46310413a77b0028L;
592 code[3] = 0x000000006bfb0000L
593 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
595 code[4] = (StgWord64)hptr;
596 code[5] = (StgWord64)wptr;
598 /* Ensure that instruction cache is consistent with our new code */
599 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
601 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
603 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
604 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
606 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
607 We need to calculate all the details of the stack frame layout,
608 taking into account the types of all the arguments, and then
609 generate code on the fly. */
611 int src_gpr = 3, dst_gpr = 5;
613 int src_offset = 0, dst_offset = 0;
614 int n = strlen(typeString),i;
615 int src_locs[n], dst_locs[n];
620 Calculate where the arguments should go.
621 src_locs[] will contain the locations of the arguments in the
622 original stack frame passed to the adjustor.
623 dst_locs[] will contain the locations of the arguments after the
624 adjustor runs, on entry to the wrapper proc pointed to by wptr.
626 This algorithm is based on the one described on page 3-19 of the
627 System V ABI PowerPC Processor Supplement.
629 for(i=0;typeString[i];i++)
631 char t = typeString[i];
632 if((t == 'f' || t == 'd') && fpr <= 8)
633 src_locs[i] = dst_locs[i] = -32-(fpr++);
636 if(t == 'l' && src_gpr <= 9)
638 if((src_gpr & 1) == 0)
640 src_locs[i] = -src_gpr;
643 else if(t == 'i' && src_gpr <= 10)
645 src_locs[i] = -(src_gpr++);
649 if(t == 'l' || t == 'd')
654 src_locs[i] = src_offset;
655 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
658 if(t == 'l' && dst_gpr <= 9)
660 if((dst_gpr & 1) == 0)
662 dst_locs[i] = -dst_gpr;
665 else if(t == 'i' && dst_gpr <= 10)
667 dst_locs[i] = -(dst_gpr++);
671 if(t == 'l' || t == 'd')
676 dst_locs[i] = dst_offset;
677 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
682 frameSize = dst_offset + 8;
683 frameSize = (frameSize+15) & ~0xF;
688 // allocate space for at most 4 insns per parameter
689 // plus 14 more instructions.
690 adjustor = allocateExec(4 * (4*n + 14));
691 code = (unsigned*)adjustor;
693 *code++ = 0x48000008; // b *+8
694 // * Put the hptr in a place where freeHaskellFunctionPtr
696 *code++ = (unsigned) hptr;
698 // * save the link register
699 *code++ = 0x7c0802a6; // mflr r0;
700 *code++ = 0x90010004; // stw r0, 4(r1);
701 // * and build a new stack frame
702 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
704 // * now generate instructions to copy arguments
705 // from the old stack frame into the new stack frame.
708 if(src_locs[i] < -32)
709 ASSERT(dst_locs[i] == src_locs[i]);
710 else if(src_locs[i] < 0)
713 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
716 ASSERT(dst_locs[i] > -32);
717 // dst is in GPR, too.
719 if(typeString[i] == 'l')
723 | ((-dst_locs[i]+1) << 16)
724 | ((-src_locs[i]+1) << 11)
725 | ((-src_locs[i]+1) << 21);
729 | ((-dst_locs[i]) << 16)
730 | ((-src_locs[i]) << 11)
731 | ((-src_locs[i]) << 21);
735 if(typeString[i] == 'l')
737 // stw src+1, dst_offset+4(r1)
739 | ((-src_locs[i]+1) << 21)
743 // stw src, dst_offset(r1)
745 | ((-src_locs[i]) << 21)
751 ASSERT(dst_locs[i] >= 0);
752 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
754 if(typeString[i] == 'l')
756 // lwz r0, src_offset(r1)
758 | (src_locs[i] + frameSize + 8 + 4);
759 // stw r0, dst_offset(r1)
761 | (dst_locs[i] + 8 + 4);
763 // lwz r0, src_offset(r1)
765 | (src_locs[i] + frameSize + 8);
766 // stw r0, dst_offset(r1)
772 // * hptr will be the new first argument.
774 *code++ = OP_HI(0x3c60, hptr);
775 // ori r3,r3,lo(hptr)
776 *code++ = OP_LO(0x6063, hptr);
778 // * we need to return to a piece of code
779 // which will tear down the stack frame.
780 // lis r11,hi(obscure_ccall_ret_code)
781 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
782 // ori r11,r11,lo(obscure_ccall_ret_code)
783 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
785 *code++ = 0x7d6803a6;
789 *code++ = OP_HI(0x3d60, wptr);
790 // ori r11,r11,lo(wptr)
791 *code++ = OP_LO(0x616b, wptr);
793 *code++ = 0x7d6903a6;
795 *code++ = 0x4e800420;
797 // Flush the Instruction cache:
799 unsigned *p = adjustor;
802 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
806 __asm__ volatile ("sync\n\tisync");
810 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
812 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
813 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
815 /* The following code applies to all PowerPC and PowerPC64 platforms
816 whose stack layout is based on the AIX ABI.
818 Besides (obviously) AIX, this includes
819 Mac OS 9 and BeOS/PPC (may they rest in peace),
820 which use the 32-bit AIX ABI
822 which uses the 64-bit AIX ABI
823 and Darwin (Mac OS X),
824 which uses the same stack layout as AIX,
825 but no function descriptors.
827 The actual stack-frame shuffling is implemented out-of-line
828 in the function adjustorCode, in AdjustorAsm.S.
829 Here, we set up an AdjustorStub structure, which
830 is a function descriptor (on platforms that have function
831 descriptors) or a short piece of stub code (on Darwin) to call
832 adjustorCode with a pointer to the AdjustorStub struct loaded
835 One nice thing about this is that there is _no_ code generated at
836 runtime on the platforms that have function descriptors.
838 AdjustorStub *adjustorStub;
839 int sz = 0, extra_sz, total_sz;
841 // from AdjustorAsm.s
842 // not declared as a function so that AIX-style
843 // fundescs can never get in the way.
844 extern void *adjustorCode;
847 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
849 adjustorStub = allocateExec(sizeof(AdjustorStub));
851 adjustor = adjustorStub;
853 adjustorStub->code = (void*) &adjustorCode;
856 // function descriptors are a cool idea.
857 // We don't need to generate any code at runtime.
858 adjustorStub->toc = adjustorStub;
861 // no function descriptors :-(
862 // We need to do things "by hand".
863 #if defined(powerpc_HOST_ARCH)
864 // lis r2, hi(adjustorStub)
865 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
866 // ori r2, r2, lo(adjustorStub)
867 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
869 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
870 - (char*)adjustorStub);
872 adjustorStub->mtctr = 0x7c0903a6;
874 adjustorStub->bctr = 0x4e800420;
876 barf("adjustor creation not supported on this platform");
879 // Flush the Instruction cache:
881 int n = sizeof(AdjustorStub)/sizeof(unsigned);
882 unsigned *p = (unsigned*)adjustor;
885 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
889 __asm__ volatile ("sync\n\tisync");
893 // Calculate the size of the stack frame, in words.
894 sz = totalArgumentSize(typeString);
896 // The first eight words of the parameter area
897 // are just "backing store" for the parameters passed in
898 // the GPRs. extra_sz is the number of words beyond those first
904 // Calculate the total size of the stack frame.
905 total_sz = (6 /* linkage area */
906 + 8 /* minimum parameter area */
907 + 2 /* two extra arguments */
908 + extra_sz)*sizeof(StgWord);
910 // align to 16 bytes.
911 // AIX only requires 8 bytes, but who cares?
912 total_sz = (total_sz+15) & ~0xF;
914 // Fill in the information that adjustorCode in AdjustorAsm.S
915 // will use to create a new stack frame with the additional args.
916 adjustorStub->hptr = hptr;
917 adjustorStub->wptr = wptr;
918 adjustorStub->negative_framesize = -total_sz;
919 adjustorStub->extrawords_plus_one = extra_sz + 1;
922 #elif defined(ia64_HOST_ARCH)
924 Up to 8 inputs are passed in registers. We flush the last two inputs to
925 the stack, initially into the 16-byte scratch region left by the caller.
926 We then shuffle the others along by 4 (taking 2 registers for ourselves
927 to save return address and previous function state - we need to come back
928 here on the way out to restore the stack, so this is a real function
929 rather than just a trampoline).
931 The function descriptor we create contains the gp of the target function
932 so gp is already loaded correctly.
934 [MLX] alloc r16=ar.pfs,10,2,0
936 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
937 mov r41=r37 // out7 = in5 (out3)
938 mov r40=r36;; // out6 = in4 (out2)
939 [MII] st8.spill [r12]=r39 // spill in7 (out5)
941 mov r38=r34;; // out4 = in2 (out0)
942 [MII] mov r39=r35 // out5 = in3 (out1)
943 mov r37=r33 // out3 = in1 (loc1)
944 mov r36=r32 // out2 = in0 (loc0)
945 [MLX] adds r12=-24,r12 // update sp
946 movl r34=hptr;; // out0 = hptr
947 [MIB] mov r33=r16 // loc1 = ar.pfs
948 mov r32=b0 // loc0 = retaddr
949 br.call.sptk.many b0=b6;;
951 [MII] adds r12=-16,r12
956 br.ret.sptk.many b0;;
959 /* These macros distribute a long constant into the two words of an MLX bundle */
960 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
961 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
962 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
963 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
967 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
968 StgWord64 wcode = wdesc->ip;
972 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
973 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
975 fdesc = (IA64FunDesc *)adjustor;
976 code = (StgWord64 *)(fdesc + 1);
977 fdesc->ip = (StgWord64)code;
978 fdesc->gp = wdesc->gp;
980 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
981 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
982 code[2] = 0x029015d818984001;
983 code[3] = 0x8401200500420094;
984 code[4] = 0x886011d8189c0001;
985 code[5] = 0x84011004c00380c0;
986 code[6] = 0x0250210046013800;
987 code[7] = 0x8401000480420084;
988 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
989 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
990 code[10] = 0x0200210020010811;
991 code[11] = 0x1080006800006200;
992 code[12] = 0x0000210018406000;
993 code[13] = 0x00aa021000038005;
994 code[14] = 0x000000010000001d;
995 code[15] = 0x0084000880000200;
997 /* save stable pointers in convenient form */
998 code[16] = (StgWord64)hptr;
999 code[17] = (StgWord64)stable;
1002 barf("adjustor creation not supported on this platform");
1017 freeHaskellFunctionPtr(void* ptr)
1019 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
1020 if ( *(unsigned char*)ptr != 0x68 &&
1021 *(unsigned char*)ptr != 0x58 ) {
1022 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1026 /* Free the stable pointer first..*/
1027 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
1028 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
1030 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1032 #elif defined(x86_HOST_ARCH) && defined(darwin_HOST_OS)
1033 if ( *(unsigned char*)ptr != 0xe8 ) {
1034 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1037 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1038 #elif defined(x86_64_HOST_ARCH)
1039 if ( *(StgWord16 *)ptr == 0x894d ) {
1040 freeStablePtr(*(StgStablePtr*)(ptr+0x20));
1041 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1042 freeStablePtr(*(StgStablePtr*)(ptr+0x30));
1044 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1047 #elif defined(sparc_HOST_ARCH)
1048 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1049 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1053 /* Free the stable pointer first..*/
1054 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1055 #elif defined(alpha_HOST_ARCH)
1056 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1057 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1061 /* Free the stable pointer first..*/
1062 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1063 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1064 if ( *(StgWord*)ptr != 0x48000008 ) {
1065 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1068 freeStablePtr(((StgStablePtr*)ptr)[1]);
1069 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1070 extern void* adjustorCode;
1071 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1072 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1075 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1076 #elif defined(ia64_HOST_ARCH)
1077 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1078 StgWord64 *code = (StgWord64 *)(fdesc+1);
1080 if (fdesc->ip != (StgWord64)code) {
1081 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1084 freeStablePtr((StgStablePtr)code[16]);
1085 freeStablePtr((StgStablePtr)code[17]);
1090 *((unsigned char*)ptr) = '\0';
1097 * Function: initAdjustor()
1099 * Perform initialisation of adjustor thunk layer (if needed.)
1104 #if defined(i386_HOST_ARCH) && defined(openbsd_HOST_OS)
1105 obscure_ccall_ret_code_dyn = allocateExec(4);
1106 obscure_ccall_ret_code_dyn[0] = ((unsigned char *)obscure_ccall_ret_code)[0];
1107 obscure_ccall_ret_code_dyn[1] = ((unsigned char *)obscure_ccall_ret_code)[1];
1108 obscure_ccall_ret_code_dyn[2] = ((unsigned char *)obscure_ccall_ret_code)[2];
1109 obscure_ccall_ret_code_dyn[3] = ((unsigned char *)obscure_ccall_ret_code)[3];