1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
53 #ifdef LEADING_UNDERSCORE
54 #define UNDERSCORE "_"
58 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
60 Now here's something obscure for you:
62 When generating an adjustor thunk that uses the C calling
63 convention, we have to make sure that the thunk kicks off
64 the process of jumping into Haskell with a tail jump. Why?
65 Because as a result of jumping in into Haskell we may end
66 up freeing the very adjustor thunk we came from using
67 freeHaskellFunctionPtr(). Hence, we better not return to
68 the adjustor code on our way out, since it could by then
71 The fix is readily at hand, just include the opcodes
72 for the C stack fixup code that we need to perform when
73 returning in some static piece of memory and arrange
74 to return to it before tail jumping from the adjustor thunk.
76 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
79 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
80 UNDERSCORE "obscure_ccall_ret_code:\n\t"
85 extern void obscure_ccall_ret_code(void);
87 #if defined(openbsd_HOST_OS)
88 static unsigned char *obscure_ccall_ret_code_dyn;
93 #if defined(x86_64_HOST_ARCH)
94 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
97 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
98 UNDERSCORE "obscure_ccall_ret_code:\n\t"
103 extern void obscure_ccall_ret_code(void);
106 #if defined(alpha_HOST_ARCH)
107 /* To get the definition of PAL_imb: */
108 # if defined(linux_HOST_OS)
109 # include <asm/pal.h>
111 # include <machine/pal.h>
115 #if defined(ia64_HOST_ARCH)
117 /* Layout of a function descriptor */
118 typedef struct _IA64FunDesc {
124 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
127 nat data_size_in_words, total_size_in_words;
129 /* round up to a whole number of words */
130 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
131 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
133 /* allocate and fill it in */
134 arr = (StgArrWords *)allocate(total_size_in_words);
135 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
137 /* obtain a stable ptr */
138 *stable = getStablePtr((StgPtr)arr);
140 /* and return a ptr to the goods inside the array */
141 return(&(arr->payload));
145 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
146 __asm__("obscure_ccall_ret_code:\n\t"
151 extern void obscure_ccall_ret_code(void);
154 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
155 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
157 /* !!! !!! WARNING: !!! !!!
158 * This structure is accessed from AdjustorAsm.s
159 * Any changes here have to be mirrored in the offsets there.
162 typedef struct AdjustorStub {
163 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
170 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
171 /* powerpc64-darwin: just guessing that it won't use fundescs. */
182 /* fundesc-based ABIs */
191 StgInt negative_framesize;
192 StgInt extrawords_plus_one;
198 #if defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
200 /* !!! !!! WARNING: !!! !!!
201 * This structure is accessed from AdjustorAsm.s
202 * Any changes here have to be mirrored in the offsets there.
205 typedef struct AdjustorStub {
206 unsigned char call[8];
210 StgInt argument_size;
214 #if defined(darwin_HOST_OS) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
215 static int totalArgumentSize(char *typeString)
220 char t = *typeString++;
224 // on 32-bit platforms, Double and Int64 occupy two words.
227 if(sizeof(void*) == 4)
232 // everything else is one word.
242 createAdjustor(int cconv, StgStablePtr hptr,
245 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
250 void *adjustor = NULL;
254 case 0: /* _stdcall */
255 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
256 /* Magic constant computed by inspecting the code length of
257 the following assembly language snippet
258 (offset and machine code prefixed):
260 <0>: 58 popl %eax # temp. remove ret addr..
261 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
262 # hold a StgStablePtr
263 <6>: 50 pushl %eax # put back ret. addr
264 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
265 <c>: ff e0 jmp %eax # and jump to it.
266 # the callee cleans up the stack
268 adjustor = allocateExec(14);
270 unsigned char *const adj_code = (unsigned char *)adjustor;
271 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
273 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
274 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
276 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
278 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
279 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
281 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
282 adj_code[0x0d] = (unsigned char)0xe0;
288 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
289 /* Magic constant computed by inspecting the code length of
290 the following assembly language snippet
291 (offset and machine code prefixed):
293 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
294 # hold a StgStablePtr
295 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
296 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
297 <0f>: ff e0 jmp *%eax # jump to wptr
299 The ccall'ing version is a tad different, passing in the return
300 address of the caller to the auto-generated C stub (which enters
301 via the stable pointer.) (The auto-generated C stub is in on this
302 game, don't worry :-)
304 See the comment next to obscure_ccall_ret_code why we need to
305 perform a tail jump instead of a call, followed by some C stack
308 Note: The adjustor makes the assumption that any return value
309 coming back from the C stub is not stored on the stack.
310 That's (thankfully) the case here with the restricted set of
311 return types that we support.
313 adjustor = allocateExec(17);
315 unsigned char *const adj_code = (unsigned char *)adjustor;
317 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
318 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
320 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
321 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
323 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
324 *((StgFunPtr*)(adj_code + 0x0b)) =
325 #if !defined(openbsd_HOST_OS)
326 (StgFunPtr)obscure_ccall_ret_code;
328 (StgFunPtr)obscure_ccall_ret_code_dyn;
331 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
332 adj_code[0x10] = (unsigned char)0xe0;
334 #elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
337 What's special about Darwin/Mac OS X on i386?
338 It wants the stack to stay 16-byte aligned.
340 We offload most of the work to AdjustorAsm.S.
342 AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub));
343 adjustor = adjustorStub;
345 extern void adjustorCode(void);
346 int sz = totalArgumentSize(typeString);
348 adjustorStub->call[0] = 0xe8;
349 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5);
350 adjustorStub->hptr = hptr;
351 adjustorStub->wptr = wptr;
353 // The adjustor puts the following things on the stack:
355 // 2.) padding and (a copy of) the arguments
356 // 3.) a dummy argument
358 // 5.) return address (for returning to the adjustor)
359 // All these have to add up to a multiple of 16.
361 // first, include everything in frame_size
362 adjustorStub->frame_size = sz * 4 + 16;
364 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
365 // only count 2.) and 3.) as part of frame_size
366 adjustorStub->frame_size -= 12;
367 adjustorStub->argument_size = sz;
370 #elif defined(x86_64_HOST_ARCH)
377 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
379 if there are <6 integer args, then we can just push the
380 StablePtr into %edi and shuffle the other args up.
382 If there are >=6 integer args, then we have to flush one arg
383 to the stack, and arrange to adjust the stack ptr on return.
384 The stack will be rearranged to this:
389 return address *** <-- dummy arg in stub fn.
391 obscure_ccall_ret_code
393 This unfortunately means that the type of the stub function
394 must have a dummy argument for the original return address
395 pointer inserted just after the 6th integer argument.
397 Code for the simple case:
399 0: 4d 89 c1 mov %r8,%r9
400 3: 49 89 c8 mov %rcx,%r8
401 6: 48 89 d1 mov %rdx,%rcx
402 9: 48 89 f2 mov %rsi,%rdx
403 c: 48 89 fe mov %rdi,%rsi
404 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
405 16: ff 25 0c 00 00 00 jmpq *12(%rip)
407 20: .quad 0 # aligned on 8-byte boundary
408 28: .quad 0 # aligned on 8-byte boundary
411 And the version for >=6 integer arguments:
414 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
415 8: 4d 89 c1 mov %r8,%r9
416 b: 49 89 c8 mov %rcx,%r8
417 e: 48 89 d1 mov %rdx,%rcx
418 11: 48 89 f2 mov %rsi,%rdx
419 14: 48 89 fe mov %rdi,%rsi
420 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
421 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
423 28: .quad 0 # aligned on 8-byte boundary
424 30: .quad 0 # aligned on 8-byte boundary
425 38: .quad 0 # aligned on 8-byte boundary
428 /* we assume the small code model (gcc -mcmmodel=small) where
429 * all symbols are <2^32, so hence wptr should fit into 32 bits.
431 ASSERT(((long)wptr >> 32) == 0);
437 // determine whether we have 6 or more integer arguments,
438 // and therefore need to flush one to the stack.
439 for (c = typeString; *c != '\0'; c++) {
440 if (*c == 'i' || *c == 'l') i++;
445 adjustor = allocateExec(0x30);
447 *(StgInt32 *)adjustor = 0x49c1894d;
448 *(StgInt32 *)(adjustor+0x4) = 0x8948c889;
449 *(StgInt32 *)(adjustor+0x8) = 0xf28948d1;
450 *(StgInt32 *)(adjustor+0xc) = 0x48fe8948;
451 *(StgInt32 *)(adjustor+0x10) = 0x000a3d8b;
452 *(StgInt32 *)(adjustor+0x14) = 0x25ff0000;
453 *(StgInt32 *)(adjustor+0x18) = 0x0000000c;
454 *(StgInt64 *)(adjustor+0x20) = (StgInt64)hptr;
455 *(StgInt64 *)(adjustor+0x28) = (StgInt64)wptr;
459 adjustor = allocateExec(0x40);
461 *(StgInt32 *)adjustor = 0x35ff5141;
462 *(StgInt32 *)(adjustor+0x4) = 0x00000020;
463 *(StgInt32 *)(adjustor+0x8) = 0x49c1894d;
464 *(StgInt32 *)(adjustor+0xc) = 0x8948c889;
465 *(StgInt32 *)(adjustor+0x10) = 0xf28948d1;
466 *(StgInt32 *)(adjustor+0x14) = 0x48fe8948;
467 *(StgInt32 *)(adjustor+0x18) = 0x00123d8b;
468 *(StgInt32 *)(adjustor+0x1c) = 0x25ff0000;
469 *(StgInt32 *)(adjustor+0x20) = 0x00000014;
471 *(StgInt64 *)(adjustor+0x28) = (StgInt64)obscure_ccall_ret_code;
472 *(StgInt64 *)(adjustor+0x30) = (StgInt64)hptr;
473 *(StgInt64 *)(adjustor+0x38) = (StgInt64)wptr;
476 #elif defined(sparc_HOST_ARCH)
477 /* Magic constant computed by inspecting the code length of the following
478 assembly language snippet (offset and machine code prefixed):
480 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
481 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
482 <08>: D823A05C st %o4, [%sp + 92]
483 <0C>: 9A10000B mov %o3, %o5
484 <10>: 9810000A mov %o2, %o4
485 <14>: 96100009 mov %o1, %o3
486 <18>: 94100008 mov %o0, %o2
487 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
488 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
489 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
490 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
491 <2C> 00000000 ! place for getting hptr back easily
493 ccall'ing on SPARC is easy, because we are quite lucky to push a
494 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
495 existing arguments (note that %sp must stay double-word aligned at
496 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
497 To do this, we extend the *caller's* stack frame by 2 words and shift
498 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
499 procedure because of the tail-jump) by 2 positions. This makes room in
500 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
501 for destination addr of jump on SPARC, return address on x86, ...). This
502 shouldn't cause any problems for a C-like caller: alloca is implemented
503 similarly, and local variables should be accessed via %fp, not %sp. In a
504 nutshell: This should work! (Famous last words! :-)
506 adjustor = allocateExec(4*(11+1));
508 unsigned long *const adj_code = (unsigned long *)adjustor;
510 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
511 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
512 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
513 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
514 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
515 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
516 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
517 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
518 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
519 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
520 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
521 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
522 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
523 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
524 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
526 adj_code[11] = (unsigned long)hptr;
529 asm("flush %0" : : "r" (adj_code ));
530 asm("flush %0" : : "r" (adj_code + 2));
531 asm("flush %0" : : "r" (adj_code + 4));
532 asm("flush %0" : : "r" (adj_code + 6));
533 asm("flush %0" : : "r" (adj_code + 10));
535 /* max. 5 instructions latency, and we need at >= 1 for returning */
541 #elif defined(alpha_HOST_ARCH)
542 /* Magic constant computed by inspecting the code length of
543 the following assembly language snippet
544 (offset and machine code prefixed; note that the machine code
545 shown is longwords stored in little-endian order):
547 <00>: 46520414 mov a2, a4
548 <04>: 46100412 mov a0, a2
549 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
550 <0c>: 46730415 mov a3, a5
551 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
552 <14>: 46310413 mov a1, a3
553 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
554 <1c>: 00000000 # padding for alignment
555 <20>: [8 bytes for hptr quadword]
556 <28>: [8 bytes for wptr quadword]
558 The "computed" jump at <08> above is really a jump to a fixed
559 location. Accordingly, we place an always-correct hint in the
560 jump instruction, namely the address offset from <0c> to wptr,
561 divided by 4, taking the lowest 14 bits.
563 We only support passing 4 or fewer argument words, for the same
564 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
565 On the Alpha the first 6 integer arguments are in a0 through a5,
566 and the rest on the stack. Hence we want to shuffle the original
567 caller's arguments by two.
569 On the Alpha the calling convention is so complex and dependent
570 on the callee's signature -- for example, the stack pointer has
571 to be a multiple of 16 -- that it seems impossible to me [ccshan]
572 to handle the general case correctly without changing how the
573 adjustor is called from C. For now, our solution of shuffling
574 registers only and ignoring the stack only works if the original
575 caller passed 4 or fewer argument words.
577 TODO: Depending on how much allocation overhead stgMallocBytes uses for
578 header information (more precisely, if the overhead is no more than
579 4 bytes), we should move the first three instructions above down by
580 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
582 ASSERT(((StgWord64)wptr & 3) == 0);
583 adjustor = allocateExec(48);
585 StgWord64 *const code = (StgWord64 *)adjustor;
587 code[0] = 0x4610041246520414L;
588 code[1] = 0x46730415a61b0020L;
589 code[2] = 0x46310413a77b0028L;
590 code[3] = 0x000000006bfb0000L
591 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
593 code[4] = (StgWord64)hptr;
594 code[5] = (StgWord64)wptr;
596 /* Ensure that instruction cache is consistent with our new code */
597 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
599 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
601 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
602 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
604 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
605 We need to calculate all the details of the stack frame layout,
606 taking into account the types of all the arguments, and then
607 generate code on the fly. */
609 int src_gpr = 3, dst_gpr = 5;
611 int src_offset = 0, dst_offset = 0;
612 int n = strlen(typeString),i;
613 int src_locs[n], dst_locs[n];
618 Calculate where the arguments should go.
619 src_locs[] will contain the locations of the arguments in the
620 original stack frame passed to the adjustor.
621 dst_locs[] will contain the locations of the arguments after the
622 adjustor runs, on entry to the wrapper proc pointed to by wptr.
624 This algorithm is based on the one described on page 3-19 of the
625 System V ABI PowerPC Processor Supplement.
627 for(i=0;typeString[i];i++)
629 char t = typeString[i];
630 if((t == 'f' || t == 'd') && fpr <= 8)
631 src_locs[i] = dst_locs[i] = -32-(fpr++);
634 if(t == 'l' && src_gpr <= 9)
636 if((src_gpr & 1) == 0)
638 src_locs[i] = -src_gpr;
641 else if(t == 'i' && src_gpr <= 10)
643 src_locs[i] = -(src_gpr++);
647 if(t == 'l' || t == 'd')
652 src_locs[i] = src_offset;
653 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
656 if(t == 'l' && dst_gpr <= 9)
658 if((dst_gpr & 1) == 0)
660 dst_locs[i] = -dst_gpr;
663 else if(t == 'i' && dst_gpr <= 10)
665 dst_locs[i] = -(dst_gpr++);
669 if(t == 'l' || t == 'd')
674 dst_locs[i] = dst_offset;
675 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
680 frameSize = dst_offset + 8;
681 frameSize = (frameSize+15) & ~0xF;
686 // allocate space for at most 4 insns per parameter
687 // plus 14 more instructions.
688 adjustor = allocateExec(4 * (4*n + 14));
689 code = (unsigned*)adjustor;
691 *code++ = 0x48000008; // b *+8
692 // * Put the hptr in a place where freeHaskellFunctionPtr
694 *code++ = (unsigned) hptr;
696 // * save the link register
697 *code++ = 0x7c0802a6; // mflr r0;
698 *code++ = 0x90010004; // stw r0, 4(r1);
699 // * and build a new stack frame
700 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
702 // * now generate instructions to copy arguments
703 // from the old stack frame into the new stack frame.
706 if(src_locs[i] < -32)
707 ASSERT(dst_locs[i] == src_locs[i]);
708 else if(src_locs[i] < 0)
711 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
714 ASSERT(dst_locs[i] > -32);
715 // dst is in GPR, too.
717 if(typeString[i] == 'l')
721 | ((-dst_locs[i]+1) << 16)
722 | ((-src_locs[i]+1) << 11)
723 | ((-src_locs[i]+1) << 21);
727 | ((-dst_locs[i]) << 16)
728 | ((-src_locs[i]) << 11)
729 | ((-src_locs[i]) << 21);
733 if(typeString[i] == 'l')
735 // stw src+1, dst_offset+4(r1)
737 | ((-src_locs[i]+1) << 21)
741 // stw src, dst_offset(r1)
743 | ((-src_locs[i]) << 21)
749 ASSERT(dst_locs[i] >= 0);
750 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
752 if(typeString[i] == 'l')
754 // lwz r0, src_offset(r1)
756 | (src_locs[i] + frameSize + 8 + 4);
757 // stw r0, dst_offset(r1)
759 | (dst_locs[i] + 8 + 4);
761 // lwz r0, src_offset(r1)
763 | (src_locs[i] + frameSize + 8);
764 // stw r0, dst_offset(r1)
770 // * hptr will be the new first argument.
772 *code++ = OP_HI(0x3c60, hptr);
773 // ori r3,r3,lo(hptr)
774 *code++ = OP_LO(0x6063, hptr);
776 // * we need to return to a piece of code
777 // which will tear down the stack frame.
778 // lis r11,hi(obscure_ccall_ret_code)
779 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
780 // ori r11,r11,lo(obscure_ccall_ret_code)
781 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
783 *code++ = 0x7d6803a6;
787 *code++ = OP_HI(0x3d60, wptr);
788 // ori r11,r11,lo(wptr)
789 *code++ = OP_LO(0x616b, wptr);
791 *code++ = 0x7d6903a6;
793 *code++ = 0x4e800420;
795 // Flush the Instruction cache:
797 unsigned *p = adjustor;
800 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
804 __asm__ volatile ("sync\n\tisync");
808 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
810 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
811 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
813 /* The following code applies to all PowerPC and PowerPC64 platforms
814 whose stack layout is based on the AIX ABI.
816 Besides (obviously) AIX, this includes
817 Mac OS 9 and BeOS/PPC (may they rest in peace),
818 which use the 32-bit AIX ABI
820 which uses the 64-bit AIX ABI
821 and Darwin (Mac OS X),
822 which uses the same stack layout as AIX,
823 but no function descriptors.
825 The actual stack-frame shuffling is implemented out-of-line
826 in the function adjustorCode, in AdjustorAsm.S.
827 Here, we set up an AdjustorStub structure, which
828 is a function descriptor (on platforms that have function
829 descriptors) or a short piece of stub code (on Darwin) to call
830 adjustorCode with a pointer to the AdjustorStub struct loaded
833 One nice thing about this is that there is _no_ code generated at
834 runtime on the platforms that have function descriptors.
836 AdjustorStub *adjustorStub;
837 int sz = 0, extra_sz, total_sz;
839 // from AdjustorAsm.s
840 // not declared as a function so that AIX-style
841 // fundescs can never get in the way.
842 extern void *adjustorCode;
845 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
847 adjustorStub = allocateExec(sizeof(AdjustorStub));
849 adjustor = adjustorStub;
851 adjustorStub->code = (void*) &adjustorCode;
854 // function descriptors are a cool idea.
855 // We don't need to generate any code at runtime.
856 adjustorStub->toc = adjustorStub;
859 // no function descriptors :-(
860 // We need to do things "by hand".
861 #if defined(powerpc_HOST_ARCH)
862 // lis r2, hi(adjustorStub)
863 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
864 // ori r2, r2, lo(adjustorStub)
865 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
867 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
868 - (char*)adjustorStub);
870 adjustorStub->mtctr = 0x7c0903a6;
872 adjustorStub->bctr = 0x4e800420;
874 barf("adjustor creation not supported on this platform");
877 // Flush the Instruction cache:
879 int n = sizeof(AdjustorStub)/sizeof(unsigned);
880 unsigned *p = (unsigned*)adjustor;
883 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
887 __asm__ volatile ("sync\n\tisync");
891 // Calculate the size of the stack frame, in words.
892 sz = totalArgumentSize(typeString);
894 // The first eight words of the parameter area
895 // are just "backing store" for the parameters passed in
896 // the GPRs. extra_sz is the number of words beyond those first
902 // Calculate the total size of the stack frame.
903 total_sz = (6 /* linkage area */
904 + 8 /* minimum parameter area */
905 + 2 /* two extra arguments */
906 + extra_sz)*sizeof(StgWord);
908 // align to 16 bytes.
909 // AIX only requires 8 bytes, but who cares?
910 total_sz = (total_sz+15) & ~0xF;
912 // Fill in the information that adjustorCode in AdjustorAsm.S
913 // will use to create a new stack frame with the additional args.
914 adjustorStub->hptr = hptr;
915 adjustorStub->wptr = wptr;
916 adjustorStub->negative_framesize = -total_sz;
917 adjustorStub->extrawords_plus_one = extra_sz + 1;
920 #elif defined(ia64_HOST_ARCH)
922 Up to 8 inputs are passed in registers. We flush the last two inputs to
923 the stack, initially into the 16-byte scratch region left by the caller.
924 We then shuffle the others along by 4 (taking 2 registers for ourselves
925 to save return address and previous function state - we need to come back
926 here on the way out to restore the stack, so this is a real function
927 rather than just a trampoline).
929 The function descriptor we create contains the gp of the target function
930 so gp is already loaded correctly.
932 [MLX] alloc r16=ar.pfs,10,2,0
934 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
935 mov r41=r37 // out7 = in5 (out3)
936 mov r40=r36;; // out6 = in4 (out2)
937 [MII] st8.spill [r12]=r39 // spill in7 (out5)
939 mov r38=r34;; // out4 = in2 (out0)
940 [MII] mov r39=r35 // out5 = in3 (out1)
941 mov r37=r33 // out3 = in1 (loc1)
942 mov r36=r32 // out2 = in0 (loc0)
943 [MLX] adds r12=-24,r12 // update sp
944 movl r34=hptr;; // out0 = hptr
945 [MIB] mov r33=r16 // loc1 = ar.pfs
946 mov r32=b0 // loc0 = retaddr
947 br.call.sptk.many b0=b6;;
949 [MII] adds r12=-16,r12
954 br.ret.sptk.many b0;;
957 /* These macros distribute a long constant into the two words of an MLX bundle */
958 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
959 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
960 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
961 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
965 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
966 StgWord64 wcode = wdesc->ip;
970 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
971 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
973 fdesc = (IA64FunDesc *)adjustor;
974 code = (StgWord64 *)(fdesc + 1);
975 fdesc->ip = (StgWord64)code;
976 fdesc->gp = wdesc->gp;
978 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
979 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
980 code[2] = 0x029015d818984001;
981 code[3] = 0x8401200500420094;
982 code[4] = 0x886011d8189c0001;
983 code[5] = 0x84011004c00380c0;
984 code[6] = 0x0250210046013800;
985 code[7] = 0x8401000480420084;
986 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
987 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
988 code[10] = 0x0200210020010811;
989 code[11] = 0x1080006800006200;
990 code[12] = 0x0000210018406000;
991 code[13] = 0x00aa021000038005;
992 code[14] = 0x000000010000001d;
993 code[15] = 0x0084000880000200;
995 /* save stable pointers in convenient form */
996 code[16] = (StgWord64)hptr;
997 code[17] = (StgWord64)stable;
1000 barf("adjustor creation not supported on this platform");
1015 freeHaskellFunctionPtr(void* ptr)
1017 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
1018 if ( *(unsigned char*)ptr != 0x68 &&
1019 *(unsigned char*)ptr != 0x58 ) {
1020 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1024 /* Free the stable pointer first..*/
1025 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
1026 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
1028 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1030 #elif defined(x86_HOST_ARCH) && defined(darwin_HOST_OS)
1031 if ( *(unsigned char*)ptr != 0xe8 ) {
1032 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1035 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1036 #elif defined(x86_64_HOST_ARCH)
1037 if ( *(StgWord16 *)ptr == 0x894d ) {
1038 freeStablePtr(*(StgStablePtr*)(ptr+0x20));
1039 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1040 freeStablePtr(*(StgStablePtr*)(ptr+0x30));
1042 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1045 #elif defined(sparc_HOST_ARCH)
1046 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1047 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1051 /* Free the stable pointer first..*/
1052 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1053 #elif defined(alpha_HOST_ARCH)
1054 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1055 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1059 /* Free the stable pointer first..*/
1060 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1061 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1062 if ( *(StgWord*)ptr != 0x48000008 ) {
1063 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1066 freeStablePtr(((StgStablePtr*)ptr)[1]);
1067 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1068 extern void* adjustorCode;
1069 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1070 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1073 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1074 #elif defined(ia64_HOST_ARCH)
1075 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1076 StgWord64 *code = (StgWord64 *)(fdesc+1);
1078 if (fdesc->ip != (StgWord64)code) {
1079 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1082 freeStablePtr((StgStablePtr)code[16]);
1083 freeStablePtr((StgStablePtr)code[17]);
1088 *((unsigned char*)ptr) = '\0';
1095 * Function: initAdjustor()
1097 * Perform initialisation of adjustor thunk layer (if needed.)
1102 #if defined(i386_HOST_ARCH) && defined(openbsd_HOST_OS)
1103 obscure_ccall_ret_code_dyn = allocateExec(4);
1104 obscure_ccall_ret_code_dyn[0] = ((unsigned char *)obscure_ccall_ret_code)[0];
1105 obscure_ccall_ret_code_dyn[1] = ((unsigned char *)obscure_ccall_ret_code)[1];
1106 obscure_ccall_ret_code_dyn[2] = ((unsigned char *)obscure_ccall_ret_code)[2];
1107 obscure_ccall_ret_code_dyn[3] = ((unsigned char *)obscure_ccall_ret_code)[3];