1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(openbsd_TARGET_OS)
51 #include <sys/types.h>
54 /* no C99 header stdint.h on OpenBSD? */
55 typedef unsigned long my_uintptr_t;
58 #if defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS)
62 /* Heavily arch-specific, I'm afraid.. */
65 * Allocate len bytes which are readable, writable, and executable.
67 * ToDo: If this turns out to be a performance bottleneck, one could
68 * e.g. cache the last VirtualProtect/mprotect-ed region and do
69 * nothing in case of a cache hit.
72 mallocBytesRWX(int len)
74 void *addr = stgMallocBytes(len, "mallocBytesRWX");
75 #if defined(i386_TARGET_ARCH) && defined(_WIN32)
76 /* This could be necessary for processors which distinguish between READ and
77 EXECUTE memory accesses, e.g. Itaniums. */
78 DWORD dwOldProtect = 0;
79 if (VirtualProtect (addr, len, PAGE_EXECUTE_READWRITE, &dwOldProtect) == 0) {
80 barf("mallocBytesRWX: failed to protect 0x%p; error=%lu; old protection: %lu\n",
81 addr, (unsigned long)GetLastError(), (unsigned long)dwOldProtect);
83 #elif defined(openbsd_TARGET_OS)
84 /* malloced memory isn't executable by default on OpenBSD */
85 my_uintptr_t pageSize = sysconf(_SC_PAGESIZE);
86 my_uintptr_t mask = ~(pageSize - 1);
87 my_uintptr_t startOfFirstPage = ((my_uintptr_t)addr ) & mask;
88 my_uintptr_t startOfLastPage = ((my_uintptr_t)addr + len - 1) & mask;
89 my_uintptr_t size = startOfLastPage - startOfFirstPage + pageSize;
90 if (mprotect((void*)startOfFirstPage, (size_t)size, PROT_EXEC | PROT_READ | PROT_WRITE) != 0) {
91 barf("mallocBytesRWX: failed to protect 0x%p\n", addr);
97 #if defined(i386_TARGET_ARCH)
98 static unsigned char *obscure_ccall_ret_code;
101 #if defined(alpha_TARGET_ARCH)
102 /* To get the definition of PAL_imb: */
103 # if defined(linux_TARGET_OS)
104 # include <asm/pal.h>
106 # include <machine/pal.h>
110 #if defined(ia64_TARGET_ARCH)
113 /* Layout of a function descriptor */
114 typedef struct _IA64FunDesc {
120 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
123 nat data_size_in_words, total_size_in_words;
125 /* round up to a whole number of words */
126 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
127 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
129 /* allocate and fill it in */
130 arr = (StgArrWords *)allocate(total_size_in_words);
131 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
133 /* obtain a stable ptr */
134 *stable = getStablePtr((StgPtr)arr);
136 /* and return a ptr to the goods inside the array */
137 return(BYTE_ARR_CTS(arr));
141 #if defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS)
142 __asm__("obscure_ccall_ret_code:\n\t"
147 extern void obscure_ccall_ret_code(void);
150 #if defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
151 #if !(defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS))
153 /* !!! !!! WARNING: !!! !!!
154 * This structure is accessed from AdjustorAsm.s
155 * Any changes here have to be mirrored in the offsets there.
158 typedef struct AdjustorStub {
159 #if defined(powerpc_TARGET_ARCH) && defined(darwin_TARGET_OS)
166 #elif defined(powerpc64_TARGET_ARCH) && defined(darwin_TARGET_OS)
167 /* powerpc64-darwin: just guessing that it won't use fundescs. */
178 /* fundesc-based ABIs */
187 StgInt negative_framesize;
188 StgInt extrawords_plus_one;
195 createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr, char *typeString)
197 void *adjustor = NULL;
201 case 0: /* _stdcall */
202 #if defined(i386_TARGET_ARCH)
203 /* Magic constant computed by inspecting the code length of
204 the following assembly language snippet
205 (offset and machine code prefixed):
207 <0>: 58 popl %eax # temp. remove ret addr..
208 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
209 # hold a StgStablePtr
210 <6>: 50 pushl %eax # put back ret. addr
211 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
212 <c>: ff e0 jmp %eax # and jump to it.
213 # the callee cleans up the stack
215 adjustor = mallocBytesRWX(14);
217 unsigned char *const adj_code = (unsigned char *)adjustor;
218 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
220 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
221 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
223 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
225 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
226 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
228 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
229 adj_code[0x0d] = (unsigned char)0xe0;
235 #if defined(i386_TARGET_ARCH)
236 /* Magic constant computed by inspecting the code length of
237 the following assembly language snippet
238 (offset and machine code prefixed):
240 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
241 # hold a StgStablePtr
242 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
243 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
244 <0f>: ff e0 jmp *%eax # jump to wptr
246 The ccall'ing version is a tad different, passing in the return
247 address of the caller to the auto-generated C stub (which enters
248 via the stable pointer.) (The auto-generated C stub is in on this
249 game, don't worry :-)
251 See the comment next to obscure_ccall_ret_code why we need to
252 perform a tail jump instead of a call, followed by some C stack
255 Note: The adjustor makes the assumption that any return value
256 coming back from the C stub is not stored on the stack.
257 That's (thankfully) the case here with the restricted set of
258 return types that we support.
260 adjustor = mallocBytesRWX(17);
262 unsigned char *const adj_code = (unsigned char *)adjustor;
264 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
265 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
267 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
268 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
270 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
271 *((StgFunPtr*)(adj_code + 0x0b)) = (StgFunPtr)obscure_ccall_ret_code;
273 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
274 adj_code[0x10] = (unsigned char)0xe0;
276 #elif defined(sparc_TARGET_ARCH)
277 /* Magic constant computed by inspecting the code length of the following
278 assembly language snippet (offset and machine code prefixed):
280 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
281 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
282 <08>: D823A05C st %o4, [%sp + 92]
283 <0C>: 9A10000B mov %o3, %o5
284 <10>: 9810000A mov %o2, %o4
285 <14>: 96100009 mov %o1, %o3
286 <18>: 94100008 mov %o0, %o2
287 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
288 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
289 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
290 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
291 <2C> 00000000 ! place for getting hptr back easily
293 ccall'ing on SPARC is easy, because we are quite lucky to push a
294 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
295 existing arguments (note that %sp must stay double-word aligned at
296 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
297 To do this, we extend the *caller's* stack frame by 2 words and shift
298 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
299 procedure because of the tail-jump) by 2 positions. This makes room in
300 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
301 for destination addr of jump on SPARC, return address on x86, ...). This
302 shouldn't cause any problems for a C-like caller: alloca is implemented
303 similarly, and local variables should be accessed via %fp, not %sp. In a
304 nutshell: This should work! (Famous last words! :-)
306 adjustor = mallocBytesRWX(4*(11+1));
308 unsigned long *const adj_code = (unsigned long *)adjustor;
310 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
311 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
312 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
313 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
314 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
315 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
316 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
317 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
318 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
319 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
320 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
321 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
322 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
323 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
324 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
326 adj_code[11] = (unsigned long)hptr;
329 asm("flush %0" : : "r" (adj_code ));
330 asm("flush %0" : : "r" (adj_code + 2));
331 asm("flush %0" : : "r" (adj_code + 4));
332 asm("flush %0" : : "r" (adj_code + 6));
333 asm("flush %0" : : "r" (adj_code + 10));
335 /* max. 5 instructions latency, and we need at >= 1 for returning */
341 #elif defined(alpha_TARGET_ARCH)
342 /* Magic constant computed by inspecting the code length of
343 the following assembly language snippet
344 (offset and machine code prefixed; note that the machine code
345 shown is longwords stored in little-endian order):
347 <00>: 46520414 mov a2, a4
348 <04>: 46100412 mov a0, a2
349 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
350 <0c>: 46730415 mov a3, a5
351 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
352 <14>: 46310413 mov a1, a3
353 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
354 <1c>: 00000000 # padding for alignment
355 <20>: [8 bytes for hptr quadword]
356 <28>: [8 bytes for wptr quadword]
358 The "computed" jump at <08> above is really a jump to a fixed
359 location. Accordingly, we place an always-correct hint in the
360 jump instruction, namely the address offset from <0c> to wptr,
361 divided by 4, taking the lowest 14 bits.
363 We only support passing 4 or fewer argument words, for the same
364 reason described under sparc_TARGET_ARCH above by JRS, 21 Aug 01.
365 On the Alpha the first 6 integer arguments are in a0 through a5,
366 and the rest on the stack. Hence we want to shuffle the original
367 caller's arguments by two.
369 On the Alpha the calling convention is so complex and dependent
370 on the callee's signature -- for example, the stack pointer has
371 to be a multiple of 16 -- that it seems impossible to me [ccshan]
372 to handle the general case correctly without changing how the
373 adjustor is called from C. For now, our solution of shuffling
374 registers only and ignoring the stack only works if the original
375 caller passed 4 or fewer argument words.
377 TODO: Depending on how much allocation overhead stgMallocBytes uses for
378 header information (more precisely, if the overhead is no more than
379 4 bytes), we should move the first three instructions above down by
380 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
382 ASSERT(((StgWord64)wptr & 3) == 0);
383 adjustor = mallocBytesRWX(48);
385 StgWord64 *const code = (StgWord64 *)adjustor;
387 code[0] = 0x4610041246520414L;
388 code[1] = 0x46730415a61b0020L;
389 code[2] = 0x46310413a77b0028L;
390 code[3] = 0x000000006bfb0000L
391 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
393 code[4] = (StgWord64)hptr;
394 code[5] = (StgWord64)wptr;
396 /* Ensure that instruction cache is consistent with our new code */
397 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
399 #elif defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS)
401 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
402 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
404 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
405 We need to calculate all the details of the stack frame layout,
406 taking into account the types of all the arguments, and then
407 generate code on the fly. */
409 int src_gpr = 3, dst_gpr = 5;
411 int src_offset = 0, dst_offset = 0;
412 int n = strlen(typeString),i;
413 int src_locs[n], dst_locs[n];
418 Calculate where the arguments should go.
419 src_locs[] will contain the locations of the arguments in the
420 original stack frame passed to the adjustor.
421 dst_locs[] will contain the locations of the arguments after the
422 adjustor runs, on entry to the wrapper proc pointed to by wptr.
424 This algorithm is based on the one described on page 3-19 of the
425 System V ABI PowerPC Processor Supplement.
427 for(i=0;typeString[i];i++)
429 char t = typeString[i];
430 if((t == 'f' || t == 'd') && fpr <= 8)
431 src_locs[i] = dst_locs[i] = -32-(fpr++);
434 if(t == 'l' && src_gpr <= 9)
436 if((src_gpr & 1) == 0)
438 src_locs[i] = -src_gpr;
441 else if(t == 'i' && src_gpr <= 10)
443 src_locs[i] = -(src_gpr++);
447 if(t == 'l' || t == 'd')
452 src_locs[i] = src_offset;
453 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
456 if(t == 'l' && dst_gpr <= 9)
458 if((dst_gpr & 1) == 0)
460 dst_locs[i] = -dst_gpr;
463 else if(t == 'i' && dst_gpr <= 10)
465 dst_locs[i] = -(dst_gpr++);
469 if(t == 'l' || t == 'd')
474 dst_locs[i] = dst_offset;
475 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
480 frameSize = dst_offset + 8;
481 frameSize = (frameSize+15) & ~0xF;
486 // allocate space for at most 4 insns per parameter
487 // plus 14 more instructions.
488 adjustor = mallocBytesRWX(4 * (4*n + 14));
489 code = (unsigned*)adjustor;
491 *code++ = 0x48000008; // b *+8
492 // * Put the hptr in a place where freeHaskellFunctionPtr
494 *code++ = (unsigned) hptr;
496 // * save the link register
497 *code++ = 0x7c0802a6; // mflr r0;
498 *code++ = 0x90010004; // stw r0, 4(r1);
499 // * and build a new stack frame
500 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
502 // * now generate instructions to copy arguments
503 // from the old stack frame into the new stack frame.
506 if(src_locs[i] < -32)
507 ASSERT(dst_locs[i] == src_locs[i]);
508 else if(src_locs[i] < 0)
511 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
514 ASSERT(dst_locs[i] > -32);
515 // dst is in GPR, too.
517 if(typeString[i] == 'l')
521 | ((-dst_locs[i]+1) << 16)
522 | ((-src_locs[i]+1) << 11)
523 | ((-src_locs[i]+1) << 21);
527 | ((-dst_locs[i]) << 16)
528 | ((-src_locs[i]) << 11)
529 | ((-src_locs[i]) << 21);
533 if(typeString[i] == 'l')
535 // stw src+1, dst_offset+4(r1)
537 | ((-src_locs[i]+1) << 21)
541 // stw src, dst_offset(r1)
543 | ((-src_locs[i]) << 21)
549 ASSERT(dst_locs[i] >= 0);
550 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
552 if(typeString[i] == 'l')
554 // lwz r0, src_offset(r1)
556 | (src_locs[i] + frameSize + 8 + 4);
557 // stw r0, dst_offset(r1)
559 | (dst_locs[i] + 8 + 4);
561 // lwz r0, src_offset(r1)
563 | (src_locs[i] + frameSize + 8);
564 // stw r0, dst_offset(r1)
570 // * hptr will be the new first argument.
572 *code++ = OP_HI(0x3c60, hptr);
573 // ori r3,r3,lo(hptr)
574 *code++ = OP_LO(0x6063, hptr);
576 // * we need to return to a piece of code
577 // which will tear down the stack frame.
578 // lis r11,hi(obscure_ccall_ret_code)
579 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
580 // ori r11,r11,lo(obscure_ccall_ret_code)
581 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
583 *code++ = 0x7d6803a6;
587 *code++ = OP_HI(0x3d60, wptr);
588 // ori r11,r11,lo(wptr)
589 *code++ = OP_LO(0x616b, wptr);
591 *code++ = 0x7d6903a6;
593 *code++ = 0x4e800420;
595 // Flush the Instruction cache:
597 unsigned *p = adjustor;
600 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
604 __asm__ volatile ("sync\n\tisync");
608 #elif defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
610 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
611 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
613 /* The following code applies to all PowerPC and PowerPC64 platforms
614 whose stack layout is based on the AIX ABI.
616 Besides (obviously) AIX, this includes
617 Mac OS 9 and BeOS/PPC (may they rest in peace),
618 which use the 32-bit AIX ABI
620 which uses the 64-bit AIX ABI
621 and Darwin (Mac OS X),
622 which uses the same stack layout as AIX,
623 but no function descriptors.
625 The actual stack-frame shuffling is implemented out-of-line
626 in the function adjustorCode, in AdjustorAsm.S.
627 Here, we set up an AdjustorStub structure, which
628 is a function descriptor (on platforms that have function
629 descriptors) or a short piece of stub code (on Darwin) to call
630 adjustorCode with a pointer to the AdjustorStub struct loaded
633 One nice thing about this is that there is _no_ code generated at
634 runtime on the platforms that have function descriptors.
636 AdjustorStub *adjustorStub;
637 int sz = 0, extra_sz, total_sz;
639 // from AdjustorAsm.s
640 // not declared as a function so that AIX-style
641 // fundescs can never get in the way.
642 extern void *adjustorCode;
645 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
647 adjustorStub = mallocBytesRWX(sizeof(AdjustorStub));
649 adjustor = adjustorStub;
651 adjustorStub->code = (void*) &adjustorCode;
654 // function descriptors are a cool idea.
655 // We don't need to generate any code at runtime.
656 adjustorStub->toc = adjustorStub;
659 // no function descriptors :-(
660 // We need to do things "by hand".
661 #if defined(powerpc_TARGET_ARCH)
662 // lis r2, hi(adjustorStub)
663 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
664 // ori r2, r2, lo(adjustorStub)
665 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
667 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
668 - (char*)adjustorStub);
670 adjustorStub->mtctr = 0x7c0903a6;
672 adjustorStub->bctr = 0x4e800420;
674 barf("adjustor creation not supported on this platform");
677 // Flush the Instruction cache:
679 int n = sizeof(AdjustorStub)/sizeof(unsigned);
680 unsigned *p = (unsigned*)adjustor;
683 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
687 __asm__ volatile ("sync\n\tisync");
691 // Calculate the size of the stack frame, in words.
694 char t = *typeString++;
698 #if defined(powerpc_TARGET_ARCH)
699 // on 32-bit platforms, Double and Int64 occupy two words.
705 // everything else is one word.
710 // The first eight words of the parameter area
711 // are just "backing store" for the parameters passed in
712 // the GPRs. extra_sz is the number of words beyond those first
718 // Calculate the total size of the stack frame.
719 total_sz = (6 /* linkage area */
720 + 8 /* minimum parameter area */
721 + 2 /* two extra arguments */
722 + extra_sz)*sizeof(StgWord);
724 // align to 16 bytes.
725 // AIX only requires 8 bytes, but who cares?
726 total_sz = (total_sz+15) & ~0xF;
728 // Fill in the information that adjustorCode in AdjustorAsm.S
729 // will use to create a new stack frame with the additional args.
730 adjustorStub->hptr = hptr;
731 adjustorStub->wptr = wptr;
732 adjustorStub->negative_framesize = -total_sz;
733 adjustorStub->extrawords_plus_one = extra_sz + 1;
736 #elif defined(ia64_TARGET_ARCH)
738 Up to 8 inputs are passed in registers. We flush the last two inputs to
739 the stack, initially into the 16-byte scratch region left by the caller.
740 We then shuffle the others along by 4 (taking 2 registers for ourselves
741 to save return address and previous function state - we need to come back
742 here on the way out to restore the stack, so this is a real function
743 rather than just a trampoline).
745 The function descriptor we create contains the gp of the target function
746 so gp is already loaded correctly.
748 [MLX] alloc r16=ar.pfs,10,2,0
750 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
751 mov r41=r37 // out7 = in5 (out3)
752 mov r40=r36;; // out6 = in4 (out2)
753 [MII] st8.spill [r12]=r39 // spill in7 (out5)
755 mov r38=r34;; // out4 = in2 (out0)
756 [MII] mov r39=r35 // out5 = in3 (out1)
757 mov r37=r33 // out3 = in1 (loc1)
758 mov r36=r32 // out2 = in0 (loc0)
759 [MLX] adds r12=-24,r12 // update sp
760 movl r34=hptr;; // out0 = hptr
761 [MIB] mov r33=r16 // loc1 = ar.pfs
762 mov r32=b0 // loc0 = retaddr
763 br.call.sptk.many b0=b6;;
765 [MII] adds r12=-16,r12
770 br.ret.sptk.many b0;;
773 /* These macros distribute a long constant into the two words of an MLX bundle */
774 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
775 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
776 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
777 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
781 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
782 StgWord64 wcode = wdesc->ip;
786 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
787 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
789 fdesc = (IA64FunDesc *)adjustor;
790 code = (StgWord64 *)(fdesc + 1);
791 fdesc->ip = (StgWord64)code;
792 fdesc->gp = wdesc->gp;
794 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
795 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
796 code[2] = 0x029015d818984001;
797 code[3] = 0x8401200500420094;
798 code[4] = 0x886011d8189c0001;
799 code[5] = 0x84011004c00380c0;
800 code[6] = 0x0250210046013800;
801 code[7] = 0x8401000480420084;
802 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
803 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
804 code[10] = 0x0200210020010811;
805 code[11] = 0x1080006800006200;
806 code[12] = 0x0000210018406000;
807 code[13] = 0x00aa021000038005;
808 code[14] = 0x000000010000001d;
809 code[15] = 0x0084000880000200;
811 /* save stable pointers in convenient form */
812 code[16] = (StgWord64)hptr;
813 code[17] = (StgWord64)stable;
816 barf("adjustor creation not supported on this platform");
831 freeHaskellFunctionPtr(void* ptr)
833 #if defined(i386_TARGET_ARCH)
834 if ( *(unsigned char*)ptr != 0x68 &&
835 *(unsigned char*)ptr != 0x58 ) {
836 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
840 /* Free the stable pointer first..*/
841 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
842 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
844 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
846 #elif defined(sparc_TARGET_ARCH)
847 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
848 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
852 /* Free the stable pointer first..*/
853 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
854 #elif defined(alpha_TARGET_ARCH)
855 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
856 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
860 /* Free the stable pointer first..*/
861 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
862 #elif defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS)
863 if ( *(StgWord*)ptr != 0x48000008 ) {
864 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
867 freeStablePtr(((StgStablePtr*)ptr)[1]);
868 #elif defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
869 extern void* adjustorCode;
870 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
871 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
874 freeStablePtr(((AdjustorStub*)ptr)->hptr);
875 #elif defined(ia64_TARGET_ARCH)
876 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
877 StgWord64 *code = (StgWord64 *)(fdesc+1);
879 if (fdesc->ip != (StgWord64)code) {
880 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
883 freeStablePtr((StgStablePtr)code[16]);
884 freeStablePtr((StgStablePtr)code[17]);
889 *((unsigned char*)ptr) = '\0';
896 * Function: initAdjustor()
898 * Perform initialisation of adjustor thunk layer (if needed.)
903 #if defined(i386_TARGET_ARCH)
904 /* Now here's something obscure for you:
906 When generating an adjustor thunk that uses the C calling
907 convention, we have to make sure that the thunk kicks off
908 the process of jumping into Haskell with a tail jump. Why?
909 Because as a result of jumping in into Haskell we may end
910 up freeing the very adjustor thunk we came from using
911 freeHaskellFunctionPtr(). Hence, we better not return to
912 the adjustor code on our way out, since it could by then
915 The fix is readily at hand, just include the opcodes
916 for the C stack fixup code that we need to perform when
917 returning in some static piece of memory and arrange
918 to return to it before tail jumping from the adjustor thunk.
921 obscure_ccall_ret_code = mallocBytesRWX(4);
923 obscure_ccall_ret_code[0x00] = (unsigned char)0x83; /* addl $0x4, %esp */
924 obscure_ccall_ret_code[0x01] = (unsigned char)0xc4;
925 obscure_ccall_ret_code[0x02] = (unsigned char)0x04;
927 obscure_ccall_ret_code[0x03] = (unsigned char)0xc3; /* ret */