1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(openbsd_HOST_OS)
51 #include <sys/types.h>
54 /* no C99 header stdint.h on OpenBSD? */
55 typedef unsigned long my_uintptr_t;
58 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
62 /* Heavily arch-specific, I'm afraid.. */
65 * Allocate len bytes which are readable, writable, and executable.
67 * ToDo: If this turns out to be a performance bottleneck, one could
68 * e.g. cache the last VirtualProtect/mprotect-ed region and do
69 * nothing in case of a cache hit.
72 mallocBytesRWX(int len)
74 void *addr = stgMallocBytes(len, "mallocBytesRWX");
75 #if defined(i386_HOST_ARCH) && defined(_WIN32)
76 /* This could be necessary for processors which distinguish between READ and
77 EXECUTE memory accesses, e.g. Itaniums. */
78 DWORD dwOldProtect = 0;
79 if (VirtualProtect (addr, len, PAGE_EXECUTE_READWRITE, &dwOldProtect) == 0) {
80 barf("mallocBytesRWX: failed to protect 0x%p; error=%lu; old protection: %lu\n",
81 addr, (unsigned long)GetLastError(), (unsigned long)dwOldProtect);
83 #elif defined(openbsd_HOST_OS)
84 /* malloced memory isn't executable by default on OpenBSD */
85 my_uintptr_t pageSize = sysconf(_SC_PAGESIZE);
86 my_uintptr_t mask = ~(pageSize - 1);
87 my_uintptr_t startOfFirstPage = ((my_uintptr_t)addr ) & mask;
88 my_uintptr_t startOfLastPage = ((my_uintptr_t)addr + len - 1) & mask;
89 my_uintptr_t size = startOfLastPage - startOfFirstPage + pageSize;
90 if (mprotect((void*)startOfFirstPage, (size_t)size, PROT_EXEC | PROT_READ | PROT_WRITE) != 0) {
91 barf("mallocBytesRWX: failed to protect 0x%p\n", addr);
97 #if defined(i386_HOST_ARCH)
98 static unsigned char *obscure_ccall_ret_code;
101 #if defined(alpha_HOST_ARCH)
102 /* To get the definition of PAL_imb: */
103 # if defined(linux_HOST_OS)
104 # include <asm/pal.h>
106 # include <machine/pal.h>
110 #if defined(ia64_HOST_ARCH)
113 /* Layout of a function descriptor */
114 typedef struct _IA64FunDesc {
120 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
123 nat data_size_in_words, total_size_in_words;
125 /* round up to a whole number of words */
126 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
127 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
129 /* allocate and fill it in */
130 arr = (StgArrWords *)allocate(total_size_in_words);
131 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
133 /* obtain a stable ptr */
134 *stable = getStablePtr((StgPtr)arr);
136 /* and return a ptr to the goods inside the array */
137 return(BYTE_ARR_CTS(arr));
141 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
142 __asm__("obscure_ccall_ret_code:\n\t"
147 extern void obscure_ccall_ret_code(void);
150 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
151 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
153 /* !!! !!! WARNING: !!! !!!
154 * This structure is accessed from AdjustorAsm.s
155 * Any changes here have to be mirrored in the offsets there.
158 typedef struct AdjustorStub {
159 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
166 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
167 /* powerpc64-darwin: just guessing that it won't use fundescs. */
178 /* fundesc-based ABIs */
187 StgInt negative_framesize;
188 StgInt extrawords_plus_one;
195 createAdjustor(int cconv, StgStablePtr hptr,
198 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH)
203 void *adjustor = NULL;
207 case 0: /* _stdcall */
208 #if defined(i386_HOST_ARCH)
209 /* Magic constant computed by inspecting the code length of
210 the following assembly language snippet
211 (offset and machine code prefixed):
213 <0>: 58 popl %eax # temp. remove ret addr..
214 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
215 # hold a StgStablePtr
216 <6>: 50 pushl %eax # put back ret. addr
217 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
218 <c>: ff e0 jmp %eax # and jump to it.
219 # the callee cleans up the stack
221 adjustor = mallocBytesRWX(14);
223 unsigned char *const adj_code = (unsigned char *)adjustor;
224 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
226 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
227 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
229 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
231 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
232 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
234 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
235 adj_code[0x0d] = (unsigned char)0xe0;
241 #if defined(i386_HOST_ARCH)
242 /* Magic constant computed by inspecting the code length of
243 the following assembly language snippet
244 (offset and machine code prefixed):
246 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
247 # hold a StgStablePtr
248 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
249 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
250 <0f>: ff e0 jmp *%eax # jump to wptr
252 The ccall'ing version is a tad different, passing in the return
253 address of the caller to the auto-generated C stub (which enters
254 via the stable pointer.) (The auto-generated C stub is in on this
255 game, don't worry :-)
257 See the comment next to obscure_ccall_ret_code why we need to
258 perform a tail jump instead of a call, followed by some C stack
261 Note: The adjustor makes the assumption that any return value
262 coming back from the C stub is not stored on the stack.
263 That's (thankfully) the case here with the restricted set of
264 return types that we support.
266 adjustor = mallocBytesRWX(17);
268 unsigned char *const adj_code = (unsigned char *)adjustor;
270 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
271 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
273 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
274 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
276 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
277 *((StgFunPtr*)(adj_code + 0x0b)) = (StgFunPtr)obscure_ccall_ret_code;
279 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
280 adj_code[0x10] = (unsigned char)0xe0;
282 #elif defined(sparc_HOST_ARCH)
283 /* Magic constant computed by inspecting the code length of the following
284 assembly language snippet (offset and machine code prefixed):
286 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
287 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
288 <08>: D823A05C st %o4, [%sp + 92]
289 <0C>: 9A10000B mov %o3, %o5
290 <10>: 9810000A mov %o2, %o4
291 <14>: 96100009 mov %o1, %o3
292 <18>: 94100008 mov %o0, %o2
293 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
294 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
295 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
296 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
297 <2C> 00000000 ! place for getting hptr back easily
299 ccall'ing on SPARC is easy, because we are quite lucky to push a
300 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
301 existing arguments (note that %sp must stay double-word aligned at
302 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
303 To do this, we extend the *caller's* stack frame by 2 words and shift
304 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
305 procedure because of the tail-jump) by 2 positions. This makes room in
306 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
307 for destination addr of jump on SPARC, return address on x86, ...). This
308 shouldn't cause any problems for a C-like caller: alloca is implemented
309 similarly, and local variables should be accessed via %fp, not %sp. In a
310 nutshell: This should work! (Famous last words! :-)
312 adjustor = mallocBytesRWX(4*(11+1));
314 unsigned long *const adj_code = (unsigned long *)adjustor;
316 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
317 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
318 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
319 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
320 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
321 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
322 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
323 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
324 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
325 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
326 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
327 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
328 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
329 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
330 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
332 adj_code[11] = (unsigned long)hptr;
335 asm("flush %0" : : "r" (adj_code ));
336 asm("flush %0" : : "r" (adj_code + 2));
337 asm("flush %0" : : "r" (adj_code + 4));
338 asm("flush %0" : : "r" (adj_code + 6));
339 asm("flush %0" : : "r" (adj_code + 10));
341 /* max. 5 instructions latency, and we need at >= 1 for returning */
347 #elif defined(alpha_HOST_ARCH)
348 /* Magic constant computed by inspecting the code length of
349 the following assembly language snippet
350 (offset and machine code prefixed; note that the machine code
351 shown is longwords stored in little-endian order):
353 <00>: 46520414 mov a2, a4
354 <04>: 46100412 mov a0, a2
355 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
356 <0c>: 46730415 mov a3, a5
357 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
358 <14>: 46310413 mov a1, a3
359 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
360 <1c>: 00000000 # padding for alignment
361 <20>: [8 bytes for hptr quadword]
362 <28>: [8 bytes for wptr quadword]
364 The "computed" jump at <08> above is really a jump to a fixed
365 location. Accordingly, we place an always-correct hint in the
366 jump instruction, namely the address offset from <0c> to wptr,
367 divided by 4, taking the lowest 14 bits.
369 We only support passing 4 or fewer argument words, for the same
370 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
371 On the Alpha the first 6 integer arguments are in a0 through a5,
372 and the rest on the stack. Hence we want to shuffle the original
373 caller's arguments by two.
375 On the Alpha the calling convention is so complex and dependent
376 on the callee's signature -- for example, the stack pointer has
377 to be a multiple of 16 -- that it seems impossible to me [ccshan]
378 to handle the general case correctly without changing how the
379 adjustor is called from C. For now, our solution of shuffling
380 registers only and ignoring the stack only works if the original
381 caller passed 4 or fewer argument words.
383 TODO: Depending on how much allocation overhead stgMallocBytes uses for
384 header information (more precisely, if the overhead is no more than
385 4 bytes), we should move the first three instructions above down by
386 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
388 ASSERT(((StgWord64)wptr & 3) == 0);
389 adjustor = mallocBytesRWX(48);
391 StgWord64 *const code = (StgWord64 *)adjustor;
393 code[0] = 0x4610041246520414L;
394 code[1] = 0x46730415a61b0020L;
395 code[2] = 0x46310413a77b0028L;
396 code[3] = 0x000000006bfb0000L
397 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
399 code[4] = (StgWord64)hptr;
400 code[5] = (StgWord64)wptr;
402 /* Ensure that instruction cache is consistent with our new code */
403 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
405 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
407 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
408 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
410 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
411 We need to calculate all the details of the stack frame layout,
412 taking into account the types of all the arguments, and then
413 generate code on the fly. */
415 int src_gpr = 3, dst_gpr = 5;
417 int src_offset = 0, dst_offset = 0;
418 int n = strlen(typeString),i;
419 int src_locs[n], dst_locs[n];
424 Calculate where the arguments should go.
425 src_locs[] will contain the locations of the arguments in the
426 original stack frame passed to the adjustor.
427 dst_locs[] will contain the locations of the arguments after the
428 adjustor runs, on entry to the wrapper proc pointed to by wptr.
430 This algorithm is based on the one described on page 3-19 of the
431 System V ABI PowerPC Processor Supplement.
433 for(i=0;typeString[i];i++)
435 char t = typeString[i];
436 if((t == 'f' || t == 'd') && fpr <= 8)
437 src_locs[i] = dst_locs[i] = -32-(fpr++);
440 if(t == 'l' && src_gpr <= 9)
442 if((src_gpr & 1) == 0)
444 src_locs[i] = -src_gpr;
447 else if(t == 'i' && src_gpr <= 10)
449 src_locs[i] = -(src_gpr++);
453 if(t == 'l' || t == 'd')
458 src_locs[i] = src_offset;
459 src_offset += (t == 'l' || t == 'd') ? 8 : 4;
462 if(t == 'l' && dst_gpr <= 9)
464 if((dst_gpr & 1) == 0)
466 dst_locs[i] = -dst_gpr;
469 else if(t == 'i' && dst_gpr <= 10)
471 dst_locs[i] = -(dst_gpr++);
475 if(t == 'l' || t == 'd')
480 dst_locs[i] = dst_offset;
481 dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
486 frameSize = dst_offset + 8;
487 frameSize = (frameSize+15) & ~0xF;
492 // allocate space for at most 4 insns per parameter
493 // plus 14 more instructions.
494 adjustor = mallocBytesRWX(4 * (4*n + 14));
495 code = (unsigned*)adjustor;
497 *code++ = 0x48000008; // b *+8
498 // * Put the hptr in a place where freeHaskellFunctionPtr
500 *code++ = (unsigned) hptr;
502 // * save the link register
503 *code++ = 0x7c0802a6; // mflr r0;
504 *code++ = 0x90010004; // stw r0, 4(r1);
505 // * and build a new stack frame
506 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
508 // * now generate instructions to copy arguments
509 // from the old stack frame into the new stack frame.
512 if(src_locs[i] < -32)
513 ASSERT(dst_locs[i] == src_locs[i]);
514 else if(src_locs[i] < 0)
517 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
520 ASSERT(dst_locs[i] > -32);
521 // dst is in GPR, too.
523 if(typeString[i] == 'l')
527 | ((-dst_locs[i]+1) << 16)
528 | ((-src_locs[i]+1) << 11)
529 | ((-src_locs[i]+1) << 21);
533 | ((-dst_locs[i]) << 16)
534 | ((-src_locs[i]) << 11)
535 | ((-src_locs[i]) << 21);
539 if(typeString[i] == 'l')
541 // stw src+1, dst_offset+4(r1)
543 | ((-src_locs[i]+1) << 21)
547 // stw src, dst_offset(r1)
549 | ((-src_locs[i]) << 21)
555 ASSERT(dst_locs[i] >= 0);
556 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
558 if(typeString[i] == 'l')
560 // lwz r0, src_offset(r1)
562 | (src_locs[i] + frameSize + 8 + 4);
563 // stw r0, dst_offset(r1)
565 | (dst_locs[i] + 8 + 4);
567 // lwz r0, src_offset(r1)
569 | (src_locs[i] + frameSize + 8);
570 // stw r0, dst_offset(r1)
576 // * hptr will be the new first argument.
578 *code++ = OP_HI(0x3c60, hptr);
579 // ori r3,r3,lo(hptr)
580 *code++ = OP_LO(0x6063, hptr);
582 // * we need to return to a piece of code
583 // which will tear down the stack frame.
584 // lis r11,hi(obscure_ccall_ret_code)
585 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
586 // ori r11,r11,lo(obscure_ccall_ret_code)
587 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
589 *code++ = 0x7d6803a6;
593 *code++ = OP_HI(0x3d60, wptr);
594 // ori r11,r11,lo(wptr)
595 *code++ = OP_LO(0x616b, wptr);
597 *code++ = 0x7d6903a6;
599 *code++ = 0x4e800420;
601 // Flush the Instruction cache:
603 unsigned *p = adjustor;
606 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
610 __asm__ volatile ("sync\n\tisync");
614 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
616 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
617 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
619 /* The following code applies to all PowerPC and PowerPC64 platforms
620 whose stack layout is based on the AIX ABI.
622 Besides (obviously) AIX, this includes
623 Mac OS 9 and BeOS/PPC (may they rest in peace),
624 which use the 32-bit AIX ABI
626 which uses the 64-bit AIX ABI
627 and Darwin (Mac OS X),
628 which uses the same stack layout as AIX,
629 but no function descriptors.
631 The actual stack-frame shuffling is implemented out-of-line
632 in the function adjustorCode, in AdjustorAsm.S.
633 Here, we set up an AdjustorStub structure, which
634 is a function descriptor (on platforms that have function
635 descriptors) or a short piece of stub code (on Darwin) to call
636 adjustorCode with a pointer to the AdjustorStub struct loaded
639 One nice thing about this is that there is _no_ code generated at
640 runtime on the platforms that have function descriptors.
642 AdjustorStub *adjustorStub;
643 int sz = 0, extra_sz, total_sz;
645 // from AdjustorAsm.s
646 // not declared as a function so that AIX-style
647 // fundescs can never get in the way.
648 extern void *adjustorCode;
651 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
653 adjustorStub = mallocBytesRWX(sizeof(AdjustorStub));
655 adjustor = adjustorStub;
657 adjustorStub->code = (void*) &adjustorCode;
660 // function descriptors are a cool idea.
661 // We don't need to generate any code at runtime.
662 adjustorStub->toc = adjustorStub;
665 // no function descriptors :-(
666 // We need to do things "by hand".
667 #if defined(powerpc_HOST_ARCH)
668 // lis r2, hi(adjustorStub)
669 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
670 // ori r2, r2, lo(adjustorStub)
671 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
673 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
674 - (char*)adjustorStub);
676 adjustorStub->mtctr = 0x7c0903a6;
678 adjustorStub->bctr = 0x4e800420;
680 barf("adjustor creation not supported on this platform");
683 // Flush the Instruction cache:
685 int n = sizeof(AdjustorStub)/sizeof(unsigned);
686 unsigned *p = (unsigned*)adjustor;
689 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
693 __asm__ volatile ("sync\n\tisync");
697 // Calculate the size of the stack frame, in words.
700 char t = *typeString++;
704 #if defined(powerpc_HOST_ARCH)
705 // on 32-bit platforms, Double and Int64 occupy two words.
711 // everything else is one word.
716 // The first eight words of the parameter area
717 // are just "backing store" for the parameters passed in
718 // the GPRs. extra_sz is the number of words beyond those first
724 // Calculate the total size of the stack frame.
725 total_sz = (6 /* linkage area */
726 + 8 /* minimum parameter area */
727 + 2 /* two extra arguments */
728 + extra_sz)*sizeof(StgWord);
730 // align to 16 bytes.
731 // AIX only requires 8 bytes, but who cares?
732 total_sz = (total_sz+15) & ~0xF;
734 // Fill in the information that adjustorCode in AdjustorAsm.S
735 // will use to create a new stack frame with the additional args.
736 adjustorStub->hptr = hptr;
737 adjustorStub->wptr = wptr;
738 adjustorStub->negative_framesize = -total_sz;
739 adjustorStub->extrawords_plus_one = extra_sz + 1;
742 #elif defined(ia64_HOST_ARCH)
744 Up to 8 inputs are passed in registers. We flush the last two inputs to
745 the stack, initially into the 16-byte scratch region left by the caller.
746 We then shuffle the others along by 4 (taking 2 registers for ourselves
747 to save return address and previous function state - we need to come back
748 here on the way out to restore the stack, so this is a real function
749 rather than just a trampoline).
751 The function descriptor we create contains the gp of the target function
752 so gp is already loaded correctly.
754 [MLX] alloc r16=ar.pfs,10,2,0
756 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
757 mov r41=r37 // out7 = in5 (out3)
758 mov r40=r36;; // out6 = in4 (out2)
759 [MII] st8.spill [r12]=r39 // spill in7 (out5)
761 mov r38=r34;; // out4 = in2 (out0)
762 [MII] mov r39=r35 // out5 = in3 (out1)
763 mov r37=r33 // out3 = in1 (loc1)
764 mov r36=r32 // out2 = in0 (loc0)
765 [MLX] adds r12=-24,r12 // update sp
766 movl r34=hptr;; // out0 = hptr
767 [MIB] mov r33=r16 // loc1 = ar.pfs
768 mov r32=b0 // loc0 = retaddr
769 br.call.sptk.many b0=b6;;
771 [MII] adds r12=-16,r12
776 br.ret.sptk.many b0;;
779 /* These macros distribute a long constant into the two words of an MLX bundle */
780 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
781 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
782 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
783 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
787 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
788 StgWord64 wcode = wdesc->ip;
792 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
793 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
795 fdesc = (IA64FunDesc *)adjustor;
796 code = (StgWord64 *)(fdesc + 1);
797 fdesc->ip = (StgWord64)code;
798 fdesc->gp = wdesc->gp;
800 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
801 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
802 code[2] = 0x029015d818984001;
803 code[3] = 0x8401200500420094;
804 code[4] = 0x886011d8189c0001;
805 code[5] = 0x84011004c00380c0;
806 code[6] = 0x0250210046013800;
807 code[7] = 0x8401000480420084;
808 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
809 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
810 code[10] = 0x0200210020010811;
811 code[11] = 0x1080006800006200;
812 code[12] = 0x0000210018406000;
813 code[13] = 0x00aa021000038005;
814 code[14] = 0x000000010000001d;
815 code[15] = 0x0084000880000200;
817 /* save stable pointers in convenient form */
818 code[16] = (StgWord64)hptr;
819 code[17] = (StgWord64)stable;
822 barf("adjustor creation not supported on this platform");
837 freeHaskellFunctionPtr(void* ptr)
839 #if defined(i386_HOST_ARCH)
840 if ( *(unsigned char*)ptr != 0x68 &&
841 *(unsigned char*)ptr != 0x58 ) {
842 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
846 /* Free the stable pointer first..*/
847 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
848 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
850 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
852 #elif defined(sparc_HOST_ARCH)
853 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
854 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
858 /* Free the stable pointer first..*/
859 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
860 #elif defined(alpha_HOST_ARCH)
861 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
862 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
866 /* Free the stable pointer first..*/
867 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
868 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
869 if ( *(StgWord*)ptr != 0x48000008 ) {
870 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
873 freeStablePtr(((StgStablePtr*)ptr)[1]);
874 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
875 extern void* adjustorCode;
876 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
877 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
880 freeStablePtr(((AdjustorStub*)ptr)->hptr);
881 #elif defined(ia64_HOST_ARCH)
882 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
883 StgWord64 *code = (StgWord64 *)(fdesc+1);
885 if (fdesc->ip != (StgWord64)code) {
886 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
889 freeStablePtr((StgStablePtr)code[16]);
890 freeStablePtr((StgStablePtr)code[17]);
895 *((unsigned char*)ptr) = '\0';
902 * Function: initAdjustor()
904 * Perform initialisation of adjustor thunk layer (if needed.)
909 #if defined(i386_HOST_ARCH)
910 /* Now here's something obscure for you:
912 When generating an adjustor thunk that uses the C calling
913 convention, we have to make sure that the thunk kicks off
914 the process of jumping into Haskell with a tail jump. Why?
915 Because as a result of jumping in into Haskell we may end
916 up freeing the very adjustor thunk we came from using
917 freeHaskellFunctionPtr(). Hence, we better not return to
918 the adjustor code on our way out, since it could by then
921 The fix is readily at hand, just include the opcodes
922 for the C stack fixup code that we need to perform when
923 returning in some static piece of memory and arrange
924 to return to it before tail jumping from the adjustor thunk.
927 obscure_ccall_ret_code = mallocBytesRWX(4);
929 obscure_ccall_ret_code[0x00] = (unsigned char)0x83; /* addl $0x4, %esp */
930 obscure_ccall_ret_code[0x01] = (unsigned char)0xc4;
931 obscure_ccall_ret_code[0x02] = (unsigned char)0x04;
933 obscure_ccall_ret_code[0x03] = (unsigned char)0xc3; /* ret */