1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
41 #include "RtsExternal.h"
49 #if defined(openbsd_TARGET_OS)
51 #include <sys/types.h>
54 /* no C99 header stdint.h on OpenBSD? */
55 typedef unsigned long my_uintptr_t;
58 /* Heavily arch-specific, I'm afraid.. */
61 * Allocate len bytes which are readable, writable, and executable.
63 * ToDo: If this turns out to be a performance bottleneck, one could
64 * e.g. cache the last VirtualProtect/mprotect-ed region and do
65 * nothing in case of a cache hit.
68 mallocBytesRWX(int len)
70 void *addr = stgMallocBytes(len, "mallocBytesRWX");
71 #if defined(i386_TARGET_ARCH) && defined(_WIN32)
72 /* This could be necessary for processors which distinguish between READ and
73 EXECUTE memory accesses, e.g. Itaniums. */
74 DWORD dwOldProtect = 0;
75 if (VirtualProtect (addr, len, PAGE_EXECUTE_READWRITE, &dwOldProtect) == 0) {
76 barf("mallocBytesRWX: failed to protect 0x%p; error=%lu; old protection: %lu\n",
77 addr, (unsigned long)GetLastError(), (unsigned long)dwOldProtect);
79 #elif defined(openbsd_TARGET_OS)
80 /* malloced memory isn't executable by default on OpenBSD */
81 my_uintptr_t pageSize = sysconf(_SC_PAGESIZE);
82 my_uintptr_t mask = ~(pageSize - 1);
83 my_uintptr_t startOfFirstPage = ((my_uintptr_t)addr ) & mask;
84 my_uintptr_t startOfLastPage = ((my_uintptr_t)addr + len - 1) & mask;
85 my_uintptr_t size = startOfLastPage - startOfFirstPage + pageSize;
86 if (mprotect((void*)startOfFirstPage, (size_t)size, PROT_EXEC | PROT_READ | PROT_WRITE) != 0) {
87 barf("mallocBytesRWX: failed to protect 0x%p\n", addr);
93 #if defined(i386_TARGET_ARCH)
94 static unsigned char *obscure_ccall_ret_code;
97 #if defined(alpha_TARGET_ARCH)
98 /* To get the definition of PAL_imb: */
99 # if defined(linux_TARGET_OS)
100 # include <asm/pal.h>
102 # include <machine/pal.h>
106 #if defined(ia64_TARGET_ARCH)
109 /* Layout of a function descriptor */
110 typedef struct _IA64FunDesc {
116 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
119 nat data_size_in_words, total_size_in_words;
121 /* round up to a whole number of words */
122 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
123 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
125 /* allocate and fill it in */
126 arr = (StgArrWords *)allocate(total_size_in_words);
127 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
129 /* obtain a stable ptr */
130 *stable = getStablePtr((StgPtr)arr);
132 /* and return a ptr to the goods inside the array */
133 return(BYTE_ARR_CTS(arr));
137 #if defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
138 #if !(defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS))
140 /* !!! !!! WARNING: !!! !!!
141 * This structure is accessed from AdjustorAsm.s
142 * Any changes here have to be mirrored in the offsets there.
145 typedef struct AdjustorStub {
146 #if defined(powerpc_TARGET_ARCH) && defined(darwin_TARGET_OS)
153 #elif defined(powerpc64_TARGET_ARCH) && defined(darwin_TARGET_OS)
154 /* powerpc64-darwin: just guessing that it won't use fundescs. */
165 /* fundesc-based ABIs */
174 StgInt negative_framesize;
175 StgInt extrawords_plus_one;
182 createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr, char *typeString)
184 void *adjustor = NULL;
188 case 0: /* _stdcall */
189 #if defined(i386_TARGET_ARCH)
190 /* Magic constant computed by inspecting the code length of
191 the following assembly language snippet
192 (offset and machine code prefixed):
194 <0>: 58 popl %eax # temp. remove ret addr..
195 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
196 # hold a StgStablePtr
197 <6>: 50 pushl %eax # put back ret. addr
198 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
199 <c>: ff e0 jmp %eax # and jump to it.
200 # the callee cleans up the stack
202 adjustor = mallocBytesRWX(14);
204 unsigned char *const adj_code = (unsigned char *)adjustor;
205 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
207 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
208 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
210 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
212 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
213 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
215 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
216 adj_code[0x0d] = (unsigned char)0xe0;
222 #if defined(i386_TARGET_ARCH)
223 /* Magic constant computed by inspecting the code length of
224 the following assembly language snippet
225 (offset and machine code prefixed):
227 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
228 # hold a StgStablePtr
229 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
230 <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
231 <0f>: ff e0 jmp *%eax # jump to wptr
233 The ccall'ing version is a tad different, passing in the return
234 address of the caller to the auto-generated C stub (which enters
235 via the stable pointer.) (The auto-generated C stub is in on this
236 game, don't worry :-)
238 See the comment next to obscure_ccall_ret_code why we need to
239 perform a tail jump instead of a call, followed by some C stack
242 Note: The adjustor makes the assumption that any return value
243 coming back from the C stub is not stored on the stack.
244 That's (thankfully) the case here with the restricted set of
245 return types that we support.
247 adjustor = mallocBytesRWX(17);
249 unsigned char *const adj_code = (unsigned char *)adjustor;
251 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
252 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
254 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
255 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
257 adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
258 *((StgFunPtr*)(adj_code + 0x0b)) = (StgFunPtr)obscure_ccall_ret_code;
260 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
261 adj_code[0x10] = (unsigned char)0xe0;
263 #elif defined(sparc_TARGET_ARCH)
264 /* Magic constant computed by inspecting the code length of the following
265 assembly language snippet (offset and machine code prefixed):
267 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
268 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
269 <08>: D823A05C st %o4, [%sp + 92]
270 <0C>: 9A10000B mov %o3, %o5
271 <10>: 9810000A mov %o2, %o4
272 <14>: 96100009 mov %o1, %o3
273 <18>: 94100008 mov %o0, %o2
274 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
275 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
276 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
277 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
278 <2C> 00000000 ! place for getting hptr back easily
280 ccall'ing on SPARC is easy, because we are quite lucky to push a
281 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
282 existing arguments (note that %sp must stay double-word aligned at
283 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
284 To do this, we extend the *caller's* stack frame by 2 words and shift
285 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
286 procedure because of the tail-jump) by 2 positions. This makes room in
287 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
288 for destination addr of jump on SPARC, return address on x86, ...). This
289 shouldn't cause any problems for a C-like caller: alloca is implemented
290 similarly, and local variables should be accessed via %fp, not %sp. In a
291 nutshell: This should work! (Famous last words! :-)
293 adjustor = mallocBytesRWX(4*(11+1));
295 unsigned long *const adj_code = (unsigned long *)adjustor;
297 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
298 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
299 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
300 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
301 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
302 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
303 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
304 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
305 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
306 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
307 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
308 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
309 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
310 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
311 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
313 adj_code[11] = (unsigned long)hptr;
316 asm("flush %0" : : "r" (adj_code ));
317 asm("flush %0" : : "r" (adj_code + 2));
318 asm("flush %0" : : "r" (adj_code + 4));
319 asm("flush %0" : : "r" (adj_code + 6));
320 asm("flush %0" : : "r" (adj_code + 10));
322 /* max. 5 instructions latency, and we need at >= 1 for returning */
328 #elif defined(alpha_TARGET_ARCH)
329 /* Magic constant computed by inspecting the code length of
330 the following assembly language snippet
331 (offset and machine code prefixed; note that the machine code
332 shown is longwords stored in little-endian order):
334 <00>: 46520414 mov a2, a4
335 <04>: 46100412 mov a0, a2
336 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
337 <0c>: 46730415 mov a3, a5
338 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
339 <14>: 46310413 mov a1, a3
340 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
341 <1c>: 00000000 # padding for alignment
342 <20>: [8 bytes for hptr quadword]
343 <28>: [8 bytes for wptr quadword]
345 The "computed" jump at <08> above is really a jump to a fixed
346 location. Accordingly, we place an always-correct hint in the
347 jump instruction, namely the address offset from <0c> to wptr,
348 divided by 4, taking the lowest 14 bits.
350 We only support passing 4 or fewer argument words, for the same
351 reason described under sparc_TARGET_ARCH above by JRS, 21 Aug 01.
352 On the Alpha the first 6 integer arguments are in a0 through a5,
353 and the rest on the stack. Hence we want to shuffle the original
354 caller's arguments by two.
356 On the Alpha the calling convention is so complex and dependent
357 on the callee's signature -- for example, the stack pointer has
358 to be a multiple of 16 -- that it seems impossible to me [ccshan]
359 to handle the general case correctly without changing how the
360 adjustor is called from C. For now, our solution of shuffling
361 registers only and ignoring the stack only works if the original
362 caller passed 4 or fewer argument words.
364 TODO: Depending on how much allocation overhead stgMallocBytes uses for
365 header information (more precisely, if the overhead is no more than
366 4 bytes), we should move the first three instructions above down by
367 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
369 ASSERT(((StgWord64)wptr & 3) == 0);
370 adjustor = mallocBytesRWX(48);
372 StgWord64 *const code = (StgWord64 *)adjustor;
374 code[0] = 0x4610041246520414L;
375 code[1] = 0x46730415a61b0020L;
376 code[2] = 0x46310413a77b0028L;
377 code[3] = 0x000000006bfb0000L
378 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
380 code[4] = (StgWord64)hptr;
381 code[5] = (StgWord64)wptr;
383 /* Ensure that instruction cache is consistent with our new code */
384 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
386 #elif defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS)
388 For PowerPC Linux, the following code is used:
396 lis r0,0xDEAD ;hi(wptr)
397 lis r3,0xDEAF ;hi(hptr)
398 ori r0,r0,0xBEEF ; lo(wptr)
399 ori r3,r3,0xFACE ; lo(hptr)
403 The arguments (passed in registers r3 - r10) are shuffled along by two to
404 make room for hptr and a dummy argument. As r9 and r10 are overwritten by
405 this code, it only works for up to 6 arguments (when floating point arguments
406 are involved, this may be more or less, depending on the exact situation).
408 adjustor = mallocBytesRWX(4*13);
410 unsigned long *const adj_code = (unsigned long *)adjustor;
412 // make room for extra arguments
413 adj_code[0] = 0x7d0a4378; //mr r10,r8
414 adj_code[1] = 0x7ce93b78; //mr r9,r7
415 adj_code[2] = 0x7cc83378; //mr r8,r6
416 adj_code[3] = 0x7ca72b78; //mr r7,r5
417 adj_code[4] = 0x7c862378; //mr r6,r4
418 adj_code[5] = 0x7c651b78; //mr r5,r3
420 adj_code[6] = 0x3c000000; //lis r0,hi(wptr)
421 adj_code[6] |= ((unsigned long)wptr) >> 16;
423 adj_code[7] = 0x3c600000; //lis r3,hi(hptr)
424 adj_code[7] |= ((unsigned long)hptr) >> 16;
426 adj_code[8] = 0x60000000; //ori r0,r0,lo(wptr)
427 adj_code[8] |= ((unsigned long)wptr) & 0xFFFF;
429 adj_code[9] = 0x60630000; //ori r3,r3,lo(hptr)
430 adj_code[9] |= ((unsigned long)hptr) & 0xFFFF;
432 adj_code[10] = 0x7c0903a6; //mtctr r0
433 adj_code[11] = 0x4e800420; //bctr
434 adj_code[12] = (unsigned long)hptr;
436 // Flush the Instruction cache:
437 // MakeDataExecutable(adjustor,4*13);
438 /* This would require us to link with CoreServices.framework */
439 { /* this should do the same: */
441 unsigned long *p = adj_code;
444 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
448 __asm__ volatile ("sync\n\tisync");
452 #elif defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
454 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
455 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
457 AdjustorStub *adjustorStub;
458 int sz = 0, extra_sz, total_sz;
460 // from AdjustorAsm.s
461 // not declared as a function so that AIX-style
462 // fundescs can never get in the way.
463 extern void *adjustorCode;
466 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
468 adjustorStub = mallocBytesRWX(sizeof(AdjustorStub));
470 adjustor = adjustorStub;
472 adjustorStub->code = (void*) &adjustorCode;
475 // function descriptors are a cool idea.
476 // We don't need to generate any code at runtime.
477 adjustorStub->toc = adjustorStub;
480 // no function descriptors :-(
481 // We need to do things "by hand".
482 #if defined(powerpc_TARGET_ARCH)
483 // lis r2, hi(adjustorStub)
484 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
485 // ori r2, r2, lo(adjustorStub)
486 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
488 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
489 - (char*)adjustorStub);
491 adjustorStub->mtctr = 0x7c0903a6;
493 adjustorStub->bctr = 0x4e800420;
495 barf("adjustor creation not supported on this platform");
498 // Flush the Instruction cache:
500 int n = sizeof(AdjustorStub)/sizeof(unsigned);
501 unsigned *p = (unsigned*)adjustor;
504 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
508 __asm__ volatile ("sync\n\tisync");
512 printf("createAdjustor: %s\n", typeString);
515 char t = *typeString++;
519 #if defined(powerpc64_TARGET_ARCH)
520 case 'd': sz += 1; break;
521 case 'l': sz += 1; break;
523 case 'd': sz += 2; break;
524 case 'l': sz += 2; break;
526 case 'f': sz += 1; break;
527 case 'i': sz += 1; break;
533 total_sz = (6 /* linkage area */
534 + 8 /* minimum parameter area */
535 + 2 /* two extra arguments */
536 + extra_sz)*sizeof(StgWord);
538 // align to 16 bytes.
539 // AIX only requires 8 bytes, but who cares?
540 total_sz = (total_sz+15) & ~0xF;
542 adjustorStub->hptr = hptr;
543 adjustorStub->wptr = wptr;
544 adjustorStub->negative_framesize = -total_sz;
545 adjustorStub->extrawords_plus_one = extra_sz + 1;
548 #elif defined(ia64_TARGET_ARCH)
550 Up to 8 inputs are passed in registers. We flush the last two inputs to
551 the stack, initially into the 16-byte scratch region left by the caller.
552 We then shuffle the others along by 4 (taking 2 registers for ourselves
553 to save return address and previous function state - we need to come back
554 here on the way out to restore the stack, so this is a real function
555 rather than just a trampoline).
557 The function descriptor we create contains the gp of the target function
558 so gp is already loaded correctly.
560 [MLX] alloc r16=ar.pfs,10,2,0
562 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
563 mov r41=r37 // out7 = in5 (out3)
564 mov r40=r36;; // out6 = in4 (out2)
565 [MII] st8.spill [r12]=r39 // spill in7 (out5)
567 mov r38=r34;; // out4 = in2 (out0)
568 [MII] mov r39=r35 // out5 = in3 (out1)
569 mov r37=r33 // out3 = in1 (loc1)
570 mov r36=r32 // out2 = in0 (loc0)
571 [MLX] adds r12=-24,r12 // update sp
572 movl r34=hptr;; // out0 = hptr
573 [MIB] mov r33=r16 // loc1 = ar.pfs
574 mov r32=b0 // loc0 = retaddr
575 br.call.sptk.many b0=b6;;
577 [MII] adds r12=-16,r12
582 br.ret.sptk.many b0;;
585 /* These macros distribute a long constant into the two words of an MLX bundle */
586 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
587 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
588 #define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
589 | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
593 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
594 StgWord64 wcode = wdesc->ip;
598 /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
599 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
601 fdesc = (IA64FunDesc *)adjustor;
602 code = (StgWord64 *)(fdesc + 1);
603 fdesc->ip = (StgWord64)code;
604 fdesc->gp = wdesc->gp;
606 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
607 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
608 code[2] = 0x029015d818984001;
609 code[3] = 0x8401200500420094;
610 code[4] = 0x886011d8189c0001;
611 code[5] = 0x84011004c00380c0;
612 code[6] = 0x0250210046013800;
613 code[7] = 0x8401000480420084;
614 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
615 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
616 code[10] = 0x0200210020010811;
617 code[11] = 0x1080006800006200;
618 code[12] = 0x0000210018406000;
619 code[13] = 0x00aa021000038005;
620 code[14] = 0x000000010000001d;
621 code[15] = 0x0084000880000200;
623 /* save stable pointers in convenient form */
624 code[16] = (StgWord64)hptr;
625 code[17] = (StgWord64)stable;
628 barf("adjustor creation not supported on this platform");
643 freeHaskellFunctionPtr(void* ptr)
645 #if defined(i386_TARGET_ARCH)
646 if ( *(unsigned char*)ptr != 0x68 &&
647 *(unsigned char*)ptr != 0x58 ) {
648 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
652 /* Free the stable pointer first..*/
653 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
654 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
656 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
658 #elif defined(sparc_TARGET_ARCH)
659 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
660 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
664 /* Free the stable pointer first..*/
665 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
666 #elif defined(alpha_TARGET_ARCH)
667 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
668 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
672 /* Free the stable pointer first..*/
673 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
674 #elif defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS)
675 if ( *(StgWord*)ptr != 0x7d0a4378 ) {
676 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
679 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 4*12)));
680 #elif defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
681 extern void* adjustorCode;
682 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
683 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
686 freeStablePtr(((AdjustorStub*)ptr)->hptr);
687 #elif defined(ia64_TARGET_ARCH)
688 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
689 StgWord64 *code = (StgWord64 *)(fdesc+1);
691 if (fdesc->ip != (StgWord64)code) {
692 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
695 freeStablePtr((StgStablePtr)code[16]);
696 freeStablePtr((StgStablePtr)code[17]);
701 *((unsigned char*)ptr) = '\0';
708 * Function: initAdjustor()
710 * Perform initialisation of adjustor thunk layer (if needed.)
715 #if defined(i386_TARGET_ARCH)
716 /* Now here's something obscure for you:
718 When generating an adjustor thunk that uses the C calling
719 convention, we have to make sure that the thunk kicks off
720 the process of jumping into Haskell with a tail jump. Why?
721 Because as a result of jumping in into Haskell we may end
722 up freeing the very adjustor thunk we came from using
723 freeHaskellFunctionPtr(). Hence, we better not return to
724 the adjustor code on our way out, since it could by then
727 The fix is readily at hand, just include the opcodes
728 for the C stack fixup code that we need to perform when
729 returning in some static piece of memory and arrange
730 to return to it before tail jumping from the adjustor thunk.
733 obscure_ccall_ret_code = mallocBytesRWX(4);
735 obscure_ccall_ret_code[0x00] = (unsigned char)0x83; /* addl $0x4, %esp */
736 obscure_ccall_ret_code[0x01] = (unsigned char)0xc4;
737 obscure_ccall_ret_code[0x02] = (unsigned char)0x04;
739 obscure_ccall_ret_code[0x03] = (unsigned char)0xc3; /* ret */