1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
44 /* Heavily arch-specific, I'm afraid.. */
46 #if defined(i386_TARGET_ARCH)
47 /* Now here's something obscure for you:
49 When generating an adjustor thunk that uses the C calling
50 convention, we have to make sure that the thunk kicks off
51 the process of jumping into Haskell with a tail jump. Why?
52 Because as a result of jumping in into Haskell we may end
53 up freeing the very adjustor thunk we came from using
54 freeHaskellFunctionPtr(). Hence, we better not return to
55 the adjustor code on our way out, since it could by then
58 The fix is readily at hand, just include the opcodes
59 for the C stack fixup code that we need to perform when
60 returning in some static piece of memory and arrange
61 to return to it before tail jumping from the adjustor thunk.
63 For this to work we make the assumption that bytes in .data
64 are considered executable.
66 static unsigned char __obscure_ccall_ret_code [] =
67 { 0x83, 0xc4, 0x04 /* addl $0x4, %esp */
72 #if defined(alpha_TARGET_ARCH)
73 /* To get the definition of PAL_imb: */
74 #include <machine/pal.h>
78 createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr)
80 void *adjustor = NULL;
84 case 0: /* _stdcall */
85 #if defined(i386_TARGET_ARCH)
86 /* Magic constant computed by inspecting the code length of
87 the following assembly language snippet
88 (offset and machine code prefixed):
90 <0>: 58 popl %eax # temp. remove ret addr..
91 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
93 <6>: 50 pushl %eax # put back ret. addr
94 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
95 <c>: ff e0 jmp %eax # and jump to it.
96 # the callee cleans up the stack
98 if ((adjustor = stgMallocBytes(14, "createAdjustor")) != NULL) {
99 unsigned char *const adj_code = (unsigned char *)adjustor;
100 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
102 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
103 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
105 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
107 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
108 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
110 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
111 adj_code[0x0d] = (unsigned char)0xe0;
117 #if defined(i386_TARGET_ARCH)
118 /* Magic constant computed by inspecting the code length of
119 the following assembly language snippet
120 (offset and machine code prefixed):
122 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
123 # hold a StgStablePtr
124 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
125 <0a>: 68 ef be ad de pushl $__obscure_ccall_ret_code # push the return address
126 <0f>: ff e0 jmp *%eax # jump to wptr
128 The ccall'ing version is a tad different, passing in the return
129 address of the caller to the auto-generated C stub (which enters
130 via the stable pointer.) (The auto-generated C stub is in on this
131 game, don't worry :-)
133 See the comment next to __obscure_ccall_ret_code why we need to
134 perform a tail jump instead of a call, followed by some C stack
137 Note: The adjustor makes the assumption that any return value
138 coming back from the C stub is not stored on the stack.
139 That's (thankfully) the case here with the restricted set of
140 return types that we support.
142 if ((adjustor = stgMallocBytes(17, "createAdjustor")) != NULL) {
143 unsigned char *const adj_code = (unsigned char *)adjustor;
145 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
146 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
148 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
149 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
151 adj_code[0x0a] = (unsigned char)0x68; /* pushl __obscure_ccall_ret_code */
152 *((StgFunPtr*)(adj_code + 0x0b)) = (StgFunPtr)__obscure_ccall_ret_code;
154 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
155 adj_code[0x10] = (unsigned char)0xe0;
157 #elif defined(sparc_TARGET_ARCH)
158 /* Magic constant computed by inspecting the code length of
159 the following assembly language snippet
160 (offset and machine code prefixed):
162 <00>: BA 10 00 1B mov %i3, %i5
163 <04>: B8 10 00 1A mov %i2, %i4
164 <08>: B6 10 00 19 mov %i1, %i3
165 <0c>: B4 10 00 18 mov %i0, %i2
166 <10>: 13 00 3f fb sethi %hi(0x00ffeffa), %o1 # load up wptr (1 of 2)
167 <14>: 11 37 ab 6f sethi %hi(0xdeadbeef), %o0 # load up hptr (1 of 2)
168 <18>: 81 c2 63 fa jmp %o1+%lo(0x00ffeffa) # jump to wptr (load 2 of 2)
169 <1c>: 90 12 22 ef or %o0, %lo(0xdeadbeef), %o0 # load up hptr (2 of 2)
171 <20>: de ad be ef # Place the value of the StgStablePtr somewhere readable
173 ccall'ing on a SPARC leaves little to be performed by the caller.
174 The callee shifts the window on entry and restores it on exit.
175 Input paramters and results are passed via registers. (%o0 in the
176 code above contains the input paramter to wptr.) The return address
177 is stored in %o7/%i7. Since we don't shift the window in this code,
178 the return address is preserved and wptr will return to our caller.
180 JRS, 21 Aug 01: the above para is a fiction. The caller passes
181 args in %i0 .. %i5 and then the rest at [%sp+92]. We want to
182 tailjump to wptr, passing hptr as the new first arg, and a dummy
183 second arg, which would be where the return address is on x86.
184 That means we have to shuffle the original caller's args along by
187 We do a half-correct solution which works only if the original
188 caller passed 4 or fewer arg words. Move %i0 .. %i3 into %i3
189 .. %i6, so we can park hptr in %i0 and a bogus arg in %i1. The
190 fully correct solution would be to subtract 8 from %sp and then
191 place %i4 and %i5 at [%sp+92] and [%sp+96] respectively. This
192 machinery should then work in all cases. (Or would it? Perhaps
193 it would trash parts of the caller's frame. Dunno).
195 SUP, 25 Apr 02: We are quite lucky to push a multiple of 8 bytes in
196 front of the existing arguments, because %sp must stay double-word
197 aligned at all times, see: http://www.sparc.org/standards/psABI3rd.pdf
198 Although we extend the *caller's* stack frame, this shouldn't cause
199 any problems for a C-like caller: alloca is implemented similarly, and
200 local variables should be accessed via %fp, not %sp. In a nutshell:
201 This should work. (Famous last words! :-)
203 if ((adjustor = stgMallocBytes(4*(8+1), "createAdjustor")) != NULL) {
204 unsigned long *const adj_code = (unsigned long *)adjustor;
207 adj_code[0] = (unsigned long)0x9A10000B;
209 adj_code[1] = (unsigned long)0x9810000A;
211 adj_code[2] = (unsigned long)0x96100009;
213 adj_code[3] = (unsigned long)0x94100008;
215 /* sethi %hi(wptr), %o1 */
216 adj_code[4] = (unsigned long)0x13000000;
217 adj_code[4] |= ((unsigned long)wptr) >> 10;
219 /* sethi %hi(hptr), %o0 */
220 adj_code[5] = (unsigned long)0x11000000;
221 adj_code[5] |= ((unsigned long)hptr) >> 10;
223 /* jmp %o1+%lo(wptr) */
224 adj_code[6] = (unsigned long)0x81c26000;
225 adj_code[6] |= ((unsigned long)wptr) & 0x000003ff;
227 /* or %o0, %lo(hptr), %o0 */
228 adj_code[7] = (unsigned long)0x90122000;
229 adj_code[7] |= ((unsigned long)hptr) & 0x000003ff;
231 adj_code[8] = (StgStablePtr)hptr;
234 asm("flush %0" : : "r" (adj_code ));
235 asm("flush %0" : : "r" (adj_code + 2));
236 asm("flush %0" : : "r" (adj_code + 4));
237 asm("flush %0" : : "r" (adj_code + 6));
239 /* max. 5 instructions latency, and we need at >= 1 for returning */
245 #elif defined(alpha_TARGET_ARCH)
246 /* Magic constant computed by inspecting the code length of
247 the following assembly language snippet
248 (offset and machine code prefixed; note that the machine code
249 shown is longwords stored in little-endian order):
251 <00>: 46520414 mov a2, a4
252 <04>: 46100412 mov a0, a2
253 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
254 <0c>: 46730415 mov a3, a5
255 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
256 <14>: 46310413 mov a1, a3
257 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
258 <1c>: 00000000 # padding for alignment
259 <20>: [8 bytes for hptr quadword]
260 <28>: [8 bytes for wptr quadword]
262 The "computed" jump at <08> above is really a jump to a fixed
263 location. Accordingly, we place an always-correct hint in the
264 jump instruction, namely the address offset from <0c> to wptr,
265 divided by 4, taking the lowest 14 bits.
267 We only support passing 4 or fewer argument words, for the same
268 reason described under sparc_TARGET_ARCH above by JRS, 21 Aug 01.
269 On the Alpha the first 6 integer arguments are in a0 through a5,
270 and the rest on the stack. Hence we want to shuffle the original
271 caller's arguments by two.
273 On the Alpha the calling convention is so complex and dependent
274 on the callee's signature -- for example, the stack pointer has
275 to be a multiple of 16 -- that it seems impossible to me [ccshan]
276 to handle the general case correctly without changing how the
277 adjustor is called from C. For now, our solution of shuffling
278 registers only and ignoring the stack only works if the original
279 caller passed 4 or fewer argument words.
281 TODO: Depending on how much allocation overhead stgMallocBytes uses for
282 header information (more precisely, if the overhead is no more than
283 4 bytes), we should move the first three instructions above down by
284 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
286 ASSERT(((StgWord64)wptr & 3) == 0);
287 if ((adjustor = stgMallocBytes(48, "createAdjustor")) != NULL) {
288 StgWord64 *const code = (StgWord64 *)adjustor;
290 code[0] = 0x4610041246520414L;
291 code[1] = 0x46730415a61b0020L;
292 code[2] = 0x46310413a77b0028L;
293 code[3] = 0x000000006bfb0000L
294 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
296 code[4] = (StgWord64)hptr;
297 code[5] = (StgWord64)wptr;
299 /* Ensure that instruction cache is consistent with our new code */
300 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
302 #elif defined(powerpc_TARGET_ARCH)
304 For PowerPC, the following code is used:
312 lis r0,0xDEAD ;hi(wptr)
313 lis r3,0xDEAF ;hi(hptr)
314 ori r0,r0,0xBEEF ; lo(wptr)
315 ori r3,r3,0xFACE ; lo(hptr)
319 The arguments (passed in registers r3 - r10) are shuffled along by two to
320 make room for hptr and a dummy argument. As r9 and r10 are overwritten by
321 this code, it only works for up to 6 arguments (when floating point arguments
322 are involved, this may be more or less, depending on the exact situation).
324 if ((adjustor = stgMallocBytes(4*13, "createAdjustor")) != NULL) {
325 unsigned long *const adj_code = (unsigned long *)adjustor;
327 // make room for extra arguments
328 adj_code[0] = 0x7d0a4378; //mr r10,r8
329 adj_code[1] = 0x7ce93b78; //mr r9,r7
330 adj_code[2] = 0x7cc83378; //mr r8,r6
331 adj_code[3] = 0x7ca72b78; //mr r7,r5
332 adj_code[4] = 0x7c862378; //mr r6,r4
333 adj_code[5] = 0x7c651b78; //mr r5,r3
335 adj_code[6] = 0x3c000000; //lis r0,hi(wptr)
336 adj_code[6] |= ((unsigned long)wptr) >> 16;
338 adj_code[7] = 0x3c600000; //lis r3,hi(hptr)
339 adj_code[6] |= ((unsigned long)hptr) >> 16;
341 adj_code[8] = 0x60000000; //ori r0,r0,lo(wptr)
342 adj_code[8] |= ((unsigned long)wptr) & 0xFFFF;
344 adj_code[9] = 0x60630000; //ori r3,r3,lo(hptr)
345 adj_code[9] |= ((unsigned long)hptr) & 0xFFFF;
347 adj_code[10] = 0x7c0903a6; //mtctr r0
348 adj_code[11] = 0x4e800420; //bctr
349 adj_code[12] = (unsigned long)hptr;
351 // Flush the Instruction cache:
352 // MakeDataExecutable(adjustor,4*13);
353 /* This would require us to link with CoreServices.framework */
354 { /* this should do the same: */
356 unsigned long *p = adj_code;
359 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
363 __asm__ volatile ("sync\n\tisync");
367 barf("adjustor creation not supported on this platform");
382 freeHaskellFunctionPtr(void* ptr)
384 #if defined(i386_TARGET_ARCH)
385 if ( *(unsigned char*)ptr != 0x68 &&
386 *(unsigned char*)ptr != 0x58 ) {
387 fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
391 /* Free the stable pointer first..*/
392 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
393 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
395 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
397 #elif defined(sparc_TARGET_ARCH)
398 if ( *(unsigned long*)ptr != 0x9A10000B ) {
399 fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
403 /* Free the stable pointer first..*/
404 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 8)));
405 #elif defined(alpha_TARGET_ARCH)
406 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
407 fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
411 /* Free the stable pointer first..*/
412 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
413 #elif defined(powerpc_TARGET_ARCH)
414 if ( *(StgWord*)ptr != 0x7d0a4378 ) {
415 fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
418 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 4*12)));
422 *((unsigned char*)ptr) = '\0';