1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
6 * ---------------------------------------------------------------------------*/
8 /* A little bit of background...
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so result in memory leaks on both the C and
39 #include "PosixSource.h"
44 /* Heavily arch-specific, I'm afraid.. */
45 #if defined(i386_TARGET_ARCH) || defined(sparc_TARGET_ARCH) || defined(alpha_TARGET_ARCH)
47 #if defined(i386_TARGET_ARCH)
48 /* Now here's something obscure for you:
50 When generating an adjustor thunk that uses the C calling
51 convention, we have to make sure that the thunk kicks off
52 the process of jumping into Haskell with a tail jump. Why?
53 Because as a result of jumping in into Haskell we may end
54 up freeing the very adjustor thunk we came from using
55 freeHaskellFunctionPtr(). Hence, we better not return to
56 the adjustor code on our way out, since it could by then
59 The fix is readily at hand, just include the opcodes
60 for the C stack fixup code that we need to perform when
61 returning in some static piece of memory and arrange
62 to return to it before tail jumping from the adjustor thunk.
64 For this to work we make the assumption that bytes in .data
65 are considered executable.
67 static unsigned char __obscure_ccall_ret_code [] =
68 { 0x83, 0xc4, 0x04 /* addl $0x4, %esp */
73 #if defined(alpha_TARGET_ARCH)
74 /* To get the definition of PAL_imb: */
75 #include <machine/pal.h>
79 createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr)
81 void *adjustor = NULL;
85 case 0: /* _stdcall */
86 #if defined(i386_TARGET_ARCH)
87 /* Magic constant computed by inspecting the code length of
88 the following assembly language snippet
89 (offset and machine code prefixed):
91 <0>: 58 popl %eax # temp. remove ret addr..
92 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
94 <6>: 50 pushl %eax # put back ret. addr
95 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
96 <c>: ff e0 jmp %eax # and jump to it.
97 # the callee cleans up the stack
99 if ((adjustor = stgMallocBytes(14, "createAdjustor")) != NULL) {
100 unsigned char *const adj_code = (unsigned char *)adjustor;
101 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
103 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
104 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
106 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
108 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
109 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
111 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
112 adj_code[0x0d] = (unsigned char)0xe0;
118 #if defined(i386_TARGET_ARCH)
119 /* Magic constant computed by inspecting the code length of
120 the following assembly language snippet
121 (offset and machine code prefixed):
123 <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
124 # hold a StgStablePtr
125 <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
126 <0a>: 68 ef be ad de pushl $__obscure_ccall_ret_code # push the return address
127 <0f>: ff e0 jmp *%eax # jump to wptr
129 The ccall'ing version is a tad different, passing in the return
130 address of the caller to the auto-generated C stub (which enters
131 via the stable pointer.) (The auto-generated C stub is in on this
132 game, don't worry :-)
134 See the comment next to __obscure_ccall_ret_code why we need to
135 perform a tail jump instead of a call, followed by some C stack
138 Note: The adjustor makes the assumption that any return value
139 coming back from the C stub is not stored on the stack.
140 That's (thankfully) the case here with the restricted set of
141 return types that we support.
143 if ((adjustor = stgMallocBytes(17, "createAdjustor")) != NULL) {
144 unsigned char *const adj_code = (unsigned char *)adjustor;
146 adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
147 *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
149 adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
150 *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
152 adj_code[0x0a] = (unsigned char)0x68; /* pushl __obscure_ccall_ret_code */
153 *((StgFunPtr*)(adj_code + 0x0b)) = (StgFunPtr)__obscure_ccall_ret_code;
155 adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
156 adj_code[0x10] = (unsigned char)0xe0;
158 #elif defined(sparc_TARGET_ARCH)
159 /* Magic constant computed by inspecting the code length of
160 the following assembly language snippet
161 (offset and machine code prefixed):
163 <00>: BA 10 00 1B mov %i3, %i5
164 <04>: B8 10 00 1A mov %i2, %i4
165 <08>: B6 10 00 19 mov %i1, %i3
166 <0c>: B4 10 00 18 mov %i0, %i2
167 <10>: 13 00 3f fb sethi %hi(0x00ffeffa), %o1 # load up wptr (1 of 2)
168 <14>: 11 37 ab 6f sethi %hi(0xdeadbeef), %o0 # load up hptr (1 of 2)
169 <18>: 81 c2 63 fa jmp %o1+%lo(0x00ffeffa) # jump to wptr (load 2 of 2)
170 <1c>: 90 12 22 ef or %o0, %lo(0xdeadbeef), %o0 # load up hptr (2 of 2)
172 <20>: de ad be ef # Place the value of the StgStablePtr somewhere readable
174 ccall'ing on a SPARC leaves little to be performed by the caller.
175 The callee shifts the window on entry and restores it on exit.
176 Input paramters and results are passed via registers. (%o0 in the
177 code above contains the input paramter to wptr.) The return address
178 is stored in %o7/%i7. Since we don't shift the window in this code,
179 the return address is preserved and wptr will return to our caller.
181 JRS, 21 Aug 01: the above para is a fiction. The caller passes
182 args in %i0 .. %i5 and then the rest at [%sp+92]. We want to
183 tailjump to wptr, passing hptr as the new first arg, and a dummy
184 second arg, which would be where the return address is on x86.
185 That means we have to shuffle the original caller's args along by
188 We do a half-correct solution which works only if the original
189 caller passed 4 or fewer arg words. Move %i0 .. %i3 into %i3
190 .. %i6, so we can park hptr in %i0 and a bogus arg in %i1. The
191 fully correct solution would be to subtract 8 from %sp and then
192 place %i4 and %i5 at [%sp+92] and [%sp+96] respectively. This
193 machinery should then work in all cases. (Or would it? Perhaps
194 it would trash parts of the caller's frame. Dunno).
196 if ((adjustor = stgMallocBytes(4*(8+1), "createAdjustor")) != NULL) {
197 unsigned long *const adj_code = (unsigned long *)adjustor;
200 adj_code[0] = (unsigned long)0x9A10000B;
202 adj_code[1] = (unsigned long)0x9810000A;
204 adj_code[2] = (unsigned long)0x96100009;
206 adj_code[3] = (unsigned long)0x94100008;
208 /* sethi %hi(wptr), %o1 */
209 adj_code[4] = (unsigned long)0x13000000;
210 adj_code[4] |= ((unsigned long)wptr) >> 10;
212 /* sethi %hi(hptr), %o0 */
213 adj_code[5] = (unsigned long)0x11000000;
214 adj_code[5] |= ((unsigned long)hptr) >> 10;
216 /* jmp %o1+%lo(wptr) */
217 adj_code[6] = (unsigned long)0x81c26000;
218 adj_code[6] |= ((unsigned long)wptr) & 0x000003ff;
220 /* or %o0, %lo(hptr), %o0 */
221 adj_code[7] = (unsigned long)0x90122000;
222 adj_code[7] |= ((unsigned long)hptr) & 0x000003ff;
224 adj_code[8] = (StgStablePtr)hptr;
226 #elif defined(alpha_TARGET_ARCH)
227 /* Magic constant computed by inspecting the code length of
228 the following assembly language snippet
229 (offset and machine code prefixed; note that the machine code
230 shown is longwords stored in little-endian order):
232 <00>: 46520414 mov a2, a4
233 <04>: 46100412 mov a0, a2
234 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
235 <0c>: 46730415 mov a3, a5
236 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
237 <14>: 46310413 mov a1, a3
238 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
239 <1c>: 00000000 # padding for alignment
240 <20>: [8 bytes for hptr quadword]
241 <28>: [8 bytes for wptr quadword]
243 The "computed" jump at <08> above is really a jump to a fixed
244 location. Accordingly, we place an always-correct hint in the
245 jump instruction, namely the address offset from <0c> to wptr,
246 divided by 4, taking the lowest 14 bits.
248 We only support passing 4 or fewer argument words, for the same
249 reason described under sparc_TARGET_ARCH above by JRS, 21 Aug 01.
250 On the Alpha the first 6 integer arguments are in a0 through a5,
251 and the rest on the stack. Hence we want to shuffle the original
252 caller's arguments by two.
254 On the Alpha the calling convention is so complex and dependent
255 on the callee's signature -- for example, the stack pointer has
256 to be a multiple of 16 -- that it seems impossible to me [ccshan]
257 to handle the general case correctly without changing how the
258 adjustor is called from C. For now, our solution of shuffling
259 registers only and ignoring the stack only works if the original
260 caller passed 4 or fewer argument words.
262 TODO: Depending on how much allocation overhead stgMallocBytes uses for
263 header information (more precisely, if the overhead is no more than
264 4 bytes), we should move the first three instructions above down by
265 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
267 ASSERT(((StgWord64)wptr & 3) == 0);
268 if ((adjustor = stgMallocBytes(48, "createAdjustor")) != NULL) {
269 StgWord64 *const code = (StgWord64 *)adjustor;
271 code[0] = 0x4610041246520414L;
272 code[1] = 0x46730415a61b0020L;
273 code[2] = 0x46310413a77b0028L;
274 code[3] = 0x000000006bfb0000L
275 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
277 code[4] = (StgWord64)hptr;
278 code[5] = (StgWord64)wptr;
280 /* Ensure that instruction cache is consistent with our new code */
281 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
284 #error Adjustor creation is not supported on this platform.
300 freeHaskellFunctionPtr(void* ptr)
302 #if defined(i386_TARGET_ARCH)
303 if ( *(unsigned char*)ptr != 0x68 &&
304 *(unsigned char*)ptr != 0x58 ) {
305 fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
309 /* Free the stable pointer first..*/
310 if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
311 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
313 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
315 #elif defined(sparc_TARGET_ARCH)
316 if ( *(unsigned char*)ptr != 0x13 ) {
317 fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
321 /* Free the stable pointer first..*/
322 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
323 #elif defined(sparc_TARGET_ARCH)
324 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
325 fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
329 /* Free the stable pointer first..*/
330 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
334 *((unsigned char*)ptr) = '\0';