#include "RtsUtils.h"
#include "RtsFlags.h"
+#include <stdlib.h>
+
/* Heavily arch-specific, I'm afraid.. */
-#if defined(i386_TARGET_ARCH) || defined(sparc_TARGET_ARCH) || defined(alpha_TARGET_ARCH)
#if defined(i386_TARGET_ARCH)
/* Now here's something obscure for you:
#include <machine/pal.h>
#endif
+#if defined(ia64_TARGET_ARCH)
+#include "Storage.h"
+
+/* Layout of a function descriptor */
+typedef struct _IA64FunDesc {
+ StgWord64 ip;
+ StgWord64 gp;
+} IA64FunDesc;
+
+static void *
+stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
+{
+ StgArrWords* arr;
+ nat data_size_in_words, total_size_in_words;
+
+ /* round up to a whole number of words */
+ data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
+ total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
+
+ /* allocate and fill it in */
+ arr = (StgArrWords *)allocate(total_size_in_words);
+ SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
+
+ /* obtain a stable ptr */
+ *stable = getStablePtr((StgPtr)arr);
+
+ /* and return a ptr to the goods inside the array */
+ return(BYTE_ARR_CTS(arr));
+}
+#endif
+
void*
createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr)
{
adj_code[0x10] = (unsigned char)0xe0;
}
#elif defined(sparc_TARGET_ARCH)
- /* Magic constant computed by inspecting the code length of
- the following assembly language snippet
- (offset and machine code prefixed):
-
- <00>: BA 10 00 1B mov %i3, %i5
- <04>: B8 10 00 1A mov %i2, %i4
- <08>: B6 10 00 19 mov %i1, %i3
- <0c>: B4 10 00 18 mov %i0, %i2
- <10>: 13 00 3f fb sethi %hi(0x00ffeffa), %o1 # load up wptr (1 of 2)
- <14>: 11 37 ab 6f sethi %hi(0xdeadbeef), %o0 # load up hptr (1 of 2)
- <18>: 81 c2 63 fa jmp %o1+%lo(0x00ffeffa) # jump to wptr (load 2 of 2)
- <1c>: 90 12 22 ef or %o0, %lo(0xdeadbeef), %o0 # load up hptr (2 of 2)
- # [in delay slot]
- <20>: de ad be ef # Place the value of the StgStablePtr somewhere readable
-
- ccall'ing on a SPARC leaves little to be performed by the caller.
- The callee shifts the window on entry and restores it on exit.
- Input paramters and results are passed via registers. (%o0 in the
- code above contains the input paramter to wptr.) The return address
- is stored in %o7/%i7. Since we don't shift the window in this code,
- the return address is preserved and wptr will return to our caller.
-
- JRS, 21 Aug 01: the above para is a fiction. The caller passes
- args in %i0 .. %i5 and then the rest at [%sp+92]. We want to
- tailjump to wptr, passing hptr as the new first arg, and a dummy
- second arg, which would be where the return address is on x86.
- That means we have to shuffle the original caller's args along by
- two.
-
- We do a half-correct solution which works only if the original
- caller passed 4 or fewer arg words. Move %i0 .. %i3 into %i3
- .. %i6, so we can park hptr in %i0 and a bogus arg in %i1. The
- fully correct solution would be to subtract 8 from %sp and then
- place %i4 and %i5 at [%sp+92] and [%sp+96] respectively. This
- machinery should then work in all cases. (Or would it? Perhaps
- it would trash parts of the caller's frame. Dunno).
+ /* Magic constant computed by inspecting the code length of the following
+ assembly language snippet (offset and machine code prefixed):
+
+ <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
+ <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
+ <08>: D823A05C st %o4, [%sp + 92]
+ <0C>: 9A10000B mov %o3, %o5
+ <10>: 9810000A mov %o2, %o4
+ <14>: 96100009 mov %o1, %o3
+ <18>: 94100008 mov %o0, %o2
+ <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
+ <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
+ <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
+ <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
+ <2C> 00000000 ! place for getting hptr back easily
+
+ ccall'ing on SPARC is easy, because we are quite lucky to push a
+ multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
+ existing arguments (note that %sp must stay double-word aligned at
+ all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
+ To do this, we extend the *caller's* stack frame by 2 words and shift
+ the output registers used for argument passing (%o0 - %o5, we are a *leaf*
+ procedure because of the tail-jump) by 2 positions. This makes room in
+ %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
+ for destination addr of jump on SPARC, return address on x86, ...). This
+ shouldn't cause any problems for a C-like caller: alloca is implemented
+ similarly, and local variables should be accessed via %fp, not %sp. In a
+ nutshell: This should work! (Famous last words! :-)
*/
- if ((adjustor = stgMallocBytes(4*(8+1), "createAdjustor")) != NULL) {
- unsigned long *const adj_code = (unsigned long *)adjustor;
-
- /* mov %o3, %o5 */
- adj_code[0] = (unsigned long)0x9A10000B;
- /* mov %o2, %o4 */
- adj_code[1] = (unsigned long)0x9810000A;
- /* mov %o1, %o3 */
- adj_code[2] = (unsigned long)0x96100009;
- /* mov %o0, %o2 */
- adj_code[3] = (unsigned long)0x94100008;
-
- /* sethi %hi(wptr), %o1 */
- adj_code[4] = (unsigned long)0x13000000;
- adj_code[4] |= ((unsigned long)wptr) >> 10;
-
- /* sethi %hi(hptr), %o0 */
- adj_code[5] = (unsigned long)0x11000000;
- adj_code[5] |= ((unsigned long)hptr) >> 10;
-
- /* jmp %o1+%lo(wptr) */
- adj_code[6] = (unsigned long)0x81c26000;
- adj_code[6] |= ((unsigned long)wptr) & 0x000003ff;
-
- /* or %o0, %lo(hptr), %o0 */
- adj_code[7] = (unsigned long)0x90122000;
- adj_code[7] |= ((unsigned long)hptr) & 0x000003ff;
-
- adj_code[8] = (StgStablePtr)hptr;
+ if ((adjustor = stgMallocBytes(4*(11+1), "createAdjustor")) != NULL) {
+ unsigned long *const adj_code = (unsigned long *)adjustor;
+
+ adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
+ adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
+ adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
+ adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
+ adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
+ adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
+ adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
+ adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
+ adj_code[ 7] |= ((unsigned long)wptr) >> 10;
+ adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
+ adj_code[ 8] |= ((unsigned long)hptr) >> 10;
+ adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
+ adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
+ adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
+ adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
+
+ adj_code[11] = (unsigned long)hptr;
+
+ /* flush cache */
+ asm("flush %0" : : "r" (adj_code ));
+ asm("flush %0" : : "r" (adj_code + 2));
+ asm("flush %0" : : "r" (adj_code + 4));
+ asm("flush %0" : : "r" (adj_code + 6));
+ asm("flush %0" : : "r" (adj_code + 10));
+
+ /* max. 5 instructions latency, and we need at >= 1 for returning */
+ asm("nop");
+ asm("nop");
+ asm("nop");
+ asm("nop");
}
#elif defined(alpha_TARGET_ARCH)
/* Magic constant computed by inspecting the code length of
/* Ensure that instruction cache is consistent with our new code */
__asm__ volatile("call_pal %0" : : "i" (PAL_imb));
}
+#elif defined(powerpc_TARGET_ARCH)
+/*
+ For PowerPC, the following code is used:
+
+ mr r10,r8
+ mr r9,r7
+ mr r8,r6
+ mr r7,r5
+ mr r6,r4
+ mr r5,r3
+ lis r0,0xDEAD ;hi(wptr)
+ lis r3,0xDEAF ;hi(hptr)
+ ori r0,r0,0xBEEF ; lo(wptr)
+ ori r3,r3,0xFACE ; lo(hptr)
+ mtctr r0
+ bctr
+
+ The arguments (passed in registers r3 - r10) are shuffled along by two to
+ make room for hptr and a dummy argument. As r9 and r10 are overwritten by
+ this code, it only works for up to 6 arguments (when floating point arguments
+ are involved, this may be more or less, depending on the exact situation).
+*/
+ if ((adjustor = stgMallocBytes(4*13, "createAdjustor")) != NULL) {
+ unsigned long *const adj_code = (unsigned long *)adjustor;
+
+ // make room for extra arguments
+ adj_code[0] = 0x7d0a4378; //mr r10,r8
+ adj_code[1] = 0x7ce93b78; //mr r9,r7
+ adj_code[2] = 0x7cc83378; //mr r8,r6
+ adj_code[3] = 0x7ca72b78; //mr r7,r5
+ adj_code[4] = 0x7c862378; //mr r6,r4
+ adj_code[5] = 0x7c651b78; //mr r5,r3
+
+ adj_code[6] = 0x3c000000; //lis r0,hi(wptr)
+ adj_code[6] |= ((unsigned long)wptr) >> 16;
+
+ adj_code[7] = 0x3c600000; //lis r3,hi(hptr)
+ adj_code[7] |= ((unsigned long)hptr) >> 16;
+
+ adj_code[8] = 0x60000000; //ori r0,r0,lo(wptr)
+ adj_code[8] |= ((unsigned long)wptr) & 0xFFFF;
+
+ adj_code[9] = 0x60630000; //ori r3,r3,lo(hptr)
+ adj_code[9] |= ((unsigned long)hptr) & 0xFFFF;
+
+ adj_code[10] = 0x7c0903a6; //mtctr r0
+ adj_code[11] = 0x4e800420; //bctr
+ adj_code[12] = (unsigned long)hptr;
+
+ // Flush the Instruction cache:
+ // MakeDataExecutable(adjustor,4*13);
+ /* This would require us to link with CoreServices.framework */
+ { /* this should do the same: */
+ int n = 13;
+ unsigned long *p = adj_code;
+ while(n--)
+ {
+ __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
+ : : "r" (p));
+ p++;
+ }
+ __asm__ volatile ("sync\n\tisync");
+ }
+ }
+#elif defined(ia64_TARGET_ARCH)
+/*
+ Up to 8 inputs are passed in registers. We flush the last two inputs to
+ the stack, initially into the 16-byte scratch region left by the caller.
+ We then shuffle the others along by 4 (taking 2 registers for ourselves
+ to save return address and previous function state - we need to come back
+ here on the way out to restore the stack, so this is a real function
+ rather than just a trampoline).
+
+ The function descriptor we create contains the gp of the target function
+ so gp is already loaded correctly.
+
+ [MLX] alloc r16=ar.pfs,10,2,0
+ movl r17=wptr
+ [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
+ mov r41=r37 // out7 = in5 (out3)
+ mov r40=r36;; // out6 = in4 (out2)
+ [MII] st8.spill [r12]=r39 // spill in7 (out5)
+ mov.sptk b6=r17,50
+ mov r38=r34;; // out4 = in2 (out0)
+ [MII] mov r39=r35 // out5 = in3 (out1)
+ mov r37=r33 // out3 = in1 (loc1)
+ mov r36=r32 // out2 = in0 (loc0)
+ [MLX] adds r12=-24,r12 // update sp
+ movl r34=hptr;; // out0 = hptr
+ [MIB] mov r33=r16 // loc1 = ar.pfs
+ mov r32=b0 // loc0 = retaddr
+ br.call.sptk.many b0=b6;;
+
+ [MII] adds r12=-16,r12
+ mov b0=r32
+ mov.i ar.pfs=r33
+ [MFB] nop.m 0x0
+ nop.f 0x0
+ br.ret.sptk.many b0;;
+*/
+
+/* These macros distribute a long constant into the two words of an MLX bundle */
+#define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
+#define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
+#define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
+ | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
+
+ {
+ StgStablePtr stable;
+ IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
+ StgWord64 wcode = wdesc->ip;
+ IA64FunDesc *fdesc;
+ StgWord64 *code;
+
+ /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
+ adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
+
+ fdesc = (IA64FunDesc *)adjustor;
+ code = (StgWord64 *)(fdesc + 1);
+ fdesc->ip = (StgWord64)code;
+ fdesc->gp = wdesc->gp;
+
+ code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
+ code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
+ code[2] = 0x029015d818984001;
+ code[3] = 0x8401200500420094;
+ code[4] = 0x886011d8189c0001;
+ code[5] = 0x84011004c00380c0;
+ code[6] = 0x0250210046013800;
+ code[7] = 0x8401000480420084;
+ code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
+ code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
+ code[10] = 0x0200210020010811;
+ code[11] = 0x1080006800006200;
+ code[12] = 0x0000210018406000;
+ code[13] = 0x00aa021000038005;
+ code[14] = 0x000000010000001d;
+ code[15] = 0x0084000880000200;
+
+ /* save stable pointers in convenient form */
+ code[16] = (StgWord64)hptr;
+ code[17] = (StgWord64)stable;
+ }
#else
-#error Adjustor creation is not supported on this platform.
+ barf("adjustor creation not supported on this platform");
#endif
break;
return adjustor;
}
-#endif
void
freeHaskellFunctionPtr(void* ptr)
freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
}
#elif defined(sparc_TARGET_ARCH)
- if ( *(unsigned char*)ptr != 0x13 ) {
+ if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
return;
}
/* Free the stable pointer first..*/
- freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
-#elif defined(sparc_TARGET_ARCH)
+ freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
+#elif defined(alpha_TARGET_ARCH)
if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
return;
/* Free the stable pointer first..*/
freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
+#elif defined(powerpc_TARGET_ARCH)
+ if ( *(StgWord*)ptr != 0x7d0a4378 ) {
+ fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+ freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 4*12)));
+#elif defined(ia64_TARGET_ARCH)
+ IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
+ StgWord64 *code = (StgWord64 *)(fdesc+1);
+
+ if (fdesc->ip != (StgWord64)code) {
+ fprintf(stderr, "freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+ freeStablePtr((StgStablePtr)code[16]);
+ freeStablePtr((StgStablePtr)code[17]);
+ return;
#else
ASSERT(0);
#endif
*((unsigned char*)ptr) = '\0';
- free(ptr);
+ stgFree(ptr);
}