+ code[4] = (StgWord64)hptr;
+ code[5] = (StgWord64)wptr;
+
+ /* Ensure that instruction cache is consistent with our new code */
+ __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
+ }
+#elif defined(powerpc_TARGET_ARCH)
+/*
+ For PowerPC, the following code is used:
+
+ mr r10,r8
+ mr r9,r7
+ mr r8,r6
+ mr r7,r5
+ mr r6,r4
+ mr r5,r3
+ lis r0,0xDEAD ;hi(wptr)
+ lis r3,0xDEAF ;hi(hptr)
+ ori r0,r0,0xBEEF ; lo(wptr)
+ ori r3,r3,0xFACE ; lo(hptr)
+ mtctr r0
+ bctr
+
+ The arguments (passed in registers r3 - r10) are shuffled along by two to
+ make room for hptr and a dummy argument. As r9 and r10 are overwritten by
+ this code, it only works for up to 6 arguments (when floating point arguments
+ are involved, this may be more or less, depending on the exact situation).
+*/
+ if ((adjustor = stgMallocBytes(4*13, "createAdjustor")) != NULL) {
+ unsigned long *const adj_code = (unsigned long *)adjustor;
+
+ // make room for extra arguments
+ adj_code[0] = 0x7d0a4378; //mr r10,r8
+ adj_code[1] = 0x7ce93b78; //mr r9,r7
+ adj_code[2] = 0x7cc83378; //mr r8,r6
+ adj_code[3] = 0x7ca72b78; //mr r7,r5
+ adj_code[4] = 0x7c862378; //mr r6,r4
+ adj_code[5] = 0x7c651b78; //mr r5,r3
+
+ adj_code[6] = 0x3c000000; //lis r0,hi(wptr)
+ adj_code[6] |= ((unsigned long)wptr) >> 16;
+
+ adj_code[7] = 0x3c600000; //lis r3,hi(hptr)
+ adj_code[7] |= ((unsigned long)hptr) >> 16;
+
+ adj_code[8] = 0x60000000; //ori r0,r0,lo(wptr)
+ adj_code[8] |= ((unsigned long)wptr) & 0xFFFF;
+
+ adj_code[9] = 0x60630000; //ori r3,r3,lo(hptr)
+ adj_code[9] |= ((unsigned long)hptr) & 0xFFFF;
+
+ adj_code[10] = 0x7c0903a6; //mtctr r0
+ adj_code[11] = 0x4e800420; //bctr
+ adj_code[12] = (unsigned long)hptr;
+
+ // Flush the Instruction cache:
+ // MakeDataExecutable(adjustor,4*13);
+ /* This would require us to link with CoreServices.framework */
+ { /* this should do the same: */
+ int n = 13;
+ unsigned long *p = adj_code;
+ while(n--)
+ {
+ __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
+ : : "r" (p));
+ p++;
+ }
+ __asm__ volatile ("sync\n\tisync");
+ }
+ }
+#elif defined(ia64_TARGET_ARCH)
+/*
+ Up to 8 inputs are passed in registers. We flush the last two inputs to
+ the stack, initially into the 16-byte scratch region left by the caller.
+ We then shuffle the others along by 4 (taking 2 registers for ourselves
+ to save return address and previous function state - we need to come back
+ here on the way out to restore the stack, so this is a real function
+ rather than just a trampoline).
+
+ The function descriptor we create contains the gp of the target function
+ so gp is already loaded correctly.
+
+ [MLX] alloc r16=ar.pfs,10,2,0
+ movl r17=wptr
+ [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
+ mov r41=r37 // out7 = in5 (out3)
+ mov r40=r36;; // out6 = in4 (out2)
+ [MII] st8.spill [r12]=r39 // spill in7 (out5)
+ mov.sptk b6=r17,50
+ mov r38=r34;; // out4 = in2 (out0)
+ [MII] mov r39=r35 // out5 = in3 (out1)
+ mov r37=r33 // out3 = in1 (loc1)
+ mov r36=r32 // out2 = in0 (loc0)
+ [MLX] adds r12=-24,r12 // update sp
+ movl r34=hptr;; // out0 = hptr
+ [MIB] mov r33=r16 // loc1 = ar.pfs
+ mov r32=b0 // loc0 = retaddr
+ br.call.sptk.many b0=b6;;
+
+ [MII] adds r12=-16,r12
+ mov b0=r32
+ mov.i ar.pfs=r33
+ [MFB] nop.m 0x0
+ nop.f 0x0
+ br.ret.sptk.many b0;;
+*/
+
+/* These macros distribute a long constant into the two words of an MLX bundle */
+#define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
+#define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
+#define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
+ | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
+
+ {
+ StgStablePtr stable;
+ IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
+ StgWord64 wcode = wdesc->ip;
+ IA64FunDesc *fdesc;
+ StgWord64 *code;
+
+ /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
+ adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
+
+ fdesc = (IA64FunDesc *)adjustor;
+ code = (StgWord64 *)(fdesc + 1);
+ fdesc->ip = (StgWord64)code;
+ fdesc->gp = wdesc->gp;
+
+ code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
+ code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
+ code[2] = 0x029015d818984001;
+ code[3] = 0x8401200500420094;
+ code[4] = 0x886011d8189c0001;
+ code[5] = 0x84011004c00380c0;
+ code[6] = 0x0250210046013800;
+ code[7] = 0x8401000480420084;
+ code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
+ code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
+ code[10] = 0x0200210020010811;
+ code[11] = 0x1080006800006200;
+ code[12] = 0x0000210018406000;
+ code[13] = 0x00aa021000038005;
+ code[14] = 0x000000010000001d;
+ code[15] = 0x0084000880000200;
+
+ /* save stable pointers in convenient form */
+ code[16] = (StgWord64)hptr;
+ code[17] = (StgWord64)stable;