Haskell side.
*/
+#include "PosixSource.h"
#include "Rts.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
};
#endif
+#if defined(alpha_TARGET_ARCH)
+/* To get the definition of PAL_imb: */
+#include <machine/pal.h>
+#endif
+
void*
createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr)
{
the following assembly language snippet
(offset and machine code prefixed):
- <00>: 13 00 3f fb sethi %hi(0x00ffeffa), %o1 # load up wptr (1 of 2)
- <04>: 11 37 ab 6f sethi %hi(0xdeadbeef), %o0 # load up hptr (1 of 2)
- <08>: 81 c2 63 fa jmp %o1+%lo(0x00ffeffa) # jump to wptr (load 2 of 2)
- <0c>: 90 12 22 ef or %o0, %lo(0xdeadbeef), %o0 # load up hptr (2 of 2)
+ <00>: BA 10 00 1B mov %i3, %i5
+ <04>: B8 10 00 1A mov %i2, %i4
+ <08>: B6 10 00 19 mov %i1, %i3
+ <0c>: B4 10 00 18 mov %i0, %i2
+ <10>: 13 00 3f fb sethi %hi(0x00ffeffa), %o1 # load up wptr (1 of 2)
+ <14>: 11 37 ab 6f sethi %hi(0xdeadbeef), %o0 # load up hptr (1 of 2)
+ <18>: 81 c2 63 fa jmp %o1+%lo(0x00ffeffa) # jump to wptr (load 2 of 2)
+ <1c>: 90 12 22 ef or %o0, %lo(0xdeadbeef), %o0 # load up hptr (2 of 2)
# [in delay slot]
- <10>: de ad be ef # Place the value of the StgStablePtr somewhere readable
+ <20>: de ad be ef # Place the value of the StgStablePtr somewhere readable
ccall'ing on a SPARC leaves little to be performed by the caller.
The callee shifts the window on entry and restores it on exit.
code above contains the input paramter to wptr.) The return address
is stored in %o7/%i7. Since we don't shift the window in this code,
the return address is preserved and wptr will return to our caller.
+
+ JRS, 21 Aug 01: the above para is a fiction. The caller passes
+ args in %i0 .. %i5 and then the rest at [%sp+92]. We want to
+ tailjump to wptr, passing hptr as the new first arg, and a dummy
+ second arg, which would be where the return address is on x86.
+ That means we have to shuffle the original caller's args along by
+ two.
+
+ We do a half-correct solution which works only if the original
+ caller passed 4 or fewer arg words. Move %i0 .. %i3 into %i3
+ .. %i6, so we can park hptr in %i0 and a bogus arg in %i1. The
+ fully correct solution would be to subtract 8 from %sp and then
+ place %i4 and %i5 at [%sp+92] and [%sp+96] respectively. This
+ machinery should then work in all cases. (Or would it? Perhaps
+ it would trash parts of the caller's frame. Dunno).
*/
- if ((adjustor = stgMallocBytes(28, "createAdjustor")) != NULL) {
- unsigned char *const adj_code = (unsigned char *)adjustor;
+ if ((adjustor = stgMallocBytes(4*(8+1), "createAdjustor")) != NULL) {
+ unsigned long *const adj_code = (unsigned long *)adjustor;
+
+ /* mov %o3, %o5 */
+ adj_code[0] = (unsigned long)0x9A10000B;
+ /* mov %o2, %o4 */
+ adj_code[1] = (unsigned long)0x9810000A;
+ /* mov %o1, %o3 */
+ adj_code[2] = (unsigned long)0x96100009;
+ /* mov %o0, %o2 */
+ adj_code[3] = (unsigned long)0x94100008;
/* sethi %hi(wptr), %o1 */
- *((unsigned long*)(adj_code+0x00)) = (unsigned long)0x13000000;
- *((unsigned long*)(adj_code+0x00)) |= ((unsigned long)wptr) >> 10;
+ adj_code[4] = (unsigned long)0x13000000;
+ adj_code[4] |= ((unsigned long)wptr) >> 10;
/* sethi %hi(hptr), %o0 */
- *((unsigned long*)(adj_code+0x04)) = (unsigned long)0x11000000;
- *((unsigned long*)(adj_code+0x04)) |= ((unsigned long)hptr) >> 10;
+ adj_code[5] = (unsigned long)0x11000000;
+ adj_code[5] |= ((unsigned long)hptr) >> 10;
/* jmp %o1+%lo(wptr) */
- *((unsigned long*)(adj_code+0x08)) = (unsigned long)0x81c26000;
- *((unsigned long*)(adj_code+0x08)) |= ((unsigned long)wptr) & 0x000003ff;
+ adj_code[6] = (unsigned long)0x81c26000;
+ adj_code[6] |= ((unsigned long)wptr) & 0x000003ff;
/* or %o0, %lo(hptr), %o0 */
- *((unsigned long*)(adj_code+0x0c)) = (unsigned long)0x90122000;
- *((unsigned long*)(adj_code+0x0c)) |= ((unsigned long)hptr) & 0x000003ff;
+ adj_code[7] = (unsigned long)0x90122000;
+ adj_code[7] |= ((unsigned long)hptr) & 0x000003ff;
- *((StgStablePtr*)(adj_code+0x10)) = (StgStablePtr)hptr;
+ adj_code[8] = (StgStablePtr)hptr;
}
#elif defined(alpha_TARGET_ARCH)
/* Magic constant computed by inspecting the code length of
(offset and machine code prefixed; note that the machine code
shown is longwords stored in little-endian order):
- <00>: a61b0010 ldq a0, 0x10(pv) # load up hptr
- <04>: a77b0018 ldq pv, 0x18(pv) # load up wptr
- <08>: 6bfbabcd jmp (pv), 0xabcd # jump to wptr (with hint)
- <0c>: 47ff041f nop # padding for alignment
- <10>: [8 bytes for hptr quadword]
- <18>: [8 bytes for wptr quadword]
+ <00>: 46520414 mov a2, a4
+ <04>: 46100412 mov a0, a2
+ <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
+ <0c>: 46730415 mov a3, a5
+ <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
+ <14>: 46310413 mov a1, a3
+ <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
+ <1c>: 00000000 # padding for alignment
+ <20>: [8 bytes for hptr quadword]
+ <28>: [8 bytes for wptr quadword]
The "computed" jump at <08> above is really a jump to a fixed
location. Accordingly, we place an always-correct hint in the
jump instruction, namely the address offset from <0c> to wptr,
divided by 4, taking the lowest 14 bits.
+ We only support passing 4 or fewer argument words, for the same
+ reason described under sparc_TARGET_ARCH above by JRS, 21 Aug 01.
+ On the Alpha the first 6 integer arguments are in a0 through a5,
+ and the rest on the stack. Hence we want to shuffle the original
+ caller's arguments by two.
+
+ On the Alpha the calling convention is so complex and dependent
+ on the callee's signature -- for example, the stack pointer has
+ to be a multiple of 16 -- that it seems impossible to me [ccshan]
+ to handle the general case correctly without changing how the
+ adjustor is called from C. For now, our solution of shuffling
+ registers only and ignoring the stack only works if the original
+ caller passed 4 or fewer argument words.
+
TODO: Depending on how much allocation overhead stgMallocBytes uses for
header information (more precisely, if the overhead is no more than
4 bytes), we should move the first three instructions above down by
4 bytes (getting rid of the nop), hence saving memory. [ccshan]
*/
ASSERT(((StgWord64)wptr & 3) == 0);
- if ((adjustor = stgMallocBytes(32, "createAdjustor")) != NULL) {
+ if ((adjustor = stgMallocBytes(48, "createAdjustor")) != NULL) {
StgWord64 *const code = (StgWord64 *)adjustor;
- code[0] = 0xa77b0018a61b0010L;
- code[1] = 0x47ff041f6bfb0000L
+ code[0] = 0x4610041246520414L;
+ code[1] = 0x46730415a61b0020L;
+ code[2] = 0x46310413a77b0028L;
+ code[3] = 0x000000006bfb0000L
| (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
- code[2] = (StgWord64)hptr;
- code[3] = (StgWord64)wptr;
+ code[4] = (StgWord64)hptr;
+ code[5] = (StgWord64)wptr;
+
+ /* Ensure that instruction cache is consistent with our new code */
+ __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
}
#else
#error Adjustor creation is not supported on this platform.