+ -- addr and result bits offsetsW
+ a_off = addr_offW * bytes_per_word
+ result_off = r_offW * bytes_per_word
+
+ linkageArea = 24
+ parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
+ | (_, a_rep) <- arg_offs_n_reps ]
+ savedRegisterArea = 4
+ frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
+ padTo16 x = case x `mod` 16 of
+ 0 -> x
+ y -> x - y + 16
+
+ pass_parameters [] _ _ = []
+ pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
+ let
+ haskellArgOffset = a_offW * bytes_per_word
+ offsetW' = offsetW + cgRepSizeW a_rep
+
+ pass_word w
+ | offsetW + w < 8 =
+ [0x801f0000 -- lwz rX, src(r31)
+ .|. (fromIntegral src .&. 0xFFFF)
+ .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
+ | otherwise =
+ [0x801f0000 -- lwz r0, src(r31)
+ .|. (fromIntegral src .&. 0xFFFF),
+ 0x90010000 -- stw r0, dst(r1)
+ .|. (fromIntegral dst .&. 0xFFFF)]
+ where
+ src = haskellArgOffset + w*bytes_per_word
+ dst = linkageArea + (offsetW+w) * bytes_per_word
+ in
+ case a_rep of
+ FloatArg | nextFPR < 14 ->
+ (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
+ .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
+ .|. (fromIntegral nextFPR `shiftL` 21))
+ : pass_parameters args (nextFPR+1) offsetW'
+ DoubleArg | nextFPR < 14 ->
+ (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
+ .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
+ .|. (fromIntegral nextFPR `shiftL` 21))
+ : pass_parameters args (nextFPR+1) offsetW'
+ _ ->
+ concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
+ ++ pass_parameters args nextFPR offsetW'
+
+ gather_result = case r_rep of
+ VoidArg -> []
+ FloatArg ->
+ [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stfs f1, result_off(r31)
+ DoubleArg ->
+ [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stfd f1, result_off(r31)
+ _ | cgRepSizeW r_rep == 2 ->
+ [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
+ 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
+ -- stw r3, result_off(r31)
+ -- stw r4, result_off+4(r31)
+ _ | cgRepSizeW r_rep == 1 ->
+ [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stw r3, result_off(r31)
+ in
+ concatMap w32_to_w8s_bigEndian $ [
+ 0x7c0802a6, -- mflr r0
+ 0x93e1fffc, -- stw r31,-4(r1)
+ 0x90010008, -- stw r0,8(r1)
+ 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
+ -- stwu r1, -frameSize(r1)
+ 0x7c7f1b78 -- mr r31, r3
+ ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
+ 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
+ -- lwz r12, a_off(r31)
+ 0x7d8903a6, -- mtctr r12
+ 0x4e800421 -- bctrl
+ ] ++ gather_result ++ [
+ 0x80210000, -- lwz r1, 0(r1)
+ 0x83e1fffc, -- lwz r31, -4(r1)
+ 0x80010008, -- lwz r0, 8(r1)
+ 0x7c0803a6, -- mtlr r0
+ 0x4e800020 -- blr
+ ]
+
+#elif powerpc_TARGET_ARCH && linux_TARGET_OS
+
+ -- All offsets here are measured in Words (not bytes). This includes
+ -- arguments to the load/store machine code generators, alignment numbers
+ -- and the final 'framesize' among others.
+
+ = concatMap w32_to_w8s_bigEndian $ [
+ 0x7c0802a6, -- mflr r0
+ 0x93e1fffc, -- stw r31,-4(r1)
+ 0x90010008, -- stw r0,8(r1)
+ 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
+ 0x7c7f1b78 -- mr r31, r3
+ ] ++ pass_parameters ++ -- pass the parameters
+ loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
+ 0x7d8903a6, -- mtctr r12
+ 0x4e800421 -- bctrl
+ ] ++ gather_result ++ [ -- save the return value
+ 0x80210000, -- lwz r1, 0(r1)
+ 0x83e1fffc, -- lwz r31, -4(r1)
+ 0x80010008, -- lwz r0, 8(r1)
+ 0x7c0803a6, -- mtlr r0
+ 0x4e800020 -- blr
+ ]
+
+ where
+ gather_result :: [Word32]
+ gather_result = case r_rep of
+ VoidArg -> []
+ FloatArg -> storeFloat 1 r_offW
+ DoubleArg -> storeDouble 1 r_offW
+ LongArg -> storeLong 3 r_offW
+ _ -> storeWord 3 r_offW
+
+ pass_parameters :: [Word32]
+ pass_parameters = concat params
+
+ -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
+ framesize = alignedTo 4 (argsize + 8)
+
+ ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
+
+ -- handle one argument, returning machine code and the updated state
+ loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
+ ((Int, Int, Int), [Word32])
+
+ loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
+ FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
+ FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
+
+ DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
+ DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+
+ LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
+ LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
+ LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+
+ _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
+ _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
+ where astack = alignedTo 2 stack
+
+ alignedTo :: Int -> Int -> Int
+ alignedTo alignment x = case x `mod` alignment of
+ 0 -> x
+ y -> x - y + alignment
+
+ -- convenience macros to do multiple-instruction data moves
+ stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
+ stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
+ loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
+ storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
+
+ -- load data from the Haskell stack (relative to r31)
+ loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
+ loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
+ loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
+
+ -- store data to the Haskell stack (relative to r31)
+ storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
+ storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
+ storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
+
+ -- store data to the C stack (relative to r1)
+ storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
+
+ -- machine code building blocks
+ loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
+ loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
+
+ register :: Int -> Word32
+ register reg = fromIntegral reg `shiftL` 21
+
+ offset :: Int -> Word32
+ offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
+
+ -- speaks for itself
+ w32_to_w8s_bigEndian :: Word32 -> [Word8]
+ w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
+ fromIntegral (0xFF .&. (w `shiftR` 16)),
+ fromIntegral (0xFF .&. (w `shiftR` 8)),
+ fromIntegral (0xFF .&. w)]
+
+#else
+
+ = error "mkMarshalCode not implemented for this platform."
+
+#endif