%
-% (c) The University of Glasgow 2000
+% (c) The University of Glasgow 2001
%
-\section[ByteCodeGen]{Generate bytecode from Core}
+\section[ByteCodeGen]{Generate machine-code sequences for foreign import}
\begin{code}
-module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode ) where
+module ByteCodeFFI ( mkMarshalCode, moan64 ) where
#include "HsVersions.h"
-import PrimRep ( PrimRep(..), getPrimRepSize, isFollowableRep )
-import Bits ( Bits(..), shiftR )
-import Word ( Word8, Word32 )
-import Addr ( Addr(..), writeWord8OffAddr )
-import Foreign ( Ptr(..), mallocBytes )
-import IOExts ( unsafePerformIO, trace )
-
+import Outputable
+import SMRep ( CgRep(..), cgRepSizeW )
+import ForeignCall ( CCallConv(..) )
+import Panic
+
+-- DON'T remove apparently unused imports here ..
+-- there is ifdeffery below
+import Control.Exception ( throwDyn )
+import DATA_BITS ( Bits(..), shiftR, shiftL )
+import Foreign ( newArray )
+import Data.List ( mapAccumL )
+
+import DATA_WORD ( Word8, Word32 )
+import Foreign ( Ptr )
+import System.IO.Unsafe ( unsafePerformIO )
+import IO ( hPutStrLn, stderr )
+-- import Debug.Trace ( trace )
\end{code}
%************************************************************************
%* *
-\subsection{The sizes of things. These are platform-independent.}
+\subsection{The platform-dependent marshall-code-generator.}
%* *
%************************************************************************
\begin{code}
--- When I push one of these on the H stack, how much does Sp move by?
-taggedSizeW :: PrimRep -> Int
-taggedSizeW pr
- | isFollowableRep pr = 1 {-it's a pointer, Jim-}
- | otherwise = 1 {-the tag-} + getPrimRepSize pr
-
--- The plain size of something, without tag.
-untaggedSizeW :: PrimRep -> Int
-untaggedSizeW pr
- | isFollowableRep pr = 1
- | otherwise = getPrimRepSize pr
-
--- How big is this thing's tag?
-sizeOfTagW :: PrimRep -> Int
-sizeOfTagW pr
- | isFollowableRep pr = 0
- | otherwise = 1
-
--- Blast a bunch of bytes into malloc'd memory and return the addr.
-sendBytesToMallocville :: [Word8] -> IO Addr
-sendBytesToMallocville bytes
- = do let n = length bytes
- (Ptr a#) <- mallocBytes n
- mapM ( \(off,byte) -> writeWord8OffAddr (A# a#) off byte )
- (zip [0 ..] bytes)
- return (A# a#)
-\end{code}
+moan64 :: String -> SDoc -> a
+moan64 msg pp_rep
+ = unsafePerformIO (
+ hPutStrLn stderr (
+ "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
+ "code properly yet. You can work around this for the time being\n" ++
+ "by compiling this module and all those it imports to object code,\n" ++
+ "and re-starting your GHCi session. The panic below contains information,\n" ++
+ "intended for the GHC implementors, about the exact place where GHC gave up.\n"
+ )
+ )
+ `seq`
+ pprPanic msg pp_rep
-%************************************************************************
-%* *
-\subsection{The platform-dependent marshall-code-generator.}
-%* *
-%************************************************************************
-\begin{code}
+-- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
+#include "nativeGen/NCG.h"
{-
Make a piece of code which expects to see the Haskell stack
<arg_1>
Addr# address_of_C_fn
<placeholder-for-result#> (must be an unboxed type)
+
+We cope with both ccall and stdcall for the C fn. However, this code
+itself expects only to be called using the ccall convention -- that is,
+we don't clear our own (single) arg off the C stack.
-}
-mkMarshalCode :: (Int, PrimRep) -> Int -> [(Int, PrimRep)]
- -> Addr
-mkMarshalCode (r_offW, r_rep) addr_offW arg_offs_n_reps
- = let bytes = mkMarshalCode_wrk (r_offW, r_rep)
+mkMarshalCode :: CCallConv
+ -> (Int, CgRep) -> Int -> [(Int, CgRep)]
+ -> IO (Ptr Word8)
+mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
+ = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
addr_offW arg_offs_n_reps
- in unsafePerformIO (sendBytesToMallocville bytes)
+ in Foreign.newArray bytes
+
+
-mkMarshalCode_wrk :: (Int, PrimRep) -> Int -> [(Int, PrimRep)]
+mkMarshalCode_wrk :: CCallConv
+ -> (Int, CgRep) -> Int -> [(Int, CgRep)]
-> [Word8]
-mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps
+
+mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
+
+#if i386_TARGET_ARCH
= let -- Don't change this without first consulting Intel Corp :-)
bytes_per_word = 4
- -- addr and result bits offsetsW
- offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
- offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
-
offsets_to_pushW
= concat
- [ let -- where this arg's bits start
- a_bits_offW = a_offW + sizeOfTagW a_rep
- in
- [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
+ [ -- reversed because x86 is little-endian
+ reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
+ -- reversed because args are pushed L -> R onto C stack
| (a_offW, a_rep) <- reverse arg_offs_n_reps
]
= [0x81, 0xC4] ++ lit32 lit
movl_eax_offesimem offB -- movl %eax, offB(%esi)
= [0x89, 0x86] ++ lit32 offB
+ movl_edx_offesimem offB -- movl %edx, offB(%esi)
+ = [0x89, 0x96] ++ lit32 offB
ret -- ret
= [0xC3]
-
+ fstpl_offesimem offB -- fstpl offB(%esi)
+ = [0xDD, 0x9E] ++ lit32 offB
+ fstps_offesimem offB -- fstps offB(%esi)
+ = [0xD9, 0x9E] ++ lit32 offB
lit32 :: Int -> [Word8]
lit32 i = let w32 = (fromIntegral i) :: Word32
in map (fromIntegral . ( .&. 0xFF))
15 3412
16 002a 89967856 movl %edx, 0x12345678(%esi)
16 3412
+ 17
+ 18 0030 DD967856 fstl 0x12345678(%esi)
+ 18 3412
+ 19 0036 DD9E7856 fstpl 0x12345678(%esi)
+ 19 3412
+ 20 003c D9967856 fsts 0x12345678(%esi)
+ 20 3412
+ 21 0042 D99E7856 fstps 0x12345678(%esi)
18
19 0030 C3 ret
20
-}
in
- trace (show (map fst arg_offs_n_reps))
+ --trace (show (map fst arg_offs_n_reps))
(
{- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
arg passed from the interpreter.
-}
++ movl_offespmem_esi 32
- {- For each arg in args_offs_n_reps, examine the associated PrimRep
- to determine how many payload (non-tag) words there are, and
- whether or not there is a tag. This gives a bunch of offsets on
- the H stack to copy to the C stack:
+ {- For each arg in args_offs_n_reps, examine the associated
+ CgRep to determine how many words there are. This gives a
+ bunch of offsets on the H stack to copy to the C stack:
movl off1(%esi), %ecx
pushl %ecx
movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
call * %ecx
-}
- ++ movl_offesimem_ecx (bytes_per_word * offset_of_addr_bitsW)
+ ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
++ call_star_ecx
{- Nuke the args just pushed and re-establish %esi at the
addl $4*number_of_args_pushed, %esp (ccall only)
movl 28+4(%esp), %esi
-}
- ++ add_lit_esp (bytes_per_word * length offsets_to_pushW)
+ ++ (if cconv /= StdCallConv
+ then add_lit_esp (bytes_per_word * length offsets_to_pushW)
+ else [])
++ movl_offespmem_esi 32
{- Depending on what the return type is, get the result
or
fstps 4(%esi)
-}
- ++ case r_rep of
- IntRep -> movl_eax_offesimem 4
+ ++ let i32 = movl_eax_offesimem 0
+ i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
+ f32 = fstps_offesimem 0
+ f64 = fstpl_offesimem 0
+ in
+ case r_rep of
+ NonPtrArg -> i32
+ DoubleArg -> f64
+ FloatArg -> f32
+ -- LongArg -> i64
+ VoidArg -> []
+ other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
+ (ppr r_rep)
{- Restore all the pushed regs and go home.
++ restore_regs
++ ret
)
+
+#elif sparc_TARGET_ARCH
+
+ = let -- At least for sparc V8
+ bytes_per_word = 4
+
+ -- speaks for itself
+ w32_to_w8s_bigEndian :: Word32 -> [Word8]
+ w32_to_w8s_bigEndian w
+ = [fromIntegral (0xFF .&. (w `shiftR` 24)),
+ fromIntegral (0xFF .&. (w `shiftR` 16)),
+ fromIntegral (0xFF .&. (w `shiftR` 8)),
+ fromIntegral (0xFF .&. w)]
+
+ offsets_to_pushW
+ = concat
+ [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
+
+ | (a_offW, a_rep) <- arg_offs_n_reps
+ ]
+
+ total_argWs = length offsets_to_pushW
+ argWs_on_stack = if total_argWs > 6 then total_argWs - 6
+ else 0
+
+ -- The stack pointer must be kept 8-byte aligned, which means
+ -- we need to calculate this quantity too
+ argWs_on_stack_ROUNDED_UP
+ | odd argWs_on_stack = 1 + argWs_on_stack
+ | otherwise = argWs_on_stack
+
+ -- some helpers to assemble sparc insns.
+ -- REGS
+ iReg, oReg, gReg, fReg :: Int -> Word32
+ iReg = fromIntegral . (+ 24)
+ oReg = fromIntegral . (+ 8)
+ gReg = fromIntegral . (+ 0)
+ fReg = fromIntegral
+
+ sp = oReg 6
+ i0 = iReg 0
+ i7 = iReg 7
+ o0 = oReg 0
+ o1 = oReg 1
+ o7 = oReg 7
+ g0 = gReg 0
+ g1 = gReg 1
+ f0 = fReg 0
+ f1 = fReg 1
+
+ -- INSN templates
+ insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
+ insn_r_r_i op3 rs1 rd imm13
+ = (3 `shiftL` 30)
+ .|. (rs1 `shiftL` 25)
+ .|. (op3 `shiftL` 19)
+ .|. (rd `shiftL` 14)
+ .|. (1 `shiftL` 13)
+ .|. mkSimm13 imm13
+
+ insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
+ insn_r_i_r op3 rs1 imm13 rd
+ = (2 `shiftL` 30)
+ .|. (rd `shiftL` 25)
+ .|. (op3 `shiftL` 19)
+ .|. (rs1 `shiftL` 14)
+ .|. (1 `shiftL` 13)
+ .|. mkSimm13 imm13
+
+ mkSimm13 :: Int -> Word32
+ mkSimm13 imm13
+ = let imm13w = (fromIntegral imm13) :: Word32
+ in imm13w .&. 0x1FFF
+
+ -- REAL (non-synthetic) insns
+ -- or %rs1, %rs2, %rd
+ mkOR :: Word32 -> Word32 -> Word32 -> Word32
+ mkOR rs1 rs2 rd
+ = (2 `shiftL` 30)
+ .|. (rd `shiftL` 25)
+ .|. (op3_OR `shiftL` 19)
+ .|. (rs1 `shiftL` 14)
+ .|. (0 `shiftL` 13)
+ .|. rs2
+ where op3_OR = 2 :: Word32
+
+ -- ld(int) [%rs + imm13], %rd
+ mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
+
+ -- st(int) %rs, [%rd + imm13]
+ mkST = insn_r_r_i 0x04 -- op3_ST
+
+ -- st(float) %rs, [%rd + imm13]
+ mkSTF = insn_r_r_i 0x24 -- op3_STF
+
+ -- jmpl %rs + imm13, %rd
+ mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
+
+ -- save %rs + imm13, %rd
+ mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
+
+ -- restore %rs + imm13, %rd
+ mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
+
+ -- SYNTHETIC insns
+ mkNOP = mkOR g0 g0 g0
+ mkCALL reg = mkJMPL reg 0 o7
+ mkRET = mkJMPL i7 8 g0
+ mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
+ in
+ --trace (show (map fst arg_offs_n_reps))
+ concatMap w32_to_w8s_bigEndian (
+
+ {- On entry, %o0 is the arg passed from the interpreter. After
+ the initial save insn, it will be in %i0. Studying the sparc
+ docs one would have thought that the minimum frame size is 92
+ bytes, but gcc always uses at least 112, and indeed there are
+ segfaults a-plenty with 92. So I use 112 here as well. I
+ don't understand why, tho.
+ -}
+ [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
+
+ {- For each arg in args_offs_n_reps, examine the associated
+ CgRep to determine how many words there are. This gives a
+ bunch of offsets on the H stack. Move the first 6 words into
+ %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
+ Use %g1 as a temp.
+ -}
+ ++ let doArgW (offW, wordNo)
+ | wordNo < 6
+ = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
+ | otherwise
+ = [mkLD i0 (bytes_per_word * offW) g1,
+ mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
+ in
+ concatMap doArgW (zip offsets_to_pushW [0 ..])
+
+ {- Get the addr to call into %g1, bearing in mind that there's
+ an Addr# tag at the indicated location, and do the call:
+
+ ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
+ call %g1
+ -}
+ ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
+ mkCALL g1,
+ mkNOP]
+
+ {- Depending on what the return type is, get the result
+ from %o0 or %o1:%o0 or %f0 or %f1:%f0.
+
+ st %o0, [%i0 + 4] -- 32 bit int
+ or
+ st %o0, [%i0 + 4] -- 64 bit int
+ st %o1, [%i0 + 8] -- or the other way round?
+ or
+ st %f0, [%i0 + 4] -- 32 bit float
+ or
+ st %f0, [%i0 + 4] -- 64 bit float
+ st %f1, [%i0 + 8] -- or the other way round?
+
+ -}
+ ++ let i32 = [mkST o0 i0 0]
+ i64 = [mkST o0 i0 0, mkST o1 i0 4]
+ f32 = [mkSTF f0 i0 0]
+ f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
+ in
+ case r_rep of
+ NonPtrArg -> i32
+ DoubleArg -> f64
+ FloatArg -> f32
+ VoidArg -> []
+ other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
+ (ppr r_rep)
+
+ ++ [mkRET,
+ mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
+ )
+#elif powerpc_TARGET_ARCH && darwin_TARGET_OS
+
+ = let
+ bytes_per_word = 4
+
+ -- speaks for itself
+ w32_to_w8s_bigEndian :: Word32 -> [Word8]
+ w32_to_w8s_bigEndian w
+ = [fromIntegral (0xFF .&. (w `shiftR` 24)),
+ fromIntegral (0xFF .&. (w `shiftR` 16)),
+ fromIntegral (0xFF .&. (w `shiftR` 8)),
+ fromIntegral (0xFF .&. w)]
+
+ -- addr and result bits offsetsW
+ a_off = addr_offW * bytes_per_word
+ result_off = r_offW * bytes_per_word
+
+ linkageArea = 24
+ parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
+ | (_, a_rep) <- arg_offs_n_reps ]
+ savedRegisterArea = 4
+ frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
+ padTo16 x = case x `mod` 16 of
+ 0 -> x
+ y -> x - y + 16
+
+ pass_parameters [] _ _ = []
+ pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
+ let
+ haskellArgOffset = a_offW * bytes_per_word
+ offsetW' = offsetW + cgRepSizeW a_rep
+
+ pass_word w
+ | offsetW + w < 8 =
+ [0x801f0000 -- lwz rX, src(r31)
+ .|. (fromIntegral src .&. 0xFFFF)
+ .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
+ | otherwise =
+ [0x801f0000 -- lwz r0, src(r31)
+ .|. (fromIntegral src .&. 0xFFFF),
+ 0x90010000 -- stw r0, dst(r1)
+ .|. (fromIntegral dst .&. 0xFFFF)]
+ where
+ src = haskellArgOffset + w*bytes_per_word
+ dst = linkageArea + (offsetW+w) * bytes_per_word
+ in
+ case a_rep of
+ FloatArg | nextFPR < 14 ->
+ (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
+ .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
+ .|. (fromIntegral nextFPR `shiftL` 21))
+ : pass_parameters args (nextFPR+1) offsetW'
+ DoubleArg | nextFPR < 14 ->
+ (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
+ .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
+ .|. (fromIntegral nextFPR `shiftL` 21))
+ : pass_parameters args (nextFPR+1) offsetW'
+ _ ->
+ concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
+ ++ pass_parameters args nextFPR offsetW'
+
+ gather_result = case r_rep of
+ VoidArg -> []
+ FloatArg ->
+ [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stfs f1, result_off(r31)
+ DoubleArg ->
+ [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stfd f1, result_off(r31)
+ _ | cgRepSizeW r_rep == 2 ->
+ [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
+ 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
+ -- stw r3, result_off(r31)
+ -- stw r4, result_off+4(r31)
+ _ | cgRepSizeW r_rep == 1 ->
+ [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stw r3, result_off(r31)
+ in
+ concatMap w32_to_w8s_bigEndian $ [
+ 0x7c0802a6, -- mflr r0
+ 0x93e1fffc, -- stw r31,-4(r1)
+ 0x90010008, -- stw r0,8(r1)
+ 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
+ -- stwu r1, -frameSize(r1)
+ 0x7c7f1b78 -- mr r31, r3
+ ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
+ 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
+ -- lwz r12, a_off(r31)
+ 0x7d8903a6, -- mtctr r12
+ 0x4e800421 -- bctrl
+ ] ++ gather_result ++ [
+ 0x80210000, -- lwz r1, 0(r1)
+ 0x83e1fffc, -- lwz r31, -4(r1)
+ 0x80010008, -- lwz r0, 8(r1)
+ 0x7c0803a6, -- mtlr r0
+ 0x4e800020 -- blr
+ ]
+
+#elif powerpc_TARGET_ARCH && linux_TARGET_OS
+
+ -- All offsets here are measured in Words (not bytes). This includes
+ -- arguments to the load/store machine code generators, alignment numbers
+ -- and the final 'framesize' among others.
+
+ = concatMap w32_to_w8s_bigEndian $ [
+ 0x7c0802a6, -- mflr r0
+ 0x93e1fffc, -- stw r31,-4(r1)
+ 0x90010008, -- stw r0,8(r1)
+ 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
+ 0x7c7f1b78 -- mr r31, r3
+ ] ++ pass_parameters ++ -- pass the parameters
+ loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
+ 0x7d8903a6, -- mtctr r12
+ 0x4e800421 -- bctrl
+ ] ++ gather_result ++ [ -- save the return value
+ 0x80210000, -- lwz r1, 0(r1)
+ 0x83e1fffc, -- lwz r31, -4(r1)
+ 0x80010008, -- lwz r0, 8(r1)
+ 0x7c0803a6, -- mtlr r0
+ 0x4e800020 -- blr
+ ]
+
+ where
+ gather_result :: [Word32]
+ gather_result = case r_rep of
+ VoidArg -> []
+ FloatArg -> storeFloat 1 r_offW
+ DoubleArg -> storeDouble 1 r_offW
+ LongArg -> storeLong 3 r_offW
+ _ -> storeWord 3 r_offW
+
+ pass_parameters :: [Word32]
+ pass_parameters = concat params
+
+ -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
+ framesize = alignedTo 4 (argsize + 8)
+
+ ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
+
+ -- handle one argument, returning machine code and the updated state
+ loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
+ ((Int, Int, Int), [Word32])
+
+ loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
+ FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
+ FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
+
+ DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
+ DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+
+ LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
+ LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
+ LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+
+ _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
+ _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
+ where astack = alignedTo 2 stack
+
+ alignedTo :: Int -> Int -> Int
+ alignedTo alignment x = case x `mod` alignment of
+ 0 -> x
+ y -> x - y + alignment
+
+ -- convenience macros to do multiple-instruction data moves
+ stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
+ stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
+ loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
+ storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
+
+ -- load data from the Haskell stack (relative to r31)
+ loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
+ loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
+ loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
+
+ -- store data to the Haskell stack (relative to r31)
+ storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
+ storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
+ storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
+
+ -- store data to the C stack (relative to r1)
+ storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
+
+ -- machine code building blocks
+ loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
+ loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
+
+ register :: Int -> Word32
+ register reg = fromIntegral reg `shiftL` 21
+
+ offset :: Int -> Word32
+ offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
+
+ -- speaks for itself
+ w32_to_w8s_bigEndian :: Word32 -> [Word8]
+ w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
+ fromIntegral (0xFF .&. (w `shiftR` 16)),
+ fromIntegral (0xFF .&. (w `shiftR` 8)),
+ fromIntegral (0xFF .&. w)]
+
+#else
+
+ = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")
+
+#endif
+
\end{code}