X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fghci%2FByteCodeFFI.lhs;h=4fc09a7667c627bd22e2999908d5288f5a034eca;hb=8133c305d14d748d7720272b1eaa67847d00e241;hp=8e655482b8f08d797960db7b81f8159eaf230206;hpb=54afa8cb01aa038f64fb9ab943d92a9638394e34;p=ghc-hetmet.git diff --git a/ghc/compiler/ghci/ByteCodeFFI.lhs b/ghc/compiler/ghci/ByteCodeFFI.lhs index 8e65548..4fc09a7 100644 --- a/ghc/compiler/ghci/ByteCodeFFI.lhs +++ b/ghc/compiler/ghci/ByteCodeFFI.lhs @@ -1,20 +1,27 @@ % -% (c) The University of Glasgow 2000 +% (c) The University of Glasgow 2001 % -\section[ByteCodeGen]{Generate bytecode from Core} +\section[ByteCodeGen]{Generate machine-code sequences for foreign import} \begin{code} -module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode ) where +module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode, moan64 ) where #include "HsVersions.h" +import Outputable import PrimRep ( PrimRep(..), getPrimRepSize, isFollowableRep ) -import Bits ( Bits(..), shiftR ) -import Word ( Word8, Word32 ) -import Addr ( Addr(..), writeWord8OffAddr ) -import Foreign ( Ptr(..), mallocBytes ) -import IOExts ( unsafePerformIO, trace ) - +import ForeignCall ( CCallConv(..) ) + +-- DON'T remove apparently unused imports here .. +-- there is ifdeffery below +import DATA_BITS ( Bits(..), shiftR, shiftL ) +import Foreign ( newArray ) + +import DATA_WORD ( Word8, Word32 ) +import Foreign ( Ptr ) +import System.IO.Unsafe ( unsafePerformIO ) +import IO ( hPutStrLn, stderr ) +-- import Debug.Trace ( trace ) \end{code} %************************************************************************ @@ -42,15 +49,6 @@ sizeOfTagW :: PrimRep -> Int sizeOfTagW pr | isFollowableRep pr = 0 | otherwise = 1 - --- Blast a bunch of bytes into malloc'd memory and return the addr. -sendBytesToMallocville :: [Word8] -> IO Addr -sendBytesToMallocville bytes - = do let n = length bytes - (Ptr a#) <- mallocBytes n - mapM ( \(off,byte) -> writeWord8OffAddr (A# a#) off byte ) - (zip [0 ..] bytes) - return (A# a#) \end{code} %************************************************************************ @@ -61,6 +59,24 @@ sendBytesToMallocville bytes \begin{code} +moan64 :: String -> SDoc -> a +moan64 msg pp_rep + = unsafePerformIO ( + hPutStrLn stderr ( + "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++ + "code properly yet. You can work around this for the time being\n" ++ + "by compiling this module and all those it imports to object code,\n" ++ + "and re-starting your GHCi session. The panic below contains information,\n" ++ + "intended for the GHC implementors, about the exact place where GHC gave up.\n" + ) + ) + `seq` + pprPanic msg pp_rep + + +-- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc. +#include "nativeGen/NCG.h" + {- Make a piece of code which expects to see the Haskell stack looking like this. It is given a pointer to the lowest word in @@ -71,18 +87,29 @@ the stack -- presumably the tag of the placeholder. Addr# address_of_C_fn (must be an unboxed type) + +We cope with both ccall and stdcall for the C fn. However, this code +itself expects only to be called using the ccall convention -- that is, +we don't clear our own (single) arg off the C stack. -} -mkMarshalCode :: (Int, PrimRep) -> Int -> [(Int, PrimRep)] - -> Addr -mkMarshalCode (r_offW, r_rep) addr_offW arg_offs_n_reps - = let bytes = mkMarshalCode_wrk (r_offW, r_rep) +mkMarshalCode :: CCallConv + -> (Int, PrimRep) -> Int -> [(Int, PrimRep)] + -> IO (Ptr Word8) +mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps + = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps - in unsafePerformIO (sendBytesToMallocville bytes) + in Foreign.newArray bytes -mkMarshalCode_wrk :: (Int, PrimRep) -> Int -> [(Int, PrimRep)] + + +mkMarshalCode_wrk :: CCallConv + -> (Int, PrimRep) -> Int -> [(Int, PrimRep)] -> [Word8] -mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps + +mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps + +#if i386_TARGET_ARCH = let -- Don't change this without first consulting Intel Corp :-) bytes_per_word = 4 @@ -96,8 +123,11 @@ mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps [ let -- where this arg's bits start a_bits_offW = a_offW + sizeOfTagW a_rep in + -- reversed because x86 is little-endian + reverse [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1] + -- reversed because args are pushed L -> R onto C stack | (a_offW, a_rep) <- reverse arg_offs_n_reps ] @@ -118,9 +148,14 @@ mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps = [0x81, 0xC4] ++ lit32 lit movl_eax_offesimem offB -- movl %eax, offB(%esi) = [0x89, 0x86] ++ lit32 offB + movl_edx_offesimem offB -- movl %edx, offB(%esi) + = [0x89, 0x96] ++ lit32 offB ret -- ret = [0xC3] - + fstpl_offesimem offB -- fstpl offB(%esi) + = [0xDD, 0x9E] ++ lit32 offB + fstps_offesimem offB -- fstps offB(%esi) + = [0xD9, 0x9E] ++ lit32 offB lit32 :: Int -> [Word8] lit32 i = let w32 = (fromIntegral i) :: Word32 in map (fromIntegral . ( .&. 0xFF)) @@ -147,6 +182,14 @@ mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps 15 3412 16 002a 89967856 movl %edx, 0x12345678(%esi) 16 3412 + 17 + 18 0030 DD967856 fstl 0x12345678(%esi) + 18 3412 + 19 0036 DD9E7856 fstpl 0x12345678(%esi) + 19 3412 + 20 003c D9967856 fsts 0x12345678(%esi) + 20 3412 + 21 0042 D99E7856 fstps 0x12345678(%esi) 18 19 0030 C3 ret 20 @@ -154,7 +197,7 @@ mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps -} in - trace (show (map fst arg_offs_n_reps)) + --trace (show (map fst arg_offs_n_reps)) ( {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is arg passed from the interpreter. @@ -205,7 +248,9 @@ mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps addl $4*number_of_args_pushed, %esp (ccall only) movl 28+4(%esp), %esi -} - ++ add_lit_esp (bytes_per_word * length offsets_to_pushW) + ++ (if cconv /= StdCallConv + then add_lit_esp (bytes_per_word * length offsets_to_pushW) + else []) ++ movl_offespmem_esi 32 {- Depending on what the return type is, get the result @@ -220,8 +265,23 @@ mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps or fstps 4(%esi) -} - ++ case r_rep of - IntRep -> movl_eax_offesimem 4 + ++ let i32 = movl_eax_offesimem 4 + i64 = movl_eax_offesimem 4 ++ movl_edx_offesimem 8 + f32 = fstps_offesimem 4 + f64 = fstpl_offesimem 4 + in + case r_rep of + CharRep -> i32 + IntRep -> i32 + WordRep -> i32 + AddrRep -> i32 + DoubleRep -> f64 + FloatRep -> f32 + -- Word64Rep -> i64 + -- Int64Rep -> i64 + VoidRep -> [] + other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)" + (ppr r_rep) {- Restore all the pushed regs and go home. @@ -238,5 +298,296 @@ mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps ++ restore_regs ++ ret ) + +#elif sparc_TARGET_ARCH + + = let -- At least for sparc V8 + bytes_per_word = 4 + + -- speaks for itself + w32_to_w8s_bigEndian :: Word32 -> [Word8] + w32_to_w8s_bigEndian w + = [fromIntegral (0xFF .&. (w `shiftR` 24)), + fromIntegral (0xFF .&. (w `shiftR` 16)), + fromIntegral (0xFF .&. (w `shiftR` 8)), + fromIntegral (0xFF .&. w)] + + -- addr and result bits offsetsW + offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep + offset_of_res_bitsW = r_offW + sizeOfTagW r_rep + + offsets_to_pushW + = concat + [ let -- where this arg's bits start + a_bits_offW = a_offW + sizeOfTagW a_rep + in + [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1] + + | (a_offW, a_rep) <- arg_offs_n_reps + ] + + total_argWs = length offsets_to_pushW + argWs_on_stack = if total_argWs > 6 then total_argWs - 6 + else 0 + + -- The stack pointer must be kept 8-byte aligned, which means + -- we need to calculate this quantity too + argWs_on_stack_ROUNDED_UP + | odd argWs_on_stack = 1 + argWs_on_stack + | otherwise = argWs_on_stack + + -- some helpers to assemble sparc insns. + -- REGS + iReg, oReg, gReg, fReg :: Int -> Word32 + iReg = fromIntegral . (+ 24) + oReg = fromIntegral . (+ 8) + gReg = fromIntegral . (+ 0) + fReg = fromIntegral + + sp = oReg 6 + i0 = iReg 0 + i7 = iReg 7 + o0 = oReg 0 + o1 = oReg 1 + o7 = oReg 7 + g0 = gReg 0 + g1 = gReg 1 + f0 = fReg 0 + f1 = fReg 1 + + -- INSN templates + insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32 + insn_r_r_i op3 rs1 rd imm13 + = (3 `shiftL` 30) + .|. (rs1 `shiftL` 25) + .|. (op3 `shiftL` 19) + .|. (rd `shiftL` 14) + .|. (1 `shiftL` 13) + .|. mkSimm13 imm13 + + insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32 + insn_r_i_r op3 rs1 imm13 rd + = (2 `shiftL` 30) + .|. (rd `shiftL` 25) + .|. (op3 `shiftL` 19) + .|. (rs1 `shiftL` 14) + .|. (1 `shiftL` 13) + .|. mkSimm13 imm13 + + mkSimm13 :: Int -> Word32 + mkSimm13 imm13 + = let imm13w = (fromIntegral imm13) :: Word32 + in imm13w .&. 0x1FFF + + -- REAL (non-synthetic) insns + -- or %rs1, %rs2, %rd + mkOR :: Word32 -> Word32 -> Word32 -> Word32 + mkOR rs1 rs2 rd + = (2 `shiftL` 30) + .|. (rd `shiftL` 25) + .|. (op3_OR `shiftL` 19) + .|. (rs1 `shiftL` 14) + .|. (0 `shiftL` 13) + .|. rs2 + where op3_OR = 2 :: Word32 + + -- ld(int) [%rs + imm13], %rd + mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13 + + -- st(int) %rs, [%rd + imm13] + mkST = insn_r_r_i 0x04 -- op3_ST + + -- st(float) %rs, [%rd + imm13] + mkSTF = insn_r_r_i 0x24 -- op3_STF + + -- jmpl %rs + imm13, %rd + mkJMPL = insn_r_i_r 0x38 -- op3_JMPL + + -- save %rs + imm13, %rd + mkSAVE = insn_r_i_r 0x3C -- op3_SAVE + + -- restore %rs + imm13, %rd + mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE + + -- SYNTHETIC insns + mkNOP = mkOR g0 g0 g0 + mkCALL reg = mkJMPL reg 0 o7 + mkRET = mkJMPL i7 8 g0 + mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0 + in + --trace (show (map fst arg_offs_n_reps)) + concatMap w32_to_w8s_bigEndian ( + + {- On entry, %o0 is the arg passed from the interpreter. After + the initial save insn, it will be in %i0. Studying the sparc + docs one would have thought that the minimum frame size is 92 + bytes, but gcc always uses at least 112, and indeed there are + segfaults a-plenty with 92. So I use 112 here as well. I + don't understand why, tho. + -} + [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp] + + {- For each arg in args_offs_n_reps, examine the associated PrimRep + to determine how many payload (non-tag) words there are, and + whether or not there is a tag. This gives a bunch of offsets on + the H stack. Move the first 6 words into %o0 .. %o5 and the + rest on the stack, starting at [%sp+92]. Use %g1 as a temp. + -} + ++ let doArgW (offW, wordNo) + | wordNo < 6 + = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)] + | otherwise + = [mkLD i0 (bytes_per_word * offW) g1, + mkST g1 sp (92 + bytes_per_word * (wordNo - 6))] + in + concatMap doArgW (zip offsets_to_pushW [0 ..]) + + {- Get the addr to call into %g1, bearing in mind that there's + an Addr# tag at the indicated location, and do the call: + + ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1 + call %g1 + -} + ++ [mkLD i0 (bytes_per_word * offset_of_addr_bitsW) g1, + mkCALL g1, + mkNOP] + + {- Depending on what the return type is, get the result + from %o0 or %o1:%o0 or %f0 or %f1:%f0. + + st %o0, [%i0 + 4] -- 32 bit int + or + st %o0, [%i0 + 4] -- 64 bit int + st %o1, [%i0 + 8] -- or the other way round? + or + st %f0, [%i0 + 4] -- 32 bit float + or + st %f0, [%i0 + 4] -- 64 bit float + st %f1, [%i0 + 8] -- or the other way round? + + -} + ++ let i32 = [mkST o0 i0 4] + i64 = [mkST o0 i0 4, mkST o1 i0 8] + f32 = [mkSTF f0 i0 4] + f64 = [mkSTF f0 i0 4, mkSTF f1 i0 8] + in + case r_rep of + CharRep -> i32 + IntRep -> i32 + WordRep -> i32 + AddrRep -> i32 + DoubleRep -> f64 + FloatRep -> f32 + VoidRep -> [] + other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)" + (ppr r_rep) + + ++ [mkRET, + mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET + ) +#elif powerpc_TARGET_ARCH + + = let + bytes_per_word = 4 + + -- speaks for itself + w32_to_w8s_bigEndian :: Word32 -> [Word8] + w32_to_w8s_bigEndian w + = [fromIntegral (0xFF .&. (w `shiftR` 24)), + fromIntegral (0xFF .&. (w `shiftR` 16)), + fromIntegral (0xFF .&. (w `shiftR` 8)), + fromIntegral (0xFF .&. w)] + + -- addr and result bits offsetsW + a_off = (addr_offW + sizeOfTagW AddrRep) * bytes_per_word + result_off = (r_offW + sizeOfTagW r_rep) * bytes_per_word + + linkageArea = 24 + parameterArea = sum [ untaggedSizeW a_rep * bytes_per_word + | (_, a_rep) <- arg_offs_n_reps ] + savedRegisterArea = 4 + frameSize = padTo16 (linkageArea + min parameterArea 32 + savedRegisterArea) + padTo16 x = case x `mod` 16 of + 0 -> x + y -> x - y + 16 + + pass_parameters [] _ _ = [] + pass_parameters ((a_offW, a_rep):args) nextFPR offsetW = + let + haskellArgOffset = (a_offW + sizeOfTagW a_rep) + * bytes_per_word + offsetW' = offsetW + untaggedSizeW a_rep + + pass_word w + | w < 8 = + [0x801f0000 -- lwz rX, src(r31) + .|. (fromIntegral src .&. 0xFFFF) + .|. (fromIntegral (offsetW+w+3) `shiftL` 21)] + | otherwise = + [0x801f0000 -- lwz r0, src(r31) + .|. (fromIntegral src .&. 0xFFFF), + 0x90010000 -- stw r0, dst(r1) + .|. (fromIntegral dst .&. 0xFFFF)] + where + src = haskellArgOffset + w*bytes_per_word + dst = linkageArea + (offsetW+w) * bytes_per_word + in + case a_rep of + FloatRep | nextFPR < 14 -> + (0xc01f0000 -- lfs fX, haskellArgOffset(r31) + .|. (fromIntegral haskellArgOffset .&. 0xFFFF) + .|. (fromIntegral nextFPR `shiftL` 21)) + : pass_parameters args (nextFPR+1) offsetW' + DoubleRep | nextFPR < 14 -> + (0xc81f0000 -- lfd fX, haskellArgOffset(r31) + .|. (fromIntegral haskellArgOffset .&. 0xFFFF) + .|. (fromIntegral nextFPR `shiftL` 21)) + : pass_parameters args (nextFPR+1) offsetW' + _ -> + concatMap pass_word [0 .. untaggedSizeW a_rep - 1] + ++ pass_parameters args nextFPR offsetW' + + gather_result = case r_rep of + VoidRep -> [] + FloatRep -> + [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)] + -- stfs f1, result_off(r31) + DoubleRep -> + [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)] + -- stfs f1, result_off(r31) + _ | untaggedSizeW r_rep == 2 -> + [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF), + 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)] + -- stw r3, result_off(r31) + -- stw r4, result_off+4(r31) + _ | untaggedSizeW r_rep == 1 -> + [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)] + -- stw r3, result_off(r31) + in + concatMap w32_to_w8s_bigEndian $ [ + 0x7c0802a6, -- mflr r0 + 0x93e1fffc, -- stw r31,-4(r1) + 0x90010008, -- stw r0,8(r1) + 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF), + -- stwu r1, -frameSize(r1) + 0x7c7f1b78 -- mr r31, r3 + ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [ + 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF), + -- lwz r12, a_off(r31) + 0x7d8903a6, -- mtctr r12 + 0x4e800421 -- bctrl + ] ++ gather_result ++ [ + 0x80210000, -- lwz r1, 0(r1) + 0x83e1fffc, -- lwz r31, -4(r1) + 0x80010008, -- lwz r0, 8(r1) + 0x7c0803a6, -- mtlr r0 + 0x4e800020 -- blr + ] +#else + + = undefined + +#endif + \end{code}