%
-% (c) The University of Glasgow 2001
+% (c) The University of Glasgow 2001-2006
%
-\section[ByteCodeGen]{Generate machine-code sequences for foreign import}
+
+ByteCodeGen: Generate machine-code sequences for foreign import
\begin{code}
-module ByteCodeFFI ( mkMarshalCode, moan64 ) where
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
+#ifdef USE_LIBFFI
+
+module ByteCodeFFI ( moan64, newExec ) where
+
+import Outputable
+import System.IO
+import Foreign
+import Foreign.C
+
+#else
+
+module ByteCodeFFI ( mkMarshalCode, moan64, newExec ) where
#include "HsVersions.h"
+import TyCon
import Outputable
-import SMRep ( CgRep(..), cgRepSizeW )
-import ForeignCall ( CCallConv(..) )
+import SMRep
+import ForeignCall
import Panic
-- DON'T remove apparently unused imports here ..
-- there is ifdeffery below
import Control.Exception ( throwDyn )
-import DATA_BITS ( Bits(..), shiftR, shiftL )
-import Foreign ( newArray )
+import Data.Bits ( Bits(..), shiftR, shiftL )
import Data.List ( mapAccumL )
-import DATA_WORD ( Word8, Word32 )
-import Foreign ( Ptr )
+import Data.Word ( Word8, Word32 )
+import Foreign ( Ptr, FunPtr, castPtrToFunPtr,
+ Storable, sizeOf, pokeArray )
+import Foreign.C ( CUInt )
import System.IO.Unsafe ( unsafePerformIO )
-import IO ( hPutStrLn, stderr )
-import Debug.Trace ( trace )
+import System.IO ( hPutStrLn, stderr )
+-- import Debug.Trace ( trace )
\end{code}
%************************************************************************
\begin{code}
-moan64 :: String -> SDoc -> a
-moan64 msg pp_rep
- = unsafePerformIO (
- hPutStrLn stderr (
- "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
- "code properly yet. You can work around this for the time being\n" ++
- "by compiling this module and all those it imports to object code,\n" ++
- "and re-starting your GHCi session. The panic below contains information,\n" ++
- "intended for the GHC implementors, about the exact place where GHC gave up.\n"
- )
- )
- `seq`
- pprPanic msg pp_rep
-
-
-- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
#include "nativeGen/NCG.h"
we don't clear our own (single) arg off the C stack.
-}
mkMarshalCode :: CCallConv
- -> (Int, CgRep) -> Int -> [(Int, CgRep)]
- -> IO (Ptr Word8)
+ -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
+ -> IO (FunPtr ())
mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
= let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
addr_offW arg_offs_n_reps
- in trace (show bytes) $ Foreign.newArray bytes
-
-
-
+ in newExec bytes
mkMarshalCode_wrk :: CCallConv
- -> (Int, CgRep) -> Int -> [(Int, CgRep)]
+ -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
-> [Word8]
mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
offsets_to_pushW
= concat
[ -- reversed because x86 is little-endian
- reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
+ reverse [a_offW .. a_offW + primRepSizeW a_rep - 1]
-- reversed because args are pushed L -> R onto C stack
| (a_offW, a_rep) <- reverse arg_offs_n_reps
f64 = fstpl_offesimem 0
in
case r_rep of
- NonPtrArg -> i32
- DoubleArg -> f64
- FloatArg -> f32
- -- LongArg -> i64
- VoidArg -> []
+ VoidRep -> []
+ IntRep -> i32
+ WordRep -> i32
+ Int64Rep -> i64
+ Word64Rep -> i64
+ AddrRep -> i32
+ FloatRep -> f32
+ DoubleRep -> f64
other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
(ppr r_rep)
-- flt arg regs: xmm0..xmm7
int_loads = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
movq_rbpoff_rcx, movq_rbpoff_r8, movq_rbpoff_r9 ]
- float_loads = [ (mov_f32_rbpoff_xmm n, mov_f64_rbpoff_xmm n) | n <- [0..7] ]
+ float_loads = [ 0..7 ]
load_arg_regs args [] [] code = (args, [], code)
load_arg_regs [] iregs fregs code = ([], fregs, code)
load_arg_regs ((off,rep):args) iregs fregs code
- | FloatArg <- rep, ((mov_f32,_):frest) <- fregs =
- load_arg_regs args iregs frest (mov_f32 (bytes_per_word * off) : code)
- | DoubleArg <- rep, ((_,mov_f64):frest) <- fregs =
- load_arg_regs args iregs frest (mov_f64 (bytes_per_word * off) : code)
+ | FloatRep <- rep =
+ case fregs of
+ [] -> push_this_arg
+ n : frest ->
+ load_arg_regs args iregs frest
+ (mov_f32_rbpoff_xmm n (bytes_per_word * off) : code)
+ | DoubleRep <- rep =
+ case fregs of
+ [] -> push_this_arg
+ n : frest ->
+ load_arg_regs args iregs frest
+ (mov_f64_rbpoff_xmm n (bytes_per_word * off) : code)
| (mov_reg:irest) <- iregs =
load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
| otherwise =
push_args [] code pushed_words = (code, pushed_words)
push_args ((off,rep):args) code pushed_words
- | FloatArg <- rep =
+ | FloatRep <- rep =
push_args args (push_f32_rbpoff (bytes_per_word * off) : code)
(pushed_words+1)
- | DoubleArg <- rep =
+ | DoubleRep <- rep =
push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
(pushed_words+1)
| otherwise =
assign_result =
case r_rep of
- DoubleArg -> f64
- FloatArg -> f32
- VoidArg -> []
+ DoubleRep -> f64
+ FloatRep -> f32
+ VoidRep -> []
_other -> i64
where
i64 = movq_rax_rbpoff 0
f32 = mov_f32_xmm0_rbpoff 0
f64 = mov_f64_xmm0_rbpoff 0
--- ######### x86_64 machine code:
+-- ######### x86_64 machine code:
-- 0: 48 89 fd mov %rdi,%rbp
-- 3: 48 8b bd 78 56 34 12 mov 0x12345678(%rbp),%rdi
-- 2d: 4c 8b 95 78 56 34 12 mov 0x12345678(%rbp),%r10
-- 34: 48 c7 c0 78 56 34 12 mov $0x12345678,%rax
-- 3b: 48 89 85 78 56 34 12 mov %rax,0x12345678(%rbp)
--- 42: f3 0f 10 85 78 56 34 12 movss 0x12345678(%rbp),%xmm0
--- 4a: f2 0f 10 85 78 56 34 12 movsd 0x12345678(%rbp),%xmm0
--- 52: f3 0f 11 85 78 56 34 12 movss %xmm0,0x12345678(%rbp)
--- 5a: f2 0f 11 85 78 56 34 12 movsd %xmm0,0x12345678(%rbp)
--- 62: ff b5 78 56 34 12 pushq 0x12345678(%rbp)
--- 68: f3 44 0f 11 04 24 movss %xmm8,(%rsp)
--- 6e: f2 44 0f 11 04 24 movsd %xmm8,(%rsp)
--- 74: 48 81 ec 78 56 34 12 sub $0x12345678,%rsp
--- 7b: 48 81 c4 78 56 34 12 add $0x12345678,%rsp
--- 82: 41 ff d2 callq *%r10
--- 85: c3 retq
+-- 42: f3 0f 10 bd 78 56 34 12 movss 0x12345678(%rbp),%xmm7
+-- 4a: f2 0f 10 9d 78 56 34 12 movsd 0x12345678(%rbp),%xmm3
+-- 52: f2 44 0f 10 85 78 56 34 12 movsd 0x12345678(%rbp),%xmm8
+-- 5b: f3 0f 11 9d 78 56 34 12 movss %xmm3,0x12345678(%rbp)
+-- 63: f2 0f 11 9d 78 56 34 12 movsd %xmm3,0x12345678(%rbp)
+-- 6b: f2 44 0f 11 85 78 56 34 12 movsd %xmm8,0x12345678(%rbp)
+-- 74: ff b5 78 56 34 12 pushq 0x12345678(%rbp)
+-- 7a: f3 44 0f 11 04 24 movss %xmm8,(%rsp)
+-- 80: f2 44 0f 11 04 24 movsd %xmm8,(%rsp)
+-- 86: 48 81 ec 78 56 34 12 sub $0x12345678,%rsp
+-- 8d: 48 81 c4 78 56 34 12 add $0x12345678,%rsp
+-- 94: 41 ff d2 callq *%r10
+-- 97: 55 push %rbp
+-- 98: 5d pop %rbp
+-- 99: c3 retq
movq_rdi_rbp = [0x48,0x89,0xfd]
movq_rbpoff_rdi off = [0x48, 0x8b, 0xbd] ++ lit32 off
movq_rbpoff_r10 off = [0x4c, 0x8b, 0x95] ++ lit32 off
movq_lit_rax lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
movq_rax_rbpoff off = [0x48, 0x89, 0x85] ++ lit32 off
- mov_f32_rbpoff_xmm n off = [0xf3, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
- mov_f64_rbpoff_xmm n off = [0xf2, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
+ mov_f32_rbpoff_xmm n off
+ = 0xf3 : if n >= 8 then 0x44 : rest else rest
+ where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
+ mov_f64_rbpoff_xmm n off
+ = 0xf2 : if n >= 8 then 0x44 : rest else rest
+ where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
mov_f32_xmm0_rbpoff off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
mov_f64_xmm0_rbpoff off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
pushq_rbpoff off = [0xff, 0xb5] ++ lit32 off
push_f32_rbpoff off =
+ subq_lit_rsp 8 ++ -- subq $8, %rsp
mov_f32_rbpoff_xmm 8 off ++ -- movss off(%rbp), %xmm8
- [0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] ++ -- movss %xmm8, (%rsp)
- subq_lit_rsp 8 -- subq $8, %rsp
+ [0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movss %xmm8, (%rsp)
push_f64_rbpoff off =
+ subq_lit_rsp 8 ++ -- subq $8, %rsp
mov_f64_rbpoff_xmm 8 off ++ -- movsd off(%rbp), %xmm8
- [0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] ++ -- movsd %xmm8, (%rsp)
- subq_lit_rsp 8 -- subq $8, %rsp
+ [0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movsd %xmm8, (%rsp)
subq_lit_rsp lit = [0x48, 0x81, 0xec] ++ lit32 lit
addq_lit_rsp lit = [0x48, 0x81, 0xc4] ++ lit32 lit
call_star_r10 = [0x41,0xff,0xd2]
offsets_to_pushW
= concat
- [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
+ [ [a_offW .. a_offW + primRepSizeW a_rep - 1]
| (a_offW, a_rep) <- arg_offs_n_reps
]
f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
in
case r_rep of
- NonPtrArg -> i32
- DoubleArg -> f64
- FloatArg -> f32
- VoidArg -> []
+ VoidRep -> []
+ IntRep -> i32
+ WordRep -> i32
+ AddrRep -> i32
+ FloatRep -> f32
+ DoubleRep -> f64
other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
(ppr r_rep)
result_off = r_offW * bytes_per_word
linkageArea = 24
- parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
+ parameterArea = sum [ primRepSizeW a_rep * bytes_per_word
| (_, a_rep) <- arg_offs_n_reps ]
savedRegisterArea = 4
frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
let
haskellArgOffset = a_offW * bytes_per_word
- offsetW' = offsetW + cgRepSizeW a_rep
+ offsetW' = offsetW + primRepSizeW a_rep
pass_word w
| offsetW + w < 8 =
dst = linkageArea + (offsetW+w) * bytes_per_word
in
case a_rep of
- FloatArg | nextFPR < 14 ->
+ FloatRep | nextFPR < 14 ->
(0xc01f0000 -- lfs fX, haskellArgOffset(r31)
.|. (fromIntegral haskellArgOffset .&. 0xFFFF)
.|. (fromIntegral nextFPR `shiftL` 21))
: pass_parameters args (nextFPR+1) offsetW'
- DoubleArg | nextFPR < 14 ->
+ DoubleRep | nextFPR < 14 ->
(0xc81f0000 -- lfd fX, haskellArgOffset(r31)
.|. (fromIntegral haskellArgOffset .&. 0xFFFF)
.|. (fromIntegral nextFPR `shiftL` 21))
: pass_parameters args (nextFPR+1) offsetW'
_ ->
- concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
+ concatMap pass_word [0 .. primRepSizeW a_rep - 1]
++ pass_parameters args nextFPR offsetW'
gather_result = case r_rep of
- VoidArg -> []
- FloatArg ->
+ VoidRep -> []
+ FloatRep ->
[0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
-- stfs f1, result_off(r31)
- DoubleArg ->
+ DoubleRep ->
[0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
-- stfd f1, result_off(r31)
- _ | cgRepSizeW r_rep == 2 ->
+ _ | primRepSizeW r_rep == 2 ->
[0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
-- stw r3, result_off(r31)
-- stw r4, result_off+4(r31)
- _ | cgRepSizeW r_rep == 1 ->
+ _ | primRepSizeW r_rep == 1 ->
[0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
-- stw r3, result_off(r31)
in
where
gather_result :: [Word32]
gather_result = case r_rep of
- VoidArg -> []
- FloatArg -> storeFloat 1 r_offW
- DoubleArg -> storeDouble 1 r_offW
- LongArg -> storeLong 3 r_offW
+ VoidRep -> []
+ FloatRep -> storeFloat 1 r_offW
+ DoubleRep -> storeDouble 1 r_offW
+ Int64Rep -> storeLong 3 r_offW
+ Word64Rep -> storeLong 3 r_offW
_ -> storeWord 3 r_offW
pass_parameters :: [Word32]
((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
-- handle one argument, returning machine code and the updated state
- loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
+ loadparam :: (Int, Int, Int) -> (Int, PrimRep) ->
((Int, Int, Int), [Word32])
loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
- FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
- FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
+ FloatRep | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
+ FloatRep -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
- DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
- DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
-
- LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
- LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
- LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+ DoubleRep | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
+ DoubleRep -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+ r | is64 r && even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
+ r | is64 r && gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
+ r | is64 r -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
_ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
_ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
where astack = alignedTo 2 stack
+ is64 Int64Rep = True
+ is64 Word64Rep = True
+ is64 _ = False
+
alignedTo :: Int -> Int -> Int
alignedTo alignment x = case x `mod` alignment of
0 -> x
[w32, w32 `shiftR` 8,
w32 `shiftR` 16, w32 `shiftR` 24]
#endif
+
+#endif /* !USE_LIBFFI */
+
+moan64 :: String -> SDoc -> a
+moan64 msg pp_rep
+ = unsafePerformIO (
+ hPutStrLn stderr (
+ "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
+ "code properly yet. You can work around this for the time being\n" ++
+ "by compiling this module and all those it imports to object code,\n" ++
+ "and re-starting your GHCi session. The panic below contains information,\n" ++
+ "intended for the GHC implementors, about the exact place where GHC gave up.\n"
+ )
+ )
+ `seq`
+ pprPanic msg pp_rep
+
+newExec :: Storable a => [a] -> IO (FunPtr ())
+newExec code
+ = do ptr <- _allocateExec (fromIntegral $ codeSize undefined code)
+ pokeArray ptr code
+ return (castPtrToFunPtr ptr)
+ where
+ codeSize :: Storable a => a -> [a] -> Int
+ codeSize dummy array = sizeOf(dummy) * length array
+
+foreign import ccall unsafe "allocateExec"
+ _allocateExec :: CUInt -> IO (Ptr a)
\end{code}