2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode, moan64 ) where
9 #include "HsVersions.h"
12 import PrimRep ( PrimRep(..), getPrimRepSize, isFollowableRep )
13 import ForeignCall ( CCallConv(..) )
15 -- DON'T remove apparently unused imports here .. there is ifdeffery
17 import Bits ( Bits(..), shiftR, shiftL )
19 import Word ( Word8, Word32 )
20 import Addr ( Addr(..), writeWord8OffAddr )
21 import Foreign ( Ptr(..), mallocBytes )
22 import IOExts ( trace, unsafePerformIO )
23 import IO ( hPutStrLn, stderr )
27 %************************************************************************
29 \subsection{The sizes of things. These are platform-independent.}
31 %************************************************************************
35 -- When I push one of these on the H stack, how much does Sp move by?
36 taggedSizeW :: PrimRep -> Int
38 | isFollowableRep pr = 1 {-it's a pointer, Jim-}
39 | otherwise = 1 {-the tag-} + getPrimRepSize pr
41 -- The plain size of something, without tag.
42 untaggedSizeW :: PrimRep -> Int
44 | isFollowableRep pr = 1
45 | otherwise = getPrimRepSize pr
47 -- How big is this thing's tag?
48 sizeOfTagW :: PrimRep -> Int
50 | isFollowableRep pr = 0
53 -- Blast a bunch of bytes into malloc'd memory and return the addr.
54 sendBytesToMallocville :: [Word8] -> IO Addr
55 sendBytesToMallocville bytes
56 = do let n = length bytes
57 (Ptr a#) <- mallocBytes n
58 mapM ( \(off,byte) -> writeWord8OffAddr (A# a#) off byte )
63 %************************************************************************
65 \subsection{The platform-dependent marshall-code-generator.}
67 %************************************************************************
71 moan64 :: String -> SDoc -> a
75 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
76 "code properly yet. You can work around this for the time being\n" ++
77 "by compiling this module and all those it imports to object code,\n" ++
78 "and re-starting your GHCi session. The panic below contains information,\n" ++
79 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
86 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
87 #include "nativeGen/NCG.h"
90 Make a piece of code which expects to see the Haskell stack
91 looking like this. It is given a pointer to the lowest word in
92 the stack -- presumably the tag of the placeholder.
98 <placeholder-for-result#> (must be an unboxed type)
100 We cope with both ccall and stdcall for the C fn. However, this code
101 itself expects only to be called using the ccall convention -- that is,
102 we don't clear our own (single) arg off the C stack.
104 mkMarshalCode :: CCallConv
105 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
107 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
108 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
109 addr_offW arg_offs_n_reps
110 in sendBytesToMallocville bytes
115 mkMarshalCode_wrk :: CCallConv
116 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
119 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
123 = let -- Don't change this without first consulting Intel Corp :-)
126 -- addr and result bits offsetsW
127 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
128 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
132 [ let -- where this arg's bits start
133 a_bits_offW = a_offW + sizeOfTagW a_rep
135 -- reversed because x86 is little-endian
137 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
139 -- reversed because args are pushed L -> R onto C stack
140 | (a_offW, a_rep) <- reverse arg_offs_n_reps
143 -- some helpers to assemble x86 insns.
144 movl_offespmem_esi offB -- movl offB(%esp), %esi
145 = [0x8B, 0xB4, 0x24] ++ lit32 offB
146 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
147 = [0x8B, 0x8E] ++ lit32 offB
148 save_regs -- pushl all intregs except %esp
149 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
150 restore_regs -- popl ditto
151 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
152 pushl_ecx -- pushl %ecx
154 call_star_ecx -- call * %ecx
156 add_lit_esp lit -- addl $lit, %esp
157 = [0x81, 0xC4] ++ lit32 lit
158 movl_eax_offesimem offB -- movl %eax, offB(%esi)
159 = [0x89, 0x86] ++ lit32 offB
160 movl_edx_offesimem offB -- movl %edx, offB(%esi)
161 = [0x89, 0x96] ++ lit32 offB
164 fstpl_offesimem offB -- fstpl offB(%esi)
165 = [0xDD, 0x9E] ++ lit32 offB
166 fstps_offesimem offB -- fstps offB(%esi)
167 = [0xD9, 0x9E] ++ lit32 offB
168 lit32 :: Int -> [Word8]
169 lit32 i = let w32 = (fromIntegral i) :: Word32
170 in map (fromIntegral . ( .&. 0xFF))
171 [w32, w32 `shiftR` 8,
172 w32 `shiftR` 16, w32 `shiftR` 24]
174 2 0000 8BB42478 movl 0x12345678(%esp), %esi
176 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
179 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
180 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
182 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
183 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
185 11 001b 51 pushl %ecx
186 12 001c FFD1 call * %ecx
188 14 001e 81C47856 addl $0x12345678, %esp
190 15 0024 89867856 movl %eax, 0x12345678(%esi)
192 16 002a 89967856 movl %edx, 0x12345678(%esi)
195 18 0030 DD967856 fstl 0x12345678(%esi)
197 19 0036 DD9E7856 fstpl 0x12345678(%esi)
199 20 003c D9967856 fsts 0x12345678(%esi)
201 21 0042 D99E7856 fstps 0x12345678(%esi)
209 --trace (show (map fst arg_offs_n_reps))
211 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
212 arg passed from the interpreter.
214 Push all callee saved regs. Push all of them anyway ...
225 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
226 We'll use %esi as a temporary to point at the H stack, and
227 %ecx as a temporary to copy via.
229 movl 28+4(%esp), %esi
231 ++ movl_offespmem_esi 32
233 {- For each arg in args_offs_n_reps, examine the associated PrimRep
234 to determine how many payload (non-tag) words there are, and
235 whether or not there is a tag. This gives a bunch of offsets on
236 the H stack to copy to the C stack:
238 movl off1(%esi), %ecx
241 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
245 {- Get the addr to call into %ecx, bearing in mind that there's
246 an Addr# tag at the indicated location, and do the call:
248 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
251 ++ movl_offesimem_ecx (bytes_per_word * offset_of_addr_bitsW)
254 {- Nuke the args just pushed and re-establish %esi at the
257 addl $4*number_of_args_pushed, %esp (ccall only)
258 movl 28+4(%esp), %esi
260 ++ (if cconv /= StdCallConv
261 then add_lit_esp (bytes_per_word * length offsets_to_pushW)
263 ++ movl_offespmem_esi 32
265 {- Depending on what the return type is, get the result
266 from %eax or %edx:%eax or %st(0).
268 movl %eax, 4(%esi) -- assuming tagged result
277 ++ let i32 = movl_eax_offesimem 4
278 i64 = movl_eax_offesimem 4 ++ movl_edx_offesimem 8
279 f32 = fstps_offesimem 4
280 f64 = fstpl_offesimem 4
292 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
295 {- Restore all the pushed regs and go home.
311 #elif sparc_TARGET_ARCH
313 = let -- At least for sparc V8
317 w32_to_w8s_bigEndian :: Word32 -> [Word8]
318 w32_to_w8s_bigEndian w
319 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
320 fromIntegral (0xFF .&. (w `shiftR` 16)),
321 fromIntegral (0xFF .&. (w `shiftR` 8)),
322 fromIntegral (0xFF .&. w)]
324 -- addr and result bits offsetsW
325 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
326 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
330 [ let -- where this arg's bits start
331 a_bits_offW = a_offW + sizeOfTagW a_rep
333 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
335 | (a_offW, a_rep) <- arg_offs_n_reps
338 total_argWs = length offsets_to_pushW
339 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
342 -- The stack pointer must be kept 8-byte aligned, which means
343 -- we need to calculate this quantity too
344 argWs_on_stack_ROUNDED_UP
345 | odd argWs_on_stack = 1 + argWs_on_stack
346 | otherwise = argWs_on_stack
348 -- some helpers to assemble sparc insns.
350 iReg, oReg, gReg, fReg :: Int -> Word32
351 iReg = fromIntegral . (+ 24)
352 oReg = fromIntegral . (+ 8)
353 gReg = fromIntegral . (+ 0)
368 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
369 insn_r_r_i op3 rs1 rd imm13
371 .|. (rs1 `shiftL` 25)
372 .|. (op3 `shiftL` 19)
377 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
378 insn_r_i_r op3 rs1 imm13 rd
381 .|. (op3 `shiftL` 19)
382 .|. (rs1 `shiftL` 14)
386 mkSimm13 :: Int -> Word32
388 = let imm13w = (fromIntegral imm13) :: Word32
391 -- REAL (non-synthetic) insns
392 -- or %rs1, %rs2, %rd
393 mkOR :: Word32 -> Word32 -> Word32 -> Word32
397 .|. (op3_OR `shiftL` 19)
398 .|. (rs1 `shiftL` 14)
401 where op3_OR = 2 :: Word32
403 -- ld(int) [%rs + imm13], %rd
404 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
406 -- st(int) %rs, [%rd + imm13]
407 mkST = insn_r_r_i 0x04 -- op3_ST
409 -- st(float) %rs, [%rd + imm13]
410 mkSTF = insn_r_r_i 0x24 -- op3_STF
412 -- jmpl %rs + imm13, %rd
413 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
415 -- save %rs + imm13, %rd
416 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
418 -- restore %rs + imm13, %rd
419 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
422 mkNOP = mkOR g0 g0 g0
423 mkCALL reg = mkJMPL reg 0 o7
424 mkRET = mkJMPL i7 8 g0
425 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
427 --trace (show (map fst arg_offs_n_reps))
428 concatMap w32_to_w8s_bigEndian (
430 {- On entry, %o0 is the arg passed from the interpreter. After
431 the initial save insn, it will be in %i0. Studying the sparc
432 docs one would have thought that the minimum frame size is 92
433 bytes, but gcc always uses at least 112, and indeed there are
434 segfaults a-plenty with 92. So I use 112 here as well. I
435 don't understand why, tho.
437 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
439 {- For each arg in args_offs_n_reps, examine the associated PrimRep
440 to determine how many payload (non-tag) words there are, and
441 whether or not there is a tag. This gives a bunch of offsets on
442 the H stack. Move the first 6 words into %o0 .. %o5 and the
443 rest on the stack, starting at [%sp+92]. Use %g1 as a temp.
445 ++ let doArgW (offW, wordNo)
447 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
449 = [mkLD i0 (bytes_per_word * offW) g1,
450 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
452 concatMap doArgW (zip offsets_to_pushW [0 ..])
454 {- Get the addr to call into %g1, bearing in mind that there's
455 an Addr# tag at the indicated location, and do the call:
457 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
460 ++ [mkLD i0 (bytes_per_word * offset_of_addr_bitsW) g1,
464 {- Depending on what the return type is, get the result
465 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
467 st %o0, [%i0 + 4] -- 32 bit int
469 st %o0, [%i0 + 4] -- 64 bit int
470 st %o1, [%i0 + 8] -- or the other way round?
472 st %f0, [%i0 + 4] -- 32 bit float
474 st %f0, [%i0 + 4] -- 64 bit float
475 st %f1, [%i0 + 8] -- or the other way round?
478 ++ let i32 = [mkST o0 i0 4]
479 i64 = [mkST o0 i0 4, mkST o1 i0 8]
480 f32 = [mkSTF f0 i0 4]
481 f64 = [mkSTF f0 i0 4, mkSTF f1 i0 8]
491 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
495 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET