2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode ) where
9 #include "HsVersions.h"
12 import PrimRep ( PrimRep(..), getPrimRepSize, isFollowableRep )
13 import ForeignCall ( CCallConv(..) )
15 -- DON'T remove apparently unused imports here .. there is ifdeffery
17 import Bits ( Bits(..), shiftR, shiftL )
19 import Word ( Word8, Word32 )
20 import Addr ( Addr(..), writeWord8OffAddr )
21 import Foreign ( Ptr(..), mallocBytes )
22 import IOExts ( trace )
26 %************************************************************************
28 \subsection{The sizes of things. These are platform-independent.}
30 %************************************************************************
34 -- When I push one of these on the H stack, how much does Sp move by?
35 taggedSizeW :: PrimRep -> Int
37 | isFollowableRep pr = 1 {-it's a pointer, Jim-}
38 | otherwise = 1 {-the tag-} + getPrimRepSize pr
40 -- The plain size of something, without tag.
41 untaggedSizeW :: PrimRep -> Int
43 | isFollowableRep pr = 1
44 | otherwise = getPrimRepSize pr
46 -- How big is this thing's tag?
47 sizeOfTagW :: PrimRep -> Int
49 | isFollowableRep pr = 0
52 -- Blast a bunch of bytes into malloc'd memory and return the addr.
53 sendBytesToMallocville :: [Word8] -> IO Addr
54 sendBytesToMallocville bytes
55 = do let n = length bytes
56 (Ptr a#) <- mallocBytes n
57 mapM ( \(off,byte) -> writeWord8OffAddr (A# a#) off byte )
62 %************************************************************************
64 \subsection{The platform-dependent marshall-code-generator.}
66 %************************************************************************
70 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
71 #include "nativeGen/NCG.h"
74 Make a piece of code which expects to see the Haskell stack
75 looking like this. It is given a pointer to the lowest word in
76 the stack -- presumably the tag of the placeholder.
82 <placeholder-for-result#> (must be an unboxed type)
84 We cope with both ccall and stdcall for the C fn. However, this code
85 itself expects only to be called using the ccall convention -- that is,
86 we don't clear our own (single) arg off the C stack.
88 mkMarshalCode :: CCallConv
89 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
91 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
92 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
93 addr_offW arg_offs_n_reps
94 in sendBytesToMallocville bytes
99 mkMarshalCode_wrk :: CCallConv
100 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
103 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
107 = let -- Don't change this without first consulting Intel Corp :-)
110 -- addr and result bits offsetsW
111 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
112 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
116 [ let -- where this arg's bits start
117 a_bits_offW = a_offW + sizeOfTagW a_rep
119 -- reversed because x86 is little-endian
121 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
123 -- reversed because args are pushed L -> R onto C stack
124 | (a_offW, a_rep) <- reverse arg_offs_n_reps
127 -- some helpers to assemble x86 insns.
128 movl_offespmem_esi offB -- movl offB(%esp), %esi
129 = [0x8B, 0xB4, 0x24] ++ lit32 offB
130 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
131 = [0x8B, 0x8E] ++ lit32 offB
132 save_regs -- pushl all intregs except %esp
133 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
134 restore_regs -- popl ditto
135 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
136 pushl_ecx -- pushl %ecx
138 call_star_ecx -- call * %ecx
140 add_lit_esp lit -- addl $lit, %esp
141 = [0x81, 0xC4] ++ lit32 lit
142 movl_eax_offesimem offB -- movl %eax, offB(%esi)
143 = [0x89, 0x86] ++ lit32 offB
146 fstpl_offesimem offB -- fstpl offB(%esi)
147 = [0xDD, 0x9E] ++ lit32 offB
148 fstps_offesimem offB -- fstps offB(%esi)
149 = [0xD9, 0x9E] ++ lit32 offB
150 lit32 :: Int -> [Word8]
151 lit32 i = let w32 = (fromIntegral i) :: Word32
152 in map (fromIntegral . ( .&. 0xFF))
153 [w32, w32 `shiftR` 8,
154 w32 `shiftR` 16, w32 `shiftR` 24]
156 2 0000 8BB42478 movl 0x12345678(%esp), %esi
158 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
161 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
162 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
164 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
165 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
167 11 001b 51 pushl %ecx
168 12 001c FFD1 call * %ecx
170 14 001e 81C47856 addl $0x12345678, %esp
172 15 0024 89867856 movl %eax, 0x12345678(%esi)
174 16 002a 89967856 movl %edx, 0x12345678(%esi)
177 18 0030 DD967856 fstl 0x12345678(%esi)
179 19 0036 DD9E7856 fstpl 0x12345678(%esi)
181 20 003c D9967856 fsts 0x12345678(%esi)
183 21 0042 D99E7856 fstps 0x12345678(%esi)
191 --trace (show (map fst arg_offs_n_reps))
193 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
194 arg passed from the interpreter.
196 Push all callee saved regs. Push all of them anyway ...
207 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
208 We'll use %esi as a temporary to point at the H stack, and
209 %ecx as a temporary to copy via.
211 movl 28+4(%esp), %esi
213 ++ movl_offespmem_esi 32
215 {- For each arg in args_offs_n_reps, examine the associated PrimRep
216 to determine how many payload (non-tag) words there are, and
217 whether or not there is a tag. This gives a bunch of offsets on
218 the H stack to copy to the C stack:
220 movl off1(%esi), %ecx
223 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
227 {- Get the addr to call into %ecx, bearing in mind that there's
228 an Addr# tag at the indicated location, and do the call:
230 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
233 ++ movl_offesimem_ecx (bytes_per_word * offset_of_addr_bitsW)
236 {- Nuke the args just pushed and re-establish %esi at the
239 addl $4*number_of_args_pushed, %esp (ccall only)
240 movl 28+4(%esp), %esi
242 ++ (if cconv /= StdCallConv
243 then add_lit_esp (bytes_per_word * length offsets_to_pushW)
245 ++ movl_offespmem_esi 32
247 {- Depending on what the return type is, get the result
248 from %eax or %edx:%eax or %st(0).
250 movl %eax, 4(%esi) -- assuming tagged result
260 IntRep -> movl_eax_offesimem 4
261 WordRep -> movl_eax_offesimem 4
262 AddrRep -> movl_eax_offesimem 4
263 DoubleRep -> fstpl_offesimem 4
264 FloatRep -> fstps_offesimem 4
266 other -> pprPanic "ByteCodeFFI.mkMarshalCode_wrk(x86)"
269 {- Restore all the pushed regs and go home.
285 #elif sparc_TARGET_ARCH
287 = let -- At least for sparc V8
291 w32_to_w8s_bigEndian :: Word32 -> [Word8]
292 w32_to_w8s_bigEndian w
293 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
294 fromIntegral (0xFF .&. (w `shiftR` 16)),
295 fromIntegral (0xFF .&. (w `shiftR` 8)),
296 fromIntegral (0xFF .&. w)]
298 -- addr and result bits offsetsW
299 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
300 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
304 [ let -- where this arg's bits start
305 a_bits_offW = a_offW + sizeOfTagW a_rep
307 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
309 | (a_offW, a_rep) <- arg_offs_n_reps
312 total_argWs = length offsets_to_pushW
313 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
316 -- The stack pointer must be kept 8-byte aligned, which means
317 -- we need to calculate this quantity too
318 argWs_on_stack_ROUNDED_UP
319 | odd argWs_on_stack = 1 + argWs_on_stack
320 | otherwise = argWs_on_stack
322 -- some helpers to assemble sparc insns.
324 iReg, oReg, gReg, fReg :: Int -> Word32
325 iReg = fromIntegral . (+ 24)
326 oReg = fromIntegral . (+ 8)
327 gReg = fromIntegral . (+ 0)
342 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
343 insn_r_r_i op3 rs1 rd imm13
345 .|. (rs1 `shiftL` 25)
346 .|. (op3 `shiftL` 19)
351 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
352 insn_r_i_r op3 rs1 imm13 rd
355 .|. (op3 `shiftL` 19)
356 .|. (rs1 `shiftL` 14)
360 mkSimm13 :: Int -> Word32
362 = let imm13w = (fromIntegral imm13) :: Word32
365 -- REAL (non-synthetic) insns
366 -- or %rs1, %rs2, %rd
367 mkOR :: Word32 -> Word32 -> Word32 -> Word32
371 .|. (op3_OR `shiftL` 19)
372 .|. (rs1 `shiftL` 14)
375 where op3_OR = 2 :: Word32
377 -- ld(int) [%rs + imm13], %rd
378 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
380 -- st(int) %rs, [%rd + imm13]
381 mkST = insn_r_r_i 0x04 -- op3_ST
383 -- st(float) %rs, [%rd + imm13]
384 mkSTF = insn_r_r_i 0x24 -- op3_STF
386 -- jmpl %rs + imm13, %rd
387 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
389 -- save %rs + imm13, %rd
390 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
392 -- restore %rs + imm13, %rd
393 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
396 mkNOP = mkOR g0 g0 g0
397 mkCALL reg = mkJMPL reg 0 o7
398 mkRET = mkJMPL i7 8 g0
399 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
401 --trace (show (map fst arg_offs_n_reps))
402 concatMap w32_to_w8s_bigEndian (
404 {- On entry, %o0 is the arg passed from the interpreter. After
405 the initial save insn, it will be in %i0. Studying the sparc
406 docs one would have thought that the minimum frame size is 92
407 bytes, but gcc always uses at least 112, and indeed there are
408 segfaults a-plenty with 92. So I use 112 here as well. I
409 don't understand why, tho.
411 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
413 {- For each arg in args_offs_n_reps, examine the associated PrimRep
414 to determine how many payload (non-tag) words there are, and
415 whether or not there is a tag. This gives a bunch of offsets on
416 the H stack. Move the first 6 words into %o0 .. %o5 and the
417 rest on the stack, starting at [%sp+92]. Use %g1 as a temp.
419 ++ let doArgW (offW, wordNo)
421 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
423 = [mkLD i0 (bytes_per_word * offW) g1,
424 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
426 concatMap doArgW (zip offsets_to_pushW [0 ..])
428 {- Get the addr to call into %g1, bearing in mind that there's
429 an Addr# tag at the indicated location, and do the call:
431 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
434 ++ [mkLD i0 (bytes_per_word * offset_of_addr_bitsW) g1,
438 {- Depending on what the return type is, get the result
439 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
441 st %o0, [%i0 + 4] -- 32 bit int
443 st %o0, [%i0 + 4] -- 64 bit int
444 st %o1, [%i0 + 8] -- or the other way round?
446 st %f0, [%i0 + 4] -- 32 bit float
448 st %f0, [%i0 + 4] -- 64 bit float
449 st %f1, [%i0 + 8] -- or the other way round?
452 ++ let i32 = [mkST o0 i0 4]
453 i64 = [mkST o0 i0 4, mkST o1 i0 8]
454 f32 = [mkSTF f0 i0 4]
455 f64 = [mkSTF f0 i0 4, mkSTF f1 i0 8]
464 other -> pprPanic "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
468 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET