2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode, moan64 ) where
9 #include "HsVersions.h"
12 import PrimRep ( PrimRep(..), getPrimRepSize, isFollowableRep )
13 import ForeignCall ( CCallConv(..) )
15 -- DON'T remove apparently unused imports here .. there is ifdeffery
17 import Bits ( Bits(..), shiftR, shiftL )
18 import Foreign ( newArray )
20 import Word ( Word8, Word32 )
21 import Foreign ( Ptr(..), mallocBytes )
22 import IOExts ( trace, unsafePerformIO )
23 import IO ( hPutStrLn, stderr )
26 %************************************************************************
28 \subsection{The sizes of things. These are platform-independent.}
30 %************************************************************************
34 -- When I push one of these on the H stack, how much does Sp move by?
35 taggedSizeW :: PrimRep -> Int
37 | isFollowableRep pr = 1 {-it's a pointer, Jim-}
38 | otherwise = 1 {-the tag-} + getPrimRepSize pr
40 -- The plain size of something, without tag.
41 untaggedSizeW :: PrimRep -> Int
43 | isFollowableRep pr = 1
44 | otherwise = getPrimRepSize pr
46 -- How big is this thing's tag?
47 sizeOfTagW :: PrimRep -> Int
49 | isFollowableRep pr = 0
53 %************************************************************************
55 \subsection{The platform-dependent marshall-code-generator.}
57 %************************************************************************
61 moan64 :: String -> SDoc -> a
65 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
66 "code properly yet. You can work around this for the time being\n" ++
67 "by compiling this module and all those it imports to object code,\n" ++
68 "and re-starting your GHCi session. The panic below contains information,\n" ++
69 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
76 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
77 #include "nativeGen/NCG.h"
80 Make a piece of code which expects to see the Haskell stack
81 looking like this. It is given a pointer to the lowest word in
82 the stack -- presumably the tag of the placeholder.
88 <placeholder-for-result#> (must be an unboxed type)
90 We cope with both ccall and stdcall for the C fn. However, this code
91 itself expects only to be called using the ccall convention -- that is,
92 we don't clear our own (single) arg off the C stack.
94 mkMarshalCode :: CCallConv
95 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
97 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
98 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
99 addr_offW arg_offs_n_reps
100 in Foreign.newArray bytes
105 mkMarshalCode_wrk :: CCallConv
106 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
109 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
113 = let -- Don't change this without first consulting Intel Corp :-)
116 -- addr and result bits offsetsW
117 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
118 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
122 [ let -- where this arg's bits start
123 a_bits_offW = a_offW + sizeOfTagW a_rep
125 -- reversed because x86 is little-endian
127 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
129 -- reversed because args are pushed L -> R onto C stack
130 | (a_offW, a_rep) <- reverse arg_offs_n_reps
133 -- some helpers to assemble x86 insns.
134 movl_offespmem_esi offB -- movl offB(%esp), %esi
135 = [0x8B, 0xB4, 0x24] ++ lit32 offB
136 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
137 = [0x8B, 0x8E] ++ lit32 offB
138 save_regs -- pushl all intregs except %esp
139 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
140 restore_regs -- popl ditto
141 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
142 pushl_ecx -- pushl %ecx
144 call_star_ecx -- call * %ecx
146 add_lit_esp lit -- addl $lit, %esp
147 = [0x81, 0xC4] ++ lit32 lit
148 movl_eax_offesimem offB -- movl %eax, offB(%esi)
149 = [0x89, 0x86] ++ lit32 offB
150 movl_edx_offesimem offB -- movl %edx, offB(%esi)
151 = [0x89, 0x96] ++ lit32 offB
154 fstpl_offesimem offB -- fstpl offB(%esi)
155 = [0xDD, 0x9E] ++ lit32 offB
156 fstps_offesimem offB -- fstps offB(%esi)
157 = [0xD9, 0x9E] ++ lit32 offB
158 lit32 :: Int -> [Word8]
159 lit32 i = let w32 = (fromIntegral i) :: Word32
160 in map (fromIntegral . ( .&. 0xFF))
161 [w32, w32 `shiftR` 8,
162 w32 `shiftR` 16, w32 `shiftR` 24]
164 2 0000 8BB42478 movl 0x12345678(%esp), %esi
166 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
169 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
170 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
172 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
173 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
175 11 001b 51 pushl %ecx
176 12 001c FFD1 call * %ecx
178 14 001e 81C47856 addl $0x12345678, %esp
180 15 0024 89867856 movl %eax, 0x12345678(%esi)
182 16 002a 89967856 movl %edx, 0x12345678(%esi)
185 18 0030 DD967856 fstl 0x12345678(%esi)
187 19 0036 DD9E7856 fstpl 0x12345678(%esi)
189 20 003c D9967856 fsts 0x12345678(%esi)
191 21 0042 D99E7856 fstps 0x12345678(%esi)
199 --trace (show (map fst arg_offs_n_reps))
201 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
202 arg passed from the interpreter.
204 Push all callee saved regs. Push all of them anyway ...
215 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
216 We'll use %esi as a temporary to point at the H stack, and
217 %ecx as a temporary to copy via.
219 movl 28+4(%esp), %esi
221 ++ movl_offespmem_esi 32
223 {- For each arg in args_offs_n_reps, examine the associated PrimRep
224 to determine how many payload (non-tag) words there are, and
225 whether or not there is a tag. This gives a bunch of offsets on
226 the H stack to copy to the C stack:
228 movl off1(%esi), %ecx
231 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
235 {- Get the addr to call into %ecx, bearing in mind that there's
236 an Addr# tag at the indicated location, and do the call:
238 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
241 ++ movl_offesimem_ecx (bytes_per_word * offset_of_addr_bitsW)
244 {- Nuke the args just pushed and re-establish %esi at the
247 addl $4*number_of_args_pushed, %esp (ccall only)
248 movl 28+4(%esp), %esi
250 ++ (if cconv /= StdCallConv
251 then add_lit_esp (bytes_per_word * length offsets_to_pushW)
253 ++ movl_offespmem_esi 32
255 {- Depending on what the return type is, get the result
256 from %eax or %edx:%eax or %st(0).
258 movl %eax, 4(%esi) -- assuming tagged result
267 ++ let i32 = movl_eax_offesimem 4
268 i64 = movl_eax_offesimem 4 ++ movl_edx_offesimem 8
269 f32 = fstps_offesimem 4
270 f64 = fstpl_offesimem 4
282 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
285 {- Restore all the pushed regs and go home.
301 #elif sparc_TARGET_ARCH
303 = let -- At least for sparc V8
307 w32_to_w8s_bigEndian :: Word32 -> [Word8]
308 w32_to_w8s_bigEndian w
309 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
310 fromIntegral (0xFF .&. (w `shiftR` 16)),
311 fromIntegral (0xFF .&. (w `shiftR` 8)),
312 fromIntegral (0xFF .&. w)]
314 -- addr and result bits offsetsW
315 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
316 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
320 [ let -- where this arg's bits start
321 a_bits_offW = a_offW + sizeOfTagW a_rep
323 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
325 | (a_offW, a_rep) <- arg_offs_n_reps
328 total_argWs = length offsets_to_pushW
329 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
332 -- The stack pointer must be kept 8-byte aligned, which means
333 -- we need to calculate this quantity too
334 argWs_on_stack_ROUNDED_UP
335 | odd argWs_on_stack = 1 + argWs_on_stack
336 | otherwise = argWs_on_stack
338 -- some helpers to assemble sparc insns.
340 iReg, oReg, gReg, fReg :: Int -> Word32
341 iReg = fromIntegral . (+ 24)
342 oReg = fromIntegral . (+ 8)
343 gReg = fromIntegral . (+ 0)
358 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
359 insn_r_r_i op3 rs1 rd imm13
361 .|. (rs1 `shiftL` 25)
362 .|. (op3 `shiftL` 19)
367 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
368 insn_r_i_r op3 rs1 imm13 rd
371 .|. (op3 `shiftL` 19)
372 .|. (rs1 `shiftL` 14)
376 mkSimm13 :: Int -> Word32
378 = let imm13w = (fromIntegral imm13) :: Word32
381 -- REAL (non-synthetic) insns
382 -- or %rs1, %rs2, %rd
383 mkOR :: Word32 -> Word32 -> Word32 -> Word32
387 .|. (op3_OR `shiftL` 19)
388 .|. (rs1 `shiftL` 14)
391 where op3_OR = 2 :: Word32
393 -- ld(int) [%rs + imm13], %rd
394 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
396 -- st(int) %rs, [%rd + imm13]
397 mkST = insn_r_r_i 0x04 -- op3_ST
399 -- st(float) %rs, [%rd + imm13]
400 mkSTF = insn_r_r_i 0x24 -- op3_STF
402 -- jmpl %rs + imm13, %rd
403 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
405 -- save %rs + imm13, %rd
406 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
408 -- restore %rs + imm13, %rd
409 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
412 mkNOP = mkOR g0 g0 g0
413 mkCALL reg = mkJMPL reg 0 o7
414 mkRET = mkJMPL i7 8 g0
415 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
417 --trace (show (map fst arg_offs_n_reps))
418 concatMap w32_to_w8s_bigEndian (
420 {- On entry, %o0 is the arg passed from the interpreter. After
421 the initial save insn, it will be in %i0. Studying the sparc
422 docs one would have thought that the minimum frame size is 92
423 bytes, but gcc always uses at least 112, and indeed there are
424 segfaults a-plenty with 92. So I use 112 here as well. I
425 don't understand why, tho.
427 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
429 {- For each arg in args_offs_n_reps, examine the associated PrimRep
430 to determine how many payload (non-tag) words there are, and
431 whether or not there is a tag. This gives a bunch of offsets on
432 the H stack. Move the first 6 words into %o0 .. %o5 and the
433 rest on the stack, starting at [%sp+92]. Use %g1 as a temp.
435 ++ let doArgW (offW, wordNo)
437 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
439 = [mkLD i0 (bytes_per_word * offW) g1,
440 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
442 concatMap doArgW (zip offsets_to_pushW [0 ..])
444 {- Get the addr to call into %g1, bearing in mind that there's
445 an Addr# tag at the indicated location, and do the call:
447 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
450 ++ [mkLD i0 (bytes_per_word * offset_of_addr_bitsW) g1,
454 {- Depending on what the return type is, get the result
455 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
457 st %o0, [%i0 + 4] -- 32 bit int
459 st %o0, [%i0 + 4] -- 64 bit int
460 st %o1, [%i0 + 8] -- or the other way round?
462 st %f0, [%i0 + 4] -- 32 bit float
464 st %f0, [%i0 + 4] -- 64 bit float
465 st %f1, [%i0 + 8] -- or the other way round?
468 ++ let i32 = [mkST o0 i0 4]
469 i64 = [mkST o0 i0 4, mkST o1 i0 8]
470 f32 = [mkSTF f0 i0 4]
471 f64 = [mkSTF f0 i0 4, mkSTF f1 i0 8]
481 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
485 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET