2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode, moan64 ) where
9 #include "HsVersions.h"
12 import PrimRep ( PrimRep(..), getPrimRepSize, isFollowableRep )
13 import ForeignCall ( CCallConv(..) )
15 -- DON'T remove apparently unused imports here ..
16 -- there is ifdeffery below
17 import DATA_BITS ( Bits(..), shiftR, shiftL )
18 import Foreign ( newArray )
20 import DATA_WORD ( Word8, Word32 )
21 import Foreign ( Ptr )
22 import System.IO.Unsafe ( unsafePerformIO )
23 import IO ( hPutStrLn, stderr )
24 -- import Debug.Trace ( trace )
27 %************************************************************************
29 \subsection{The sizes of things. These are platform-independent.}
31 %************************************************************************
35 -- When I push one of these on the H stack, how much does Sp move by?
36 taggedSizeW :: PrimRep -> Int
38 | isFollowableRep pr = 1 {-it's a pointer, Jim-}
39 | otherwise = 1 {-the tag-} + getPrimRepSize pr
41 -- The plain size of something, without tag.
42 untaggedSizeW :: PrimRep -> Int
44 | isFollowableRep pr = 1
45 | otherwise = getPrimRepSize pr
47 -- How big is this thing's tag?
48 sizeOfTagW :: PrimRep -> Int
50 | isFollowableRep pr = 0
54 %************************************************************************
56 \subsection{The platform-dependent marshall-code-generator.}
58 %************************************************************************
62 moan64 :: String -> SDoc -> a
66 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
67 "code properly yet. You can work around this for the time being\n" ++
68 "by compiling this module and all those it imports to object code,\n" ++
69 "and re-starting your GHCi session. The panic below contains information,\n" ++
70 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
77 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
78 #include "nativeGen/NCG.h"
81 Make a piece of code which expects to see the Haskell stack
82 looking like this. It is given a pointer to the lowest word in
83 the stack -- presumably the tag of the placeholder.
89 <placeholder-for-result#> (must be an unboxed type)
91 We cope with both ccall and stdcall for the C fn. However, this code
92 itself expects only to be called using the ccall convention -- that is,
93 we don't clear our own (single) arg off the C stack.
95 mkMarshalCode :: CCallConv
96 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
98 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
99 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
100 addr_offW arg_offs_n_reps
101 in Foreign.newArray bytes
106 mkMarshalCode_wrk :: CCallConv
107 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
110 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
114 = let -- Don't change this without first consulting Intel Corp :-)
117 -- addr and result bits offsetsW
118 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
119 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
123 [ let -- where this arg's bits start
124 a_bits_offW = a_offW + sizeOfTagW a_rep
126 -- reversed because x86 is little-endian
128 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
130 -- reversed because args are pushed L -> R onto C stack
131 | (a_offW, a_rep) <- reverse arg_offs_n_reps
134 -- some helpers to assemble x86 insns.
135 movl_offespmem_esi offB -- movl offB(%esp), %esi
136 = [0x8B, 0xB4, 0x24] ++ lit32 offB
137 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
138 = [0x8B, 0x8E] ++ lit32 offB
139 save_regs -- pushl all intregs except %esp
140 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
141 restore_regs -- popl ditto
142 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
143 pushl_ecx -- pushl %ecx
145 call_star_ecx -- call * %ecx
147 add_lit_esp lit -- addl $lit, %esp
148 = [0x81, 0xC4] ++ lit32 lit
149 movl_eax_offesimem offB -- movl %eax, offB(%esi)
150 = [0x89, 0x86] ++ lit32 offB
151 movl_edx_offesimem offB -- movl %edx, offB(%esi)
152 = [0x89, 0x96] ++ lit32 offB
155 fstpl_offesimem offB -- fstpl offB(%esi)
156 = [0xDD, 0x9E] ++ lit32 offB
157 fstps_offesimem offB -- fstps offB(%esi)
158 = [0xD9, 0x9E] ++ lit32 offB
159 lit32 :: Int -> [Word8]
160 lit32 i = let w32 = (fromIntegral i) :: Word32
161 in map (fromIntegral . ( .&. 0xFF))
162 [w32, w32 `shiftR` 8,
163 w32 `shiftR` 16, w32 `shiftR` 24]
165 2 0000 8BB42478 movl 0x12345678(%esp), %esi
167 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
170 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
171 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
173 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
174 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
176 11 001b 51 pushl %ecx
177 12 001c FFD1 call * %ecx
179 14 001e 81C47856 addl $0x12345678, %esp
181 15 0024 89867856 movl %eax, 0x12345678(%esi)
183 16 002a 89967856 movl %edx, 0x12345678(%esi)
186 18 0030 DD967856 fstl 0x12345678(%esi)
188 19 0036 DD9E7856 fstpl 0x12345678(%esi)
190 20 003c D9967856 fsts 0x12345678(%esi)
192 21 0042 D99E7856 fstps 0x12345678(%esi)
200 --trace (show (map fst arg_offs_n_reps))
202 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
203 arg passed from the interpreter.
205 Push all callee saved regs. Push all of them anyway ...
216 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
217 We'll use %esi as a temporary to point at the H stack, and
218 %ecx as a temporary to copy via.
220 movl 28+4(%esp), %esi
222 ++ movl_offespmem_esi 32
224 {- For each arg in args_offs_n_reps, examine the associated PrimRep
225 to determine how many payload (non-tag) words there are, and
226 whether or not there is a tag. This gives a bunch of offsets on
227 the H stack to copy to the C stack:
229 movl off1(%esi), %ecx
232 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
236 {- Get the addr to call into %ecx, bearing in mind that there's
237 an Addr# tag at the indicated location, and do the call:
239 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
242 ++ movl_offesimem_ecx (bytes_per_word * offset_of_addr_bitsW)
245 {- Nuke the args just pushed and re-establish %esi at the
248 addl $4*number_of_args_pushed, %esp (ccall only)
249 movl 28+4(%esp), %esi
251 ++ (if cconv /= StdCallConv
252 then add_lit_esp (bytes_per_word * length offsets_to_pushW)
254 ++ movl_offespmem_esi 32
256 {- Depending on what the return type is, get the result
257 from %eax or %edx:%eax or %st(0).
259 movl %eax, 4(%esi) -- assuming tagged result
268 ++ let i32 = movl_eax_offesimem 4
269 i64 = movl_eax_offesimem 4 ++ movl_edx_offesimem 8
270 f32 = fstps_offesimem 4
271 f64 = fstpl_offesimem 4
283 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
286 {- Restore all the pushed regs and go home.
302 #elif sparc_TARGET_ARCH
304 = let -- At least for sparc V8
308 w32_to_w8s_bigEndian :: Word32 -> [Word8]
309 w32_to_w8s_bigEndian w
310 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
311 fromIntegral (0xFF .&. (w `shiftR` 16)),
312 fromIntegral (0xFF .&. (w `shiftR` 8)),
313 fromIntegral (0xFF .&. w)]
315 -- addr and result bits offsetsW
316 offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
317 offset_of_res_bitsW = r_offW + sizeOfTagW r_rep
321 [ let -- where this arg's bits start
322 a_bits_offW = a_offW + sizeOfTagW a_rep
324 [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
326 | (a_offW, a_rep) <- arg_offs_n_reps
329 total_argWs = length offsets_to_pushW
330 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
333 -- The stack pointer must be kept 8-byte aligned, which means
334 -- we need to calculate this quantity too
335 argWs_on_stack_ROUNDED_UP
336 | odd argWs_on_stack = 1 + argWs_on_stack
337 | otherwise = argWs_on_stack
339 -- some helpers to assemble sparc insns.
341 iReg, oReg, gReg, fReg :: Int -> Word32
342 iReg = fromIntegral . (+ 24)
343 oReg = fromIntegral . (+ 8)
344 gReg = fromIntegral . (+ 0)
359 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
360 insn_r_r_i op3 rs1 rd imm13
362 .|. (rs1 `shiftL` 25)
363 .|. (op3 `shiftL` 19)
368 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
369 insn_r_i_r op3 rs1 imm13 rd
372 .|. (op3 `shiftL` 19)
373 .|. (rs1 `shiftL` 14)
377 mkSimm13 :: Int -> Word32
379 = let imm13w = (fromIntegral imm13) :: Word32
382 -- REAL (non-synthetic) insns
383 -- or %rs1, %rs2, %rd
384 mkOR :: Word32 -> Word32 -> Word32 -> Word32
388 .|. (op3_OR `shiftL` 19)
389 .|. (rs1 `shiftL` 14)
392 where op3_OR = 2 :: Word32
394 -- ld(int) [%rs + imm13], %rd
395 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
397 -- st(int) %rs, [%rd + imm13]
398 mkST = insn_r_r_i 0x04 -- op3_ST
400 -- st(float) %rs, [%rd + imm13]
401 mkSTF = insn_r_r_i 0x24 -- op3_STF
403 -- jmpl %rs + imm13, %rd
404 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
406 -- save %rs + imm13, %rd
407 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
409 -- restore %rs + imm13, %rd
410 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
413 mkNOP = mkOR g0 g0 g0
414 mkCALL reg = mkJMPL reg 0 o7
415 mkRET = mkJMPL i7 8 g0
416 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
418 --trace (show (map fst arg_offs_n_reps))
419 concatMap w32_to_w8s_bigEndian (
421 {- On entry, %o0 is the arg passed from the interpreter. After
422 the initial save insn, it will be in %i0. Studying the sparc
423 docs one would have thought that the minimum frame size is 92
424 bytes, but gcc always uses at least 112, and indeed there are
425 segfaults a-plenty with 92. So I use 112 here as well. I
426 don't understand why, tho.
428 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
430 {- For each arg in args_offs_n_reps, examine the associated PrimRep
431 to determine how many payload (non-tag) words there are, and
432 whether or not there is a tag. This gives a bunch of offsets on
433 the H stack. Move the first 6 words into %o0 .. %o5 and the
434 rest on the stack, starting at [%sp+92]. Use %g1 as a temp.
436 ++ let doArgW (offW, wordNo)
438 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
440 = [mkLD i0 (bytes_per_word * offW) g1,
441 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
443 concatMap doArgW (zip offsets_to_pushW [0 ..])
445 {- Get the addr to call into %g1, bearing in mind that there's
446 an Addr# tag at the indicated location, and do the call:
448 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
451 ++ [mkLD i0 (bytes_per_word * offset_of_addr_bitsW) g1,
455 {- Depending on what the return type is, get the result
456 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
458 st %o0, [%i0 + 4] -- 32 bit int
460 st %o0, [%i0 + 4] -- 64 bit int
461 st %o1, [%i0 + 8] -- or the other way round?
463 st %f0, [%i0 + 4] -- 32 bit float
465 st %f0, [%i0 + 4] -- 64 bit float
466 st %f1, [%i0 + 8] -- or the other way round?
469 ++ let i32 = [mkST o0 i0 4]
470 i64 = [mkST o0 i0 4, mkST o1 i0 8]
471 f32 = [mkSTF f0 i0 4]
472 f64 = [mkSTF f0 i0 4, mkSTF f1 i0 8]
482 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
486 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
488 #elif powerpc_TARGET_ARCH
494 w32_to_w8s_bigEndian :: Word32 -> [Word8]
495 w32_to_w8s_bigEndian w
496 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
497 fromIntegral (0xFF .&. (w `shiftR` 16)),
498 fromIntegral (0xFF .&. (w `shiftR` 8)),
499 fromIntegral (0xFF .&. w)]
501 -- addr and result bits offsetsW
502 a_off = (addr_offW + sizeOfTagW AddrRep) * bytes_per_word
503 result_off = (r_offW + sizeOfTagW r_rep) * bytes_per_word
506 parameterArea = sum [ untaggedSizeW a_rep * bytes_per_word
507 | (_, a_rep) <- arg_offs_n_reps ]
508 savedRegisterArea = 4
509 frameSize = padTo16 (linkageArea + min parameterArea 32 + savedRegisterArea)
510 padTo16 x = case x `mod` 16 of
514 pass_parameters [] _ _ = []
515 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
517 haskellArgOffset = (a_offW + sizeOfTagW a_rep)
519 offsetW' = offsetW + untaggedSizeW a_rep
523 [0x801f0000 -- lwz rX, src(r31)
524 .|. (fromIntegral src .&. 0xFFFF)
525 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
527 [0x801f0000 -- lwz r0, src(r31)
528 .|. (fromIntegral src .&. 0xFFFF),
529 0x90010000 -- stw r0, dst(r1)
530 .|. (fromIntegral dst .&. 0xFFFF)]
532 src = haskellArgOffset + w*bytes_per_word
533 dst = linkageArea + (offsetW+w) * bytes_per_word
536 FloatRep | nextFPR < 14 ->
537 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
538 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
539 .|. (fromIntegral nextFPR `shiftL` 21))
540 : pass_parameters args (nextFPR+1) offsetW'
541 DoubleRep | nextFPR < 14 ->
542 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
543 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
544 .|. (fromIntegral nextFPR `shiftL` 21))
545 : pass_parameters args (nextFPR+1) offsetW'
547 concatMap pass_word [0 .. untaggedSizeW a_rep - 1]
548 ++ pass_parameters args nextFPR offsetW'
550 gather_result = case r_rep of
553 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
554 -- stfs f1, result_off(r31)
556 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
557 -- stfs f1, result_off(r31)
558 _ | untaggedSizeW r_rep == 2 ->
559 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
560 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
561 -- stw r3, result_off(r31)
562 -- stw r4, result_off+4(r31)
563 _ | untaggedSizeW r_rep == 1 ->
564 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
565 -- stw r3, result_off(r31)
567 concatMap w32_to_w8s_bigEndian $ [
568 0x7c0802a6, -- mflr r0
569 0x93e1fffc, -- stw r31,-4(r1)
570 0x90010008, -- stw r0,8(r1)
571 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
572 -- stwu r1, -frameSize(r1)
573 0x7c7f1b78 -- mr r31, r3
574 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
575 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
576 -- lwz r12, a_off(r31)
577 0x7d8903a6, -- mtctr r12
579 ] ++ gather_result ++ [
580 0x80210000, -- lwz r1, 0(r1)
581 0x83e1fffc, -- lwz r31, -4(r1)
582 0x80010008, -- lwz r0, 8(r1)
583 0x7c0803a6, -- mtlr r0