2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( mkMarshalCode, moan64 ) where
9 #include "HsVersions.h"
12 import SMRep ( CgRep(..), cgRepSizeW )
13 import ForeignCall ( CCallConv(..) )
15 -- DON'T remove apparently unused imports here ..
16 -- there is ifdeffery below
17 import DATA_BITS ( Bits(..), shiftR, shiftL )
18 import Foreign ( newArray )
19 import Data.List ( mapAccumL )
21 import DATA_WORD ( Word8, Word32 )
22 import Foreign ( Ptr )
23 import System.IO.Unsafe ( unsafePerformIO )
24 import IO ( hPutStrLn, stderr )
25 -- import Debug.Trace ( trace )
28 %************************************************************************
30 \subsection{The platform-dependent marshall-code-generator.}
32 %************************************************************************
36 moan64 :: String -> SDoc -> a
40 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
41 "code properly yet. You can work around this for the time being\n" ++
42 "by compiling this module and all those it imports to object code,\n" ++
43 "and re-starting your GHCi session. The panic below contains information,\n" ++
44 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
51 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
52 #include "nativeGen/NCG.h"
55 Make a piece of code which expects to see the Haskell stack
56 looking like this. It is given a pointer to the lowest word in
57 the stack -- presumably the tag of the placeholder.
63 <placeholder-for-result#> (must be an unboxed type)
65 We cope with both ccall and stdcall for the C fn. However, this code
66 itself expects only to be called using the ccall convention -- that is,
67 we don't clear our own (single) arg off the C stack.
69 mkMarshalCode :: CCallConv
70 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
72 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
73 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
74 addr_offW arg_offs_n_reps
75 in Foreign.newArray bytes
80 mkMarshalCode_wrk :: CCallConv
81 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
84 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
88 = let -- Don't change this without first consulting Intel Corp :-)
93 [ -- reversed because x86 is little-endian
94 reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
96 -- reversed because args are pushed L -> R onto C stack
97 | (a_offW, a_rep) <- reverse arg_offs_n_reps
100 -- some helpers to assemble x86 insns.
101 movl_offespmem_esi offB -- movl offB(%esp), %esi
102 = [0x8B, 0xB4, 0x24] ++ lit32 offB
103 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
104 = [0x8B, 0x8E] ++ lit32 offB
105 save_regs -- pushl all intregs except %esp
106 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
107 restore_regs -- popl ditto
108 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
109 pushl_ecx -- pushl %ecx
111 call_star_ecx -- call * %ecx
113 add_lit_esp lit -- addl $lit, %esp
114 = [0x81, 0xC4] ++ lit32 lit
115 movl_eax_offesimem offB -- movl %eax, offB(%esi)
116 = [0x89, 0x86] ++ lit32 offB
117 movl_edx_offesimem offB -- movl %edx, offB(%esi)
118 = [0x89, 0x96] ++ lit32 offB
121 fstpl_offesimem offB -- fstpl offB(%esi)
122 = [0xDD, 0x9E] ++ lit32 offB
123 fstps_offesimem offB -- fstps offB(%esi)
124 = [0xD9, 0x9E] ++ lit32 offB
125 lit32 :: Int -> [Word8]
126 lit32 i = let w32 = (fromIntegral i) :: Word32
127 in map (fromIntegral . ( .&. 0xFF))
128 [w32, w32 `shiftR` 8,
129 w32 `shiftR` 16, w32 `shiftR` 24]
131 2 0000 8BB42478 movl 0x12345678(%esp), %esi
133 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
136 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
137 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
139 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
140 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
142 11 001b 51 pushl %ecx
143 12 001c FFD1 call * %ecx
145 14 001e 81C47856 addl $0x12345678, %esp
147 15 0024 89867856 movl %eax, 0x12345678(%esi)
149 16 002a 89967856 movl %edx, 0x12345678(%esi)
152 18 0030 DD967856 fstl 0x12345678(%esi)
154 19 0036 DD9E7856 fstpl 0x12345678(%esi)
156 20 003c D9967856 fsts 0x12345678(%esi)
158 21 0042 D99E7856 fstps 0x12345678(%esi)
166 --trace (show (map fst arg_offs_n_reps))
168 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
169 arg passed from the interpreter.
171 Push all callee saved regs. Push all of them anyway ...
182 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
183 We'll use %esi as a temporary to point at the H stack, and
184 %ecx as a temporary to copy via.
186 movl 28+4(%esp), %esi
188 ++ movl_offespmem_esi 32
190 {- For each arg in args_offs_n_reps, examine the associated
191 CgRep to determine how many words there are. This gives a
192 bunch of offsets on the H stack to copy to the C stack:
194 movl off1(%esi), %ecx
197 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
201 {- Get the addr to call into %ecx, bearing in mind that there's
202 an Addr# tag at the indicated location, and do the call:
204 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
207 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
210 {- Nuke the args just pushed and re-establish %esi at the
213 addl $4*number_of_args_pushed, %esp (ccall only)
214 movl 28+4(%esp), %esi
216 ++ (if cconv /= StdCallConv
217 then add_lit_esp (bytes_per_word * length offsets_to_pushW)
219 ++ movl_offespmem_esi 32
221 {- Depending on what the return type is, get the result
222 from %eax or %edx:%eax or %st(0).
224 movl %eax, 4(%esi) -- assuming tagged result
233 ++ let i32 = movl_eax_offesimem 0
234 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
235 f32 = fstps_offesimem 0
236 f64 = fstpl_offesimem 0
244 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
247 {- Restore all the pushed regs and go home.
263 #elif sparc_TARGET_ARCH
265 = let -- At least for sparc V8
269 w32_to_w8s_bigEndian :: Word32 -> [Word8]
270 w32_to_w8s_bigEndian w
271 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
272 fromIntegral (0xFF .&. (w `shiftR` 16)),
273 fromIntegral (0xFF .&. (w `shiftR` 8)),
274 fromIntegral (0xFF .&. w)]
278 [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
280 | (a_offW, a_rep) <- arg_offs_n_reps
283 total_argWs = length offsets_to_pushW
284 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
287 -- The stack pointer must be kept 8-byte aligned, which means
288 -- we need to calculate this quantity too
289 argWs_on_stack_ROUNDED_UP
290 | odd argWs_on_stack = 1 + argWs_on_stack
291 | otherwise = argWs_on_stack
293 -- some helpers to assemble sparc insns.
295 iReg, oReg, gReg, fReg :: Int -> Word32
296 iReg = fromIntegral . (+ 24)
297 oReg = fromIntegral . (+ 8)
298 gReg = fromIntegral . (+ 0)
313 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
314 insn_r_r_i op3 rs1 rd imm13
316 .|. (rs1 `shiftL` 25)
317 .|. (op3 `shiftL` 19)
322 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
323 insn_r_i_r op3 rs1 imm13 rd
326 .|. (op3 `shiftL` 19)
327 .|. (rs1 `shiftL` 14)
331 mkSimm13 :: Int -> Word32
333 = let imm13w = (fromIntegral imm13) :: Word32
336 -- REAL (non-synthetic) insns
337 -- or %rs1, %rs2, %rd
338 mkOR :: Word32 -> Word32 -> Word32 -> Word32
342 .|. (op3_OR `shiftL` 19)
343 .|. (rs1 `shiftL` 14)
346 where op3_OR = 2 :: Word32
348 -- ld(int) [%rs + imm13], %rd
349 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
351 -- st(int) %rs, [%rd + imm13]
352 mkST = insn_r_r_i 0x04 -- op3_ST
354 -- st(float) %rs, [%rd + imm13]
355 mkSTF = insn_r_r_i 0x24 -- op3_STF
357 -- jmpl %rs + imm13, %rd
358 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
360 -- save %rs + imm13, %rd
361 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
363 -- restore %rs + imm13, %rd
364 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
367 mkNOP = mkOR g0 g0 g0
368 mkCALL reg = mkJMPL reg 0 o7
369 mkRET = mkJMPL i7 8 g0
370 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
372 --trace (show (map fst arg_offs_n_reps))
373 concatMap w32_to_w8s_bigEndian (
375 {- On entry, %o0 is the arg passed from the interpreter. After
376 the initial save insn, it will be in %i0. Studying the sparc
377 docs one would have thought that the minimum frame size is 92
378 bytes, but gcc always uses at least 112, and indeed there are
379 segfaults a-plenty with 92. So I use 112 here as well. I
380 don't understand why, tho.
382 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
384 {- For each arg in args_offs_n_reps, examine the associated
385 CgRep to determine how many words there are. This gives a
386 bunch of offsets on the H stack. Move the first 6 words into
387 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
390 ++ let doArgW (offW, wordNo)
392 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
394 = [mkLD i0 (bytes_per_word * offW) g1,
395 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
397 concatMap doArgW (zip offsets_to_pushW [0 ..])
399 {- Get the addr to call into %g1, bearing in mind that there's
400 an Addr# tag at the indicated location, and do the call:
402 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
405 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
409 {- Depending on what the return type is, get the result
410 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
412 st %o0, [%i0 + 4] -- 32 bit int
414 st %o0, [%i0 + 4] -- 64 bit int
415 st %o1, [%i0 + 8] -- or the other way round?
417 st %f0, [%i0 + 4] -- 32 bit float
419 st %f0, [%i0 + 4] -- 64 bit float
420 st %f1, [%i0 + 8] -- or the other way round?
423 ++ let i32 = [mkST o0 i0 0]
424 i64 = [mkST o0 i0 0, mkST o1 i0 4]
425 f32 = [mkSTF f0 i0 0]
426 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
433 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
437 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
439 #elif powerpc_TARGET_ARCH && darwin_TARGET_OS
445 w32_to_w8s_bigEndian :: Word32 -> [Word8]
446 w32_to_w8s_bigEndian w
447 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
448 fromIntegral (0xFF .&. (w `shiftR` 16)),
449 fromIntegral (0xFF .&. (w `shiftR` 8)),
450 fromIntegral (0xFF .&. w)]
452 -- addr and result bits offsetsW
453 a_off = addr_offW * bytes_per_word
454 result_off = r_offW * bytes_per_word
457 parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
458 | (_, a_rep) <- arg_offs_n_reps ]
459 savedRegisterArea = 4
460 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
461 padTo16 x = case x `mod` 16 of
465 pass_parameters [] _ _ = []
466 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
468 haskellArgOffset = a_offW * bytes_per_word
469 offsetW' = offsetW + cgRepSizeW a_rep
473 [0x801f0000 -- lwz rX, src(r31)
474 .|. (fromIntegral src .&. 0xFFFF)
475 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
477 [0x801f0000 -- lwz r0, src(r31)
478 .|. (fromIntegral src .&. 0xFFFF),
479 0x90010000 -- stw r0, dst(r1)
480 .|. (fromIntegral dst .&. 0xFFFF)]
482 src = haskellArgOffset + w*bytes_per_word
483 dst = linkageArea + (offsetW+w) * bytes_per_word
486 FloatArg | nextFPR < 14 ->
487 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
488 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
489 .|. (fromIntegral nextFPR `shiftL` 21))
490 : pass_parameters args (nextFPR+1) offsetW'
491 DoubleArg | nextFPR < 14 ->
492 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
493 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
494 .|. (fromIntegral nextFPR `shiftL` 21))
495 : pass_parameters args (nextFPR+1) offsetW'
497 concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
498 ++ pass_parameters args nextFPR offsetW'
500 gather_result = case r_rep of
503 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
504 -- stfs f1, result_off(r31)
506 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
507 -- stfd f1, result_off(r31)
508 _ | cgRepSizeW r_rep == 2 ->
509 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
510 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
511 -- stw r3, result_off(r31)
512 -- stw r4, result_off+4(r31)
513 _ | cgRepSizeW r_rep == 1 ->
514 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
515 -- stw r3, result_off(r31)
517 concatMap w32_to_w8s_bigEndian $ [
518 0x7c0802a6, -- mflr r0
519 0x93e1fffc, -- stw r31,-4(r1)
520 0x90010008, -- stw r0,8(r1)
521 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
522 -- stwu r1, -frameSize(r1)
523 0x7c7f1b78 -- mr r31, r3
524 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
525 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
526 -- lwz r12, a_off(r31)
527 0x7d8903a6, -- mtctr r12
529 ] ++ gather_result ++ [
530 0x80210000, -- lwz r1, 0(r1)
531 0x83e1fffc, -- lwz r31, -4(r1)
532 0x80010008, -- lwz r0, 8(r1)
533 0x7c0803a6, -- mtlr r0
537 #elif powerpc_TARGET_ARCH && linux_TARGET_OS
539 -- All offsets here are measured in Words (not bytes). This includes
540 -- arguments to the load/store machine code generators, alignment numbers
541 -- and the final 'framesize' among others.
543 = concatMap w32_to_w8s_bigEndian $ [
544 0x7c0802a6, -- mflr r0
545 0x93e1fffc, -- stw r31,-4(r1)
546 0x90010008, -- stw r0,8(r1)
547 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
548 0x7c7f1b78 -- mr r31, r3
549 ] ++ pass_parameters ++ -- pass the parameters
550 loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
551 0x7d8903a6, -- mtctr r12
553 ] ++ gather_result ++ [ -- save the return value
554 0x80210000, -- lwz r1, 0(r1)
555 0x83e1fffc, -- lwz r31, -4(r1)
556 0x80010008, -- lwz r0, 8(r1)
557 0x7c0803a6, -- mtlr r0
562 gather_result :: [Word32]
563 gather_result = case r_rep of
565 FloatArg -> storeFloat 1 r_offW
566 DoubleArg -> storeDouble 1 r_offW
567 LongArg -> storeLong 3 r_offW
568 _ -> storeWord 3 r_offW
570 pass_parameters :: [Word32]
571 pass_parameters = concat params
573 -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
574 framesize = alignedTo 4 (argsize + 8)
576 ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
578 -- handle one argument, returning machine code and the updated state
579 loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
580 ((Int, Int, Int), [Word32])
582 loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
583 FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
584 FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
586 DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
587 DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
589 LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
590 LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
591 LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
593 _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
594 _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
595 where astack = alignedTo 2 stack
597 alignedTo :: Int -> Int -> Int
598 alignedTo alignment x = case x `mod` alignment of
600 y -> x - y + alignment
602 -- convenience macros to do multiple-instruction data moves
603 stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
604 stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
605 loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
606 storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
608 -- load data from the Haskell stack (relative to r31)
609 loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
610 loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
611 loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
613 -- store data to the Haskell stack (relative to r31)
614 storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
615 storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
616 storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
618 -- store data to the C stack (relative to r1)
619 storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
621 -- machine code building blocks
622 loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
623 loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
625 register :: Int -> Word32
626 register reg = fromIntegral reg `shiftL` 21
628 offset :: Int -> Word32
629 offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
632 w32_to_w8s_bigEndian :: Word32 -> [Word8]
633 w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
634 fromIntegral (0xFF .&. (w `shiftR` 16)),
635 fromIntegral (0xFF .&. (w `shiftR` 8)),
636 fromIntegral (0xFF .&. w)]
640 = error "mkMarshalCode not implemented for this platform."