2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( mkMarshalCode, moan64 ) where
9 #include "HsVersions.h"
12 import SMRep ( CgRep(..), cgRepSizeW )
13 import ForeignCall ( CCallConv(..) )
16 -- DON'T remove apparently unused imports here ..
17 -- there is ifdeffery below
18 import Control.Exception ( throwDyn )
19 import DATA_BITS ( Bits(..), shiftR, shiftL )
20 import Foreign ( newArray )
21 import Data.List ( mapAccumL )
23 import DATA_WORD ( Word8, Word32 )
24 import Foreign ( Ptr )
25 import System.IO.Unsafe ( unsafePerformIO )
26 import IO ( hPutStrLn, stderr )
27 -- import Debug.Trace ( trace )
30 %************************************************************************
32 \subsection{The platform-dependent marshall-code-generator.}
34 %************************************************************************
38 moan64 :: String -> SDoc -> a
42 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
43 "code properly yet. You can work around this for the time being\n" ++
44 "by compiling this module and all those it imports to object code,\n" ++
45 "and re-starting your GHCi session. The panic below contains information,\n" ++
46 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
53 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
54 #include "nativeGen/NCG.h"
57 Make a piece of code which expects to see the Haskell stack
58 looking like this. It is given a pointer to the lowest word in
59 the stack -- presumably the tag of the placeholder.
65 <placeholder-for-result#> (must be an unboxed type)
67 We cope with both ccall and stdcall for the C fn. However, this code
68 itself expects only to be called using the ccall convention -- that is,
69 we don't clear our own (single) arg off the C stack.
71 mkMarshalCode :: CCallConv
72 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
74 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
75 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
76 addr_offW arg_offs_n_reps
77 in Foreign.newArray bytes
82 mkMarshalCode_wrk :: CCallConv
83 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
86 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
90 = let -- Don't change this without first consulting Intel Corp :-)
95 [ -- reversed because x86 is little-endian
96 reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
98 -- reversed because args are pushed L -> R onto C stack
99 | (a_offW, a_rep) <- reverse arg_offs_n_reps
102 arguments_size = bytes_per_word * length offsets_to_pushW
104 -- Darwin: align stack frame size to a multiple of 16 bytes
105 stack_frame_size = (arguments_size + 15) .&. complement 15
106 stack_frame_pad = stack_frame_size - arguments_size
108 stack_frame_size = arguments_size
111 -- some helpers to assemble x86 insns.
112 movl_offespmem_esi offB -- movl offB(%esp), %esi
113 = [0x8B, 0xB4, 0x24] ++ lit32 offB
114 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
115 = [0x8B, 0x8E] ++ lit32 offB
116 save_regs -- pushl all intregs except %esp
117 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
118 restore_regs -- popl ditto
119 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
120 pushl_ecx -- pushl %ecx
122 call_star_ecx -- call * %ecx
124 add_lit_esp lit -- addl $lit, %esp
125 = [0x81, 0xC4] ++ lit32 lit
126 movl_eax_offesimem offB -- movl %eax, offB(%esi)
127 = [0x89, 0x86] ++ lit32 offB
128 movl_edx_offesimem offB -- movl %edx, offB(%esi)
129 = [0x89, 0x96] ++ lit32 offB
132 fstpl_offesimem offB -- fstpl offB(%esi)
133 = [0xDD, 0x9E] ++ lit32 offB
134 fstps_offesimem offB -- fstps offB(%esi)
135 = [0xD9, 0x9E] ++ lit32 offB
136 lit32 :: Int -> [Word8]
137 lit32 i = let w32 = (fromIntegral i) :: Word32
138 in map (fromIntegral . ( .&. 0xFF))
139 [w32, w32 `shiftR` 8,
140 w32 `shiftR` 16, w32 `shiftR` 24]
142 2 0000 8BB42478 movl 0x12345678(%esp), %esi
144 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
147 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
148 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
150 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
151 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
153 11 001b 51 pushl %ecx
154 12 001c FFD1 call * %ecx
156 14 001e 81C47856 addl $0x12345678, %esp
158 15 0024 89867856 movl %eax, 0x12345678(%esi)
160 16 002a 89967856 movl %edx, 0x12345678(%esi)
163 18 0030 DD967856 fstl 0x12345678(%esi)
165 19 0036 DD9E7856 fstpl 0x12345678(%esi)
167 20 003c D9967856 fsts 0x12345678(%esi)
169 21 0042 D99E7856 fstps 0x12345678(%esi)
177 --trace (show (map fst arg_offs_n_reps))
179 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
180 arg passed from the interpreter.
182 Push all callee saved regs. Push all of them anyway ...
193 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
194 We'll use %esi as a temporary to point at the H stack, and
195 %ecx as a temporary to copy via.
197 movl 28+4(%esp), %esi
199 ++ movl_offespmem_esi 32
202 {- On Darwin, add some padding so that the stack stays aligned. -}
203 ++ (if stack_frame_pad /= 0
204 then add_lit_esp (-stack_frame_pad)
208 {- For each arg in args_offs_n_reps, examine the associated
209 CgRep to determine how many words there are. This gives a
210 bunch of offsets on the H stack to copy to the C stack:
212 movl off1(%esi), %ecx
215 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
219 {- Get the addr to call into %ecx, bearing in mind that there's
220 an Addr# tag at the indicated location, and do the call:
222 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
225 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
228 {- Nuke the args just pushed and re-establish %esi at the
231 addl $4*number_of_args_pushed, %esp (ccall only)
232 movl 28+4(%esp), %esi
234 ++ (if cconv /= StdCallConv
235 then add_lit_esp stack_frame_size
237 ++ movl_offespmem_esi 32
239 {- Depending on what the return type is, get the result
240 from %eax or %edx:%eax or %st(0).
242 movl %eax, 4(%esi) -- assuming tagged result
251 ++ let i32 = movl_eax_offesimem 0
252 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
253 f32 = fstps_offesimem 0
254 f64 = fstpl_offesimem 0
262 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
265 {- Restore all the pushed regs and go home.
281 #elif sparc_TARGET_ARCH
283 = let -- At least for sparc V8
287 w32_to_w8s_bigEndian :: Word32 -> [Word8]
288 w32_to_w8s_bigEndian w
289 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
290 fromIntegral (0xFF .&. (w `shiftR` 16)),
291 fromIntegral (0xFF .&. (w `shiftR` 8)),
292 fromIntegral (0xFF .&. w)]
296 [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
298 | (a_offW, a_rep) <- arg_offs_n_reps
301 total_argWs = length offsets_to_pushW
302 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
305 -- The stack pointer must be kept 8-byte aligned, which means
306 -- we need to calculate this quantity too
307 argWs_on_stack_ROUNDED_UP
308 | odd argWs_on_stack = 1 + argWs_on_stack
309 | otherwise = argWs_on_stack
311 -- some helpers to assemble sparc insns.
313 iReg, oReg, gReg, fReg :: Int -> Word32
314 iReg = fromIntegral . (+ 24)
315 oReg = fromIntegral . (+ 8)
316 gReg = fromIntegral . (+ 0)
331 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
332 insn_r_r_i op3 rs1 rd imm13
334 .|. (rs1 `shiftL` 25)
335 .|. (op3 `shiftL` 19)
340 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
341 insn_r_i_r op3 rs1 imm13 rd
344 .|. (op3 `shiftL` 19)
345 .|. (rs1 `shiftL` 14)
349 mkSimm13 :: Int -> Word32
351 = let imm13w = (fromIntegral imm13) :: Word32
354 -- REAL (non-synthetic) insns
355 -- or %rs1, %rs2, %rd
356 mkOR :: Word32 -> Word32 -> Word32 -> Word32
360 .|. (op3_OR `shiftL` 19)
361 .|. (rs1 `shiftL` 14)
364 where op3_OR = 2 :: Word32
366 -- ld(int) [%rs + imm13], %rd
367 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
369 -- st(int) %rs, [%rd + imm13]
370 mkST = insn_r_r_i 0x04 -- op3_ST
372 -- st(float) %rs, [%rd + imm13]
373 mkSTF = insn_r_r_i 0x24 -- op3_STF
375 -- jmpl %rs + imm13, %rd
376 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
378 -- save %rs + imm13, %rd
379 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
381 -- restore %rs + imm13, %rd
382 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
385 mkNOP = mkOR g0 g0 g0
386 mkCALL reg = mkJMPL reg 0 o7
387 mkRET = mkJMPL i7 8 g0
388 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
390 --trace (show (map fst arg_offs_n_reps))
391 concatMap w32_to_w8s_bigEndian (
393 {- On entry, %o0 is the arg passed from the interpreter. After
394 the initial save insn, it will be in %i0. Studying the sparc
395 docs one would have thought that the minimum frame size is 92
396 bytes, but gcc always uses at least 112, and indeed there are
397 segfaults a-plenty with 92. So I use 112 here as well. I
398 don't understand why, tho.
400 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
402 {- For each arg in args_offs_n_reps, examine the associated
403 CgRep to determine how many words there are. This gives a
404 bunch of offsets on the H stack. Move the first 6 words into
405 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
408 ++ let doArgW (offW, wordNo)
410 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
412 = [mkLD i0 (bytes_per_word * offW) g1,
413 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
415 concatMap doArgW (zip offsets_to_pushW [0 ..])
417 {- Get the addr to call into %g1, bearing in mind that there's
418 an Addr# tag at the indicated location, and do the call:
420 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
423 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
427 {- Depending on what the return type is, get the result
428 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
430 st %o0, [%i0 + 4] -- 32 bit int
432 st %o0, [%i0 + 4] -- 64 bit int
433 st %o1, [%i0 + 8] -- or the other way round?
435 st %f0, [%i0 + 4] -- 32 bit float
437 st %f0, [%i0 + 4] -- 64 bit float
438 st %f1, [%i0 + 8] -- or the other way round?
441 ++ let i32 = [mkST o0 i0 0]
442 i64 = [mkST o0 i0 0, mkST o1 i0 4]
443 f32 = [mkSTF f0 i0 0]
444 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
451 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
455 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
457 #elif powerpc_TARGET_ARCH && darwin_TARGET_OS
463 w32_to_w8s_bigEndian :: Word32 -> [Word8]
464 w32_to_w8s_bigEndian w
465 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
466 fromIntegral (0xFF .&. (w `shiftR` 16)),
467 fromIntegral (0xFF .&. (w `shiftR` 8)),
468 fromIntegral (0xFF .&. w)]
470 -- addr and result bits offsetsW
471 a_off = addr_offW * bytes_per_word
472 result_off = r_offW * bytes_per_word
475 parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
476 | (_, a_rep) <- arg_offs_n_reps ]
477 savedRegisterArea = 4
478 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
479 padTo16 x = case x `mod` 16 of
483 pass_parameters [] _ _ = []
484 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
486 haskellArgOffset = a_offW * bytes_per_word
487 offsetW' = offsetW + cgRepSizeW a_rep
491 [0x801f0000 -- lwz rX, src(r31)
492 .|. (fromIntegral src .&. 0xFFFF)
493 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
495 [0x801f0000 -- lwz r0, src(r31)
496 .|. (fromIntegral src .&. 0xFFFF),
497 0x90010000 -- stw r0, dst(r1)
498 .|. (fromIntegral dst .&. 0xFFFF)]
500 src = haskellArgOffset + w*bytes_per_word
501 dst = linkageArea + (offsetW+w) * bytes_per_word
504 FloatArg | nextFPR < 14 ->
505 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
506 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
507 .|. (fromIntegral nextFPR `shiftL` 21))
508 : pass_parameters args (nextFPR+1) offsetW'
509 DoubleArg | nextFPR < 14 ->
510 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
511 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
512 .|. (fromIntegral nextFPR `shiftL` 21))
513 : pass_parameters args (nextFPR+1) offsetW'
515 concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
516 ++ pass_parameters args nextFPR offsetW'
518 gather_result = case r_rep of
521 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
522 -- stfs f1, result_off(r31)
524 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
525 -- stfd f1, result_off(r31)
526 _ | cgRepSizeW r_rep == 2 ->
527 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
528 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
529 -- stw r3, result_off(r31)
530 -- stw r4, result_off+4(r31)
531 _ | cgRepSizeW r_rep == 1 ->
532 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
533 -- stw r3, result_off(r31)
535 concatMap w32_to_w8s_bigEndian $ [
536 0x7c0802a6, -- mflr r0
537 0x93e1fffc, -- stw r31,-4(r1)
538 0x90010008, -- stw r0,8(r1)
539 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
540 -- stwu r1, -frameSize(r1)
541 0x7c7f1b78 -- mr r31, r3
542 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
543 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
544 -- lwz r12, a_off(r31)
545 0x7d8903a6, -- mtctr r12
547 ] ++ gather_result ++ [
548 0x80210000, -- lwz r1, 0(r1)
549 0x83e1fffc, -- lwz r31, -4(r1)
550 0x80010008, -- lwz r0, 8(r1)
551 0x7c0803a6, -- mtlr r0
555 #elif powerpc_TARGET_ARCH && linux_TARGET_OS
557 -- All offsets here are measured in Words (not bytes). This includes
558 -- arguments to the load/store machine code generators, alignment numbers
559 -- and the final 'framesize' among others.
561 = concatMap w32_to_w8s_bigEndian $ [
562 0x7c0802a6, -- mflr r0
563 0x93e1fffc, -- stw r31,-4(r1)
564 0x90010008, -- stw r0,8(r1)
565 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
566 0x7c7f1b78 -- mr r31, r3
567 ] ++ pass_parameters ++ -- pass the parameters
568 loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
569 0x7d8903a6, -- mtctr r12
571 ] ++ gather_result ++ [ -- save the return value
572 0x80210000, -- lwz r1, 0(r1)
573 0x83e1fffc, -- lwz r31, -4(r1)
574 0x80010008, -- lwz r0, 8(r1)
575 0x7c0803a6, -- mtlr r0
580 gather_result :: [Word32]
581 gather_result = case r_rep of
583 FloatArg -> storeFloat 1 r_offW
584 DoubleArg -> storeDouble 1 r_offW
585 LongArg -> storeLong 3 r_offW
586 _ -> storeWord 3 r_offW
588 pass_parameters :: [Word32]
589 pass_parameters = concat params
591 -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
592 framesize = alignedTo 4 (argsize + 8)
594 ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
596 -- handle one argument, returning machine code and the updated state
597 loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
598 ((Int, Int, Int), [Word32])
600 loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
601 FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
602 FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
604 DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
605 DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
607 LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
608 LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
609 LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
611 _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
612 _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
613 where astack = alignedTo 2 stack
615 alignedTo :: Int -> Int -> Int
616 alignedTo alignment x = case x `mod` alignment of
618 y -> x - y + alignment
620 -- convenience macros to do multiple-instruction data moves
621 stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
622 stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
623 loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
624 storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
626 -- load data from the Haskell stack (relative to r31)
627 loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
628 loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
629 loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
631 -- store data to the Haskell stack (relative to r31)
632 storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
633 storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
634 storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
636 -- store data to the C stack (relative to r1)
637 storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
639 -- machine code building blocks
640 loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
641 loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
643 register :: Int -> Word32
644 register reg = fromIntegral reg `shiftL` 21
646 offset :: Int -> Word32
647 offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
650 w32_to_w8s_bigEndian :: Word32 -> [Word8]
651 w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
652 fromIntegral (0xFF .&. (w `shiftR` 16)),
653 fromIntegral (0xFF .&. (w `shiftR` 8)),
654 fromIntegral (0xFF .&. w)]
658 = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")