2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( mkMarshalCode, moan64 ) where
9 #include "HsVersions.h"
12 import SMRep ( CgRep(..), cgRepSizeW )
13 import ForeignCall ( CCallConv(..) )
16 -- DON'T remove apparently unused imports here ..
17 -- there is ifdeffery below
18 import Control.Exception ( throwDyn )
19 import DATA_BITS ( Bits(..), shiftR, shiftL )
20 import Foreign ( newArray )
21 import Data.List ( mapAccumL )
23 import DATA_WORD ( Word8, Word32 )
24 import Foreign ( Ptr )
25 import System.IO.Unsafe ( unsafePerformIO )
26 import IO ( hPutStrLn, stderr )
27 -- import Debug.Trace ( trace )
30 %************************************************************************
32 \subsection{The platform-dependent marshall-code-generator.}
34 %************************************************************************
38 moan64 :: String -> SDoc -> a
42 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
43 "code properly yet. You can work around this for the time being\n" ++
44 "by compiling this module and all those it imports to object code,\n" ++
45 "and re-starting your GHCi session. The panic below contains information,\n" ++
46 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
53 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
54 #include "nativeGen/NCG.h"
57 Make a piece of code which expects to see the Haskell stack
58 looking like this. It is given a pointer to the lowest word in
59 the stack -- presumably the tag of the placeholder.
65 <placeholder-for-result#> (must be an unboxed type)
67 We cope with both ccall and stdcall for the C fn. However, this code
68 itself expects only to be called using the ccall convention -- that is,
69 we don't clear our own (single) arg off the C stack.
71 mkMarshalCode :: CCallConv
72 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
74 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
75 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
76 addr_offW arg_offs_n_reps
77 in Foreign.newArray bytes
82 mkMarshalCode_wrk :: CCallConv
83 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
86 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
90 = let -- Don't change this without first consulting Intel Corp :-)
95 [ -- reversed because x86 is little-endian
96 reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
98 -- reversed because args are pushed L -> R onto C stack
99 | (a_offW, a_rep) <- reverse arg_offs_n_reps
102 -- some helpers to assemble x86 insns.
103 movl_offespmem_esi offB -- movl offB(%esp), %esi
104 = [0x8B, 0xB4, 0x24] ++ lit32 offB
105 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
106 = [0x8B, 0x8E] ++ lit32 offB
107 save_regs -- pushl all intregs except %esp
108 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
109 restore_regs -- popl ditto
110 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
111 pushl_ecx -- pushl %ecx
113 call_star_ecx -- call * %ecx
115 add_lit_esp lit -- addl $lit, %esp
116 = [0x81, 0xC4] ++ lit32 lit
117 movl_eax_offesimem offB -- movl %eax, offB(%esi)
118 = [0x89, 0x86] ++ lit32 offB
119 movl_edx_offesimem offB -- movl %edx, offB(%esi)
120 = [0x89, 0x96] ++ lit32 offB
123 fstpl_offesimem offB -- fstpl offB(%esi)
124 = [0xDD, 0x9E] ++ lit32 offB
125 fstps_offesimem offB -- fstps offB(%esi)
126 = [0xD9, 0x9E] ++ lit32 offB
127 lit32 :: Int -> [Word8]
128 lit32 i = let w32 = (fromIntegral i) :: Word32
129 in map (fromIntegral . ( .&. 0xFF))
130 [w32, w32 `shiftR` 8,
131 w32 `shiftR` 16, w32 `shiftR` 24]
133 2 0000 8BB42478 movl 0x12345678(%esp), %esi
135 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
138 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
139 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
141 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
142 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
144 11 001b 51 pushl %ecx
145 12 001c FFD1 call * %ecx
147 14 001e 81C47856 addl $0x12345678, %esp
149 15 0024 89867856 movl %eax, 0x12345678(%esi)
151 16 002a 89967856 movl %edx, 0x12345678(%esi)
154 18 0030 DD967856 fstl 0x12345678(%esi)
156 19 0036 DD9E7856 fstpl 0x12345678(%esi)
158 20 003c D9967856 fsts 0x12345678(%esi)
160 21 0042 D99E7856 fstps 0x12345678(%esi)
168 --trace (show (map fst arg_offs_n_reps))
170 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
171 arg passed from the interpreter.
173 Push all callee saved regs. Push all of them anyway ...
184 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
185 We'll use %esi as a temporary to point at the H stack, and
186 %ecx as a temporary to copy via.
188 movl 28+4(%esp), %esi
190 ++ movl_offespmem_esi 32
192 {- For each arg in args_offs_n_reps, examine the associated
193 CgRep to determine how many words there are. This gives a
194 bunch of offsets on the H stack to copy to the C stack:
196 movl off1(%esi), %ecx
199 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
203 {- Get the addr to call into %ecx, bearing in mind that there's
204 an Addr# tag at the indicated location, and do the call:
206 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
209 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
212 {- Nuke the args just pushed and re-establish %esi at the
215 addl $4*number_of_args_pushed, %esp (ccall only)
216 movl 28+4(%esp), %esi
218 ++ (if cconv /= StdCallConv
219 then add_lit_esp (bytes_per_word * length offsets_to_pushW)
221 ++ movl_offespmem_esi 32
223 {- Depending on what the return type is, get the result
224 from %eax or %edx:%eax or %st(0).
226 movl %eax, 4(%esi) -- assuming tagged result
235 ++ let i32 = movl_eax_offesimem 0
236 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
237 f32 = fstps_offesimem 0
238 f64 = fstpl_offesimem 0
246 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
249 {- Restore all the pushed regs and go home.
265 #elif sparc_TARGET_ARCH
267 = let -- At least for sparc V8
271 w32_to_w8s_bigEndian :: Word32 -> [Word8]
272 w32_to_w8s_bigEndian w
273 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
274 fromIntegral (0xFF .&. (w `shiftR` 16)),
275 fromIntegral (0xFF .&. (w `shiftR` 8)),
276 fromIntegral (0xFF .&. w)]
280 [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
282 | (a_offW, a_rep) <- arg_offs_n_reps
285 total_argWs = length offsets_to_pushW
286 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
289 -- The stack pointer must be kept 8-byte aligned, which means
290 -- we need to calculate this quantity too
291 argWs_on_stack_ROUNDED_UP
292 | odd argWs_on_stack = 1 + argWs_on_stack
293 | otherwise = argWs_on_stack
295 -- some helpers to assemble sparc insns.
297 iReg, oReg, gReg, fReg :: Int -> Word32
298 iReg = fromIntegral . (+ 24)
299 oReg = fromIntegral . (+ 8)
300 gReg = fromIntegral . (+ 0)
315 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
316 insn_r_r_i op3 rs1 rd imm13
318 .|. (rs1 `shiftL` 25)
319 .|. (op3 `shiftL` 19)
324 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
325 insn_r_i_r op3 rs1 imm13 rd
328 .|. (op3 `shiftL` 19)
329 .|. (rs1 `shiftL` 14)
333 mkSimm13 :: Int -> Word32
335 = let imm13w = (fromIntegral imm13) :: Word32
338 -- REAL (non-synthetic) insns
339 -- or %rs1, %rs2, %rd
340 mkOR :: Word32 -> Word32 -> Word32 -> Word32
344 .|. (op3_OR `shiftL` 19)
345 .|. (rs1 `shiftL` 14)
348 where op3_OR = 2 :: Word32
350 -- ld(int) [%rs + imm13], %rd
351 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
353 -- st(int) %rs, [%rd + imm13]
354 mkST = insn_r_r_i 0x04 -- op3_ST
356 -- st(float) %rs, [%rd + imm13]
357 mkSTF = insn_r_r_i 0x24 -- op3_STF
359 -- jmpl %rs + imm13, %rd
360 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
362 -- save %rs + imm13, %rd
363 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
365 -- restore %rs + imm13, %rd
366 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
369 mkNOP = mkOR g0 g0 g0
370 mkCALL reg = mkJMPL reg 0 o7
371 mkRET = mkJMPL i7 8 g0
372 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
374 --trace (show (map fst arg_offs_n_reps))
375 concatMap w32_to_w8s_bigEndian (
377 {- On entry, %o0 is the arg passed from the interpreter. After
378 the initial save insn, it will be in %i0. Studying the sparc
379 docs one would have thought that the minimum frame size is 92
380 bytes, but gcc always uses at least 112, and indeed there are
381 segfaults a-plenty with 92. So I use 112 here as well. I
382 don't understand why, tho.
384 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
386 {- For each arg in args_offs_n_reps, examine the associated
387 CgRep to determine how many words there are. This gives a
388 bunch of offsets on the H stack. Move the first 6 words into
389 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
392 ++ let doArgW (offW, wordNo)
394 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
396 = [mkLD i0 (bytes_per_word * offW) g1,
397 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
399 concatMap doArgW (zip offsets_to_pushW [0 ..])
401 {- Get the addr to call into %g1, bearing in mind that there's
402 an Addr# tag at the indicated location, and do the call:
404 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
407 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
411 {- Depending on what the return type is, get the result
412 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
414 st %o0, [%i0 + 4] -- 32 bit int
416 st %o0, [%i0 + 4] -- 64 bit int
417 st %o1, [%i0 + 8] -- or the other way round?
419 st %f0, [%i0 + 4] -- 32 bit float
421 st %f0, [%i0 + 4] -- 64 bit float
422 st %f1, [%i0 + 8] -- or the other way round?
425 ++ let i32 = [mkST o0 i0 0]
426 i64 = [mkST o0 i0 0, mkST o1 i0 4]
427 f32 = [mkSTF f0 i0 0]
428 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
435 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
439 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
441 #elif powerpc_TARGET_ARCH && darwin_TARGET_OS
447 w32_to_w8s_bigEndian :: Word32 -> [Word8]
448 w32_to_w8s_bigEndian w
449 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
450 fromIntegral (0xFF .&. (w `shiftR` 16)),
451 fromIntegral (0xFF .&. (w `shiftR` 8)),
452 fromIntegral (0xFF .&. w)]
454 -- addr and result bits offsetsW
455 a_off = addr_offW * bytes_per_word
456 result_off = r_offW * bytes_per_word
459 parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
460 | (_, a_rep) <- arg_offs_n_reps ]
461 savedRegisterArea = 4
462 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
463 padTo16 x = case x `mod` 16 of
467 pass_parameters [] _ _ = []
468 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
470 haskellArgOffset = a_offW * bytes_per_word
471 offsetW' = offsetW + cgRepSizeW a_rep
475 [0x801f0000 -- lwz rX, src(r31)
476 .|. (fromIntegral src .&. 0xFFFF)
477 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
479 [0x801f0000 -- lwz r0, src(r31)
480 .|. (fromIntegral src .&. 0xFFFF),
481 0x90010000 -- stw r0, dst(r1)
482 .|. (fromIntegral dst .&. 0xFFFF)]
484 src = haskellArgOffset + w*bytes_per_word
485 dst = linkageArea + (offsetW+w) * bytes_per_word
488 FloatArg | nextFPR < 14 ->
489 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
490 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
491 .|. (fromIntegral nextFPR `shiftL` 21))
492 : pass_parameters args (nextFPR+1) offsetW'
493 DoubleArg | nextFPR < 14 ->
494 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
495 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
496 .|. (fromIntegral nextFPR `shiftL` 21))
497 : pass_parameters args (nextFPR+1) offsetW'
499 concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
500 ++ pass_parameters args nextFPR offsetW'
502 gather_result = case r_rep of
505 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
506 -- stfs f1, result_off(r31)
508 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
509 -- stfd f1, result_off(r31)
510 _ | cgRepSizeW r_rep == 2 ->
511 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
512 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
513 -- stw r3, result_off(r31)
514 -- stw r4, result_off+4(r31)
515 _ | cgRepSizeW r_rep == 1 ->
516 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
517 -- stw r3, result_off(r31)
519 concatMap w32_to_w8s_bigEndian $ [
520 0x7c0802a6, -- mflr r0
521 0x93e1fffc, -- stw r31,-4(r1)
522 0x90010008, -- stw r0,8(r1)
523 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
524 -- stwu r1, -frameSize(r1)
525 0x7c7f1b78 -- mr r31, r3
526 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
527 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
528 -- lwz r12, a_off(r31)
529 0x7d8903a6, -- mtctr r12
531 ] ++ gather_result ++ [
532 0x80210000, -- lwz r1, 0(r1)
533 0x83e1fffc, -- lwz r31, -4(r1)
534 0x80010008, -- lwz r0, 8(r1)
535 0x7c0803a6, -- mtlr r0
539 #elif powerpc_TARGET_ARCH && linux_TARGET_OS
541 -- All offsets here are measured in Words (not bytes). This includes
542 -- arguments to the load/store machine code generators, alignment numbers
543 -- and the final 'framesize' among others.
545 = concatMap w32_to_w8s_bigEndian $ [
546 0x7c0802a6, -- mflr r0
547 0x93e1fffc, -- stw r31,-4(r1)
548 0x90010008, -- stw r0,8(r1)
549 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
550 0x7c7f1b78 -- mr r31, r3
551 ] ++ pass_parameters ++ -- pass the parameters
552 loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
553 0x7d8903a6, -- mtctr r12
555 ] ++ gather_result ++ [ -- save the return value
556 0x80210000, -- lwz r1, 0(r1)
557 0x83e1fffc, -- lwz r31, -4(r1)
558 0x80010008, -- lwz r0, 8(r1)
559 0x7c0803a6, -- mtlr r0
564 gather_result :: [Word32]
565 gather_result = case r_rep of
567 FloatArg -> storeFloat 1 r_offW
568 DoubleArg -> storeDouble 1 r_offW
569 LongArg -> storeLong 3 r_offW
570 _ -> storeWord 3 r_offW
572 pass_parameters :: [Word32]
573 pass_parameters = concat params
575 -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
576 framesize = alignedTo 4 (argsize + 8)
578 ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
580 -- handle one argument, returning machine code and the updated state
581 loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
582 ((Int, Int, Int), [Word32])
584 loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
585 FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
586 FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
588 DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
589 DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
591 LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
592 LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
593 LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
595 _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
596 _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
597 where astack = alignedTo 2 stack
599 alignedTo :: Int -> Int -> Int
600 alignedTo alignment x = case x `mod` alignment of
602 y -> x - y + alignment
604 -- convenience macros to do multiple-instruction data moves
605 stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
606 stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
607 loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
608 storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
610 -- load data from the Haskell stack (relative to r31)
611 loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
612 loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
613 loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
615 -- store data to the Haskell stack (relative to r31)
616 storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
617 storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
618 storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
620 -- store data to the C stack (relative to r1)
621 storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
623 -- machine code building blocks
624 loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
625 loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
627 register :: Int -> Word32
628 register reg = fromIntegral reg `shiftL` 21
630 offset :: Int -> Word32
631 offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
634 w32_to_w8s_bigEndian :: Word32 -> [Word8]
635 w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
636 fromIntegral (0xFF .&. (w `shiftR` 16)),
637 fromIntegral (0xFF .&. (w `shiftR` 8)),
638 fromIntegral (0xFF .&. w)]
642 = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")