2 % (c) The University of Glasgow 2001
4 \section[ByteCodeGen]{Generate machine-code sequences for foreign import}
7 module ByteCodeFFI ( mkMarshalCode, moan64 ) where
9 #include "HsVersions.h"
12 import SMRep ( CgRep(..), cgRepSizeW )
13 import ForeignCall ( CCallConv(..) )
15 -- DON'T remove apparently unused imports here ..
16 -- there is ifdeffery below
17 import DATA_BITS ( Bits(..), shiftR, shiftL )
18 import Foreign ( newArray )
20 import DATA_WORD ( Word8, Word32 )
21 import Foreign ( Ptr )
22 import System.IO.Unsafe ( unsafePerformIO )
23 import IO ( hPutStrLn, stderr )
24 -- import Debug.Trace ( trace )
27 %************************************************************************
29 \subsection{The platform-dependent marshall-code-generator.}
31 %************************************************************************
35 moan64 :: String -> SDoc -> a
39 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
40 "code properly yet. You can work around this for the time being\n" ++
41 "by compiling this module and all those it imports to object code,\n" ++
42 "and re-starting your GHCi session. The panic below contains information,\n" ++
43 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
50 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
51 #include "nativeGen/NCG.h"
54 Make a piece of code which expects to see the Haskell stack
55 looking like this. It is given a pointer to the lowest word in
56 the stack -- presumably the tag of the placeholder.
62 <placeholder-for-result#> (must be an unboxed type)
64 We cope with both ccall and stdcall for the C fn. However, this code
65 itself expects only to be called using the ccall convention -- that is,
66 we don't clear our own (single) arg off the C stack.
68 mkMarshalCode :: CCallConv
69 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
71 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
72 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
73 addr_offW arg_offs_n_reps
74 in Foreign.newArray bytes
79 mkMarshalCode_wrk :: CCallConv
80 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
83 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
87 = let -- Don't change this without first consulting Intel Corp :-)
92 [ -- reversed because x86 is little-endian
93 reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
95 -- reversed because args are pushed L -> R onto C stack
96 | (a_offW, a_rep) <- reverse arg_offs_n_reps
99 -- some helpers to assemble x86 insns.
100 movl_offespmem_esi offB -- movl offB(%esp), %esi
101 = [0x8B, 0xB4, 0x24] ++ lit32 offB
102 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
103 = [0x8B, 0x8E] ++ lit32 offB
104 save_regs -- pushl all intregs except %esp
105 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
106 restore_regs -- popl ditto
107 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
108 pushl_ecx -- pushl %ecx
110 call_star_ecx -- call * %ecx
112 add_lit_esp lit -- addl $lit, %esp
113 = [0x81, 0xC4] ++ lit32 lit
114 movl_eax_offesimem offB -- movl %eax, offB(%esi)
115 = [0x89, 0x86] ++ lit32 offB
116 movl_edx_offesimem offB -- movl %edx, offB(%esi)
117 = [0x89, 0x96] ++ lit32 offB
120 fstpl_offesimem offB -- fstpl offB(%esi)
121 = [0xDD, 0x9E] ++ lit32 offB
122 fstps_offesimem offB -- fstps offB(%esi)
123 = [0xD9, 0x9E] ++ lit32 offB
124 lit32 :: Int -> [Word8]
125 lit32 i = let w32 = (fromIntegral i) :: Word32
126 in map (fromIntegral . ( .&. 0xFF))
127 [w32, w32 `shiftR` 8,
128 w32 `shiftR` 16, w32 `shiftR` 24]
130 2 0000 8BB42478 movl 0x12345678(%esp), %esi
132 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
135 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
136 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
138 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
139 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
141 11 001b 51 pushl %ecx
142 12 001c FFD1 call * %ecx
144 14 001e 81C47856 addl $0x12345678, %esp
146 15 0024 89867856 movl %eax, 0x12345678(%esi)
148 16 002a 89967856 movl %edx, 0x12345678(%esi)
151 18 0030 DD967856 fstl 0x12345678(%esi)
153 19 0036 DD9E7856 fstpl 0x12345678(%esi)
155 20 003c D9967856 fsts 0x12345678(%esi)
157 21 0042 D99E7856 fstps 0x12345678(%esi)
165 --trace (show (map fst arg_offs_n_reps))
167 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
168 arg passed from the interpreter.
170 Push all callee saved regs. Push all of them anyway ...
181 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
182 We'll use %esi as a temporary to point at the H stack, and
183 %ecx as a temporary to copy via.
185 movl 28+4(%esp), %esi
187 ++ movl_offespmem_esi 32
189 {- For each arg in args_offs_n_reps, examine the associated
190 CgRep to determine how many words there are. This gives a
191 bunch of offsets on the H stack to copy to the C stack:
193 movl off1(%esi), %ecx
196 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
200 {- Get the addr to call into %ecx, bearing in mind that there's
201 an Addr# tag at the indicated location, and do the call:
203 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
206 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
209 {- Nuke the args just pushed and re-establish %esi at the
212 addl $4*number_of_args_pushed, %esp (ccall only)
213 movl 28+4(%esp), %esi
215 ++ (if cconv /= StdCallConv
216 then add_lit_esp (bytes_per_word * length offsets_to_pushW)
218 ++ movl_offespmem_esi 32
220 {- Depending on what the return type is, get the result
221 from %eax or %edx:%eax or %st(0).
223 movl %eax, 4(%esi) -- assuming tagged result
232 ++ let i32 = movl_eax_offesimem 0
233 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
234 f32 = fstps_offesimem 0
235 f64 = fstpl_offesimem 0
243 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
246 {- Restore all the pushed regs and go home.
262 #elif sparc_TARGET_ARCH
264 = let -- At least for sparc V8
268 w32_to_w8s_bigEndian :: Word32 -> [Word8]
269 w32_to_w8s_bigEndian w
270 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
271 fromIntegral (0xFF .&. (w `shiftR` 16)),
272 fromIntegral (0xFF .&. (w `shiftR` 8)),
273 fromIntegral (0xFF .&. w)]
277 [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
279 | (a_offW, a_rep) <- arg_offs_n_reps
282 total_argWs = length offsets_to_pushW
283 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
286 -- The stack pointer must be kept 8-byte aligned, which means
287 -- we need to calculate this quantity too
288 argWs_on_stack_ROUNDED_UP
289 | odd argWs_on_stack = 1 + argWs_on_stack
290 | otherwise = argWs_on_stack
292 -- some helpers to assemble sparc insns.
294 iReg, oReg, gReg, fReg :: Int -> Word32
295 iReg = fromIntegral . (+ 24)
296 oReg = fromIntegral . (+ 8)
297 gReg = fromIntegral . (+ 0)
312 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
313 insn_r_r_i op3 rs1 rd imm13
315 .|. (rs1 `shiftL` 25)
316 .|. (op3 `shiftL` 19)
321 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
322 insn_r_i_r op3 rs1 imm13 rd
325 .|. (op3 `shiftL` 19)
326 .|. (rs1 `shiftL` 14)
330 mkSimm13 :: Int -> Word32
332 = let imm13w = (fromIntegral imm13) :: Word32
335 -- REAL (non-synthetic) insns
336 -- or %rs1, %rs2, %rd
337 mkOR :: Word32 -> Word32 -> Word32 -> Word32
341 .|. (op3_OR `shiftL` 19)
342 .|. (rs1 `shiftL` 14)
345 where op3_OR = 2 :: Word32
347 -- ld(int) [%rs + imm13], %rd
348 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
350 -- st(int) %rs, [%rd + imm13]
351 mkST = insn_r_r_i 0x04 -- op3_ST
353 -- st(float) %rs, [%rd + imm13]
354 mkSTF = insn_r_r_i 0x24 -- op3_STF
356 -- jmpl %rs + imm13, %rd
357 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
359 -- save %rs + imm13, %rd
360 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
362 -- restore %rs + imm13, %rd
363 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
366 mkNOP = mkOR g0 g0 g0
367 mkCALL reg = mkJMPL reg 0 o7
368 mkRET = mkJMPL i7 8 g0
369 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
371 --trace (show (map fst arg_offs_n_reps))
372 concatMap w32_to_w8s_bigEndian (
374 {- On entry, %o0 is the arg passed from the interpreter. After
375 the initial save insn, it will be in %i0. Studying the sparc
376 docs one would have thought that the minimum frame size is 92
377 bytes, but gcc always uses at least 112, and indeed there are
378 segfaults a-plenty with 92. So I use 112 here as well. I
379 don't understand why, tho.
381 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
383 {- For each arg in args_offs_n_reps, examine the associated
384 CgRep to determine how many words there are. This gives a
385 bunch of offsets on the H stack. Move the first 6 words into
386 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
389 ++ let doArgW (offW, wordNo)
391 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
393 = [mkLD i0 (bytes_per_word * offW) g1,
394 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
396 concatMap doArgW (zip offsets_to_pushW [0 ..])
398 {- Get the addr to call into %g1, bearing in mind that there's
399 an Addr# tag at the indicated location, and do the call:
401 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
404 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
408 {- Depending on what the return type is, get the result
409 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
411 st %o0, [%i0 + 4] -- 32 bit int
413 st %o0, [%i0 + 4] -- 64 bit int
414 st %o1, [%i0 + 8] -- or the other way round?
416 st %f0, [%i0 + 4] -- 32 bit float
418 st %f0, [%i0 + 4] -- 64 bit float
419 st %f1, [%i0 + 8] -- or the other way round?
422 ++ let i32 = [mkST o0 i0 0]
423 i64 = [mkST o0 i0 0, mkST o1 i0 4]
424 f32 = [mkSTF f0 i0 0]
425 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
432 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
436 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
438 #elif powerpc_TARGET_ARCH
444 w32_to_w8s_bigEndian :: Word32 -> [Word8]
445 w32_to_w8s_bigEndian w
446 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
447 fromIntegral (0xFF .&. (w `shiftR` 16)),
448 fromIntegral (0xFF .&. (w `shiftR` 8)),
449 fromIntegral (0xFF .&. w)]
451 -- addr and result bits offsetsW
452 a_off = addr_offW * bytes_per_word
453 result_off = r_offW * bytes_per_word
456 parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
457 | (_, a_rep) <- arg_offs_n_reps ]
458 savedRegisterArea = 4
459 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
460 padTo16 x = case x `mod` 16 of
464 pass_parameters [] _ _ = []
465 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
467 haskellArgOffset = a_offW * bytes_per_word
468 offsetW' = offsetW + cgRepSizeW a_rep
472 [0x801f0000 -- lwz rX, src(r31)
473 .|. (fromIntegral src .&. 0xFFFF)
474 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
476 [0x801f0000 -- lwz r0, src(r31)
477 .|. (fromIntegral src .&. 0xFFFF),
478 0x90010000 -- stw r0, dst(r1)
479 .|. (fromIntegral dst .&. 0xFFFF)]
481 src = haskellArgOffset + w*bytes_per_word
482 dst = linkageArea + (offsetW+w) * bytes_per_word
485 FloatArg | nextFPR < 14 ->
486 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
487 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
488 .|. (fromIntegral nextFPR `shiftL` 21))
489 : pass_parameters args (nextFPR+1) offsetW'
490 DoubleArg | nextFPR < 14 ->
491 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
492 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
493 .|. (fromIntegral nextFPR `shiftL` 21))
494 : pass_parameters args (nextFPR+1) offsetW'
496 concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
497 ++ pass_parameters args nextFPR offsetW'
499 gather_result = case r_rep of
502 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
503 -- stfs f1, result_off(r31)
505 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
506 -- stfs f1, result_off(r31)
507 _ | cgRepSizeW r_rep == 2 ->
508 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
509 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
510 -- stw r3, result_off(r31)
511 -- stw r4, result_off+4(r31)
512 _ | cgRepSizeW r_rep == 1 ->
513 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
514 -- stw r3, result_off(r31)
516 concatMap w32_to_w8s_bigEndian $ [
517 0x7c0802a6, -- mflr r0
518 0x93e1fffc, -- stw r31,-4(r1)
519 0x90010008, -- stw r0,8(r1)
520 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
521 -- stwu r1, -frameSize(r1)
522 0x7c7f1b78 -- mr r31, r3
523 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
524 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
525 -- lwz r12, a_off(r31)
526 0x7d8903a6, -- mtctr r12
528 ] ++ gather_result ++ [
529 0x80210000, -- lwz r1, 0(r1)
530 0x83e1fffc, -- lwz r31, -4(r1)
531 0x80010008, -- lwz r0, 8(r1)
532 0x7c0803a6, -- mtlr r0