2 % (c) The University of Glasgow 2001-2006
5 ByteCodeGen: Generate machine-code sequences for foreign import
9 -- The above warning supression flag is a temporary kludge.
10 -- While working on this module you are encouraged to remove it and fix
11 -- any warnings in the module. See
12 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
15 module ByteCodeFFI ( mkMarshalCode, moan64, newExec ) where
17 #include "HsVersions.h"
24 -- DON'T remove apparently unused imports here ..
25 -- there is ifdeffery below
26 import Control.Exception ( throwDyn )
27 import Data.Bits ( Bits(..), shiftR, shiftL )
28 import Data.List ( mapAccumL )
30 import Data.Word ( Word8, Word32 )
31 import Foreign ( Ptr, FunPtr, castPtrToFunPtr,
32 Storable, sizeOf, pokeArray )
33 import Foreign.C ( CUInt )
34 import System.IO.Unsafe ( unsafePerformIO )
35 import System.IO ( hPutStrLn, stderr )
36 -- import Debug.Trace ( trace )
39 %************************************************************************
41 \subsection{The platform-dependent marshall-code-generator.}
43 %************************************************************************
47 moan64 :: String -> SDoc -> a
51 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
52 "code properly yet. You can work around this for the time being\n" ++
53 "by compiling this module and all those it imports to object code,\n" ++
54 "and re-starting your GHCi session. The panic below contains information,\n" ++
55 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
62 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
63 #include "nativeGen/NCG.h"
66 Make a piece of code which expects to see the Haskell stack
67 looking like this. It is given a pointer to the lowest word in
68 the stack -- presumably the tag of the placeholder.
74 <placeholder-for-result#> (must be an unboxed type)
76 We cope with both ccall and stdcall for the C fn. However, this code
77 itself expects only to be called using the ccall convention -- that is,
78 we don't clear our own (single) arg off the C stack.
80 mkMarshalCode :: CCallConv
81 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
83 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
84 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
85 addr_offW arg_offs_n_reps
88 newExec :: Storable a => [a] -> IO (FunPtr ())
90 = do ptr <- _allocateExec (fromIntegral $ codeSize undefined code)
92 return (castPtrToFunPtr ptr)
94 codeSize :: Storable a => a -> [a] -> Int
95 codeSize dummy array = sizeOf(dummy) * length array
97 foreign import ccall unsafe "allocateExec"
98 _allocateExec :: CUInt -> IO (Ptr a)
100 mkMarshalCode_wrk :: CCallConv
101 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
104 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
108 = let -- Don't change this without first consulting Intel Corp :-)
113 [ -- reversed because x86 is little-endian
114 reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
116 -- reversed because args are pushed L -> R onto C stack
117 | (a_offW, a_rep) <- reverse arg_offs_n_reps
120 arguments_size = bytes_per_word * length offsets_to_pushW
122 -- Darwin: align stack frame size to a multiple of 16 bytes
123 stack_frame_size = (arguments_size + 15) .&. complement 15
124 stack_frame_pad = stack_frame_size - arguments_size
126 stack_frame_size = arguments_size
129 -- some helpers to assemble x86 insns.
130 movl_offespmem_esi offB -- movl offB(%esp), %esi
131 = [0x8B, 0xB4, 0x24] ++ lit32 offB
132 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
133 = [0x8B, 0x8E] ++ lit32 offB
134 save_regs -- pushl all intregs except %esp
135 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
136 restore_regs -- popl ditto
137 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
138 pushl_ecx -- pushl %ecx
140 call_star_ecx -- call * %ecx
142 add_lit_esp lit -- addl $lit, %esp
143 = [0x81, 0xC4] ++ lit32 lit
144 movl_eax_offesimem offB -- movl %eax, offB(%esi)
145 = [0x89, 0x86] ++ lit32 offB
146 movl_edx_offesimem offB -- movl %edx, offB(%esi)
147 = [0x89, 0x96] ++ lit32 offB
150 fstpl_offesimem offB -- fstpl offB(%esi)
151 = [0xDD, 0x9E] ++ lit32 offB
152 fstps_offesimem offB -- fstps offB(%esi)
153 = [0xD9, 0x9E] ++ lit32 offB
155 2 0000 8BB42478 movl 0x12345678(%esp), %esi
157 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
160 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
161 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
163 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
164 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
166 11 001b 51 pushl %ecx
167 12 001c FFD1 call * %ecx
169 14 001e 81C47856 addl $0x12345678, %esp
171 15 0024 89867856 movl %eax, 0x12345678(%esi)
173 16 002a 89967856 movl %edx, 0x12345678(%esi)
176 18 0030 DD967856 fstl 0x12345678(%esi)
178 19 0036 DD9E7856 fstpl 0x12345678(%esi)
180 20 003c D9967856 fsts 0x12345678(%esi)
182 21 0042 D99E7856 fstps 0x12345678(%esi)
190 --trace (show (map fst arg_offs_n_reps))
192 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
193 arg passed from the interpreter.
195 Push all callee saved regs. Push all of them anyway ...
206 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
207 We'll use %esi as a temporary to point at the H stack, and
208 %ecx as a temporary to copy via.
210 movl 28+4(%esp), %esi
212 ++ movl_offespmem_esi 32
215 {- On Darwin, add some padding so that the stack stays aligned. -}
216 ++ (if stack_frame_pad /= 0
217 then add_lit_esp (-stack_frame_pad)
221 {- For each arg in args_offs_n_reps, examine the associated
222 CgRep to determine how many words there are. This gives a
223 bunch of offsets on the H stack to copy to the C stack:
225 movl off1(%esi), %ecx
228 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
232 {- Get the addr to call into %ecx, bearing in mind that there's
233 an Addr# tag at the indicated location, and do the call:
235 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
238 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
241 {- Nuke the args just pushed and re-establish %esi at the
244 addl $4*number_of_args_pushed, %esp (ccall only)
245 movl 28+4(%esp), %esi
247 ++ (if cconv /= StdCallConv
248 then add_lit_esp stack_frame_size
250 ++ movl_offespmem_esi 32
252 {- Depending on what the return type is, get the result
253 from %eax or %edx:%eax or %st(0).
255 movl %eax, 4(%esi) -- assuming tagged result
264 ++ let i32 = movl_eax_offesimem 0
265 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
266 f32 = fstps_offesimem 0
267 f64 = fstpl_offesimem 0
275 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
278 {- Restore all the pushed regs and go home.
294 #elif x86_64_TARGET_ARCH
297 -- the address of the H stack is in %rdi. We need to move it out, so
298 -- we can use %rdi as an arg reg for the following call:
302 -- ####### load / push the args
305 (stack_args, fregs_unused, reg_loads) =
306 load_arg_regs arg_offs_n_reps int_loads float_loads []
308 tot_arg_size = bytes_per_word * length stack_args
310 -- On entry to the called function, %rsp should be aligned
311 -- on a 16-byte boundary +8 (i.e. the first stack arg after
312 -- the return address is 16-byte aligned). In STG land
313 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
314 -- need to make sure we push a multiple of 16-bytes of args,
315 -- plus the return address, to get the correct alignment.
316 (real_size, adjust_rsp)
317 | tot_arg_size `rem` 16 == 0 = (tot_arg_size, [])
318 | otherwise = (tot_arg_size + 8, subq_lit_rsp 8)
320 (stack_pushes, stack_words) =
321 push_args stack_args [] 0
323 -- we need to know the number of SSE regs used in the call, see later
324 n_sse_regs_used = length float_loads - length fregs_unused
328 ++ concat stack_pushes -- push in reverse order
330 -- ####### make the call
332 -- use %r10 to make the call, because we don't have to save it.
333 -- movq 8*addr_offW(%rbp), %r10
334 ++ movq_rbpoff_r10 (bytes_per_word * addr_offW)
336 -- The x86_64 ABI requires us to set %al to the number of SSE
337 -- registers that contain arguments, if the called routine
338 -- is a varargs function. We don't know whether it's a
339 -- varargs function or not, so we have to assume it is.
341 -- It's not safe to omit this assignment, even if the number
342 -- of SSE regs in use is zero. If %al is larger than 8
343 -- on entry to a varargs function, seg faults ensue.
344 ++ movq_lit_rax n_sse_regs_used
347 -- pop the args from the stack, only in ccall mode
348 -- (in stdcall the callee does it).
349 ++ (if cconv /= StdCallConv
350 then addq_lit_rsp real_size
353 -- ####### place the result in the right place and return
362 -- int arg regs: rdi,rsi,rdx,rcx,r8,r9
363 -- flt arg regs: xmm0..xmm7
364 int_loads = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
365 movq_rbpoff_rcx, movq_rbpoff_r8, movq_rbpoff_r9 ]
366 float_loads = [ 0..7 ]
368 load_arg_regs args [] [] code = (args, [], code)
369 load_arg_regs [] iregs fregs code = ([], fregs, code)
370 load_arg_regs ((off,rep):args) iregs fregs code
375 load_arg_regs args iregs frest
376 (mov_f32_rbpoff_xmm n (bytes_per_word * off) : code)
381 load_arg_regs args iregs frest
382 (mov_f64_rbpoff_xmm n (bytes_per_word * off) : code)
383 | (mov_reg:irest) <- iregs =
384 load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
388 push_this_arg = ((off,rep):args',fregs', code')
389 where (args',fregs',code') = load_arg_regs args iregs fregs code
391 push_args [] code pushed_words = (code, pushed_words)
392 push_args ((off,rep):args) code pushed_words
394 push_args args (push_f32_rbpoff (bytes_per_word * off) : code)
397 push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
400 push_args args (pushq_rbpoff (bytes_per_word * off) : code)
411 i64 = movq_rax_rbpoff 0
412 f32 = mov_f32_xmm0_rbpoff 0
413 f64 = mov_f64_xmm0_rbpoff 0
415 -- ######### x86_64 machine code:
417 -- 0: 48 89 fd mov %rdi,%rbp
418 -- 3: 48 8b bd 78 56 34 12 mov 0x12345678(%rbp),%rdi
419 -- a: 48 8b b5 78 56 34 12 mov 0x12345678(%rbp),%rsi
420 -- 11: 48 8b 95 78 56 34 12 mov 0x12345678(%rbp),%rdx
421 -- 18: 48 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%rcx
422 -- 1f: 4c 8b 85 78 56 34 12 mov 0x12345678(%rbp),%r8
423 -- 26: 4c 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%r9
424 -- 2d: 4c 8b 95 78 56 34 12 mov 0x12345678(%rbp),%r10
425 -- 34: 48 c7 c0 78 56 34 12 mov $0x12345678,%rax
426 -- 3b: 48 89 85 78 56 34 12 mov %rax,0x12345678(%rbp)
427 -- 42: f3 0f 10 bd 78 56 34 12 movss 0x12345678(%rbp),%xmm7
428 -- 4a: f2 0f 10 9d 78 56 34 12 movsd 0x12345678(%rbp),%xmm3
429 -- 52: f2 44 0f 10 85 78 56 34 12 movsd 0x12345678(%rbp),%xmm8
430 -- 5b: f3 0f 11 9d 78 56 34 12 movss %xmm3,0x12345678(%rbp)
431 -- 63: f2 0f 11 9d 78 56 34 12 movsd %xmm3,0x12345678(%rbp)
432 -- 6b: f2 44 0f 11 85 78 56 34 12 movsd %xmm8,0x12345678(%rbp)
433 -- 74: ff b5 78 56 34 12 pushq 0x12345678(%rbp)
434 -- 7a: f3 44 0f 11 04 24 movss %xmm8,(%rsp)
435 -- 80: f2 44 0f 11 04 24 movsd %xmm8,(%rsp)
436 -- 86: 48 81 ec 78 56 34 12 sub $0x12345678,%rsp
437 -- 8d: 48 81 c4 78 56 34 12 add $0x12345678,%rsp
438 -- 94: 41 ff d2 callq *%r10
443 movq_rdi_rbp = [0x48,0x89,0xfd]
444 movq_rbpoff_rdi off = [0x48, 0x8b, 0xbd] ++ lit32 off
445 movq_rbpoff_rsi off = [0x48, 0x8b, 0xb5] ++ lit32 off
446 movq_rbpoff_rdx off = [0x48, 0x8b, 0x95] ++ lit32 off
447 movq_rbpoff_rcx off = [0x48, 0x8b, 0x8d] ++ lit32 off
448 movq_rbpoff_r8 off = [0x4c, 0x8b, 0x85] ++ lit32 off
449 movq_rbpoff_r9 off = [0x4c, 0x8b, 0x8d] ++ lit32 off
450 movq_rbpoff_r10 off = [0x4c, 0x8b, 0x95] ++ lit32 off
451 movq_lit_rax lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
452 movq_rax_rbpoff off = [0x48, 0x89, 0x85] ++ lit32 off
453 mov_f32_rbpoff_xmm n off
454 = 0xf3 : if n >= 8 then 0x44 : rest else rest
455 where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
456 mov_f64_rbpoff_xmm n off
457 = 0xf2 : if n >= 8 then 0x44 : rest else rest
458 where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
459 mov_f32_xmm0_rbpoff off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
460 mov_f64_xmm0_rbpoff off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
461 pushq_rbpoff off = [0xff, 0xb5] ++ lit32 off
462 push_f32_rbpoff off =
463 subq_lit_rsp 8 ++ -- subq $8, %rsp
464 mov_f32_rbpoff_xmm 8 off ++ -- movss off(%rbp), %xmm8
465 [0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movss %xmm8, (%rsp)
466 push_f64_rbpoff off =
467 subq_lit_rsp 8 ++ -- subq $8, %rsp
468 mov_f64_rbpoff_xmm 8 off ++ -- movsd off(%rbp), %xmm8
469 [0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movsd %xmm8, (%rsp)
470 subq_lit_rsp lit = [0x48, 0x81, 0xec] ++ lit32 lit
471 addq_lit_rsp lit = [0x48, 0x81, 0xc4] ++ lit32 lit
472 call_star_r10 = [0x41,0xff,0xd2]
477 #elif sparc_TARGET_ARCH
479 = let -- At least for sparc V8
483 w32_to_w8s_bigEndian :: Word32 -> [Word8]
484 w32_to_w8s_bigEndian w
485 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
486 fromIntegral (0xFF .&. (w `shiftR` 16)),
487 fromIntegral (0xFF .&. (w `shiftR` 8)),
488 fromIntegral (0xFF .&. w)]
492 [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
494 | (a_offW, a_rep) <- arg_offs_n_reps
497 total_argWs = length offsets_to_pushW
498 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
501 -- The stack pointer must be kept 8-byte aligned, which means
502 -- we need to calculate this quantity too
503 argWs_on_stack_ROUNDED_UP
504 | odd argWs_on_stack = 1 + argWs_on_stack
505 | otherwise = argWs_on_stack
507 -- some helpers to assemble sparc insns.
509 iReg, oReg, gReg, fReg :: Int -> Word32
510 iReg = fromIntegral . (+ 24)
511 oReg = fromIntegral . (+ 8)
512 gReg = fromIntegral . (+ 0)
527 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
528 insn_r_r_i op3 rs1 rd imm13
530 .|. (rs1 `shiftL` 25)
531 .|. (op3 `shiftL` 19)
536 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
537 insn_r_i_r op3 rs1 imm13 rd
540 .|. (op3 `shiftL` 19)
541 .|. (rs1 `shiftL` 14)
545 mkSimm13 :: Int -> Word32
547 = let imm13w = (fromIntegral imm13) :: Word32
550 -- REAL (non-synthetic) insns
551 -- or %rs1, %rs2, %rd
552 mkOR :: Word32 -> Word32 -> Word32 -> Word32
556 .|. (op3_OR `shiftL` 19)
557 .|. (rs1 `shiftL` 14)
560 where op3_OR = 2 :: Word32
562 -- ld(int) [%rs + imm13], %rd
563 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
565 -- st(int) %rs, [%rd + imm13]
566 mkST = insn_r_r_i 0x04 -- op3_ST
568 -- st(float) %rs, [%rd + imm13]
569 mkSTF = insn_r_r_i 0x24 -- op3_STF
571 -- jmpl %rs + imm13, %rd
572 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
574 -- save %rs + imm13, %rd
575 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
577 -- restore %rs + imm13, %rd
578 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
581 mkNOP = mkOR g0 g0 g0
582 mkCALL reg = mkJMPL reg 0 o7
583 mkRET = mkJMPL i7 8 g0
584 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
586 --trace (show (map fst arg_offs_n_reps))
587 concatMap w32_to_w8s_bigEndian (
589 {- On entry, %o0 is the arg passed from the interpreter. After
590 the initial save insn, it will be in %i0. Studying the sparc
591 docs one would have thought that the minimum frame size is 92
592 bytes, but gcc always uses at least 112, and indeed there are
593 segfaults a-plenty with 92. So I use 112 here as well. I
594 don't understand why, tho.
596 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
598 {- For each arg in args_offs_n_reps, examine the associated
599 CgRep to determine how many words there are. This gives a
600 bunch of offsets on the H stack. Move the first 6 words into
601 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
604 ++ let doArgW (offW, wordNo)
606 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
608 = [mkLD i0 (bytes_per_word * offW) g1,
609 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
611 concatMap doArgW (zip offsets_to_pushW [0 ..])
613 {- Get the addr to call into %g1, bearing in mind that there's
614 an Addr# tag at the indicated location, and do the call:
616 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
619 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
623 {- Depending on what the return type is, get the result
624 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
626 st %o0, [%i0 + 4] -- 32 bit int
628 st %o0, [%i0 + 4] -- 64 bit int
629 st %o1, [%i0 + 8] -- or the other way round?
631 st %f0, [%i0 + 4] -- 32 bit float
633 st %f0, [%i0 + 4] -- 64 bit float
634 st %f1, [%i0 + 8] -- or the other way round?
637 ++ let i32 = [mkST o0 i0 0]
638 i64 = [mkST o0 i0 0, mkST o1 i0 4]
639 f32 = [mkSTF f0 i0 0]
640 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
647 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
651 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
653 #elif powerpc_TARGET_ARCH && darwin_TARGET_OS
659 w32_to_w8s_bigEndian :: Word32 -> [Word8]
660 w32_to_w8s_bigEndian w
661 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
662 fromIntegral (0xFF .&. (w `shiftR` 16)),
663 fromIntegral (0xFF .&. (w `shiftR` 8)),
664 fromIntegral (0xFF .&. w)]
666 -- addr and result bits offsetsW
667 a_off = addr_offW * bytes_per_word
668 result_off = r_offW * bytes_per_word
671 parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
672 | (_, a_rep) <- arg_offs_n_reps ]
673 savedRegisterArea = 4
674 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
675 padTo16 x = case x `mod` 16 of
679 pass_parameters [] _ _ = []
680 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
682 haskellArgOffset = a_offW * bytes_per_word
683 offsetW' = offsetW + cgRepSizeW a_rep
687 [0x801f0000 -- lwz rX, src(r31)
688 .|. (fromIntegral src .&. 0xFFFF)
689 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
691 [0x801f0000 -- lwz r0, src(r31)
692 .|. (fromIntegral src .&. 0xFFFF),
693 0x90010000 -- stw r0, dst(r1)
694 .|. (fromIntegral dst .&. 0xFFFF)]
696 src = haskellArgOffset + w*bytes_per_word
697 dst = linkageArea + (offsetW+w) * bytes_per_word
700 FloatArg | nextFPR < 14 ->
701 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
702 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
703 .|. (fromIntegral nextFPR `shiftL` 21))
704 : pass_parameters args (nextFPR+1) offsetW'
705 DoubleArg | nextFPR < 14 ->
706 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
707 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
708 .|. (fromIntegral nextFPR `shiftL` 21))
709 : pass_parameters args (nextFPR+1) offsetW'
711 concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
712 ++ pass_parameters args nextFPR offsetW'
714 gather_result = case r_rep of
717 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
718 -- stfs f1, result_off(r31)
720 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
721 -- stfd f1, result_off(r31)
722 _ | cgRepSizeW r_rep == 2 ->
723 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
724 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
725 -- stw r3, result_off(r31)
726 -- stw r4, result_off+4(r31)
727 _ | cgRepSizeW r_rep == 1 ->
728 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
729 -- stw r3, result_off(r31)
731 concatMap w32_to_w8s_bigEndian $ [
732 0x7c0802a6, -- mflr r0
733 0x93e1fffc, -- stw r31,-4(r1)
734 0x90010008, -- stw r0,8(r1)
735 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
736 -- stwu r1, -frameSize(r1)
737 0x7c7f1b78 -- mr r31, r3
738 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
739 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
740 -- lwz r12, a_off(r31)
741 0x7d8903a6, -- mtctr r12
743 ] ++ gather_result ++ [
744 0x80210000, -- lwz r1, 0(r1)
745 0x83e1fffc, -- lwz r31, -4(r1)
746 0x80010008, -- lwz r0, 8(r1)
747 0x7c0803a6, -- mtlr r0
751 #elif powerpc_TARGET_ARCH && linux_TARGET_OS
753 -- All offsets here are measured in Words (not bytes). This includes
754 -- arguments to the load/store machine code generators, alignment numbers
755 -- and the final 'framesize' among others.
757 = concatMap w32_to_w8s_bigEndian $ [
758 0x7c0802a6, -- mflr r0
759 0x93e1fffc, -- stw r31,-4(r1)
760 0x90010008, -- stw r0,8(r1)
761 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
762 0x7c7f1b78 -- mr r31, r3
763 ] ++ pass_parameters ++ -- pass the parameters
764 loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
765 0x7d8903a6, -- mtctr r12
767 ] ++ gather_result ++ [ -- save the return value
768 0x80210000, -- lwz r1, 0(r1)
769 0x83e1fffc, -- lwz r31, -4(r1)
770 0x80010008, -- lwz r0, 8(r1)
771 0x7c0803a6, -- mtlr r0
776 gather_result :: [Word32]
777 gather_result = case r_rep of
779 FloatArg -> storeFloat 1 r_offW
780 DoubleArg -> storeDouble 1 r_offW
781 LongArg -> storeLong 3 r_offW
782 _ -> storeWord 3 r_offW
784 pass_parameters :: [Word32]
785 pass_parameters = concat params
787 -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
788 framesize = alignedTo 4 (argsize + 8)
790 ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
792 -- handle one argument, returning machine code and the updated state
793 loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
794 ((Int, Int, Int), [Word32])
796 loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
797 FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
798 FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
800 DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
801 DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
803 LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
804 LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
805 LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
807 _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
808 _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
809 where astack = alignedTo 2 stack
811 alignedTo :: Int -> Int -> Int
812 alignedTo alignment x = case x `mod` alignment of
814 y -> x - y + alignment
816 -- convenience macros to do multiple-instruction data moves
817 stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
818 stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
819 loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
820 storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
822 -- load data from the Haskell stack (relative to r31)
823 loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
824 loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
825 loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
827 -- store data to the Haskell stack (relative to r31)
828 storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
829 storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
830 storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
832 -- store data to the C stack (relative to r1)
833 storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
835 -- machine code building blocks
836 loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
837 loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
839 register :: Int -> Word32
840 register reg = fromIntegral reg `shiftL` 21
842 offset :: Int -> Word32
843 offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
846 w32_to_w8s_bigEndian :: Word32 -> [Word8]
847 w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
848 fromIntegral (0xFF .&. (w `shiftR` 16)),
849 fromIntegral (0xFF .&. (w `shiftR` 8)),
850 fromIntegral (0xFF .&. w)]
854 = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")
858 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
859 lit32 :: Int -> [Word8]
860 lit32 i = let w32 = (fromIntegral i) :: Word32
861 in map (fromIntegral . ( .&. 0xFF))
862 [w32, w32 `shiftR` 8,
863 w32 `shiftR` 16, w32 `shiftR` 24]