2 % (c) The University of Glasgow 2001-2006
5 ByteCodeGen: Generate machine-code sequences for foreign import
8 module ByteCodeFFI ( mkMarshalCode, moan64, newExec ) where
10 #include "HsVersions.h"
17 -- DON'T remove apparently unused imports here ..
18 -- there is ifdeffery below
19 import Control.Exception ( throwDyn )
20 import Data.Bits ( Bits(..), shiftR, shiftL )
21 import Data.List ( mapAccumL )
23 import Data.Word ( Word8, Word32 )
24 import Foreign ( Ptr, FunPtr, castPtrToFunPtr,
25 Storable, sizeOf, pokeArray )
26 import Foreign.C ( CUInt )
27 import System.IO.Unsafe ( unsafePerformIO )
28 import System.IO ( hPutStrLn, stderr )
29 -- import Debug.Trace ( trace )
32 %************************************************************************
34 \subsection{The platform-dependent marshall-code-generator.}
36 %************************************************************************
40 moan64 :: String -> SDoc -> a
44 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
45 "code properly yet. You can work around this for the time being\n" ++
46 "by compiling this module and all those it imports to object code,\n" ++
47 "and re-starting your GHCi session. The panic below contains information,\n" ++
48 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
55 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
56 #include "nativeGen/NCG.h"
59 Make a piece of code which expects to see the Haskell stack
60 looking like this. It is given a pointer to the lowest word in
61 the stack -- presumably the tag of the placeholder.
67 <placeholder-for-result#> (must be an unboxed type)
69 We cope with both ccall and stdcall for the C fn. However, this code
70 itself expects only to be called using the ccall convention -- that is,
71 we don't clear our own (single) arg off the C stack.
73 mkMarshalCode :: CCallConv
74 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
76 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
77 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
78 addr_offW arg_offs_n_reps
81 newExec :: Storable a => [a] -> IO (FunPtr ())
83 = do ptr <- _allocateExec (fromIntegral $ codeSize undefined code)
85 return (castPtrToFunPtr ptr)
87 codeSize :: Storable a => a -> [a] -> Int
88 codeSize dummy array = sizeOf(dummy) * length array
90 foreign import ccall unsafe "allocateExec"
91 _allocateExec :: CUInt -> IO (Ptr a)
93 mkMarshalCode_wrk :: CCallConv
94 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
97 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
101 = let -- Don't change this without first consulting Intel Corp :-)
106 [ -- reversed because x86 is little-endian
107 reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
109 -- reversed because args are pushed L -> R onto C stack
110 | (a_offW, a_rep) <- reverse arg_offs_n_reps
113 arguments_size = bytes_per_word * length offsets_to_pushW
115 -- Darwin: align stack frame size to a multiple of 16 bytes
116 stack_frame_size = (arguments_size + 15) .&. complement 15
117 stack_frame_pad = stack_frame_size - arguments_size
119 stack_frame_size = arguments_size
122 -- some helpers to assemble x86 insns.
123 movl_offespmem_esi offB -- movl offB(%esp), %esi
124 = [0x8B, 0xB4, 0x24] ++ lit32 offB
125 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
126 = [0x8B, 0x8E] ++ lit32 offB
127 save_regs -- pushl all intregs except %esp
128 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
129 restore_regs -- popl ditto
130 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
131 pushl_ecx -- pushl %ecx
133 call_star_ecx -- call * %ecx
135 add_lit_esp lit -- addl $lit, %esp
136 = [0x81, 0xC4] ++ lit32 lit
137 movl_eax_offesimem offB -- movl %eax, offB(%esi)
138 = [0x89, 0x86] ++ lit32 offB
139 movl_edx_offesimem offB -- movl %edx, offB(%esi)
140 = [0x89, 0x96] ++ lit32 offB
143 fstpl_offesimem offB -- fstpl offB(%esi)
144 = [0xDD, 0x9E] ++ lit32 offB
145 fstps_offesimem offB -- fstps offB(%esi)
146 = [0xD9, 0x9E] ++ lit32 offB
148 2 0000 8BB42478 movl 0x12345678(%esp), %esi
150 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
153 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
154 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
156 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
157 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
159 11 001b 51 pushl %ecx
160 12 001c FFD1 call * %ecx
162 14 001e 81C47856 addl $0x12345678, %esp
164 15 0024 89867856 movl %eax, 0x12345678(%esi)
166 16 002a 89967856 movl %edx, 0x12345678(%esi)
169 18 0030 DD967856 fstl 0x12345678(%esi)
171 19 0036 DD9E7856 fstpl 0x12345678(%esi)
173 20 003c D9967856 fsts 0x12345678(%esi)
175 21 0042 D99E7856 fstps 0x12345678(%esi)
183 --trace (show (map fst arg_offs_n_reps))
185 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
186 arg passed from the interpreter.
188 Push all callee saved regs. Push all of them anyway ...
199 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
200 We'll use %esi as a temporary to point at the H stack, and
201 %ecx as a temporary to copy via.
203 movl 28+4(%esp), %esi
205 ++ movl_offespmem_esi 32
208 {- On Darwin, add some padding so that the stack stays aligned. -}
209 ++ (if stack_frame_pad /= 0
210 then add_lit_esp (-stack_frame_pad)
214 {- For each arg in args_offs_n_reps, examine the associated
215 CgRep to determine how many words there are. This gives a
216 bunch of offsets on the H stack to copy to the C stack:
218 movl off1(%esi), %ecx
221 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
225 {- Get the addr to call into %ecx, bearing in mind that there's
226 an Addr# tag at the indicated location, and do the call:
228 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
231 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
234 {- Nuke the args just pushed and re-establish %esi at the
237 addl $4*number_of_args_pushed, %esp (ccall only)
238 movl 28+4(%esp), %esi
240 ++ (if cconv /= StdCallConv
241 then add_lit_esp stack_frame_size
243 ++ movl_offespmem_esi 32
245 {- Depending on what the return type is, get the result
246 from %eax or %edx:%eax or %st(0).
248 movl %eax, 4(%esi) -- assuming tagged result
257 ++ let i32 = movl_eax_offesimem 0
258 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
259 f32 = fstps_offesimem 0
260 f64 = fstpl_offesimem 0
268 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
271 {- Restore all the pushed regs and go home.
287 #elif x86_64_TARGET_ARCH
290 -- the address of the H stack is in %rdi. We need to move it out, so
291 -- we can use %rdi as an arg reg for the following call:
295 -- ####### load / push the args
298 (stack_args, fregs_unused, reg_loads) =
299 load_arg_regs arg_offs_n_reps int_loads float_loads []
301 tot_arg_size = bytes_per_word * length stack_args
303 -- On entry to the called function, %rsp should be aligned
304 -- on a 16-byte boundary +8 (i.e. the first stack arg after
305 -- the return address is 16-byte aligned). In STG land
306 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
307 -- need to make sure we push a multiple of 16-bytes of args,
308 -- plus the return address, to get the correct alignment.
309 (real_size, adjust_rsp)
310 | tot_arg_size `rem` 16 == 0 = (tot_arg_size, [])
311 | otherwise = (tot_arg_size + 8, subq_lit_rsp 8)
313 (stack_pushes, stack_words) =
314 push_args stack_args [] 0
316 -- we need to know the number of SSE regs used in the call, see later
317 n_sse_regs_used = length float_loads - length fregs_unused
321 ++ concat stack_pushes -- push in reverse order
323 -- ####### make the call
325 -- use %r10 to make the call, because we don't have to save it.
326 -- movq 8*addr_offW(%rbp), %r10
327 ++ movq_rbpoff_r10 (bytes_per_word * addr_offW)
329 -- The x86_64 ABI requires us to set %al to the number of SSE
330 -- registers that contain arguments, if the called routine
331 -- is a varargs function. We don't know whether it's a
332 -- varargs function or not, so we have to assume it is.
334 -- It's not safe to omit this assignment, even if the number
335 -- of SSE regs in use is zero. If %al is larger than 8
336 -- on entry to a varargs function, seg faults ensue.
337 ++ movq_lit_rax n_sse_regs_used
340 -- pop the args from the stack, only in ccall mode
341 -- (in stdcall the callee does it).
342 ++ (if cconv /= StdCallConv
343 then addq_lit_rsp real_size
346 -- ####### place the result in the right place and return
355 -- int arg regs: rdi,rsi,rdx,rcx,r8,r9
356 -- flt arg regs: xmm0..xmm7
357 int_loads = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
358 movq_rbpoff_rcx, movq_rbpoff_r8, movq_rbpoff_r9 ]
359 float_loads = [ 0..7 ]
361 load_arg_regs args [] [] code = (args, [], code)
362 load_arg_regs [] iregs fregs code = ([], fregs, code)
363 load_arg_regs ((off,rep):args) iregs fregs code
368 load_arg_regs args iregs frest
369 (mov_f32_rbpoff_xmm n (bytes_per_word * off) : code)
374 load_arg_regs args iregs frest
375 (mov_f64_rbpoff_xmm n (bytes_per_word * off) : code)
376 | (mov_reg:irest) <- iregs =
377 load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
381 push_this_arg = ((off,rep):args',fregs', code')
382 where (args',fregs',code') = load_arg_regs args iregs fregs code
384 push_args [] code pushed_words = (code, pushed_words)
385 push_args ((off,rep):args) code pushed_words
387 push_args args (push_f32_rbpoff (bytes_per_word * off) : code)
390 push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
393 push_args args (pushq_rbpoff (bytes_per_word * off) : code)
404 i64 = movq_rax_rbpoff 0
405 f32 = mov_f32_xmm0_rbpoff 0
406 f64 = mov_f64_xmm0_rbpoff 0
408 -- ######### x86_64 machine code:
410 -- 0: 48 89 fd mov %rdi,%rbp
411 -- 3: 48 8b bd 78 56 34 12 mov 0x12345678(%rbp),%rdi
412 -- a: 48 8b b5 78 56 34 12 mov 0x12345678(%rbp),%rsi
413 -- 11: 48 8b 95 78 56 34 12 mov 0x12345678(%rbp),%rdx
414 -- 18: 48 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%rcx
415 -- 1f: 4c 8b 85 78 56 34 12 mov 0x12345678(%rbp),%r8
416 -- 26: 4c 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%r9
417 -- 2d: 4c 8b 95 78 56 34 12 mov 0x12345678(%rbp),%r10
418 -- 34: 48 c7 c0 78 56 34 12 mov $0x12345678,%rax
419 -- 3b: 48 89 85 78 56 34 12 mov %rax,0x12345678(%rbp)
420 -- 42: f3 0f 10 bd 78 56 34 12 movss 0x12345678(%rbp),%xmm7
421 -- 4a: f2 0f 10 9d 78 56 34 12 movsd 0x12345678(%rbp),%xmm3
422 -- 52: f2 44 0f 10 85 78 56 34 12 movsd 0x12345678(%rbp),%xmm8
423 -- 5b: f3 0f 11 9d 78 56 34 12 movss %xmm3,0x12345678(%rbp)
424 -- 63: f2 0f 11 9d 78 56 34 12 movsd %xmm3,0x12345678(%rbp)
425 -- 6b: f2 44 0f 11 85 78 56 34 12 movsd %xmm8,0x12345678(%rbp)
426 -- 74: ff b5 78 56 34 12 pushq 0x12345678(%rbp)
427 -- 7a: f3 44 0f 11 04 24 movss %xmm8,(%rsp)
428 -- 80: f2 44 0f 11 04 24 movsd %xmm8,(%rsp)
429 -- 86: 48 81 ec 78 56 34 12 sub $0x12345678,%rsp
430 -- 8d: 48 81 c4 78 56 34 12 add $0x12345678,%rsp
431 -- 94: 41 ff d2 callq *%r10
436 movq_rdi_rbp = [0x48,0x89,0xfd]
437 movq_rbpoff_rdi off = [0x48, 0x8b, 0xbd] ++ lit32 off
438 movq_rbpoff_rsi off = [0x48, 0x8b, 0xb5] ++ lit32 off
439 movq_rbpoff_rdx off = [0x48, 0x8b, 0x95] ++ lit32 off
440 movq_rbpoff_rcx off = [0x48, 0x8b, 0x8d] ++ lit32 off
441 movq_rbpoff_r8 off = [0x4c, 0x8b, 0x85] ++ lit32 off
442 movq_rbpoff_r9 off = [0x4c, 0x8b, 0x8d] ++ lit32 off
443 movq_rbpoff_r10 off = [0x4c, 0x8b, 0x95] ++ lit32 off
444 movq_lit_rax lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
445 movq_rax_rbpoff off = [0x48, 0x89, 0x85] ++ lit32 off
446 mov_f32_rbpoff_xmm n off
447 = 0xf3 : if n >= 8 then 0x44 : rest else rest
448 where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
449 mov_f64_rbpoff_xmm n off
450 = 0xf2 : if n >= 8 then 0x44 : rest else rest
451 where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
452 mov_f32_xmm0_rbpoff off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
453 mov_f64_xmm0_rbpoff off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
454 pushq_rbpoff off = [0xff, 0xb5] ++ lit32 off
455 push_f32_rbpoff off =
456 subq_lit_rsp 8 ++ -- subq $8, %rsp
457 mov_f32_rbpoff_xmm 8 off ++ -- movss off(%rbp), %xmm8
458 [0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movss %xmm8, (%rsp)
459 push_f64_rbpoff off =
460 subq_lit_rsp 8 ++ -- subq $8, %rsp
461 mov_f64_rbpoff_xmm 8 off ++ -- movsd off(%rbp), %xmm8
462 [0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movsd %xmm8, (%rsp)
463 subq_lit_rsp lit = [0x48, 0x81, 0xec] ++ lit32 lit
464 addq_lit_rsp lit = [0x48, 0x81, 0xc4] ++ lit32 lit
465 call_star_r10 = [0x41,0xff,0xd2]
470 #elif sparc_TARGET_ARCH
472 = let -- At least for sparc V8
476 w32_to_w8s_bigEndian :: Word32 -> [Word8]
477 w32_to_w8s_bigEndian w
478 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
479 fromIntegral (0xFF .&. (w `shiftR` 16)),
480 fromIntegral (0xFF .&. (w `shiftR` 8)),
481 fromIntegral (0xFF .&. w)]
485 [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
487 | (a_offW, a_rep) <- arg_offs_n_reps
490 total_argWs = length offsets_to_pushW
491 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
494 -- The stack pointer must be kept 8-byte aligned, which means
495 -- we need to calculate this quantity too
496 argWs_on_stack_ROUNDED_UP
497 | odd argWs_on_stack = 1 + argWs_on_stack
498 | otherwise = argWs_on_stack
500 -- some helpers to assemble sparc insns.
502 iReg, oReg, gReg, fReg :: Int -> Word32
503 iReg = fromIntegral . (+ 24)
504 oReg = fromIntegral . (+ 8)
505 gReg = fromIntegral . (+ 0)
520 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
521 insn_r_r_i op3 rs1 rd imm13
523 .|. (rs1 `shiftL` 25)
524 .|. (op3 `shiftL` 19)
529 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
530 insn_r_i_r op3 rs1 imm13 rd
533 .|. (op3 `shiftL` 19)
534 .|. (rs1 `shiftL` 14)
538 mkSimm13 :: Int -> Word32
540 = let imm13w = (fromIntegral imm13) :: Word32
543 -- REAL (non-synthetic) insns
544 -- or %rs1, %rs2, %rd
545 mkOR :: Word32 -> Word32 -> Word32 -> Word32
549 .|. (op3_OR `shiftL` 19)
550 .|. (rs1 `shiftL` 14)
553 where op3_OR = 2 :: Word32
555 -- ld(int) [%rs + imm13], %rd
556 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
558 -- st(int) %rs, [%rd + imm13]
559 mkST = insn_r_r_i 0x04 -- op3_ST
561 -- st(float) %rs, [%rd + imm13]
562 mkSTF = insn_r_r_i 0x24 -- op3_STF
564 -- jmpl %rs + imm13, %rd
565 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
567 -- save %rs + imm13, %rd
568 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
570 -- restore %rs + imm13, %rd
571 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
574 mkNOP = mkOR g0 g0 g0
575 mkCALL reg = mkJMPL reg 0 o7
576 mkRET = mkJMPL i7 8 g0
577 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
579 --trace (show (map fst arg_offs_n_reps))
580 concatMap w32_to_w8s_bigEndian (
582 {- On entry, %o0 is the arg passed from the interpreter. After
583 the initial save insn, it will be in %i0. Studying the sparc
584 docs one would have thought that the minimum frame size is 92
585 bytes, but gcc always uses at least 112, and indeed there are
586 segfaults a-plenty with 92. So I use 112 here as well. I
587 don't understand why, tho.
589 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
591 {- For each arg in args_offs_n_reps, examine the associated
592 CgRep to determine how many words there are. This gives a
593 bunch of offsets on the H stack. Move the first 6 words into
594 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
597 ++ let doArgW (offW, wordNo)
599 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
601 = [mkLD i0 (bytes_per_word * offW) g1,
602 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
604 concatMap doArgW (zip offsets_to_pushW [0 ..])
606 {- Get the addr to call into %g1, bearing in mind that there's
607 an Addr# tag at the indicated location, and do the call:
609 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
612 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
616 {- Depending on what the return type is, get the result
617 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
619 st %o0, [%i0 + 4] -- 32 bit int
621 st %o0, [%i0 + 4] -- 64 bit int
622 st %o1, [%i0 + 8] -- or the other way round?
624 st %f0, [%i0 + 4] -- 32 bit float
626 st %f0, [%i0 + 4] -- 64 bit float
627 st %f1, [%i0 + 8] -- or the other way round?
630 ++ let i32 = [mkST o0 i0 0]
631 i64 = [mkST o0 i0 0, mkST o1 i0 4]
632 f32 = [mkSTF f0 i0 0]
633 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
640 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
644 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
646 #elif powerpc_TARGET_ARCH && darwin_TARGET_OS
652 w32_to_w8s_bigEndian :: Word32 -> [Word8]
653 w32_to_w8s_bigEndian w
654 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
655 fromIntegral (0xFF .&. (w `shiftR` 16)),
656 fromIntegral (0xFF .&. (w `shiftR` 8)),
657 fromIntegral (0xFF .&. w)]
659 -- addr and result bits offsetsW
660 a_off = addr_offW * bytes_per_word
661 result_off = r_offW * bytes_per_word
664 parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
665 | (_, a_rep) <- arg_offs_n_reps ]
666 savedRegisterArea = 4
667 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
668 padTo16 x = case x `mod` 16 of
672 pass_parameters [] _ _ = []
673 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
675 haskellArgOffset = a_offW * bytes_per_word
676 offsetW' = offsetW + cgRepSizeW a_rep
680 [0x801f0000 -- lwz rX, src(r31)
681 .|. (fromIntegral src .&. 0xFFFF)
682 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
684 [0x801f0000 -- lwz r0, src(r31)
685 .|. (fromIntegral src .&. 0xFFFF),
686 0x90010000 -- stw r0, dst(r1)
687 .|. (fromIntegral dst .&. 0xFFFF)]
689 src = haskellArgOffset + w*bytes_per_word
690 dst = linkageArea + (offsetW+w) * bytes_per_word
693 FloatArg | nextFPR < 14 ->
694 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
695 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
696 .|. (fromIntegral nextFPR `shiftL` 21))
697 : pass_parameters args (nextFPR+1) offsetW'
698 DoubleArg | nextFPR < 14 ->
699 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
700 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
701 .|. (fromIntegral nextFPR `shiftL` 21))
702 : pass_parameters args (nextFPR+1) offsetW'
704 concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
705 ++ pass_parameters args nextFPR offsetW'
707 gather_result = case r_rep of
710 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
711 -- stfs f1, result_off(r31)
713 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
714 -- stfd f1, result_off(r31)
715 _ | cgRepSizeW r_rep == 2 ->
716 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
717 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
718 -- stw r3, result_off(r31)
719 -- stw r4, result_off+4(r31)
720 _ | cgRepSizeW r_rep == 1 ->
721 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
722 -- stw r3, result_off(r31)
724 concatMap w32_to_w8s_bigEndian $ [
725 0x7c0802a6, -- mflr r0
726 0x93e1fffc, -- stw r31,-4(r1)
727 0x90010008, -- stw r0,8(r1)
728 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
729 -- stwu r1, -frameSize(r1)
730 0x7c7f1b78 -- mr r31, r3
731 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
732 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
733 -- lwz r12, a_off(r31)
734 0x7d8903a6, -- mtctr r12
736 ] ++ gather_result ++ [
737 0x80210000, -- lwz r1, 0(r1)
738 0x83e1fffc, -- lwz r31, -4(r1)
739 0x80010008, -- lwz r0, 8(r1)
740 0x7c0803a6, -- mtlr r0
744 #elif powerpc_TARGET_ARCH && linux_TARGET_OS
746 -- All offsets here are measured in Words (not bytes). This includes
747 -- arguments to the load/store machine code generators, alignment numbers
748 -- and the final 'framesize' among others.
750 = concatMap w32_to_w8s_bigEndian $ [
751 0x7c0802a6, -- mflr r0
752 0x93e1fffc, -- stw r31,-4(r1)
753 0x90010008, -- stw r0,8(r1)
754 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
755 0x7c7f1b78 -- mr r31, r3
756 ] ++ pass_parameters ++ -- pass the parameters
757 loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
758 0x7d8903a6, -- mtctr r12
760 ] ++ gather_result ++ [ -- save the return value
761 0x80210000, -- lwz r1, 0(r1)
762 0x83e1fffc, -- lwz r31, -4(r1)
763 0x80010008, -- lwz r0, 8(r1)
764 0x7c0803a6, -- mtlr r0
769 gather_result :: [Word32]
770 gather_result = case r_rep of
772 FloatArg -> storeFloat 1 r_offW
773 DoubleArg -> storeDouble 1 r_offW
774 LongArg -> storeLong 3 r_offW
775 _ -> storeWord 3 r_offW
777 pass_parameters :: [Word32]
778 pass_parameters = concat params
780 -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
781 framesize = alignedTo 4 (argsize + 8)
783 ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
785 -- handle one argument, returning machine code and the updated state
786 loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
787 ((Int, Int, Int), [Word32])
789 loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
790 FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
791 FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
793 DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
794 DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
796 LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
797 LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
798 LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
800 _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
801 _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
802 where astack = alignedTo 2 stack
804 alignedTo :: Int -> Int -> Int
805 alignedTo alignment x = case x `mod` alignment of
807 y -> x - y + alignment
809 -- convenience macros to do multiple-instruction data moves
810 stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
811 stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
812 loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
813 storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
815 -- load data from the Haskell stack (relative to r31)
816 loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
817 loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
818 loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
820 -- store data to the Haskell stack (relative to r31)
821 storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
822 storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
823 storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
825 -- store data to the C stack (relative to r1)
826 storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
828 -- machine code building blocks
829 loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
830 loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
832 register :: Int -> Word32
833 register reg = fromIntegral reg `shiftL` 21
835 offset :: Int -> Word32
836 offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
839 w32_to_w8s_bigEndian :: Word32 -> [Word8]
840 w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
841 fromIntegral (0xFF .&. (w `shiftR` 16)),
842 fromIntegral (0xFF .&. (w `shiftR` 8)),
843 fromIntegral (0xFF .&. w)]
847 = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")
851 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
852 lit32 :: Int -> [Word8]
853 lit32 i = let w32 = (fromIntegral i) :: Word32
854 in map (fromIntegral . ( .&. 0xFF))
855 [w32, w32 `shiftR` 8,
856 w32 `shiftR` 16, w32 `shiftR` 24]