2 % (c) The University of Glasgow 2001-2006
5 ByteCodeGen: Generate machine-code sequences for foreign import
8 module ByteCodeFFI ( mkMarshalCode, moan64 ) where
10 #include "HsVersions.h"
17 -- DON'T remove apparently unused imports here ..
18 -- there is ifdeffery below
19 import Control.Exception ( throwDyn )
20 import Data.Bits ( Bits(..), shiftR, shiftL )
21 import Foreign ( newArray, Ptr )
22 import Data.List ( mapAccumL )
24 import Data.Word ( Word8, Word32 )
25 import System.IO.Unsafe ( unsafePerformIO )
26 import System.IO ( hPutStrLn, stderr )
27 -- import Debug.Trace ( trace )
30 %************************************************************************
32 \subsection{The platform-dependent marshall-code-generator.}
34 %************************************************************************
38 moan64 :: String -> SDoc -> a
42 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
43 "code properly yet. You can work around this for the time being\n" ++
44 "by compiling this module and all those it imports to object code,\n" ++
45 "and re-starting your GHCi session. The panic below contains information,\n" ++
46 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
53 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
54 #include "nativeGen/NCG.h"
57 Make a piece of code which expects to see the Haskell stack
58 looking like this. It is given a pointer to the lowest word in
59 the stack -- presumably the tag of the placeholder.
65 <placeholder-for-result#> (must be an unboxed type)
67 We cope with both ccall and stdcall for the C fn. However, this code
68 itself expects only to be called using the ccall convention -- that is,
69 we don't clear our own (single) arg off the C stack.
71 mkMarshalCode :: CCallConv
72 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
74 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
75 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
76 addr_offW arg_offs_n_reps
77 in Foreign.newArray bytes
82 mkMarshalCode_wrk :: CCallConv
83 -> (Int, CgRep) -> Int -> [(Int, CgRep)]
86 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
90 = let -- Don't change this without first consulting Intel Corp :-)
95 [ -- reversed because x86 is little-endian
96 reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
98 -- reversed because args are pushed L -> R onto C stack
99 | (a_offW, a_rep) <- reverse arg_offs_n_reps
102 arguments_size = bytes_per_word * length offsets_to_pushW
104 -- Darwin: align stack frame size to a multiple of 16 bytes
105 stack_frame_size = (arguments_size + 15) .&. complement 15
106 stack_frame_pad = stack_frame_size - arguments_size
108 stack_frame_size = arguments_size
111 -- some helpers to assemble x86 insns.
112 movl_offespmem_esi offB -- movl offB(%esp), %esi
113 = [0x8B, 0xB4, 0x24] ++ lit32 offB
114 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
115 = [0x8B, 0x8E] ++ lit32 offB
116 save_regs -- pushl all intregs except %esp
117 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
118 restore_regs -- popl ditto
119 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
120 pushl_ecx -- pushl %ecx
122 call_star_ecx -- call * %ecx
124 add_lit_esp lit -- addl $lit, %esp
125 = [0x81, 0xC4] ++ lit32 lit
126 movl_eax_offesimem offB -- movl %eax, offB(%esi)
127 = [0x89, 0x86] ++ lit32 offB
128 movl_edx_offesimem offB -- movl %edx, offB(%esi)
129 = [0x89, 0x96] ++ lit32 offB
132 fstpl_offesimem offB -- fstpl offB(%esi)
133 = [0xDD, 0x9E] ++ lit32 offB
134 fstps_offesimem offB -- fstps offB(%esi)
135 = [0xD9, 0x9E] ++ lit32 offB
137 2 0000 8BB42478 movl 0x12345678(%esp), %esi
139 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
142 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
143 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
145 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
146 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
148 11 001b 51 pushl %ecx
149 12 001c FFD1 call * %ecx
151 14 001e 81C47856 addl $0x12345678, %esp
153 15 0024 89867856 movl %eax, 0x12345678(%esi)
155 16 002a 89967856 movl %edx, 0x12345678(%esi)
158 18 0030 DD967856 fstl 0x12345678(%esi)
160 19 0036 DD9E7856 fstpl 0x12345678(%esi)
162 20 003c D9967856 fsts 0x12345678(%esi)
164 21 0042 D99E7856 fstps 0x12345678(%esi)
172 --trace (show (map fst arg_offs_n_reps))
174 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
175 arg passed from the interpreter.
177 Push all callee saved regs. Push all of them anyway ...
188 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
189 We'll use %esi as a temporary to point at the H stack, and
190 %ecx as a temporary to copy via.
192 movl 28+4(%esp), %esi
194 ++ movl_offespmem_esi 32
197 {- On Darwin, add some padding so that the stack stays aligned. -}
198 ++ (if stack_frame_pad /= 0
199 then add_lit_esp (-stack_frame_pad)
203 {- For each arg in args_offs_n_reps, examine the associated
204 CgRep to determine how many words there are. This gives a
205 bunch of offsets on the H stack to copy to the C stack:
207 movl off1(%esi), %ecx
210 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
214 {- Get the addr to call into %ecx, bearing in mind that there's
215 an Addr# tag at the indicated location, and do the call:
217 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
220 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
223 {- Nuke the args just pushed and re-establish %esi at the
226 addl $4*number_of_args_pushed, %esp (ccall only)
227 movl 28+4(%esp), %esi
229 ++ (if cconv /= StdCallConv
230 then add_lit_esp stack_frame_size
232 ++ movl_offespmem_esi 32
234 {- Depending on what the return type is, get the result
235 from %eax or %edx:%eax or %st(0).
237 movl %eax, 4(%esi) -- assuming tagged result
246 ++ let i32 = movl_eax_offesimem 0
247 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
248 f32 = fstps_offesimem 0
249 f64 = fstpl_offesimem 0
257 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
260 {- Restore all the pushed regs and go home.
276 #elif x86_64_TARGET_ARCH
279 -- the address of the H stack is in %rdi. We need to move it out, so
280 -- we can use %rdi as an arg reg for the following call:
284 -- ####### load / push the args
287 (stack_args, fregs_unused, reg_loads) =
288 load_arg_regs arg_offs_n_reps int_loads float_loads []
290 tot_arg_size = bytes_per_word * length stack_args
292 -- On entry to the called function, %rsp should be aligned
293 -- on a 16-byte boundary +8 (i.e. the first stack arg after
294 -- the return address is 16-byte aligned). In STG land
295 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
296 -- need to make sure we push a multiple of 16-bytes of args,
297 -- plus the return address, to get the correct alignment.
298 (real_size, adjust_rsp)
299 | tot_arg_size `rem` 16 == 0 = (tot_arg_size, [])
300 | otherwise = (tot_arg_size + 8, subq_lit_rsp 8)
302 (stack_pushes, stack_words) =
303 push_args stack_args [] 0
305 -- we need to know the number of SSE regs used in the call, see later
306 n_sse_regs_used = length float_loads - length fregs_unused
310 ++ concat stack_pushes -- push in reverse order
312 -- ####### make the call
314 -- use %r10 to make the call, because we don't have to save it.
315 -- movq 8*addr_offW(%rbp), %r10
316 ++ movq_rbpoff_r10 (bytes_per_word * addr_offW)
318 -- The x86_64 ABI requires us to set %al to the number of SSE
319 -- registers that contain arguments, if the called routine
320 -- is a varargs function. We don't know whether it's a
321 -- varargs function or not, so we have to assume it is.
323 -- It's not safe to omit this assignment, even if the number
324 -- of SSE regs in use is zero. If %al is larger than 8
325 -- on entry to a varargs function, seg faults ensue.
326 ++ movq_lit_rax n_sse_regs_used
329 -- pop the args from the stack, only in ccall mode
330 -- (in stdcall the callee does it).
331 ++ (if cconv /= StdCallConv
332 then addq_lit_rsp real_size
335 -- ####### place the result in the right place and return
344 -- int arg regs: rdi,rsi,rdx,rcx,r8,r9
345 -- flt arg regs: xmm0..xmm7
346 int_loads = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
347 movq_rbpoff_rcx, movq_rbpoff_r8, movq_rbpoff_r9 ]
348 float_loads = [ (mov_f32_rbpoff_xmm n, mov_f64_rbpoff_xmm n) | n <- [0..7] ]
350 load_arg_regs args [] [] code = (args, [], code)
351 load_arg_regs [] iregs fregs code = ([], fregs, code)
352 load_arg_regs ((off,rep):args) iregs fregs code
353 | FloatArg <- rep, ((mov_f32,_):frest) <- fregs =
354 load_arg_regs args iregs frest (mov_f32 (bytes_per_word * off) : code)
355 | DoubleArg <- rep, ((_,mov_f64):frest) <- fregs =
356 load_arg_regs args iregs frest (mov_f64 (bytes_per_word * off) : code)
357 | (mov_reg:irest) <- iregs =
358 load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
362 push_this_arg = ((off,rep):args',fregs', code')
363 where (args',fregs',code') = load_arg_regs args iregs fregs code
365 push_args [] code pushed_words = (code, pushed_words)
366 push_args ((off,rep):args) code pushed_words
368 push_args args (push_f32_rbpoff (bytes_per_word * off) : code)
371 push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
374 push_args args (pushq_rbpoff (bytes_per_word * off) : code)
385 i64 = movq_rax_rbpoff 0
386 f32 = mov_f32_xmm0_rbpoff 0
387 f64 = mov_f64_xmm0_rbpoff 0
389 -- ######### x86_64 machine code:
391 -- 0: 48 89 fd mov %rdi,%rbp
392 -- 3: 48 8b bd 78 56 34 12 mov 0x12345678(%rbp),%rdi
393 -- a: 48 8b b5 78 56 34 12 mov 0x12345678(%rbp),%rsi
394 -- 11: 48 8b 95 78 56 34 12 mov 0x12345678(%rbp),%rdx
395 -- 18: 48 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%rcx
396 -- 1f: 4c 8b 85 78 56 34 12 mov 0x12345678(%rbp),%r8
397 -- 26: 4c 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%r9
398 -- 2d: 4c 8b 95 78 56 34 12 mov 0x12345678(%rbp),%r10
399 -- 34: 48 c7 c0 78 56 34 12 mov $0x12345678,%rax
400 -- 3b: 48 89 85 78 56 34 12 mov %rax,0x12345678(%rbp)
401 -- 42: f3 0f 10 85 78 56 34 12 movss 0x12345678(%rbp),%xmm0
402 -- 4a: f2 0f 10 85 78 56 34 12 movsd 0x12345678(%rbp),%xmm0
403 -- 52: f3 0f 11 85 78 56 34 12 movss %xmm0,0x12345678(%rbp)
404 -- 5a: f2 0f 11 85 78 56 34 12 movsd %xmm0,0x12345678(%rbp)
405 -- 62: ff b5 78 56 34 12 pushq 0x12345678(%rbp)
406 -- 68: f3 44 0f 11 04 24 movss %xmm8,(%rsp)
407 -- 6e: f2 44 0f 11 04 24 movsd %xmm8,(%rsp)
408 -- 74: 48 81 ec 78 56 34 12 sub $0x12345678,%rsp
409 -- 7b: 48 81 c4 78 56 34 12 add $0x12345678,%rsp
410 -- 82: 41 ff d2 callq *%r10
413 movq_rdi_rbp = [0x48,0x89,0xfd]
414 movq_rbpoff_rdi off = [0x48, 0x8b, 0xbd] ++ lit32 off
415 movq_rbpoff_rsi off = [0x48, 0x8b, 0xb5] ++ lit32 off
416 movq_rbpoff_rdx off = [0x48, 0x8b, 0x95] ++ lit32 off
417 movq_rbpoff_rcx off = [0x48, 0x8b, 0x8d] ++ lit32 off
418 movq_rbpoff_r8 off = [0x4c, 0x8b, 0x85] ++ lit32 off
419 movq_rbpoff_r9 off = [0x4c, 0x8b, 0x8d] ++ lit32 off
420 movq_rbpoff_r10 off = [0x4c, 0x8b, 0x95] ++ lit32 off
421 movq_lit_rax lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
422 movq_rax_rbpoff off = [0x48, 0x89, 0x85] ++ lit32 off
423 mov_f32_rbpoff_xmm n off = [0xf3, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
424 mov_f64_rbpoff_xmm n off = [0xf2, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
425 mov_f32_xmm0_rbpoff off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
426 mov_f64_xmm0_rbpoff off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
427 pushq_rbpoff off = [0xff, 0xb5] ++ lit32 off
428 push_f32_rbpoff off =
429 mov_f32_rbpoff_xmm 8 off ++ -- movss off(%rbp), %xmm8
430 [0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] ++ -- movss %xmm8, (%rsp)
431 subq_lit_rsp 8 -- subq $8, %rsp
432 push_f64_rbpoff off =
433 mov_f64_rbpoff_xmm 8 off ++ -- movsd off(%rbp), %xmm8
434 [0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] ++ -- movsd %xmm8, (%rsp)
435 subq_lit_rsp 8 -- subq $8, %rsp
436 subq_lit_rsp lit = [0x48, 0x81, 0xec] ++ lit32 lit
437 addq_lit_rsp lit = [0x48, 0x81, 0xc4] ++ lit32 lit
438 call_star_r10 = [0x41,0xff,0xd2]
443 #elif sparc_TARGET_ARCH
445 = let -- At least for sparc V8
449 w32_to_w8s_bigEndian :: Word32 -> [Word8]
450 w32_to_w8s_bigEndian w
451 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
452 fromIntegral (0xFF .&. (w `shiftR` 16)),
453 fromIntegral (0xFF .&. (w `shiftR` 8)),
454 fromIntegral (0xFF .&. w)]
458 [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
460 | (a_offW, a_rep) <- arg_offs_n_reps
463 total_argWs = length offsets_to_pushW
464 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
467 -- The stack pointer must be kept 8-byte aligned, which means
468 -- we need to calculate this quantity too
469 argWs_on_stack_ROUNDED_UP
470 | odd argWs_on_stack = 1 + argWs_on_stack
471 | otherwise = argWs_on_stack
473 -- some helpers to assemble sparc insns.
475 iReg, oReg, gReg, fReg :: Int -> Word32
476 iReg = fromIntegral . (+ 24)
477 oReg = fromIntegral . (+ 8)
478 gReg = fromIntegral . (+ 0)
493 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
494 insn_r_r_i op3 rs1 rd imm13
496 .|. (rs1 `shiftL` 25)
497 .|. (op3 `shiftL` 19)
502 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
503 insn_r_i_r op3 rs1 imm13 rd
506 .|. (op3 `shiftL` 19)
507 .|. (rs1 `shiftL` 14)
511 mkSimm13 :: Int -> Word32
513 = let imm13w = (fromIntegral imm13) :: Word32
516 -- REAL (non-synthetic) insns
517 -- or %rs1, %rs2, %rd
518 mkOR :: Word32 -> Word32 -> Word32 -> Word32
522 .|. (op3_OR `shiftL` 19)
523 .|. (rs1 `shiftL` 14)
526 where op3_OR = 2 :: Word32
528 -- ld(int) [%rs + imm13], %rd
529 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
531 -- st(int) %rs, [%rd + imm13]
532 mkST = insn_r_r_i 0x04 -- op3_ST
534 -- st(float) %rs, [%rd + imm13]
535 mkSTF = insn_r_r_i 0x24 -- op3_STF
537 -- jmpl %rs + imm13, %rd
538 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
540 -- save %rs + imm13, %rd
541 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
543 -- restore %rs + imm13, %rd
544 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
547 mkNOP = mkOR g0 g0 g0
548 mkCALL reg = mkJMPL reg 0 o7
549 mkRET = mkJMPL i7 8 g0
550 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
552 --trace (show (map fst arg_offs_n_reps))
553 concatMap w32_to_w8s_bigEndian (
555 {- On entry, %o0 is the arg passed from the interpreter. After
556 the initial save insn, it will be in %i0. Studying the sparc
557 docs one would have thought that the minimum frame size is 92
558 bytes, but gcc always uses at least 112, and indeed there are
559 segfaults a-plenty with 92. So I use 112 here as well. I
560 don't understand why, tho.
562 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
564 {- For each arg in args_offs_n_reps, examine the associated
565 CgRep to determine how many words there are. This gives a
566 bunch of offsets on the H stack. Move the first 6 words into
567 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
570 ++ let doArgW (offW, wordNo)
572 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
574 = [mkLD i0 (bytes_per_word * offW) g1,
575 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
577 concatMap doArgW (zip offsets_to_pushW [0 ..])
579 {- Get the addr to call into %g1, bearing in mind that there's
580 an Addr# tag at the indicated location, and do the call:
582 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
585 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
589 {- Depending on what the return type is, get the result
590 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
592 st %o0, [%i0 + 4] -- 32 bit int
594 st %o0, [%i0 + 4] -- 64 bit int
595 st %o1, [%i0 + 8] -- or the other way round?
597 st %f0, [%i0 + 4] -- 32 bit float
599 st %f0, [%i0 + 4] -- 64 bit float
600 st %f1, [%i0 + 8] -- or the other way round?
603 ++ let i32 = [mkST o0 i0 0]
604 i64 = [mkST o0 i0 0, mkST o1 i0 4]
605 f32 = [mkSTF f0 i0 0]
606 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
613 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
617 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
619 #elif powerpc_TARGET_ARCH && darwin_TARGET_OS
625 w32_to_w8s_bigEndian :: Word32 -> [Word8]
626 w32_to_w8s_bigEndian w
627 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
628 fromIntegral (0xFF .&. (w `shiftR` 16)),
629 fromIntegral (0xFF .&. (w `shiftR` 8)),
630 fromIntegral (0xFF .&. w)]
632 -- addr and result bits offsetsW
633 a_off = addr_offW * bytes_per_word
634 result_off = r_offW * bytes_per_word
637 parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
638 | (_, a_rep) <- arg_offs_n_reps ]
639 savedRegisterArea = 4
640 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
641 padTo16 x = case x `mod` 16 of
645 pass_parameters [] _ _ = []
646 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
648 haskellArgOffset = a_offW * bytes_per_word
649 offsetW' = offsetW + cgRepSizeW a_rep
653 [0x801f0000 -- lwz rX, src(r31)
654 .|. (fromIntegral src .&. 0xFFFF)
655 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
657 [0x801f0000 -- lwz r0, src(r31)
658 .|. (fromIntegral src .&. 0xFFFF),
659 0x90010000 -- stw r0, dst(r1)
660 .|. (fromIntegral dst .&. 0xFFFF)]
662 src = haskellArgOffset + w*bytes_per_word
663 dst = linkageArea + (offsetW+w) * bytes_per_word
666 FloatArg | nextFPR < 14 ->
667 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
668 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
669 .|. (fromIntegral nextFPR `shiftL` 21))
670 : pass_parameters args (nextFPR+1) offsetW'
671 DoubleArg | nextFPR < 14 ->
672 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
673 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
674 .|. (fromIntegral nextFPR `shiftL` 21))
675 : pass_parameters args (nextFPR+1) offsetW'
677 concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
678 ++ pass_parameters args nextFPR offsetW'
680 gather_result = case r_rep of
683 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
684 -- stfs f1, result_off(r31)
686 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
687 -- stfd f1, result_off(r31)
688 _ | cgRepSizeW r_rep == 2 ->
689 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
690 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
691 -- stw r3, result_off(r31)
692 -- stw r4, result_off+4(r31)
693 _ | cgRepSizeW r_rep == 1 ->
694 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
695 -- stw r3, result_off(r31)
697 concatMap w32_to_w8s_bigEndian $ [
698 0x7c0802a6, -- mflr r0
699 0x93e1fffc, -- stw r31,-4(r1)
700 0x90010008, -- stw r0,8(r1)
701 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
702 -- stwu r1, -frameSize(r1)
703 0x7c7f1b78 -- mr r31, r3
704 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
705 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
706 -- lwz r12, a_off(r31)
707 0x7d8903a6, -- mtctr r12
709 ] ++ gather_result ++ [
710 0x80210000, -- lwz r1, 0(r1)
711 0x83e1fffc, -- lwz r31, -4(r1)
712 0x80010008, -- lwz r0, 8(r1)
713 0x7c0803a6, -- mtlr r0
717 #elif powerpc_TARGET_ARCH && linux_TARGET_OS
719 -- All offsets here are measured in Words (not bytes). This includes
720 -- arguments to the load/store machine code generators, alignment numbers
721 -- and the final 'framesize' among others.
723 = concatMap w32_to_w8s_bigEndian $ [
724 0x7c0802a6, -- mflr r0
725 0x93e1fffc, -- stw r31,-4(r1)
726 0x90010008, -- stw r0,8(r1)
727 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
728 0x7c7f1b78 -- mr r31, r3
729 ] ++ pass_parameters ++ -- pass the parameters
730 loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
731 0x7d8903a6, -- mtctr r12
733 ] ++ gather_result ++ [ -- save the return value
734 0x80210000, -- lwz r1, 0(r1)
735 0x83e1fffc, -- lwz r31, -4(r1)
736 0x80010008, -- lwz r0, 8(r1)
737 0x7c0803a6, -- mtlr r0
742 gather_result :: [Word32]
743 gather_result = case r_rep of
745 FloatArg -> storeFloat 1 r_offW
746 DoubleArg -> storeDouble 1 r_offW
747 LongArg -> storeLong 3 r_offW
748 _ -> storeWord 3 r_offW
750 pass_parameters :: [Word32]
751 pass_parameters = concat params
753 -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
754 framesize = alignedTo 4 (argsize + 8)
756 ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
758 -- handle one argument, returning machine code and the updated state
759 loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
760 ((Int, Int, Int), [Word32])
762 loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
763 FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
764 FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
766 DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
767 DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
769 LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
770 LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
771 LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
773 _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
774 _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
775 where astack = alignedTo 2 stack
777 alignedTo :: Int -> Int -> Int
778 alignedTo alignment x = case x `mod` alignment of
780 y -> x - y + alignment
782 -- convenience macros to do multiple-instruction data moves
783 stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
784 stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
785 loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
786 storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
788 -- load data from the Haskell stack (relative to r31)
789 loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
790 loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
791 loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
793 -- store data to the Haskell stack (relative to r31)
794 storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
795 storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
796 storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
798 -- store data to the C stack (relative to r1)
799 storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
801 -- machine code building blocks
802 loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
803 loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
805 register :: Int -> Word32
806 register reg = fromIntegral reg `shiftL` 21
808 offset :: Int -> Word32
809 offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
812 w32_to_w8s_bigEndian :: Word32 -> [Word8]
813 w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
814 fromIntegral (0xFF .&. (w `shiftR` 16)),
815 fromIntegral (0xFF .&. (w `shiftR` 8)),
816 fromIntegral (0xFF .&. w)]
820 = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")
824 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
825 lit32 :: Int -> [Word8]
826 lit32 i = let w32 = (fromIntegral i) :: Word32
827 in map (fromIntegral . ( .&. 0xFF))
828 [w32, w32 `shiftR` 8,
829 w32 `shiftR` 16, w32 `shiftR` 24]