2 % (c) The University of Glasgow 2001-2006
5 ByteCodeGen: Generate machine-code sequences for foreign import
9 -- The above warning supression flag is a temporary kludge.
10 -- While working on this module you are encouraged to remove it and fix
11 -- any warnings in the module. See
12 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
17 module ByteCodeFFI ( moan64, newExec ) where
26 module ByteCodeFFI ( mkMarshalCode, moan64, newExec ) where
28 #include "HsVersions.h"
36 -- DON'T remove apparently unused imports here ..
37 -- there is ifdeffery below
38 import Control.Exception ( throwDyn )
39 import Data.Bits ( Bits(..), shiftR, shiftL )
40 import Data.List ( mapAccumL )
42 import Data.Word ( Word8, Word32 )
43 import Foreign ( Ptr, FunPtr, castPtrToFunPtr,
44 Storable, sizeOf, pokeArray )
45 import Foreign.C ( CUInt )
46 import System.IO.Unsafe ( unsafePerformIO )
47 import System.IO ( hPutStrLn, stderr )
48 -- import Debug.Trace ( trace )
51 %************************************************************************
53 \subsection{The platform-dependent marshall-code-generator.}
55 %************************************************************************
59 -- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
60 #include "nativeGen/NCG.h"
63 Make a piece of code which expects to see the Haskell stack
64 looking like this. It is given a pointer to the lowest word in
65 the stack -- presumably the tag of the placeholder.
71 <placeholder-for-result#> (must be an unboxed type)
73 We cope with both ccall and stdcall for the C fn. However, this code
74 itself expects only to be called using the ccall convention -- that is,
75 we don't clear our own (single) arg off the C stack.
77 mkMarshalCode :: CCallConv
78 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
80 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
81 = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
82 addr_offW arg_offs_n_reps
85 mkMarshalCode_wrk :: CCallConv
86 -> (Int, PrimRep) -> Int -> [(Int, PrimRep)]
89 mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
93 = let -- Don't change this without first consulting Intel Corp :-)
98 [ -- reversed because x86 is little-endian
99 reverse [a_offW .. a_offW + primRepSizeW a_rep - 1]
101 -- reversed because args are pushed L -> R onto C stack
102 | (a_offW, a_rep) <- reverse arg_offs_n_reps
105 arguments_size = bytes_per_word * length offsets_to_pushW
107 -- Darwin: align stack frame size to a multiple of 16 bytes
108 stack_frame_size = (arguments_size + 15) .&. complement 15
109 stack_frame_pad = stack_frame_size - arguments_size
111 stack_frame_size = arguments_size
114 -- some helpers to assemble x86 insns.
115 movl_offespmem_esi offB -- movl offB(%esp), %esi
116 = [0x8B, 0xB4, 0x24] ++ lit32 offB
117 movl_offesimem_ecx offB -- movl offB(%esi), %ecx
118 = [0x8B, 0x8E] ++ lit32 offB
119 save_regs -- pushl all intregs except %esp
120 = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
121 restore_regs -- popl ditto
122 = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
123 pushl_ecx -- pushl %ecx
125 call_star_ecx -- call * %ecx
127 add_lit_esp lit -- addl $lit, %esp
128 = [0x81, 0xC4] ++ lit32 lit
129 movl_eax_offesimem offB -- movl %eax, offB(%esi)
130 = [0x89, 0x86] ++ lit32 offB
131 movl_edx_offesimem offB -- movl %edx, offB(%esi)
132 = [0x89, 0x96] ++ lit32 offB
135 fstpl_offesimem offB -- fstpl offB(%esi)
136 = [0xDD, 0x9E] ++ lit32 offB
137 fstps_offesimem offB -- fstps offB(%esi)
138 = [0xD9, 0x9E] ++ lit32 offB
140 2 0000 8BB42478 movl 0x12345678(%esp), %esi
142 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
145 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
146 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
148 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
149 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
151 11 001b 51 pushl %ecx
152 12 001c FFD1 call * %ecx
154 14 001e 81C47856 addl $0x12345678, %esp
156 15 0024 89867856 movl %eax, 0x12345678(%esi)
158 16 002a 89967856 movl %edx, 0x12345678(%esi)
161 18 0030 DD967856 fstl 0x12345678(%esi)
163 19 0036 DD9E7856 fstpl 0x12345678(%esi)
165 20 003c D9967856 fsts 0x12345678(%esi)
167 21 0042 D99E7856 fstps 0x12345678(%esi)
175 --trace (show (map fst arg_offs_n_reps))
177 {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
178 arg passed from the interpreter.
180 Push all callee saved regs. Push all of them anyway ...
191 {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
192 We'll use %esi as a temporary to point at the H stack, and
193 %ecx as a temporary to copy via.
195 movl 28+4(%esp), %esi
197 ++ movl_offespmem_esi 32
200 {- On Darwin, add some padding so that the stack stays aligned. -}
201 ++ (if stack_frame_pad /= 0
202 then add_lit_esp (-stack_frame_pad)
206 {- For each arg in args_offs_n_reps, examine the associated
207 CgRep to determine how many words there are. This gives a
208 bunch of offsets on the H stack to copy to the C stack:
210 movl off1(%esi), %ecx
213 ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
217 {- Get the addr to call into %ecx, bearing in mind that there's
218 an Addr# tag at the indicated location, and do the call:
220 movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
223 ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
226 {- Nuke the args just pushed and re-establish %esi at the
229 addl $4*number_of_args_pushed, %esp (ccall only)
230 movl 28+4(%esp), %esi
232 ++ (if cconv /= StdCallConv
233 then add_lit_esp stack_frame_size
235 ++ movl_offespmem_esi 32
237 {- Depending on what the return type is, get the result
238 from %eax or %edx:%eax or %st(0).
240 movl %eax, 4(%esi) -- assuming tagged result
249 ++ let i32 = movl_eax_offesimem 0
250 i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
251 f32 = fstps_offesimem 0
252 f64 = fstpl_offesimem 0
263 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
266 {- Restore all the pushed regs and go home.
282 #elif x86_64_TARGET_ARCH
285 -- the address of the H stack is in %rdi. We need to move it out, so
286 -- we can use %rdi as an arg reg for the following call:
290 -- ####### load / push the args
293 (stack_args, fregs_unused, reg_loads) =
294 load_arg_regs arg_offs_n_reps int_loads float_loads []
296 tot_arg_size = bytes_per_word * length stack_args
298 -- On entry to the called function, %rsp should be aligned
299 -- on a 16-byte boundary +8 (i.e. the first stack arg after
300 -- the return address is 16-byte aligned). In STG land
301 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
302 -- need to make sure we push a multiple of 16-bytes of args,
303 -- plus the return address, to get the correct alignment.
304 (real_size, adjust_rsp)
305 | tot_arg_size `rem` 16 == 0 = (tot_arg_size, [])
306 | otherwise = (tot_arg_size + 8, subq_lit_rsp 8)
308 (stack_pushes, stack_words) =
309 push_args stack_args [] 0
311 -- we need to know the number of SSE regs used in the call, see later
312 n_sse_regs_used = length float_loads - length fregs_unused
316 ++ concat stack_pushes -- push in reverse order
318 -- ####### make the call
320 -- use %r10 to make the call, because we don't have to save it.
321 -- movq 8*addr_offW(%rbp), %r10
322 ++ movq_rbpoff_r10 (bytes_per_word * addr_offW)
324 -- The x86_64 ABI requires us to set %al to the number of SSE
325 -- registers that contain arguments, if the called routine
326 -- is a varargs function. We don't know whether it's a
327 -- varargs function or not, so we have to assume it is.
329 -- It's not safe to omit this assignment, even if the number
330 -- of SSE regs in use is zero. If %al is larger than 8
331 -- on entry to a varargs function, seg faults ensue.
332 ++ movq_lit_rax n_sse_regs_used
335 -- pop the args from the stack, only in ccall mode
336 -- (in stdcall the callee does it).
337 ++ (if cconv /= StdCallConv
338 then addq_lit_rsp real_size
341 -- ####### place the result in the right place and return
350 -- int arg regs: rdi,rsi,rdx,rcx,r8,r9
351 -- flt arg regs: xmm0..xmm7
352 int_loads = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
353 movq_rbpoff_rcx, movq_rbpoff_r8, movq_rbpoff_r9 ]
354 float_loads = [ 0..7 ]
356 load_arg_regs args [] [] code = (args, [], code)
357 load_arg_regs [] iregs fregs code = ([], fregs, code)
358 load_arg_regs ((off,rep):args) iregs fregs code
363 load_arg_regs args iregs frest
364 (mov_f32_rbpoff_xmm n (bytes_per_word * off) : code)
369 load_arg_regs args iregs frest
370 (mov_f64_rbpoff_xmm n (bytes_per_word * off) : code)
371 | (mov_reg:irest) <- iregs =
372 load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
376 push_this_arg = ((off,rep):args',fregs', code')
377 where (args',fregs',code') = load_arg_regs args iregs fregs code
379 push_args [] code pushed_words = (code, pushed_words)
380 push_args ((off,rep):args) code pushed_words
382 push_args args (push_f32_rbpoff (bytes_per_word * off) : code)
385 push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
388 push_args args (pushq_rbpoff (bytes_per_word * off) : code)
399 i64 = movq_rax_rbpoff 0
400 f32 = mov_f32_xmm0_rbpoff 0
401 f64 = mov_f64_xmm0_rbpoff 0
403 -- ######### x86_64 machine code:
405 -- 0: 48 89 fd mov %rdi,%rbp
406 -- 3: 48 8b bd 78 56 34 12 mov 0x12345678(%rbp),%rdi
407 -- a: 48 8b b5 78 56 34 12 mov 0x12345678(%rbp),%rsi
408 -- 11: 48 8b 95 78 56 34 12 mov 0x12345678(%rbp),%rdx
409 -- 18: 48 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%rcx
410 -- 1f: 4c 8b 85 78 56 34 12 mov 0x12345678(%rbp),%r8
411 -- 26: 4c 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%r9
412 -- 2d: 4c 8b 95 78 56 34 12 mov 0x12345678(%rbp),%r10
413 -- 34: 48 c7 c0 78 56 34 12 mov $0x12345678,%rax
414 -- 3b: 48 89 85 78 56 34 12 mov %rax,0x12345678(%rbp)
415 -- 42: f3 0f 10 bd 78 56 34 12 movss 0x12345678(%rbp),%xmm7
416 -- 4a: f2 0f 10 9d 78 56 34 12 movsd 0x12345678(%rbp),%xmm3
417 -- 52: f2 44 0f 10 85 78 56 34 12 movsd 0x12345678(%rbp),%xmm8
418 -- 5b: f3 0f 11 9d 78 56 34 12 movss %xmm3,0x12345678(%rbp)
419 -- 63: f2 0f 11 9d 78 56 34 12 movsd %xmm3,0x12345678(%rbp)
420 -- 6b: f2 44 0f 11 85 78 56 34 12 movsd %xmm8,0x12345678(%rbp)
421 -- 74: ff b5 78 56 34 12 pushq 0x12345678(%rbp)
422 -- 7a: f3 44 0f 11 04 24 movss %xmm8,(%rsp)
423 -- 80: f2 44 0f 11 04 24 movsd %xmm8,(%rsp)
424 -- 86: 48 81 ec 78 56 34 12 sub $0x12345678,%rsp
425 -- 8d: 48 81 c4 78 56 34 12 add $0x12345678,%rsp
426 -- 94: 41 ff d2 callq *%r10
431 movq_rdi_rbp = [0x48,0x89,0xfd]
432 movq_rbpoff_rdi off = [0x48, 0x8b, 0xbd] ++ lit32 off
433 movq_rbpoff_rsi off = [0x48, 0x8b, 0xb5] ++ lit32 off
434 movq_rbpoff_rdx off = [0x48, 0x8b, 0x95] ++ lit32 off
435 movq_rbpoff_rcx off = [0x48, 0x8b, 0x8d] ++ lit32 off
436 movq_rbpoff_r8 off = [0x4c, 0x8b, 0x85] ++ lit32 off
437 movq_rbpoff_r9 off = [0x4c, 0x8b, 0x8d] ++ lit32 off
438 movq_rbpoff_r10 off = [0x4c, 0x8b, 0x95] ++ lit32 off
439 movq_lit_rax lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
440 movq_rax_rbpoff off = [0x48, 0x89, 0x85] ++ lit32 off
441 mov_f32_rbpoff_xmm n off
442 = 0xf3 : if n >= 8 then 0x44 : rest else rest
443 where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
444 mov_f64_rbpoff_xmm n off
445 = 0xf2 : if n >= 8 then 0x44 : rest else rest
446 where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
447 mov_f32_xmm0_rbpoff off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
448 mov_f64_xmm0_rbpoff off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
449 pushq_rbpoff off = [0xff, 0xb5] ++ lit32 off
450 push_f32_rbpoff off =
451 subq_lit_rsp 8 ++ -- subq $8, %rsp
452 mov_f32_rbpoff_xmm 8 off ++ -- movss off(%rbp), %xmm8
453 [0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movss %xmm8, (%rsp)
454 push_f64_rbpoff off =
455 subq_lit_rsp 8 ++ -- subq $8, %rsp
456 mov_f64_rbpoff_xmm 8 off ++ -- movsd off(%rbp), %xmm8
457 [0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] -- movsd %xmm8, (%rsp)
458 subq_lit_rsp lit = [0x48, 0x81, 0xec] ++ lit32 lit
459 addq_lit_rsp lit = [0x48, 0x81, 0xc4] ++ lit32 lit
460 call_star_r10 = [0x41,0xff,0xd2]
465 #elif sparc_TARGET_ARCH
467 = let -- At least for sparc V8
471 w32_to_w8s_bigEndian :: Word32 -> [Word8]
472 w32_to_w8s_bigEndian w
473 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
474 fromIntegral (0xFF .&. (w `shiftR` 16)),
475 fromIntegral (0xFF .&. (w `shiftR` 8)),
476 fromIntegral (0xFF .&. w)]
480 [ [a_offW .. a_offW + primRepSizeW a_rep - 1]
482 | (a_offW, a_rep) <- arg_offs_n_reps
485 total_argWs = length offsets_to_pushW
486 argWs_on_stack = if total_argWs > 6 then total_argWs - 6
489 -- The stack pointer must be kept 8-byte aligned, which means
490 -- we need to calculate this quantity too
491 argWs_on_stack_ROUNDED_UP
492 | odd argWs_on_stack = 1 + argWs_on_stack
493 | otherwise = argWs_on_stack
495 -- some helpers to assemble sparc insns.
497 iReg, oReg, gReg, fReg :: Int -> Word32
498 iReg = fromIntegral . (+ 24)
499 oReg = fromIntegral . (+ 8)
500 gReg = fromIntegral . (+ 0)
515 insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
516 insn_r_r_i op3 rs1 rd imm13
518 .|. (rs1 `shiftL` 25)
519 .|. (op3 `shiftL` 19)
524 insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
525 insn_r_i_r op3 rs1 imm13 rd
528 .|. (op3 `shiftL` 19)
529 .|. (rs1 `shiftL` 14)
533 mkSimm13 :: Int -> Word32
535 = let imm13w = (fromIntegral imm13) :: Word32
538 -- REAL (non-synthetic) insns
539 -- or %rs1, %rs2, %rd
540 mkOR :: Word32 -> Word32 -> Word32 -> Word32
544 .|. (op3_OR `shiftL` 19)
545 .|. (rs1 `shiftL` 14)
548 where op3_OR = 2 :: Word32
550 -- ld(int) [%rs + imm13], %rd
551 mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
553 -- st(int) %rs, [%rd + imm13]
554 mkST = insn_r_r_i 0x04 -- op3_ST
556 -- st(float) %rs, [%rd + imm13]
557 mkSTF = insn_r_r_i 0x24 -- op3_STF
559 -- jmpl %rs + imm13, %rd
560 mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
562 -- save %rs + imm13, %rd
563 mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
565 -- restore %rs + imm13, %rd
566 mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
569 mkNOP = mkOR g0 g0 g0
570 mkCALL reg = mkJMPL reg 0 o7
571 mkRET = mkJMPL i7 8 g0
572 mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
574 --trace (show (map fst arg_offs_n_reps))
575 concatMap w32_to_w8s_bigEndian (
577 {- On entry, %o0 is the arg passed from the interpreter. After
578 the initial save insn, it will be in %i0. Studying the sparc
579 docs one would have thought that the minimum frame size is 92
580 bytes, but gcc always uses at least 112, and indeed there are
581 segfaults a-plenty with 92. So I use 112 here as well. I
582 don't understand why, tho.
584 [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
586 {- For each arg in args_offs_n_reps, examine the associated
587 CgRep to determine how many words there are. This gives a
588 bunch of offsets on the H stack. Move the first 6 words into
589 %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
592 ++ let doArgW (offW, wordNo)
594 = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
596 = [mkLD i0 (bytes_per_word * offW) g1,
597 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
599 concatMap doArgW (zip offsets_to_pushW [0 ..])
601 {- Get the addr to call into %g1, bearing in mind that there's
602 an Addr# tag at the indicated location, and do the call:
604 ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
607 ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
611 {- Depending on what the return type is, get the result
612 from %o0 or %o1:%o0 or %f0 or %f1:%f0.
614 st %o0, [%i0 + 4] -- 32 bit int
616 st %o0, [%i0 + 4] -- 64 bit int
617 st %o1, [%i0 + 8] -- or the other way round?
619 st %f0, [%i0 + 4] -- 32 bit float
621 st %f0, [%i0 + 4] -- 64 bit float
622 st %f1, [%i0 + 8] -- or the other way round?
625 ++ let i32 = [mkST o0 i0 0]
626 i64 = [mkST o0 i0 0, mkST o1 i0 4]
627 f32 = [mkSTF f0 i0 0]
628 f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
637 other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
641 mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
643 #elif powerpc_TARGET_ARCH && darwin_TARGET_OS
649 w32_to_w8s_bigEndian :: Word32 -> [Word8]
650 w32_to_w8s_bigEndian w
651 = [fromIntegral (0xFF .&. (w `shiftR` 24)),
652 fromIntegral (0xFF .&. (w `shiftR` 16)),
653 fromIntegral (0xFF .&. (w `shiftR` 8)),
654 fromIntegral (0xFF .&. w)]
656 -- addr and result bits offsetsW
657 a_off = addr_offW * bytes_per_word
658 result_off = r_offW * bytes_per_word
661 parameterArea = sum [ primRepSizeW a_rep * bytes_per_word
662 | (_, a_rep) <- arg_offs_n_reps ]
663 savedRegisterArea = 4
664 frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
665 padTo16 x = case x `mod` 16 of
669 pass_parameters [] _ _ = []
670 pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
672 haskellArgOffset = a_offW * bytes_per_word
673 offsetW' = offsetW + primRepSizeW a_rep
677 [0x801f0000 -- lwz rX, src(r31)
678 .|. (fromIntegral src .&. 0xFFFF)
679 .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
681 [0x801f0000 -- lwz r0, src(r31)
682 .|. (fromIntegral src .&. 0xFFFF),
683 0x90010000 -- stw r0, dst(r1)
684 .|. (fromIntegral dst .&. 0xFFFF)]
686 src = haskellArgOffset + w*bytes_per_word
687 dst = linkageArea + (offsetW+w) * bytes_per_word
690 FloatRep | nextFPR < 14 ->
691 (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
692 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
693 .|. (fromIntegral nextFPR `shiftL` 21))
694 : pass_parameters args (nextFPR+1) offsetW'
695 DoubleRep | nextFPR < 14 ->
696 (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
697 .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
698 .|. (fromIntegral nextFPR `shiftL` 21))
699 : pass_parameters args (nextFPR+1) offsetW'
701 concatMap pass_word [0 .. primRepSizeW a_rep - 1]
702 ++ pass_parameters args nextFPR offsetW'
704 gather_result = case r_rep of
707 [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
708 -- stfs f1, result_off(r31)
710 [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
711 -- stfd f1, result_off(r31)
712 _ | primRepSizeW r_rep == 2 ->
713 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
714 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
715 -- stw r3, result_off(r31)
716 -- stw r4, result_off+4(r31)
717 _ | primRepSizeW r_rep == 1 ->
718 [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
719 -- stw r3, result_off(r31)
721 concatMap w32_to_w8s_bigEndian $ [
722 0x7c0802a6, -- mflr r0
723 0x93e1fffc, -- stw r31,-4(r1)
724 0x90010008, -- stw r0,8(r1)
725 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
726 -- stwu r1, -frameSize(r1)
727 0x7c7f1b78 -- mr r31, r3
728 ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
729 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
730 -- lwz r12, a_off(r31)
731 0x7d8903a6, -- mtctr r12
733 ] ++ gather_result ++ [
734 0x80210000, -- lwz r1, 0(r1)
735 0x83e1fffc, -- lwz r31, -4(r1)
736 0x80010008, -- lwz r0, 8(r1)
737 0x7c0803a6, -- mtlr r0
741 #elif powerpc_TARGET_ARCH && linux_TARGET_OS
743 -- All offsets here are measured in Words (not bytes). This includes
744 -- arguments to the load/store machine code generators, alignment numbers
745 -- and the final 'framesize' among others.
747 = concatMap w32_to_w8s_bigEndian $ [
748 0x7c0802a6, -- mflr r0
749 0x93e1fffc, -- stw r31,-4(r1)
750 0x90010008, -- stw r0,8(r1)
751 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
752 0x7c7f1b78 -- mr r31, r3
753 ] ++ pass_parameters ++ -- pass the parameters
754 loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
755 0x7d8903a6, -- mtctr r12
757 ] ++ gather_result ++ [ -- save the return value
758 0x80210000, -- lwz r1, 0(r1)
759 0x83e1fffc, -- lwz r31, -4(r1)
760 0x80010008, -- lwz r0, 8(r1)
761 0x7c0803a6, -- mtlr r0
766 gather_result :: [Word32]
767 gather_result = case r_rep of
769 FloatRep -> storeFloat 1 r_offW
770 DoubleRep -> storeDouble 1 r_offW
771 Int64Rep -> storeLong 3 r_offW
772 Word64Rep -> storeLong 3 r_offW
773 _ -> storeWord 3 r_offW
775 pass_parameters :: [Word32]
776 pass_parameters = concat params
778 -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
779 framesize = alignedTo 4 (argsize + 8)
781 ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
783 -- handle one argument, returning machine code and the updated state
784 loadparam :: (Int, Int, Int) -> (Int, PrimRep) ->
785 ((Int, Int, Int), [Word32])
787 loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
788 FloatRep | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
789 FloatRep -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
791 DoubleRep | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
792 DoubleRep -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
794 r | is64 r && even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
795 r | is64 r && gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
796 r | is64 r -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
797 _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
798 _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
799 where astack = alignedTo 2 stack
802 is64 Word64Rep = True
805 alignedTo :: Int -> Int -> Int
806 alignedTo alignment x = case x `mod` alignment of
808 y -> x - y + alignment
810 -- convenience macros to do multiple-instruction data moves
811 stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
812 stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
813 loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
814 storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
816 -- load data from the Haskell stack (relative to r31)
817 loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
818 loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
819 loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
821 -- store data to the Haskell stack (relative to r31)
822 storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
823 storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
824 storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
826 -- store data to the C stack (relative to r1)
827 storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
829 -- machine code building blocks
830 loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
831 loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
833 register :: Int -> Word32
834 register reg = fromIntegral reg `shiftL` 21
836 offset :: Int -> Word32
837 offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
840 w32_to_w8s_bigEndian :: Word32 -> [Word8]
841 w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
842 fromIntegral (0xFF .&. (w `shiftR` 16)),
843 fromIntegral (0xFF .&. (w `shiftR` 8)),
844 fromIntegral (0xFF .&. w)]
848 = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")
852 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
853 lit32 :: Int -> [Word8]
854 lit32 i = let w32 = (fromIntegral i) :: Word32
855 in map (fromIntegral . ( .&. 0xFF))
856 [w32, w32 `shiftR` 8,
857 w32 `shiftR` 16, w32 `shiftR` 24]
860 #endif /* !USE_LIBFFI */
862 moan64 :: String -> SDoc -> a
866 "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
867 "code properly yet. You can work around this for the time being\n" ++
868 "by compiling this module and all those it imports to object code,\n" ++
869 "and re-starting your GHCi session. The panic below contains information,\n" ++
870 "intended for the GHC implementors, about the exact place where GHC gave up.\n"
876 newExec :: Storable a => [a] -> IO (FunPtr ())
878 = do ptr <- _allocateExec (fromIntegral $ codeSize undefined code)
880 return (castPtrToFunPtr ptr)
882 codeSize :: Storable a => a -> [a] -> Int
883 codeSize dummy array = sizeOf(dummy) * length array
885 foreign import ccall unsafe "allocateExec"
886 _allocateExec :: CUInt -> IO (Ptr a)