2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
35 -- Our intermediate code:
37 import PprCmm ( pprExpr )
40 import ClosureInfo ( C_SRT(..) )
43 import StaticFlags ( opt_PIC )
44 import ForeignCall ( CCallConv(..) )
47 import qualified Outputable as O
50 import FastBool ( isFastTrue )
51 import Constants ( wORD_SIZE )
53 import Debug.Trace ( trace )
55 import Control.Monad ( mapAndUnzipM )
56 import Data.Maybe ( fromJust )
61 -- -----------------------------------------------------------------------------
62 -- Top-level of the instruction selector
64 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
65 -- They are really trees of insns to facilitate fast appending, where a
66 -- left-to-right traversal (pre-order?) yields the insns in the correct
69 type InstrBlock = OrdList Instr
71 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
72 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
73 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
74 picBaseMb <- getPicBaseMaybeNat
75 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
76 tops = proc : concat statics
78 Just picBase -> initializePicBase picBase tops
79 Nothing -> return tops
81 cmmTopCodeGen (CmmData sec dat) = do
82 return [CmmData sec dat] -- no translation, we just use CmmStatic
84 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
85 basicBlockCodeGen (BasicBlock id stmts) = do
86 instrs <- stmtsToInstrs stmts
87 -- code generation may introduce new basic block boundaries, which
88 -- are indicated by the NEWBLOCK instruction. We must split up the
89 -- instruction stream into basic blocks again. Also, we extract
92 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
94 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
95 = ([], BasicBlock id instrs : blocks, statics)
96 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
97 = (instrs, blocks, CmmData sec dat:statics)
98 mkBlocks instr (instrs,blocks,statics)
99 = (instr:instrs, blocks, statics)
101 return (BasicBlock id top : other_blocks, statics)
103 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
105 = do instrss <- mapM stmtToInstrs stmts
106 return (concatOL instrss)
108 stmtToInstrs :: CmmStmt -> NatM InstrBlock
109 stmtToInstrs stmt = case stmt of
110 CmmNop -> return nilOL
111 CmmComment s -> return (unitOL (COMMENT s))
114 | isFloatType ty -> assignReg_FltCode size reg src
115 #if WORD_SIZE_IN_BITS==32
116 | isWord64 ty -> assignReg_I64Code reg src
118 | otherwise -> assignReg_IntCode size reg src
119 where ty = cmmRegType reg
120 size = cmmTypeSize ty
123 | isFloatType ty -> assignMem_FltCode size addr src
124 #if WORD_SIZE_IN_BITS==32
125 | isWord64 ty -> assignMem_I64Code addr src
127 | otherwise -> assignMem_IntCode size addr src
128 where ty = cmmExprType src
129 size = cmmTypeSize ty
131 CmmCall target result_regs args _ _
132 -> genCCall target result_regs args
134 CmmBranch id -> genBranch id
135 CmmCondBranch arg id -> genCondJump id arg
136 CmmSwitch arg ids -> genSwitch arg ids
137 CmmJump arg params -> genJump arg
139 panic "stmtToInstrs: return statement should have been cps'd away"
141 -- -----------------------------------------------------------------------------
142 -- General things for putting together code sequences
144 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
145 -- CmmExprs into CmmRegOff?
146 mangleIndexTree :: CmmExpr -> CmmExpr
147 mangleIndexTree (CmmRegOff reg off)
148 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
149 where width = typeWidth (cmmRegType reg)
151 -- -----------------------------------------------------------------------------
152 -- Code gen for 64-bit arithmetic on 32-bit platforms
155 Simple support for generating 64-bit code (ie, 64 bit values and 64
156 bit assignments) on 32-bit platforms. Unlike the main code generator
157 we merely shoot for generating working code as simply as possible, and
158 pay little attention to code quality. Specifically, there is no
159 attempt to deal cleverly with the fixed-vs-floating register
160 distinction; all values are generated into (pairs of) floating
161 registers, even if this would mean some redundant reg-reg moves as a
162 result. Only one of the VRegUniques is returned, since it will be
163 of the VRegUniqueLo form, and the upper-half VReg can be determined
164 by applying getHiVRegFromLo to it.
167 data ChildCode64 -- a.k.a "Register64"
170 Reg -- the lower 32-bit temporary which contains the
171 -- result; use getHiVRegFromLo to find the other
172 -- VRegUnique. Rules of this simplified insn
173 -- selection game are therefore that the returned
174 -- Reg may be modified
176 #if WORD_SIZE_IN_BITS==32
177 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
178 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
181 #ifndef x86_64_TARGET_ARCH
182 iselExpr64 :: CmmExpr -> NatM ChildCode64
185 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
189 assignMem_I64Code addrTree valueTree = do
190 Amode addr addr_code <- getAmode addrTree
191 ChildCode64 vcode rlo <- iselExpr64 valueTree
193 rhi = getHiVRegFromLo rlo
195 -- Little-endian store
196 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
197 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
199 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
202 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
203 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
205 r_dst_lo = mkVReg u_dst II32
206 r_dst_hi = getHiVRegFromLo r_dst_lo
207 r_src_hi = getHiVRegFromLo r_src_lo
208 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
209 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
212 vcode `snocOL` mov_lo `snocOL` mov_hi
215 assignReg_I64Code lvalue valueTree
216 = panic "assignReg_I64Code(i386): invalid lvalue"
220 iselExpr64 (CmmLit (CmmInt i _)) = do
221 (rlo,rhi) <- getNewRegPairNat II32
223 r = fromIntegral (fromIntegral i :: Word32)
224 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
226 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
227 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
230 return (ChildCode64 code rlo)
232 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
233 Amode addr addr_code <- getAmode addrTree
234 (rlo,rhi) <- getNewRegPairNat II32
236 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
237 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
240 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
244 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
245 = return (ChildCode64 nilOL (mkVReg vu II32))
247 -- we handle addition, but rather badly
248 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
249 ChildCode64 code1 r1lo <- iselExpr64 e1
250 (rlo,rhi) <- getNewRegPairNat II32
252 r = fromIntegral (fromIntegral i :: Word32)
253 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
254 r1hi = getHiVRegFromLo r1lo
256 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
257 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
258 MOV II32 (OpReg r1hi) (OpReg rhi),
259 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
261 return (ChildCode64 code rlo)
263 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
264 ChildCode64 code1 r1lo <- iselExpr64 e1
265 ChildCode64 code2 r2lo <- iselExpr64 e2
266 (rlo,rhi) <- getNewRegPairNat II32
268 r1hi = getHiVRegFromLo r1lo
269 r2hi = getHiVRegFromLo r2lo
272 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
273 ADD II32 (OpReg r2lo) (OpReg rlo),
274 MOV II32 (OpReg r1hi) (OpReg rhi),
275 ADC II32 (OpReg r2hi) (OpReg rhi) ]
277 return (ChildCode64 code rlo)
279 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
281 r_dst_lo <- getNewRegNat II32
282 let r_dst_hi = getHiVRegFromLo r_dst_lo
285 ChildCode64 (code `snocOL`
286 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
291 = pprPanic "iselExpr64(i386)" (ppr expr)
293 #endif /* i386_TARGET_ARCH */
295 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
297 #if sparc_TARGET_ARCH
299 assignMem_I64Code addrTree valueTree = do
300 Amode addr addr_code <- getAmode addrTree
301 ChildCode64 vcode rlo <- iselExpr64 valueTree
302 (src, code) <- getSomeReg addrTree
304 rhi = getHiVRegFromLo rlo
306 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
307 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
308 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
310 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
311 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
313 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
314 r_dst_hi = getHiVRegFromLo r_dst_lo
315 r_src_hi = getHiVRegFromLo r_src_lo
316 mov_lo = mkMOV r_src_lo r_dst_lo
317 mov_hi = mkMOV r_src_hi r_dst_hi
318 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
319 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
320 assignReg_I64Code lvalue valueTree
321 = panic "assignReg_I64Code(sparc): invalid lvalue"
324 -- Don't delete this -- it's very handy for debugging.
326 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
327 -- = panic "iselExpr64(???)"
329 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
330 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
331 rlo <- getNewRegNat II32
332 let rhi = getHiVRegFromLo rlo
333 mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
334 mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
336 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
340 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
341 r_dst_lo <- getNewRegNat II32
342 let r_dst_hi = getHiVRegFromLo r_dst_lo
343 r_src_lo = mkVReg uq II32
344 r_src_hi = getHiVRegFromLo r_src_lo
345 mov_lo = mkMOV r_src_lo r_dst_lo
346 mov_hi = mkMOV r_src_hi r_dst_hi
347 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
349 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
353 = pprPanic "iselExpr64(sparc)" (ppr expr)
355 #endif /* sparc_TARGET_ARCH */
357 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
359 #if powerpc_TARGET_ARCH
361 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
362 getI64Amodes addrTree = do
363 Amode hi_addr addr_code <- getAmode addrTree
364 case addrOffset hi_addr 4 of
365 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
366 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
367 return (AddrRegImm hi_ptr (ImmInt 0),
368 AddrRegImm hi_ptr (ImmInt 4),
371 assignMem_I64Code addrTree valueTree = do
372 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
373 ChildCode64 vcode rlo <- iselExpr64 valueTree
375 rhi = getHiVRegFromLo rlo
378 mov_hi = ST II32 rhi hi_addr
379 mov_lo = ST II32 rlo lo_addr
381 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
383 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
384 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
386 r_dst_lo = mkVReg u_dst II32
387 r_dst_hi = getHiVRegFromLo r_dst_lo
388 r_src_hi = getHiVRegFromLo r_src_lo
389 mov_lo = MR r_dst_lo r_src_lo
390 mov_hi = MR r_dst_hi r_src_hi
393 vcode `snocOL` mov_lo `snocOL` mov_hi
396 assignReg_I64Code lvalue valueTree
397 = panic "assignReg_I64Code(powerpc): invalid lvalue"
400 -- Don't delete this -- it's very handy for debugging.
402 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
403 -- = panic "iselExpr64(???)"
405 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
406 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
407 (rlo, rhi) <- getNewRegPairNat II32
408 let mov_hi = LD II32 rhi hi_addr
409 mov_lo = LD II32 rlo lo_addr
410 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
413 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
414 = return (ChildCode64 nilOL (mkVReg vu II32))
416 iselExpr64 (CmmLit (CmmInt i _)) = do
417 (rlo,rhi) <- getNewRegPairNat II32
419 half0 = fromIntegral (fromIntegral i :: Word16)
420 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
421 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
422 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
425 LIS rlo (ImmInt half1),
426 OR rlo rlo (RIImm $ ImmInt half0),
427 LIS rhi (ImmInt half3),
428 OR rlo rlo (RIImm $ ImmInt half2)
431 return (ChildCode64 code rlo)
433 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
434 ChildCode64 code1 r1lo <- iselExpr64 e1
435 ChildCode64 code2 r2lo <- iselExpr64 e2
436 (rlo,rhi) <- getNewRegPairNat II32
438 r1hi = getHiVRegFromLo r1lo
439 r2hi = getHiVRegFromLo r2lo
442 toOL [ ADDC rlo r1lo r2lo,
445 return (ChildCode64 code rlo)
447 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
448 (expr_reg,expr_code) <- getSomeReg expr
449 (rlo, rhi) <- getNewRegPairNat II32
450 let mov_hi = LI rhi (ImmInt 0)
451 mov_lo = MR rlo expr_reg
452 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
455 = pprPanic "iselExpr64(powerpc)" (ppr expr)
457 #endif /* powerpc_TARGET_ARCH */
460 -- -----------------------------------------------------------------------------
461 -- The 'Register' type
463 -- 'Register's passed up the tree. If the stix code forces the register
464 -- to live in a pre-decided machine register, it comes out as @Fixed@;
465 -- otherwise, it comes out as @Any@, and the parent can decide which
466 -- register to put it in.
469 = Fixed Size Reg InstrBlock
470 | Any Size (Reg -> InstrBlock)
472 swizzleRegisterRep :: Register -> Size -> Register
473 -- Change the width; it's a no-op
474 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
475 swizzleRegisterRep (Any _ codefn) size = Any size codefn
478 -- -----------------------------------------------------------------------------
479 -- Utils based on getRegister, below
481 -- The dual to getAnyReg: compute an expression into a register, but
482 -- we don't mind which one it is.
483 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
485 r <- getRegister expr
488 tmp <- getNewRegNat rep
489 return (tmp, code tmp)
493 -- -----------------------------------------------------------------------------
494 -- Grab the Reg for a CmmReg
496 getRegisterReg :: CmmReg -> Reg
498 getRegisterReg (CmmLocal (LocalReg u pk))
499 = mkVReg u (cmmTypeSize pk)
501 getRegisterReg (CmmGlobal mid)
502 = case get_GlobalReg_reg_or_addr mid of
503 Left (RealReg rrno) -> RealReg rrno
504 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
505 -- By this stage, the only MagicIds remaining should be the
506 -- ones which map to a real machine register on this
507 -- platform. Hence ...
510 -- -----------------------------------------------------------------------------
511 -- Generate code to get a subtree into a Register
513 -- Don't delete this -- it's very handy for debugging.
515 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
516 -- = panic "getRegister(???)"
518 getRegister :: CmmExpr -> NatM Register
520 #if !x86_64_TARGET_ARCH
521 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
522 -- register, it can only be used for rip-relative addressing.
523 getRegister (CmmReg (CmmGlobal PicBaseReg))
525 reg <- getPicBaseNat wordSize
526 return (Fixed wordSize reg nilOL)
529 getRegister (CmmReg reg)
530 = return (Fixed (cmmTypeSize (cmmRegType reg))
531 (getRegisterReg reg) nilOL)
533 getRegister tree@(CmmRegOff _ _)
534 = getRegister (mangleIndexTree tree)
537 #if WORD_SIZE_IN_BITS==32
538 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
539 -- TO_W_(x), TO_W_(x >> 32)
541 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
542 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
543 ChildCode64 code rlo <- iselExpr64 x
544 return $ Fixed II32 (getHiVRegFromLo rlo) code
546 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
547 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
548 ChildCode64 code rlo <- iselExpr64 x
549 return $ Fixed II32 (getHiVRegFromLo rlo) code
551 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
552 ChildCode64 code rlo <- iselExpr64 x
553 return $ Fixed II32 rlo code
555 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
556 ChildCode64 code rlo <- iselExpr64 x
557 return $ Fixed II32 rlo code
561 -- end of machine-"independent" bit; here we go on the rest...
563 #if alpha_TARGET_ARCH
565 getRegister (StDouble d)
566 = getBlockIdNat `thenNat` \ lbl ->
567 getNewRegNat PtrRep `thenNat` \ tmp ->
568 let code dst = mkSeqInstrs [
569 LDATA RoDataSegment lbl [
570 DATA TF [ImmLab (rational d)]
572 LDA tmp (AddrImm (ImmCLbl lbl)),
573 LD TF dst (AddrReg tmp)]
575 return (Any FF64 code)
577 getRegister (StPrim primop [x]) -- unary PrimOps
579 IntNegOp -> trivialUCode (NEG Q False) x
581 NotOp -> trivialUCode NOT x
583 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
584 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
586 OrdOp -> coerceIntCode IntRep x
589 Float2IntOp -> coerceFP2Int x
590 Int2FloatOp -> coerceInt2FP pr x
591 Double2IntOp -> coerceFP2Int x
592 Int2DoubleOp -> coerceInt2FP pr x
594 Double2FloatOp -> coerceFltCode x
595 Float2DoubleOp -> coerceFltCode x
597 other_op -> getRegister (StCall fn CCallConv FF64 [x])
599 fn = case other_op of
600 FloatExpOp -> fsLit "exp"
601 FloatLogOp -> fsLit "log"
602 FloatSqrtOp -> fsLit "sqrt"
603 FloatSinOp -> fsLit "sin"
604 FloatCosOp -> fsLit "cos"
605 FloatTanOp -> fsLit "tan"
606 FloatAsinOp -> fsLit "asin"
607 FloatAcosOp -> fsLit "acos"
608 FloatAtanOp -> fsLit "atan"
609 FloatSinhOp -> fsLit "sinh"
610 FloatCoshOp -> fsLit "cosh"
611 FloatTanhOp -> fsLit "tanh"
612 DoubleExpOp -> fsLit "exp"
613 DoubleLogOp -> fsLit "log"
614 DoubleSqrtOp -> fsLit "sqrt"
615 DoubleSinOp -> fsLit "sin"
616 DoubleCosOp -> fsLit "cos"
617 DoubleTanOp -> fsLit "tan"
618 DoubleAsinOp -> fsLit "asin"
619 DoubleAcosOp -> fsLit "acos"
620 DoubleAtanOp -> fsLit "atan"
621 DoubleSinhOp -> fsLit "sinh"
622 DoubleCoshOp -> fsLit "cosh"
623 DoubleTanhOp -> fsLit "tanh"
625 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
627 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
629 CharGtOp -> trivialCode (CMP LTT) y x
630 CharGeOp -> trivialCode (CMP LE) y x
631 CharEqOp -> trivialCode (CMP EQQ) x y
632 CharNeOp -> int_NE_code x y
633 CharLtOp -> trivialCode (CMP LTT) x y
634 CharLeOp -> trivialCode (CMP LE) x y
636 IntGtOp -> trivialCode (CMP LTT) y x
637 IntGeOp -> trivialCode (CMP LE) y x
638 IntEqOp -> trivialCode (CMP EQQ) x y
639 IntNeOp -> int_NE_code x y
640 IntLtOp -> trivialCode (CMP LTT) x y
641 IntLeOp -> trivialCode (CMP LE) x y
643 WordGtOp -> trivialCode (CMP ULT) y x
644 WordGeOp -> trivialCode (CMP ULE) x y
645 WordEqOp -> trivialCode (CMP EQQ) x y
646 WordNeOp -> int_NE_code x y
647 WordLtOp -> trivialCode (CMP ULT) x y
648 WordLeOp -> trivialCode (CMP ULE) x y
650 AddrGtOp -> trivialCode (CMP ULT) y x
651 AddrGeOp -> trivialCode (CMP ULE) y x
652 AddrEqOp -> trivialCode (CMP EQQ) x y
653 AddrNeOp -> int_NE_code x y
654 AddrLtOp -> trivialCode (CMP ULT) x y
655 AddrLeOp -> trivialCode (CMP ULE) x y
657 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
658 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
659 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
660 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
661 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
662 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
664 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
665 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
666 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
667 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
668 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
669 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
671 IntAddOp -> trivialCode (ADD Q False) x y
672 IntSubOp -> trivialCode (SUB Q False) x y
673 IntMulOp -> trivialCode (MUL Q False) x y
674 IntQuotOp -> trivialCode (DIV Q False) x y
675 IntRemOp -> trivialCode (REM Q False) x y
677 WordAddOp -> trivialCode (ADD Q False) x y
678 WordSubOp -> trivialCode (SUB Q False) x y
679 WordMulOp -> trivialCode (MUL Q False) x y
680 WordQuotOp -> trivialCode (DIV Q True) x y
681 WordRemOp -> trivialCode (REM Q True) x y
683 FloatAddOp -> trivialFCode W32 (FADD TF) x y
684 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
685 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
686 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
688 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
689 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
690 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
691 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
693 AddrAddOp -> trivialCode (ADD Q False) x y
694 AddrSubOp -> trivialCode (SUB Q False) x y
695 AddrRemOp -> trivialCode (REM Q True) x y
697 AndOp -> trivialCode AND x y
698 OrOp -> trivialCode OR x y
699 XorOp -> trivialCode XOR x y
700 SllOp -> trivialCode SLL x y
701 SrlOp -> trivialCode SRL x y
703 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
704 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
705 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
707 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
708 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
710 {- ------------------------------------------------------------
711 Some bizarre special code for getting condition codes into
712 registers. Integer non-equality is a test for equality
713 followed by an XOR with 1. (Integer comparisons always set
714 the result register to 0 or 1.) Floating point comparisons of
715 any kind leave the result in a floating point register, so we
716 need to wrangle an integer register out of things.
718 int_NE_code :: StixTree -> StixTree -> NatM Register
721 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
722 getNewRegNat IntRep `thenNat` \ tmp ->
724 code = registerCode register tmp
725 src = registerName register tmp
726 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
728 return (Any IntRep code__2)
730 {- ------------------------------------------------------------
731 Comments for int_NE_code also apply to cmpF_code
734 :: (Reg -> Reg -> Reg -> Instr)
736 -> StixTree -> StixTree
739 cmpF_code instr cond x y
740 = trivialFCode pr instr x y `thenNat` \ register ->
741 getNewRegNat FF64 `thenNat` \ tmp ->
742 getBlockIdNat `thenNat` \ lbl ->
744 code = registerCode register tmp
745 result = registerName register tmp
747 code__2 dst = code . mkSeqInstrs [
748 OR zeroh (RIImm (ImmInt 1)) dst,
749 BF cond result (ImmCLbl lbl),
750 OR zeroh (RIReg zeroh) dst,
753 return (Any IntRep code__2)
755 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
756 ------------------------------------------------------------
758 getRegister (CmmLoad pk mem)
759 = getAmode mem `thenNat` \ amode ->
761 code = amodeCode amode
762 src = amodeAddr amode
763 size = primRepToSize pk
764 code__2 dst = code . mkSeqInstr (LD size dst src)
766 return (Any pk code__2)
768 getRegister (StInt i)
771 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
773 return (Any IntRep code)
776 code dst = mkSeqInstr (LDI Q dst src)
778 return (Any IntRep code)
780 src = ImmInt (fromInteger i)
785 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
787 return (Any PtrRep code)
790 imm__2 = case imm of Just x -> x
792 #endif /* alpha_TARGET_ARCH */
794 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
798 getRegister (CmmLit (CmmFloat f W32)) = do
799 lbl <- getNewLabelNat
800 dflags <- getDynFlagsNat
801 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
802 Amode addr addr_code <- getAmode dynRef
806 CmmStaticLit (CmmFloat f W32)]
807 `consOL` (addr_code `snocOL`
810 return (Any FF32 code)
813 getRegister (CmmLit (CmmFloat d W64))
815 = let code dst = unitOL (GLDZ dst)
816 in return (Any FF64 code)
819 = let code dst = unitOL (GLD1 dst)
820 in return (Any FF64 code)
823 lbl <- getNewLabelNat
824 dflags <- getDynFlagsNat
825 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
826 Amode addr addr_code <- getAmode dynRef
830 CmmStaticLit (CmmFloat d W64)]
831 `consOL` (addr_code `snocOL`
834 return (Any FF64 code)
836 #endif /* i386_TARGET_ARCH */
838 #if x86_64_TARGET_ARCH
840 getRegister (CmmLit (CmmFloat 0.0 w)) = do
841 let size = floatSize w
842 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
843 -- I don't know why there are xorpd, xorps, and pxor instructions.
844 -- They all appear to do the same thing --SDM
845 return (Any size code)
847 getRegister (CmmLit (CmmFloat f w)) = do
848 lbl <- getNewLabelNat
849 let code dst = toOL [
852 CmmStaticLit (CmmFloat f w)],
853 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
856 return (Any size code)
857 where size = floatSize w
859 #endif /* x86_64_TARGET_ARCH */
861 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
863 -- catch simple cases of zero- or sign-extended load
864 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
865 code <- intLoadCode (MOVZxL II8) addr
866 return (Any II32 code)
868 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
869 code <- intLoadCode (MOVSxL II8) addr
870 return (Any II32 code)
872 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
873 code <- intLoadCode (MOVZxL II16) addr
874 return (Any II32 code)
876 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
877 code <- intLoadCode (MOVSxL II16) addr
878 return (Any II32 code)
882 #if x86_64_TARGET_ARCH
884 -- catch simple cases of zero- or sign-extended load
885 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
886 code <- intLoadCode (MOVZxL II8) addr
887 return (Any II64 code)
889 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
890 code <- intLoadCode (MOVSxL II8) addr
891 return (Any II64 code)
893 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
894 code <- intLoadCode (MOVZxL II16) addr
895 return (Any II64 code)
897 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
898 code <- intLoadCode (MOVSxL II16) addr
899 return (Any II64 code)
901 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
902 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
903 return (Any II64 code)
905 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
906 code <- intLoadCode (MOVSxL II32) addr
907 return (Any II64 code)
911 #if x86_64_TARGET_ARCH
912 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
913 CmmLit displacement])
914 = return $ Any II64 (\dst -> unitOL $
915 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
918 #if x86_64_TARGET_ARCH
919 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
920 x_code <- getAnyReg x
921 lbl <- getNewLabelNat
923 code dst = x_code dst `appOL` toOL [
924 -- This is how gcc does it, so it can't be that bad:
925 LDATA ReadOnlyData16 [
928 CmmStaticLit (CmmInt 0x80000000 W32),
929 CmmStaticLit (CmmInt 0 W32),
930 CmmStaticLit (CmmInt 0 W32),
931 CmmStaticLit (CmmInt 0 W32)
933 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
934 -- xorps, so we need the 128-bit constant
935 -- ToDo: rip-relative
938 return (Any FF32 code)
940 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
941 x_code <- getAnyReg x
942 lbl <- getNewLabelNat
944 -- This is how gcc does it, so it can't be that bad:
945 code dst = x_code dst `appOL` toOL [
946 LDATA ReadOnlyData16 [
949 CmmStaticLit (CmmInt 0x8000000000000000 W64),
950 CmmStaticLit (CmmInt 0 W64)
952 -- gcc puts an unpck here. Wonder if we need it.
953 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
954 -- xorpd, so we need the 128-bit constant
957 return (Any FF64 code)
960 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
962 getRegister (CmmMachOp mop [x]) -- unary MachOps
965 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
966 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
969 MO_S_Neg w -> triv_ucode NEGI (intSize w)
970 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
971 MO_Not w -> triv_ucode NOT (intSize w)
974 MO_UU_Conv W32 W8 -> toI8Reg W32 x
975 MO_SS_Conv W32 W8 -> toI8Reg W32 x
976 MO_UU_Conv W16 W8 -> toI8Reg W16 x
977 MO_SS_Conv W16 W8 -> toI8Reg W16 x
978 MO_UU_Conv W32 W16 -> toI16Reg W32 x
979 MO_SS_Conv W32 W16 -> toI16Reg W32 x
981 #if x86_64_TARGET_ARCH
982 MO_UU_Conv W64 W32 -> conversionNop II64 x
983 MO_SS_Conv W64 W32 -> conversionNop II64 x
984 MO_UU_Conv W64 W16 -> toI16Reg W64 x
985 MO_SS_Conv W64 W16 -> toI16Reg W64 x
986 MO_UU_Conv W64 W8 -> toI8Reg W64 x
987 MO_SS_Conv W64 W8 -> toI8Reg W64 x
990 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
991 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
994 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
995 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
996 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
998 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
999 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1000 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1002 #if x86_64_TARGET_ARCH
1003 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1004 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1005 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1006 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1007 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1008 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1009 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1010 -- However, we don't want the register allocator to throw it
1011 -- away as an unnecessary reg-to-reg move, so we keep it in
1012 -- the form of a movzl and print it as a movl later.
1015 #if i386_TARGET_ARCH
1016 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1017 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1019 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1020 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1023 MO_FS_Conv from to -> coerceFP2Int from to x
1024 MO_SF_Conv from to -> coerceInt2FP from to x
1026 other -> pprPanic "getRegister" (pprMachOp mop)
1028 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1029 triv_ucode instr size = trivialUCode size (instr size) x
1031 -- signed or unsigned extension.
1032 integerExtend :: Width -> Width
1033 -> (Size -> Operand -> Operand -> Instr)
1034 -> CmmExpr -> NatM Register
1035 integerExtend from to instr expr = do
1036 (reg,e_code) <- if from == W8 then getByteReg expr
1037 else getSomeReg expr
1041 instr (intSize from) (OpReg reg) (OpReg dst)
1042 return (Any (intSize to) code)
1044 toI8Reg :: Width -> CmmExpr -> NatM Register
1045 toI8Reg new_rep expr
1046 = do codefn <- getAnyReg expr
1047 return (Any (intSize new_rep) codefn)
1048 -- HACK: use getAnyReg to get a byte-addressable register.
1049 -- If the source was a Fixed register, this will add the
1050 -- mov instruction to put it into the desired destination.
1051 -- We're assuming that the destination won't be a fixed
1052 -- non-byte-addressable register; it won't be, because all
1053 -- fixed registers are word-sized.
1055 toI16Reg = toI8Reg -- for now
1057 conversionNop :: Size -> CmmExpr -> NatM Register
1058 conversionNop new_size expr
1059 = do e_code <- getRegister expr
1060 return (swizzleRegisterRep e_code new_size)
1063 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1065 MO_F_Eq w -> condFltReg EQQ x y
1066 MO_F_Ne w -> condFltReg NE x y
1067 MO_F_Gt w -> condFltReg GTT x y
1068 MO_F_Ge w -> condFltReg GE x y
1069 MO_F_Lt w -> condFltReg LTT x y
1070 MO_F_Le w -> condFltReg LE x y
1072 MO_Eq rep -> condIntReg EQQ x y
1073 MO_Ne rep -> condIntReg NE x y
1075 MO_S_Gt rep -> condIntReg GTT x y
1076 MO_S_Ge rep -> condIntReg GE x y
1077 MO_S_Lt rep -> condIntReg LTT x y
1078 MO_S_Le rep -> condIntReg LE x y
1080 MO_U_Gt rep -> condIntReg GU x y
1081 MO_U_Ge rep -> condIntReg GEU x y
1082 MO_U_Lt rep -> condIntReg LU x y
1083 MO_U_Le rep -> condIntReg LEU x y
1085 #if i386_TARGET_ARCH
1086 MO_F_Add w -> trivialFCode w GADD x y
1087 MO_F_Sub w -> trivialFCode w GSUB x y
1088 MO_F_Quot w -> trivialFCode w GDIV x y
1089 MO_F_Mul w -> trivialFCode w GMUL x y
1092 #if x86_64_TARGET_ARCH
1093 MO_F_Add w -> trivialFCode w ADD x y
1094 MO_F_Sub w -> trivialFCode w SUB x y
1095 MO_F_Quot w -> trivialFCode w FDIV x y
1096 MO_F_Mul w -> trivialFCode w MUL x y
1099 MO_Add rep -> add_code rep x y
1100 MO_Sub rep -> sub_code rep x y
1102 MO_S_Quot rep -> div_code rep True True x y
1103 MO_S_Rem rep -> div_code rep True False x y
1104 MO_U_Quot rep -> div_code rep False True x y
1105 MO_U_Rem rep -> div_code rep False False x y
1107 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1109 MO_Mul rep -> triv_op rep IMUL
1110 MO_And rep -> triv_op rep AND
1111 MO_Or rep -> triv_op rep OR
1112 MO_Xor rep -> triv_op rep XOR
1114 {- Shift ops on x86s have constraints on their source, it
1115 either has to be Imm, CL or 1
1116 => trivialCode is not restrictive enough (sigh.)
1118 MO_Shl rep -> shift_code rep SHL x y {-False-}
1119 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1120 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1122 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1124 --------------------
1125 triv_op width instr = trivialCode width op (Just op) x y
1126 where op = instr (intSize width)
1128 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1129 imulMayOflo rep a b = do
1130 (a_reg, a_code) <- getNonClobberedReg a
1131 b_code <- getAnyReg b
1133 shift_amt = case rep of
1136 _ -> panic "shift_amt"
1139 code = a_code `appOL` b_code eax `appOL`
1141 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1142 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1143 -- sign extend lower part
1144 SUB size (OpReg edx) (OpReg eax)
1145 -- compare against upper
1146 -- eax==0 if high part == sign extended low part
1149 return (Fixed size eax code)
1151 --------------------
1153 -> (Size -> Operand -> Operand -> Instr)
1158 {- Case1: shift length as immediate -}
1159 shift_code width instr x y@(CmmLit lit) = do
1160 x_code <- getAnyReg x
1162 size = intSize width
1164 = x_code dst `snocOL`
1165 instr size (OpImm (litToImm lit)) (OpReg dst)
1167 return (Any size code)
1169 {- Case2: shift length is complex (non-immediate)
1170 * y must go in %ecx.
1171 * we cannot do y first *and* put its result in %ecx, because
1172 %ecx might be clobbered by x.
1173 * if we do y second, then x cannot be
1174 in a clobbered reg. Also, we cannot clobber x's reg
1175 with the instruction itself.
1177 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1178 - do y second and put its result into %ecx. x gets placed in a fresh
1179 tmp. This is likely to be better, becuase the reg alloc can
1180 eliminate this reg->reg move here (it won't eliminate the other one,
1181 because the move is into the fixed %ecx).
1183 shift_code width instr x y{-amount-} = do
1184 x_code <- getAnyReg x
1185 let size = intSize width
1186 tmp <- getNewRegNat size
1187 y_code <- getAnyReg y
1189 code = x_code tmp `appOL`
1191 instr size (OpReg ecx) (OpReg tmp)
1193 return (Fixed size tmp code)
1195 --------------------
1196 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1197 add_code rep x (CmmLit (CmmInt y _))
1198 | is32BitInteger y = add_int rep x y
1199 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1200 where size = intSize rep
1202 --------------------
1203 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1204 sub_code rep x (CmmLit (CmmInt y _))
1205 | is32BitInteger (-y) = add_int rep x (-y)
1206 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1208 -- our three-operand add instruction:
1209 add_int width x y = do
1210 (x_reg, x_code) <- getSomeReg x
1212 size = intSize width
1213 imm = ImmInt (fromInteger y)
1217 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1220 return (Any size code)
1222 ----------------------
1223 div_code width signed quotient x y = do
1224 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1225 x_code <- getAnyReg x
1227 size = intSize width
1228 widen | signed = CLTD size
1229 | otherwise = XOR size (OpReg edx) (OpReg edx)
1231 instr | signed = IDIV
1234 code = y_code `appOL`
1236 toOL [widen, instr size y_op]
1238 result | quotient = eax
1242 return (Fixed size result code)
1245 getRegister (CmmLoad mem pk)
1248 Amode src mem_code <- getAmode mem
1250 size = cmmTypeSize pk
1251 code dst = mem_code `snocOL`
1252 IF_ARCH_i386(GLD size src dst,
1253 MOV size (OpAddr src) (OpReg dst))
1254 return (Any size code)
1256 #if i386_TARGET_ARCH
1257 getRegister (CmmLoad mem pk)
1260 code <- intLoadCode instr mem
1261 return (Any size code)
1263 width = typeWidth pk
1264 size = intSize width
1265 instr = case width of
1268 -- We always zero-extend 8-bit loads, if we
1269 -- can't think of anything better. This is because
1270 -- we can't guarantee access to an 8-bit variant of every register
1271 -- (esi and edi don't have 8-bit variants), so to make things
1272 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1275 #if x86_64_TARGET_ARCH
1276 -- Simpler memory load code on x86_64
1277 getRegister (CmmLoad mem pk)
1279 code <- intLoadCode (MOV size) mem
1280 return (Any size code)
1281 where size = intSize $ typeWidth pk
1284 getRegister (CmmLit (CmmInt 0 width))
1286 size = intSize width
1288 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1289 adj_size = case size of II64 -> II32; _ -> size
1290 size1 = IF_ARCH_i386( size, adj_size )
1292 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1294 return (Any size code)
1296 #if x86_64_TARGET_ARCH
1297 -- optimisation for loading small literals on x86_64: take advantage
1298 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1299 -- instruction forms are shorter.
1300 getRegister (CmmLit lit)
1301 | isWord64 (cmmLitType lit), not (isBigLit lit)
1304 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1306 return (Any II64 code)
1308 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1310 -- note1: not the same as (not.is32BitLit), because that checks for
1311 -- signed literals that fit in 32 bits, but we want unsigned
1313 -- note2: all labels are small, because we're assuming the
1314 -- small memory model (see gcc docs, -mcmodel=small).
1317 getRegister (CmmLit lit)
1319 size = cmmTypeSize (cmmLitType lit)
1321 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1323 return (Any size code)
1325 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1328 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1329 -> NatM (Reg -> InstrBlock)
1330 intLoadCode instr mem = do
1331 Amode src mem_code <- getAmode mem
1332 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1334 -- Compute an expression into *any* register, adding the appropriate
1335 -- move instruction if necessary.
1336 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1338 r <- getRegister expr
1341 anyReg :: Register -> NatM (Reg -> InstrBlock)
1342 anyReg (Any _ code) = return code
1343 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1345 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1346 -- Fixed registers might not be byte-addressable, so we make sure we've
1347 -- got a temporary, inserting an extra reg copy if necessary.
1348 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1349 #if x86_64_TARGET_ARCH
1350 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1352 getByteReg expr = do
1353 r <- getRegister expr
1356 tmp <- getNewRegNat rep
1357 return (tmp, code tmp)
1359 | isVirtualReg reg -> return (reg,code)
1361 tmp <- getNewRegNat rep
1362 return (tmp, code `snocOL` reg2reg rep reg tmp)
1363 -- ToDo: could optimise slightly by checking for byte-addressable
1364 -- real registers, but that will happen very rarely if at all.
1367 -- Another variant: this time we want the result in a register that cannot
1368 -- be modified by code to evaluate an arbitrary expression.
1369 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1370 getNonClobberedReg expr = do
1371 r <- getRegister expr
1374 tmp <- getNewRegNat rep
1375 return (tmp, code tmp)
1377 -- only free regs can be clobbered
1378 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1379 tmp <- getNewRegNat rep
1380 return (tmp, code `snocOL` reg2reg rep reg tmp)
1384 reg2reg :: Size -> Reg -> Reg -> Instr
1385 reg2reg size src dst
1386 #if i386_TARGET_ARCH
1387 | isFloatSize size = GMOV src dst
1389 | otherwise = MOV size (OpReg src) (OpReg dst)
1391 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1393 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1395 #if sparc_TARGET_ARCH
1397 -- getRegister :: CmmExpr -> NatM Register
1399 -- Load a literal float into a float register.
1400 -- The actual literal is stored in a new data area, and we load it
1402 getRegister (CmmLit (CmmFloat f W32)) = do
1404 -- a label for the new data area
1405 lbl <- getNewLabelNat
1406 tmp <- getNewRegNat II32
1408 let code dst = toOL [
1412 CmmStaticLit (CmmFloat f W32)],
1415 SETHI (HI (ImmCLbl lbl)) tmp,
1416 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1418 return (Any FF32 code)
1420 getRegister (CmmLit (CmmFloat d W64)) = do
1421 lbl <- getNewLabelNat
1422 tmp <- getNewRegNat II32
1423 let code dst = toOL [
1426 CmmStaticLit (CmmFloat d W64)],
1427 SETHI (HI (ImmCLbl lbl)) tmp,
1428 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1429 return (Any FF64 code)
1431 getRegister (CmmMachOp mop [x]) -- unary MachOps
1433 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1434 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1436 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1437 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1439 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1440 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1442 MO_FS_Conv from to -> coerceFP2Int from to x
1443 MO_SF_Conv from to -> coerceInt2FP from to x
1445 -- Conversions which are a nop on sparc
1447 | from == to -> conversionNop (intSize to) x
1448 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1449 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1450 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1453 MO_UU_Conv W8 W32 -> integerExtend False W8 W32 x
1454 MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
1455 MO_UU_Conv W8 W16 -> integerExtend False W8 W16 x
1456 MO_SS_Conv W16 W32 -> integerExtend True W16 W32 x
1458 other_op -> panic "Unknown unary mach op"
1461 integerExtend signed from to expr = do
1462 (reg, e_code) <- getSomeReg expr
1466 ((if signed then SRA else SRL)
1467 reg (RIImm (ImmInt 0)) dst)
1468 return (Any (intSize to) code)
1469 conversionNop new_rep expr
1470 = do e_code <- getRegister expr
1471 return (swizzleRegisterRep e_code new_rep)
1473 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1475 MO_Eq rep -> condIntReg EQQ x y
1476 MO_Ne rep -> condIntReg NE x y
1478 MO_S_Gt rep -> condIntReg GTT x y
1479 MO_S_Ge rep -> condIntReg GE x y
1480 MO_S_Lt rep -> condIntReg LTT x y
1481 MO_S_Le rep -> condIntReg LE x y
1483 MO_U_Gt W32 -> condIntReg GTT x y
1484 MO_U_Ge W32 -> condIntReg GE x y
1485 MO_U_Lt W32 -> condIntReg LTT x y
1486 MO_U_Le W32 -> condIntReg LE x y
1488 MO_U_Gt W16 -> condIntReg GU x y
1489 MO_U_Ge W16 -> condIntReg GEU x y
1490 MO_U_Lt W16 -> condIntReg LU x y
1491 MO_U_Le W16 -> condIntReg LEU x y
1493 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1494 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1496 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1498 -- ToDo: teach about V8+ SPARC div instructions
1499 MO_S_Quot W32 -> idiv FSLIT(".div") x y
1500 MO_S_Rem W32 -> idiv FSLIT(".rem") x y
1501 MO_U_Quot W32 -> idiv FSLIT(".udiv") x y
1502 MO_U_Rem W32 -> idiv FSLIT(".urem") x y
1505 MO_F_Eq w -> condFltReg EQQ x y
1506 MO_F_Ne w -> condFltReg NE x y
1508 MO_F_Gt w -> condFltReg GTT x y
1509 MO_F_Ge w -> condFltReg GE x y
1510 MO_F_Lt w -> condFltReg LTT x y
1511 MO_F_Le w -> condFltReg LE x y
1513 MO_F_Add w -> trivialFCode w FADD x y
1514 MO_F_Sub w -> trivialFCode w FSUB x y
1515 MO_F_Mul w -> trivialFCode w FMUL x y
1516 MO_F_Quot w -> trivialFCode w FDIV x y
1518 MO_And rep -> trivialCode rep (AND False) x y
1519 MO_Or rep -> trivialCode rep (OR False) x y
1520 MO_Xor rep -> trivialCode rep (XOR False) x y
1522 MO_Mul rep -> trivialCode rep (SMUL False) x y
1524 MO_Shl rep -> trivialCode rep SLL x y
1525 MO_U_Shr rep -> trivialCode rep SRL x y
1526 MO_S_Shr rep -> trivialCode rep SRA x y
1529 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1530 [promote x, promote y])
1531 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1532 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1535 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1537 --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1539 --------------------
1540 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1541 imulMayOflo rep a b = do
1542 (a_reg, a_code) <- getSomeReg a
1543 (b_reg, b_code) <- getSomeReg b
1544 res_lo <- getNewRegNat II32
1545 res_hi <- getNewRegNat II32
1547 shift_amt = case rep of
1550 _ -> panic "shift_amt"
1551 code dst = a_code `appOL` b_code `appOL`
1553 SMUL False a_reg (RIReg b_reg) res_lo,
1555 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1556 SUB False False res_lo (RIReg res_hi) dst
1558 return (Any II32 code)
1560 getRegister (CmmLoad mem pk) = do
1561 Amode src code <- getAmode mem
1563 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1564 return (Any (cmmTypeSize pk) code__2)
1566 getRegister (CmmLit (CmmInt i _))
1569 src = ImmInt (fromInteger i)
1570 code dst = unitOL (OR False g0 (RIImm src) dst)
1572 return (Any II32 code)
1574 getRegister (CmmLit lit)
1575 = let rep = cmmLitType lit
1579 OR False dst (RIImm (LO imm)) dst]
1580 in return (Any II32 code)
1582 #endif /* sparc_TARGET_ARCH */
1584 #if powerpc_TARGET_ARCH
1585 getRegister (CmmLoad mem pk)
1588 Amode addr addr_code <- getAmode mem
1589 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1590 addr_code `snocOL` LD size dst addr
1591 return (Any size code)
1592 where size = cmmTypeSize pk
1594 -- catch simple cases of zero- or sign-extended load
1595 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1596 Amode addr addr_code <- getAmode mem
1597 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1599 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1601 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1602 Amode addr addr_code <- getAmode mem
1603 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1605 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1606 Amode addr addr_code <- getAmode mem
1607 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1609 getRegister (CmmMachOp mop [x]) -- unary MachOps
1611 MO_Not rep -> triv_ucode_int rep NOT
1613 MO_F_Neg w -> triv_ucode_float w FNEG
1614 MO_S_Neg w -> triv_ucode_int w NEG
1616 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1617 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1619 MO_FS_Conv from to -> coerceFP2Int from to x
1620 MO_SF_Conv from to -> coerceInt2FP from to x
1623 | from == to -> conversionNop (intSize to) x
1625 -- narrowing is a nop: we treat the high bits as undefined
1626 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1627 MO_SS_Conv W16 W8 -> conversionNop II8 x
1628 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1629 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1632 | from == to -> conversionNop (intSize to) x
1633 -- narrowing is a nop: we treat the high bits as undefined
1634 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1635 MO_UU_Conv W16 W8 -> conversionNop II8 x
1636 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1637 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1640 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1641 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1643 conversionNop new_size expr
1644 = do e_code <- getRegister expr
1645 return (swizzleRegisterRep e_code new_size)
1647 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1649 MO_F_Eq w -> condFltReg EQQ x y
1650 MO_F_Ne w -> condFltReg NE x y
1651 MO_F_Gt w -> condFltReg GTT x y
1652 MO_F_Ge w -> condFltReg GE x y
1653 MO_F_Lt w -> condFltReg LTT x y
1654 MO_F_Le w -> condFltReg LE x y
1656 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1657 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1659 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1660 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1661 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1662 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1664 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1665 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1666 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1667 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1669 MO_F_Add w -> triv_float w FADD
1670 MO_F_Sub w -> triv_float w FSUB
1671 MO_F_Mul w -> triv_float w FMUL
1672 MO_F_Quot w -> triv_float w FDIV
1674 -- optimize addition with 32-bit immediate
1678 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1679 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1682 (src, srcCode) <- getSomeReg x
1683 let imm = litToImm lit
1684 code dst = srcCode `appOL` toOL [
1685 ADDIS dst src (HA imm),
1686 ADD dst dst (RIImm (LO imm))
1688 return (Any II32 code)
1689 _ -> trivialCode W32 True ADD x y
1691 MO_Add rep -> trivialCode rep True ADD x y
1693 case y of -- subfi ('substract from' with immediate) doesn't exist
1694 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1695 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1696 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1698 MO_Mul rep -> trivialCode rep True MULLW x y
1700 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1702 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1703 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1705 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1706 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1708 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1709 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1711 MO_And rep -> trivialCode rep False AND x y
1712 MO_Or rep -> trivialCode rep False OR x y
1713 MO_Xor rep -> trivialCode rep False XOR x y
1715 MO_Shl rep -> trivialCode rep False SLW x y
1716 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1717 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1719 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1720 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1722 getRegister (CmmLit (CmmInt i rep))
1723 | Just imm <- makeImmediate rep True i
1725 code dst = unitOL (LI dst imm)
1727 return (Any (intSize rep) code)
1729 getRegister (CmmLit (CmmFloat f frep)) = do
1730 lbl <- getNewLabelNat
1731 dflags <- getDynFlagsNat
1732 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1733 Amode addr addr_code <- getAmode dynRef
1734 let size = floatSize frep
1736 LDATA ReadOnlyData [CmmDataLabel lbl,
1737 CmmStaticLit (CmmFloat f frep)]
1738 `consOL` (addr_code `snocOL` LD size dst addr)
1739 return (Any size code)
1741 getRegister (CmmLit lit)
1742 = let rep = cmmLitType lit
1746 ADD dst dst (RIImm (LO imm))
1748 in return (Any (cmmTypeSize rep) code)
1750 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1752 -- extend?Rep: wrap integer expression of type rep
1753 -- in a conversion to II32
1754 extendSExpr W32 x = x
1755 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1756 extendUExpr W32 x = x
1757 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1759 #endif /* powerpc_TARGET_ARCH */
1762 -- -----------------------------------------------------------------------------
1763 -- The 'Amode' type: Memory addressing modes passed up the tree.
1765 data Amode = Amode AddrMode InstrBlock
1768 Now, given a tree (the argument to an CmmLoad) that references memory,
1769 produce a suitable addressing mode.
1771 A Rule of the Game (tm) for Amodes: use of the addr bit must
1772 immediately follow use of the code part, since the code part puts
1773 values in registers which the addr then refers to. So you can't put
1774 anything in between, lest it overwrite some of those registers. If
1775 you need to do some other computation between the code part and use of
1776 the addr bit, first store the effective address from the amode in a
1777 temporary, then do the other computation, and then use the temporary:
1781 ... other computation ...
1785 getAmode :: CmmExpr -> NatM Amode
1786 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1788 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1790 #if alpha_TARGET_ARCH
1792 getAmode (StPrim IntSubOp [x, StInt i])
1793 = getNewRegNat PtrRep `thenNat` \ tmp ->
1794 getRegister x `thenNat` \ register ->
1796 code = registerCode register tmp
1797 reg = registerName register tmp
1798 off = ImmInt (-(fromInteger i))
1800 return (Amode (AddrRegImm reg off) code)
1802 getAmode (StPrim IntAddOp [x, StInt i])
1803 = getNewRegNat PtrRep `thenNat` \ tmp ->
1804 getRegister x `thenNat` \ register ->
1806 code = registerCode register tmp
1807 reg = registerName register tmp
1808 off = ImmInt (fromInteger i)
1810 return (Amode (AddrRegImm reg off) code)
1814 = return (Amode (AddrImm imm__2) id)
1817 imm__2 = case imm of Just x -> x
1820 = getNewRegNat PtrRep `thenNat` \ tmp ->
1821 getRegister other `thenNat` \ register ->
1823 code = registerCode register tmp
1824 reg = registerName register tmp
1826 return (Amode (AddrReg reg) code)
1828 #endif /* alpha_TARGET_ARCH */
1830 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1832 #if x86_64_TARGET_ARCH
1834 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1835 CmmLit displacement])
1836 = return $ Amode (ripRel (litToImm displacement)) nilOL
1840 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1842 -- This is all just ridiculous, since it carefully undoes
1843 -- what mangleIndexTree has just done.
1844 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1846 -- ASSERT(rep == II32)???
1847 = do (x_reg, x_code) <- getSomeReg x
1848 let off = ImmInt (-(fromInteger i))
1849 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1851 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1853 -- ASSERT(rep == II32)???
1854 = do (x_reg, x_code) <- getSomeReg x
1855 let off = ImmInt (fromInteger i)
1856 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1858 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1859 -- recognised by the next rule.
1860 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1862 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1864 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1865 [y, CmmLit (CmmInt shift _)]])
1866 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1867 = x86_complex_amode x y shift 0
1869 getAmode (CmmMachOp (MO_Add rep)
1870 [x, CmmMachOp (MO_Add _)
1871 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1872 CmmLit (CmmInt offset _)]])
1873 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1874 && is32BitInteger offset
1875 = x86_complex_amode x y shift offset
1877 getAmode (CmmMachOp (MO_Add rep) [x,y])
1878 = x86_complex_amode x y 0 0
1880 getAmode (CmmLit lit) | is32BitLit lit
1881 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1884 (reg,code) <- getSomeReg expr
1885 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1888 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1889 x86_complex_amode base index shift offset
1890 = do (x_reg, x_code) <- getNonClobberedReg base
1891 -- x must be in a temp, because it has to stay live over y_code
1892 -- we could compre x_reg and y_reg and do something better here...
1893 (y_reg, y_code) <- getSomeReg index
1895 code = x_code `appOL` y_code
1896 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1897 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1900 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1902 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1904 #if sparc_TARGET_ARCH
1906 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1909 (reg, code) <- getSomeReg x
1911 off = ImmInt (-(fromInteger i))
1912 return (Amode (AddrRegImm reg off) code)
1915 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1918 (reg, code) <- getSomeReg x
1920 off = ImmInt (fromInteger i)
1921 return (Amode (AddrRegImm reg off) code)
1923 getAmode (CmmMachOp (MO_Add rep) [x, y])
1925 (regX, codeX) <- getSomeReg x
1926 (regY, codeY) <- getSomeReg y
1928 code = codeX `appOL` codeY
1929 return (Amode (AddrRegReg regX regY) code)
1931 -- XXX Is this same as "leaf" in Stix?
1932 getAmode (CmmLit lit)
1934 tmp <- getNewRegNat II32
1936 code = unitOL (SETHI (HI imm__2) tmp)
1937 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1939 imm__2 = litToImm lit
1943 (reg, code) <- getSomeReg other
1946 return (Amode (AddrRegImm reg off) code)
1948 #endif /* sparc_TARGET_ARCH */
1950 #ifdef powerpc_TARGET_ARCH
1951 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
1952 | Just off <- makeImmediate W32 True (-i)
1954 (reg, code) <- getSomeReg x
1955 return (Amode (AddrRegImm reg off) code)
1958 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
1959 | Just off <- makeImmediate W32 True i
1961 (reg, code) <- getSomeReg x
1962 return (Amode (AddrRegImm reg off) code)
1964 -- optimize addition with 32-bit immediate
1966 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
1968 tmp <- getNewRegNat II32
1969 (src, srcCode) <- getSomeReg x
1970 let imm = litToImm lit
1971 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1972 return (Amode (AddrRegImm tmp (LO imm)) code)
1974 getAmode (CmmLit lit)
1976 tmp <- getNewRegNat II32
1977 let imm = litToImm lit
1978 code = unitOL (LIS tmp (HA imm))
1979 return (Amode (AddrRegImm tmp (LO imm)) code)
1981 getAmode (CmmMachOp (MO_Add W32) [x, y])
1983 (regX, codeX) <- getSomeReg x
1984 (regY, codeY) <- getSomeReg y
1985 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1989 (reg, code) <- getSomeReg other
1992 return (Amode (AddrRegImm reg off) code)
1993 #endif /* powerpc_TARGET_ARCH */
1995 -- -----------------------------------------------------------------------------
1996 -- getOperand: sometimes any operand will do.
1998 -- getNonClobberedOperand: the value of the operand will remain valid across
1999 -- the computation of an arbitrary expression, unless the expression
2000 -- is computed directly into a register which the operand refers to
2001 -- (see trivialCode where this function is used for an example).
2003 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2005 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2006 #if x86_64_TARGET_ARCH
2007 getNonClobberedOperand (CmmLit lit)
2008 | isSuitableFloatingPointLit lit = do
2009 lbl <- getNewLabelNat
2010 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2012 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2014 getNonClobberedOperand (CmmLit lit)
2015 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2016 return (OpImm (litToImm lit), nilOL)
2017 getNonClobberedOperand (CmmLoad mem pk)
2018 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2019 Amode src mem_code <- getAmode mem
2021 if (amodeCouldBeClobbered src)
2023 tmp <- getNewRegNat wordSize
2024 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2025 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2028 return (OpAddr src', save_code `appOL` mem_code)
2029 getNonClobberedOperand e = do
2030 (reg, code) <- getNonClobberedReg e
2031 return (OpReg reg, code)
2033 amodeCouldBeClobbered :: AddrMode -> Bool
2034 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2036 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2037 regClobbered _ = False
2039 -- getOperand: the operand is not required to remain valid across the
2040 -- computation of an arbitrary expression.
2041 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2042 #if x86_64_TARGET_ARCH
2043 getOperand (CmmLit lit)
2044 | isSuitableFloatingPointLit lit = do
2045 lbl <- getNewLabelNat
2046 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2048 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2050 getOperand (CmmLit lit)
2051 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2052 return (OpImm (litToImm lit), nilOL)
2053 getOperand (CmmLoad mem pk)
2054 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2055 Amode src mem_code <- getAmode mem
2056 return (OpAddr src, mem_code)
2058 (reg, code) <- getSomeReg e
2059 return (OpReg reg, code)
2061 isOperand :: CmmExpr -> Bool
2062 isOperand (CmmLoad _ _) = True
2063 isOperand (CmmLit lit) = is32BitLit lit
2064 || isSuitableFloatingPointLit lit
2067 -- if we want a floating-point literal as an operand, we can
2068 -- use it directly from memory. However, if the literal is
2069 -- zero, we're better off generating it into a register using
2071 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2072 isSuitableFloatingPointLit _ = False
2074 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2075 getRegOrMem (CmmLoad mem pk)
2076 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2077 Amode src mem_code <- getAmode mem
2078 return (OpAddr src, mem_code)
2080 (reg, code) <- getNonClobberedReg e
2081 return (OpReg reg, code)
2083 #if x86_64_TARGET_ARCH
2084 is32BitLit (CmmInt i W64) = is32BitInteger i
2085 -- assume that labels are in the range 0-2^31-1: this assumes the
2086 -- small memory model (see gcc docs, -mcmodel=small).
2091 is32BitInteger :: Integer -> Bool
2092 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2093 where i64 = fromIntegral i :: Int64
2094 -- a CmmInt is intended to be truncated to the appropriate
2095 -- number of bits, so here we truncate it to Int64. This is
2096 -- important because e.g. -1 as a CmmInt might be either
2097 -- -1 or 18446744073709551615.
2099 -- -----------------------------------------------------------------------------
2100 -- The 'CondCode' type: Condition codes passed up the tree.
2102 data CondCode = CondCode Bool Cond InstrBlock
2104 -- Set up a condition code for a conditional branch.
2106 getCondCode :: CmmExpr -> NatM CondCode
2108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2110 #if alpha_TARGET_ARCH
2111 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2112 #endif /* alpha_TARGET_ARCH */
2114 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2116 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2117 -- yes, they really do seem to want exactly the same!
2119 getCondCode (CmmMachOp mop [x, y])
2122 MO_F_Eq W32 -> condFltCode EQQ x y
2123 MO_F_Ne W32 -> condFltCode NE x y
2124 MO_F_Gt W32 -> condFltCode GTT x y
2125 MO_F_Ge W32 -> condFltCode GE x y
2126 MO_F_Lt W32 -> condFltCode LTT x y
2127 MO_F_Le W32 -> condFltCode LE x y
2129 MO_F_Eq W64 -> condFltCode EQQ x y
2130 MO_F_Ne W64 -> condFltCode NE x y
2131 MO_F_Gt W64 -> condFltCode GTT x y
2132 MO_F_Ge W64 -> condFltCode GE x y
2133 MO_F_Lt W64 -> condFltCode LTT x y
2134 MO_F_Le W64 -> condFltCode LE x y
2136 MO_Eq rep -> condIntCode EQQ x y
2137 MO_Ne rep -> condIntCode NE x y
2139 MO_S_Gt rep -> condIntCode GTT x y
2140 MO_S_Ge rep -> condIntCode GE x y
2141 MO_S_Lt rep -> condIntCode LTT x y
2142 MO_S_Le rep -> condIntCode LE x y
2144 MO_U_Gt rep -> condIntCode GU x y
2145 MO_U_Ge rep -> condIntCode GEU x y
2146 MO_U_Lt rep -> condIntCode LU x y
2147 MO_U_Le rep -> condIntCode LEU x y
2149 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2151 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2153 #elif powerpc_TARGET_ARCH
2155 -- almost the same as everywhere else - but we need to
2156 -- extend small integers to 32 bit first
2158 getCondCode (CmmMachOp mop [x, y])
2160 MO_F_Eq W32 -> condFltCode EQQ x y
2161 MO_F_Ne W32 -> condFltCode NE x y
2162 MO_F_Gt W32 -> condFltCode GTT x y
2163 MO_F_Ge W32 -> condFltCode GE x y
2164 MO_F_Lt W32 -> condFltCode LTT x y
2165 MO_F_Le W32 -> condFltCode LE x y
2167 MO_F_Eq W64 -> condFltCode EQQ x y
2168 MO_F_Ne W64 -> condFltCode NE x y
2169 MO_F_Gt W64 -> condFltCode GTT x y
2170 MO_F_Ge W64 -> condFltCode GE x y
2171 MO_F_Lt W64 -> condFltCode LTT x y
2172 MO_F_Le W64 -> condFltCode LE x y
2174 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2175 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2177 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2178 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2179 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2180 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2182 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2183 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2184 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2185 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2187 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2189 getCondCode other = panic "getCondCode(2)(powerpc)"
2195 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2196 -- passed back up the tree.
2198 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2200 #if alpha_TARGET_ARCH
2201 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2202 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2203 #endif /* alpha_TARGET_ARCH */
2205 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2206 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2208 -- memory vs immediate
2209 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2210 Amode x_addr x_code <- getAmode x
2213 code = x_code `snocOL`
2214 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2216 return (CondCode False cond code)
2218 -- anything vs zero, using a mask
2219 -- TODO: Add some sanity checking!!!!
2220 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2221 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2223 (x_reg, x_code) <- getSomeReg x
2225 code = x_code `snocOL`
2226 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2228 return (CondCode False cond code)
2231 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2232 (x_reg, x_code) <- getSomeReg x
2234 code = x_code `snocOL`
2235 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2237 return (CondCode False cond code)
2239 -- anything vs operand
2240 condIntCode cond x y | isOperand y = do
2241 (x_reg, x_code) <- getNonClobberedReg x
2242 (y_op, y_code) <- getOperand y
2244 code = x_code `appOL` y_code `snocOL`
2245 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2247 return (CondCode False cond code)
2249 -- anything vs anything
2250 condIntCode cond x y = do
2251 (y_reg, y_code) <- getNonClobberedReg y
2252 (x_op, x_code) <- getRegOrMem x
2254 code = y_code `appOL`
2256 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2258 return (CondCode False cond code)
2261 #if i386_TARGET_ARCH
2262 condFltCode cond x y
2263 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2264 (x_reg, x_code) <- getNonClobberedReg x
2265 (y_reg, y_code) <- getSomeReg y
2267 code = x_code `appOL` y_code `snocOL`
2268 GCMP cond x_reg y_reg
2269 -- The GCMP insn does the test and sets the zero flag if comparable
2270 -- and true. Hence we always supply EQQ as the condition to test.
2271 return (CondCode True EQQ code)
2272 #endif /* i386_TARGET_ARCH */
2274 #if x86_64_TARGET_ARCH
2275 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2276 -- an operand, but the right must be a reg. We can probably do better
2277 -- than this general case...
2278 condFltCode cond x y = do
2279 (x_reg, x_code) <- getNonClobberedReg x
2280 (y_op, y_code) <- getOperand y
2282 code = x_code `appOL`
2284 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2285 -- NB(1): we need to use the unsigned comparison operators on the
2286 -- result of this comparison.
2288 return (CondCode True (condToUnsigned cond) code)
2291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2293 #if sparc_TARGET_ARCH
2295 condIntCode cond x (CmmLit (CmmInt y rep))
2298 (src1, code) <- getSomeReg x
2300 src2 = ImmInt (fromInteger y)
2301 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2302 return (CondCode False cond code')
2304 condIntCode cond x y = do
2305 (src1, code1) <- getSomeReg x
2306 (src2, code2) <- getSomeReg y
2308 code__2 = code1 `appOL` code2 `snocOL`
2309 SUB False True src1 (RIReg src2) g0
2310 return (CondCode False cond code__2)
2313 condFltCode cond x y = do
2314 (src1, code1) <- getSomeReg x
2315 (src2, code2) <- getSomeReg y
2316 tmp <- getNewRegNat FF64
2318 promote x = FxTOy FF32 FF64 x tmp
2324 if pk1 `cmmEqType` pk2 then
2325 code1 `appOL` code2 `snocOL`
2326 FCMP True (cmmTypeSize pk1) src1 src2
2327 else if typeWidth pk1 == W32 then
2328 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2329 FCMP True FF64 tmp src2
2331 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2332 FCMP True FF64 src1 tmp
2333 return (CondCode True cond code__2)
2335 #endif /* sparc_TARGET_ARCH */
2337 #if powerpc_TARGET_ARCH
2338 -- ###FIXME: I16 and I8!
2339 condIntCode cond x (CmmLit (CmmInt y rep))
2340 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2342 (src1, code) <- getSomeReg x
2344 code' = code `snocOL`
2345 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2346 return (CondCode False cond code')
2348 condIntCode cond x y = do
2349 (src1, code1) <- getSomeReg x
2350 (src2, code2) <- getSomeReg y
2352 code' = code1 `appOL` code2 `snocOL`
2353 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2354 return (CondCode False cond code')
2356 condFltCode cond x y = do
2357 (src1, code1) <- getSomeReg x
2358 (src2, code2) <- getSomeReg y
2360 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2361 code'' = case cond of -- twiddle CR to handle unordered case
2362 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2363 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2366 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2367 return (CondCode True cond code'')
2369 #endif /* powerpc_TARGET_ARCH */
2371 -- -----------------------------------------------------------------------------
2372 -- Generating assignments
2374 -- Assignments are really at the heart of the whole code generation
2375 -- business. Almost all top-level nodes of any real importance are
2376 -- assignments, which correspond to loads, stores, or register
2377 -- transfers. If we're really lucky, some of the register transfers
2378 -- will go away, because we can use the destination register to
2379 -- complete the code generation for the right hand side. This only
2380 -- fails when the right hand side is forced into a fixed register
2381 -- (e.g. the result of a call).
2383 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2384 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2386 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2387 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2389 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2391 #if alpha_TARGET_ARCH
2393 assignIntCode pk (CmmLoad dst _) src
2394 = getNewRegNat IntRep `thenNat` \ tmp ->
2395 getAmode dst `thenNat` \ amode ->
2396 getRegister src `thenNat` \ register ->
2398 code1 = amodeCode amode []
2399 dst__2 = amodeAddr amode
2400 code2 = registerCode register tmp []
2401 src__2 = registerName register tmp
2402 sz = primRepToSize pk
2403 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2407 assignIntCode pk dst src
2408 = getRegister dst `thenNat` \ register1 ->
2409 getRegister src `thenNat` \ register2 ->
2411 dst__2 = registerName register1 zeroh
2412 code = registerCode register2 dst__2
2413 src__2 = registerName register2 dst__2
2414 code__2 = if isFixed register2
2415 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2420 #endif /* alpha_TARGET_ARCH */
2422 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2424 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2426 -- integer assignment to memory
2428 -- specific case of adding/subtracting an integer to a particular address.
2429 -- ToDo: catch other cases where we can use an operation directly on a memory
2431 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2432 CmmLit (CmmInt i _)])
2433 | addr == addr2, pk /= II64 || is32BitInteger i,
2434 Just instr <- check op
2435 = do Amode amode code_addr <- getAmode addr
2436 let code = code_addr `snocOL`
2437 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2440 check (MO_Add _) = Just ADD
2441 check (MO_Sub _) = Just SUB
2446 assignMem_IntCode pk addr src = do
2447 Amode addr code_addr <- getAmode addr
2448 (code_src, op_src) <- get_op_RI src
2450 code = code_src `appOL`
2452 MOV pk op_src (OpAddr addr)
2453 -- NOTE: op_src is stable, so it will still be valid
2454 -- after code_addr. This may involve the introduction
2455 -- of an extra MOV to a temporary register, but we hope
2456 -- the register allocator will get rid of it.
2460 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2461 get_op_RI (CmmLit lit) | is32BitLit lit
2462 = return (nilOL, OpImm (litToImm lit))
2464 = do (reg,code) <- getNonClobberedReg op
2465 return (code, OpReg reg)
2468 -- Assign; dst is a reg, rhs is mem
2469 assignReg_IntCode pk reg (CmmLoad src _) = do
2470 load_code <- intLoadCode (MOV pk) src
2471 return (load_code (getRegisterReg reg))
2473 -- dst is a reg, but src could be anything
2474 assignReg_IntCode pk reg src = do
2475 code <- getAnyReg src
2476 return (code (getRegisterReg reg))
2478 #endif /* i386_TARGET_ARCH */
2480 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2482 #if sparc_TARGET_ARCH
2484 assignMem_IntCode pk addr src = do
2485 (srcReg, code) <- getSomeReg src
2486 Amode dstAddr addr_code <- getAmode addr
2487 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2489 assignReg_IntCode pk reg src = do
2490 r <- getRegister src
2492 Any _ code -> code dst
2493 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2495 dst = getRegisterReg reg
2498 #endif /* sparc_TARGET_ARCH */
2500 #if powerpc_TARGET_ARCH
2502 assignMem_IntCode pk addr src = do
2503 (srcReg, code) <- getSomeReg src
2504 Amode dstAddr addr_code <- getAmode addr
2505 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2507 -- dst is a reg, but src could be anything
2508 assignReg_IntCode pk reg src
2510 r <- getRegister src
2512 Any _ code -> code dst
2513 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2515 dst = getRegisterReg reg
2517 #endif /* powerpc_TARGET_ARCH */
2520 -- -----------------------------------------------------------------------------
2521 -- Floating-point assignments
2523 #if alpha_TARGET_ARCH
2525 assignFltCode pk (CmmLoad dst _) src
2526 = getNewRegNat pk `thenNat` \ tmp ->
2527 getAmode dst `thenNat` \ amode ->
2528 getRegister src `thenNat` \ register ->
2530 code1 = amodeCode amode []
2531 dst__2 = amodeAddr amode
2532 code2 = registerCode register tmp []
2533 src__2 = registerName register tmp
2534 sz = primRepToSize pk
2535 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2539 assignFltCode pk dst src
2540 = getRegister dst `thenNat` \ register1 ->
2541 getRegister src `thenNat` \ register2 ->
2543 dst__2 = registerName register1 zeroh
2544 code = registerCode register2 dst__2
2545 src__2 = registerName register2 dst__2
2546 code__2 = if isFixed register2
2547 then code . mkSeqInstr (FMOV src__2 dst__2)
2552 #endif /* alpha_TARGET_ARCH */
2554 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2556 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2558 -- Floating point assignment to memory
2559 assignMem_FltCode pk addr src = do
2560 (src_reg, src_code) <- getNonClobberedReg src
2561 Amode addr addr_code <- getAmode addr
2563 code = src_code `appOL`
2565 IF_ARCH_i386(GST pk src_reg addr,
2566 MOV pk (OpReg src_reg) (OpAddr addr))
2569 -- Floating point assignment to a register/temporary
2570 assignReg_FltCode pk reg src = do
2571 src_code <- getAnyReg src
2572 return (src_code (getRegisterReg reg))
2574 #endif /* i386_TARGET_ARCH */
2576 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2578 #if sparc_TARGET_ARCH
2580 -- Floating point assignment to memory
2581 assignMem_FltCode pk addr src = do
2582 Amode dst__2 code1 <- getAmode addr
2583 (src__2, code2) <- getSomeReg src
2584 tmp1 <- getNewRegNat pk
2586 pk__2 = cmmExprType src
2587 code__2 = code1 `appOL` code2 `appOL`
2588 if sizeToWidth pk == typeWidth pk__2
2589 then unitOL (ST pk src__2 dst__2)
2590 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2591 , ST pk tmp1 dst__2]
2594 -- Floating point assignment to a register/temporary
2595 -- ToDo: Verify correctness
2596 assignReg_FltCode pk reg src = do
2597 r <- getRegister src
2598 v1 <- getNewRegNat pk
2600 Any _ code -> code dst
2601 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2603 dst = getRegisterReg reg
2605 #endif /* sparc_TARGET_ARCH */
2607 #if powerpc_TARGET_ARCH
2610 assignMem_FltCode = assignMem_IntCode
2611 assignReg_FltCode = assignReg_IntCode
2613 #endif /* powerpc_TARGET_ARCH */
2616 -- -----------------------------------------------------------------------------
2617 -- Generating an non-local jump
2619 -- (If applicable) Do not fill the delay slots here; you will confuse the
2620 -- register allocator.
2622 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2624 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2626 #if alpha_TARGET_ARCH
2628 genJump (CmmLabel lbl)
2629 | isAsmTemp lbl = returnInstr (BR target)
2630 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2632 target = ImmCLbl lbl
2635 = getRegister tree `thenNat` \ register ->
2636 getNewRegNat PtrRep `thenNat` \ tmp ->
2638 dst = registerName register pv
2639 code = registerCode register pv
2640 target = registerName register pv
2642 if isFixed register then
2643 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2645 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2647 #endif /* alpha_TARGET_ARCH */
2649 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2651 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2653 genJump (CmmLoad mem pk) = do
2654 Amode target code <- getAmode mem
2655 return (code `snocOL` JMP (OpAddr target))
2657 genJump (CmmLit lit) = do
2658 return (unitOL (JMP (OpImm (litToImm lit))))
2661 (reg,code) <- getSomeReg expr
2662 return (code `snocOL` JMP (OpReg reg))
2664 #endif /* i386_TARGET_ARCH */
2666 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2668 #if sparc_TARGET_ARCH
2670 genJump (CmmLit (CmmLabel lbl))
2671 = return (toOL [CALL (Left target) 0 True, NOP])
2673 target = ImmCLbl lbl
2677 (target, code) <- getSomeReg tree
2678 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2680 #endif /* sparc_TARGET_ARCH */
2682 #if powerpc_TARGET_ARCH
2683 genJump (CmmLit (CmmLabel lbl))
2684 = return (unitOL $ JMP lbl)
2688 (target,code) <- getSomeReg tree
2689 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2690 #endif /* powerpc_TARGET_ARCH */
2693 -- -----------------------------------------------------------------------------
2694 -- Unconditional branches
2696 genBranch :: BlockId -> NatM InstrBlock
2698 genBranch = return . toOL . mkBranchInstr
2700 -- -----------------------------------------------------------------------------
2701 -- Conditional jumps
2704 Conditional jumps are always to local labels, so we can use branch
2705 instructions. We peek at the arguments to decide what kind of
2708 ALPHA: For comparisons with 0, we're laughing, because we can just do
2709 the desired conditional branch.
2711 I386: First, we have to ensure that the condition
2712 codes are set according to the supplied comparison operation.
2714 SPARC: First, we have to ensure that the condition codes are set
2715 according to the supplied comparison operation. We generate slightly
2716 different code for floating point comparisons, because a floating
2717 point operation cannot directly precede a @BF@. We assume the worst
2718 and fill that slot with a @NOP@.
2720 SPARC: Do not fill the delay slots here; you will confuse the register
2726 :: BlockId -- the branch target
2727 -> CmmExpr -- the condition on which to branch
2730 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2732 #if alpha_TARGET_ARCH
2734 genCondJump id (StPrim op [x, StInt 0])
2735 = getRegister x `thenNat` \ register ->
2736 getNewRegNat (registerRep register)
2739 code = registerCode register tmp
2740 value = registerName register tmp
2741 pk = registerRep register
2742 target = ImmCLbl lbl
2744 returnSeq code [BI (cmpOp op) value target]
2746 cmpOp CharGtOp = GTT
2748 cmpOp CharEqOp = EQQ
2750 cmpOp CharLtOp = LTT
2759 cmpOp WordGeOp = ALWAYS
2760 cmpOp WordEqOp = EQQ
2762 cmpOp WordLtOp = NEVER
2763 cmpOp WordLeOp = EQQ
2765 cmpOp AddrGeOp = ALWAYS
2766 cmpOp AddrEqOp = EQQ
2768 cmpOp AddrLtOp = NEVER
2769 cmpOp AddrLeOp = EQQ
2771 genCondJump lbl (StPrim op [x, StDouble 0.0])
2772 = getRegister x `thenNat` \ register ->
2773 getNewRegNat (registerRep register)
2776 code = registerCode register tmp
2777 value = registerName register tmp
2778 pk = registerRep register
2779 target = ImmCLbl lbl
2781 return (code . mkSeqInstr (BF (cmpOp op) value target))
2783 cmpOp FloatGtOp = GTT
2784 cmpOp FloatGeOp = GE
2785 cmpOp FloatEqOp = EQQ
2786 cmpOp FloatNeOp = NE
2787 cmpOp FloatLtOp = LTT
2788 cmpOp FloatLeOp = LE
2789 cmpOp DoubleGtOp = GTT
2790 cmpOp DoubleGeOp = GE
2791 cmpOp DoubleEqOp = EQQ
2792 cmpOp DoubleNeOp = NE
2793 cmpOp DoubleLtOp = LTT
2794 cmpOp DoubleLeOp = LE
2796 genCondJump lbl (StPrim op [x, y])
2798 = trivialFCode pr instr x y `thenNat` \ register ->
2799 getNewRegNat FF64 `thenNat` \ tmp ->
2801 code = registerCode register tmp
2802 result = registerName register tmp
2803 target = ImmCLbl lbl
2805 return (code . mkSeqInstr (BF cond result target))
2807 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2809 fltCmpOp op = case op of
2823 (instr, cond) = case op of
2824 FloatGtOp -> (FCMP TF LE, EQQ)
2825 FloatGeOp -> (FCMP TF LTT, EQQ)
2826 FloatEqOp -> (FCMP TF EQQ, NE)
2827 FloatNeOp -> (FCMP TF EQQ, EQQ)
2828 FloatLtOp -> (FCMP TF LTT, NE)
2829 FloatLeOp -> (FCMP TF LE, NE)
2830 DoubleGtOp -> (FCMP TF LE, EQQ)
2831 DoubleGeOp -> (FCMP TF LTT, EQQ)
2832 DoubleEqOp -> (FCMP TF EQQ, NE)
2833 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2834 DoubleLtOp -> (FCMP TF LTT, NE)
2835 DoubleLeOp -> (FCMP TF LE, NE)
2837 genCondJump lbl (StPrim op [x, y])
2838 = trivialCode instr x y `thenNat` \ register ->
2839 getNewRegNat IntRep `thenNat` \ tmp ->
2841 code = registerCode register tmp
2842 result = registerName register tmp
2843 target = ImmCLbl lbl
2845 return (code . mkSeqInstr (BI cond result target))
2847 (instr, cond) = case op of
2848 CharGtOp -> (CMP LE, EQQ)
2849 CharGeOp -> (CMP LTT, EQQ)
2850 CharEqOp -> (CMP EQQ, NE)
2851 CharNeOp -> (CMP EQQ, EQQ)
2852 CharLtOp -> (CMP LTT, NE)
2853 CharLeOp -> (CMP LE, NE)
2854 IntGtOp -> (CMP LE, EQQ)
2855 IntGeOp -> (CMP LTT, EQQ)
2856 IntEqOp -> (CMP EQQ, NE)
2857 IntNeOp -> (CMP EQQ, EQQ)
2858 IntLtOp -> (CMP LTT, NE)
2859 IntLeOp -> (CMP LE, NE)
2860 WordGtOp -> (CMP ULE, EQQ)
2861 WordGeOp -> (CMP ULT, EQQ)
2862 WordEqOp -> (CMP EQQ, NE)
2863 WordNeOp -> (CMP EQQ, EQQ)
2864 WordLtOp -> (CMP ULT, NE)
2865 WordLeOp -> (CMP ULE, NE)
2866 AddrGtOp -> (CMP ULE, EQQ)
2867 AddrGeOp -> (CMP ULT, EQQ)
2868 AddrEqOp -> (CMP EQQ, NE)
2869 AddrNeOp -> (CMP EQQ, EQQ)
2870 AddrLtOp -> (CMP ULT, NE)
2871 AddrLeOp -> (CMP ULE, NE)
2873 #endif /* alpha_TARGET_ARCH */
2875 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2877 #if i386_TARGET_ARCH
2879 genCondJump id bool = do
2880 CondCode _ cond code <- getCondCode bool
2881 return (code `snocOL` JXX cond id)
2885 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2887 #if x86_64_TARGET_ARCH
2889 genCondJump id bool = do
2890 CondCode is_float cond cond_code <- getCondCode bool
2893 return (cond_code `snocOL` JXX cond id)
2895 lbl <- getBlockIdNat
2897 -- see comment with condFltReg
2898 let code = case cond of
2904 plain_test = unitOL (
2907 or_unordered = toOL [
2911 and_ordered = toOL [
2917 return (cond_code `appOL` code)
2921 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2923 #if sparc_TARGET_ARCH
2925 genCondJump bid bool = do
2926 CondCode is_float cond code <- getCondCode bool
2931 then [NOP, BF cond False bid, NOP]
2932 else [BI cond False bid, NOP]
2936 #endif /* sparc_TARGET_ARCH */
2939 #if powerpc_TARGET_ARCH
2941 genCondJump id bool = do
2942 CondCode is_float cond code <- getCondCode bool
2943 return (code `snocOL` BCC cond id)
2945 #endif /* powerpc_TARGET_ARCH */
2948 -- -----------------------------------------------------------------------------
2949 -- Generating C calls
2951 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2952 -- @get_arg@, which moves the arguments to the correct registers/stack
2953 -- locations. Apart from that, the code is easy.
2955 -- (If applicable) Do not fill the delay slots here; you will confuse the
2956 -- register allocator.
2959 :: CmmCallTarget -- function to call
2960 -> HintedCmmFormals -- where to put the result
2961 -> HintedCmmActuals -- arguments (of mixed type)
2964 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2966 #if alpha_TARGET_ARCH
2970 genCCall fn cconv result_regs args
2971 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2972 `thenNat` \ ((unused,_), argCode) ->
2974 nRegs = length allArgRegs - length unused
2975 code = asmSeqThen (map ($ []) argCode)
2978 LDA pv (AddrImm (ImmLab (ptext fn))),
2979 JSR ra (AddrReg pv) nRegs,
2980 LDGP gp (AddrReg ra)]
2982 ------------------------
2983 {- Try to get a value into a specific register (or registers) for
2984 a call. The first 6 arguments go into the appropriate
2985 argument register (separate registers for integer and floating
2986 point arguments, but used in lock-step), and the remaining
2987 arguments are dumped to the stack, beginning at 0(sp). Our
2988 first argument is a pair of the list of remaining argument
2989 registers to be assigned for this call and the next stack
2990 offset to use for overflowing arguments. This way,
2991 @get_Arg@ can be applied to all of a call's arguments using
2995 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2996 -> StixTree -- Current argument
2997 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2999 -- We have to use up all of our argument registers first...
3001 get_arg ((iDst,fDst):dsts, offset) arg
3002 = getRegister arg `thenNat` \ register ->
3004 reg = if isFloatType pk then fDst else iDst
3005 code = registerCode register reg
3006 src = registerName register reg
3007 pk = registerRep register
3010 if isFloatType pk then
3011 ((dsts, offset), if isFixed register then
3012 code . mkSeqInstr (FMOV src fDst)
3015 ((dsts, offset), if isFixed register then
3016 code . mkSeqInstr (OR src (RIReg src) iDst)
3019 -- Once we have run out of argument registers, we move to the
3022 get_arg ([], offset) arg
3023 = getRegister arg `thenNat` \ register ->
3024 getNewRegNat (registerRep register)
3027 code = registerCode register tmp
3028 src = registerName register tmp
3029 pk = registerRep register
3030 sz = primRepToSize pk
3032 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3034 #endif /* alpha_TARGET_ARCH */
3036 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3038 #if i386_TARGET_ARCH
3040 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3041 -- write barrier compiles to no code on x86/x86-64;
3042 -- we keep it this long in order to prevent earlier optimisations.
3044 -- we only cope with a single result for foreign calls
3045 genCCall (CmmPrim op) [CmmHinted r _] args = do
3046 l1 <- getNewLabelNat
3047 l2 <- getNewLabelNat
3049 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3050 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3052 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3053 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3055 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3056 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3058 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3059 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3061 other_op -> outOfLineFloatOp op r args
3063 actuallyInlineFloatOp instr size [CmmHinted x _]
3064 = do res <- trivialUFCode size (instr size) x
3066 return (any (getRegisterReg (CmmLocal r)))
3068 genCCall target dest_regs args = do
3070 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3071 #if !darwin_TARGET_OS
3072 tot_arg_size = sum sizes
3074 raw_arg_size = sum sizes
3075 tot_arg_size = roundTo 16 raw_arg_size
3076 arg_pad_size = tot_arg_size - raw_arg_size
3077 delta0 <- getDeltaNat
3078 setDeltaNat (delta0 - arg_pad_size)
3081 push_codes <- mapM push_arg (reverse args)
3082 delta <- getDeltaNat
3085 -- deal with static vs dynamic call targets
3086 (callinsns,cconv) <-
3089 CmmCallee (CmmLit (CmmLabel lbl)) conv
3090 -> -- ToDo: stdcall arg sizes
3091 return (unitOL (CALL (Left fn_imm) []), conv)
3092 where fn_imm = ImmCLbl lbl
3094 -> do { (dyn_c, dyn_r) <- get_op expr
3095 ; ASSERT( isWord32 (cmmExprType expr) )
3096 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3099 #if darwin_TARGET_OS
3101 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3102 DELTA (delta0 - arg_pad_size)]
3103 `appOL` concatOL push_codes
3106 = concatOL push_codes
3107 call = callinsns `appOL`
3109 -- Deallocate parameters after call for ccall;
3110 -- but not for stdcall (callee does it)
3111 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3112 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3114 [DELTA (delta + tot_arg_size)]
3117 setDeltaNat (delta + tot_arg_size)
3120 -- assign the results, if necessary
3121 assign_code [] = nilOL
3122 assign_code [CmmHinted dest _hint]
3123 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3124 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3125 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3126 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3128 ty = localRegType dest
3130 r_dest_hi = getHiVRegFromLo r_dest
3131 r_dest = getRegisterReg (CmmLocal dest)
3132 assign_code many = panic "genCCall.assign_code many"
3134 return (push_code `appOL`
3136 assign_code dest_regs)
3139 arg_size :: CmmType -> Int -- Width in bytes
3140 arg_size ty = widthInBytes (typeWidth ty)
3142 roundTo a x | x `mod` a == 0 = x
3143 | otherwise = x + a - (x `mod` a)
3146 push_arg :: HintedCmmActual {-current argument-}
3147 -> NatM InstrBlock -- code
3149 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3150 | isWord64 arg_ty = do
3151 ChildCode64 code r_lo <- iselExpr64 arg
3152 delta <- getDeltaNat
3153 setDeltaNat (delta - 8)
3155 r_hi = getHiVRegFromLo r_lo
3157 return ( code `appOL`
3158 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3159 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3164 (code, reg) <- get_op arg
3165 delta <- getDeltaNat
3166 let size = arg_size arg_ty -- Byte size
3167 setDeltaNat (delta-size)
3168 if (isFloatType arg_ty)
3169 then return (code `appOL`
3170 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3172 GST (floatSize (typeWidth arg_ty))
3173 reg (AddrBaseIndex (EABaseReg esp)
3177 else return (code `snocOL`
3178 PUSH II32 (OpReg reg) `snocOL`
3182 arg_ty = cmmExprType arg
3185 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3187 (reg,code) <- getSomeReg op
3190 #endif /* i386_TARGET_ARCH */
3192 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3194 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3196 outOfLineFloatOp mop res args
3198 dflags <- getDynFlagsNat
3199 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3200 let target = CmmCallee targetExpr CCallConv
3202 if isFloat64 (localRegType res)
3204 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3208 tmp = LocalReg uq f64
3210 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3211 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3212 return (code1 `appOL` code2)
3214 lbl = mkForeignLabel fn Nothing False
3217 MO_F32_Sqrt -> fsLit "sqrtf"
3218 MO_F32_Sin -> fsLit "sinf"
3219 MO_F32_Cos -> fsLit "cosf"
3220 MO_F32_Tan -> fsLit "tanf"
3221 MO_F32_Exp -> fsLit "expf"
3222 MO_F32_Log -> fsLit "logf"
3224 MO_F32_Asin -> fsLit "asinf"
3225 MO_F32_Acos -> fsLit "acosf"
3226 MO_F32_Atan -> fsLit "atanf"
3228 MO_F32_Sinh -> fsLit "sinhf"
3229 MO_F32_Cosh -> fsLit "coshf"
3230 MO_F32_Tanh -> fsLit "tanhf"
3231 MO_F32_Pwr -> fsLit "powf"
3233 MO_F64_Sqrt -> fsLit "sqrt"
3234 MO_F64_Sin -> fsLit "sin"
3235 MO_F64_Cos -> fsLit "cos"
3236 MO_F64_Tan -> fsLit "tan"
3237 MO_F64_Exp -> fsLit "exp"
3238 MO_F64_Log -> fsLit "log"
3240 MO_F64_Asin -> fsLit "asin"
3241 MO_F64_Acos -> fsLit "acos"
3242 MO_F64_Atan -> fsLit "atan"
3244 MO_F64_Sinh -> fsLit "sinh"
3245 MO_F64_Cosh -> fsLit "cosh"
3246 MO_F64_Tanh -> fsLit "tanh"
3247 MO_F64_Pwr -> fsLit "pow"
3249 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3251 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3253 #if x86_64_TARGET_ARCH
3255 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3256 -- write barrier compiles to no code on x86/x86-64;
3257 -- we keep it this long in order to prevent earlier optimisations.
3260 genCCall (CmmPrim op) [CmmHinted r _] args =
3261 outOfLineFloatOp op r args
3263 genCCall target dest_regs args = do
3265 -- load up the register arguments
3266 (stack_args, aregs, fregs, load_args_code)
3267 <- load_args args allArgRegs allFPArgRegs nilOL
3270 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3271 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3272 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3273 -- for annotating the call instruction with
3275 sse_regs = length fp_regs_used
3277 tot_arg_size = arg_size * length stack_args
3279 -- On entry to the called function, %rsp should be aligned
3280 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3281 -- the return address is 16-byte aligned). In STG land
3282 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3283 -- need to make sure we push a multiple of 16-bytes of args,
3284 -- plus the return address, to get the correct alignment.
3285 -- Urg, this is hard. We need to feed the delta back into
3286 -- the arg pushing code.
3287 (real_size, adjust_rsp) <-
3288 if tot_arg_size `rem` 16 == 0
3289 then return (tot_arg_size, nilOL)
3290 else do -- we need to adjust...
3291 delta <- getDeltaNat
3292 setDeltaNat (delta-8)
3293 return (tot_arg_size+8, toOL [
3294 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3298 -- push the stack args, right to left
3299 push_code <- push_args (reverse stack_args) nilOL
3300 delta <- getDeltaNat
3302 -- deal with static vs dynamic call targets
3303 (callinsns,cconv) <-
3306 CmmCallee (CmmLit (CmmLabel lbl)) conv
3307 -> -- ToDo: stdcall arg sizes
3308 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3309 where fn_imm = ImmCLbl lbl
3311 -> do (dyn_r, dyn_c) <- getSomeReg expr
3312 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3315 -- The x86_64 ABI requires us to set %al to the number of SSE
3316 -- registers that contain arguments, if the called routine
3317 -- is a varargs function. We don't know whether it's a
3318 -- varargs function or not, so we have to assume it is.
3320 -- It's not safe to omit this assignment, even if the number
3321 -- of SSE regs in use is zero. If %al is larger than 8
3322 -- on entry to a varargs function, seg faults ensue.
3323 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3325 let call = callinsns `appOL`
3327 -- Deallocate parameters after call for ccall;
3328 -- but not for stdcall (callee does it)
3329 (if cconv == StdCallConv || real_size==0 then [] else
3330 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3332 [DELTA (delta + real_size)]
3335 setDeltaNat (delta + real_size)
3338 -- assign the results, if necessary
3339 assign_code [] = nilOL
3340 assign_code [CmmHinted dest _hint] =
3341 case typeWidth rep of
3342 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3343 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3344 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3346 rep = localRegType dest
3347 r_dest = getRegisterReg (CmmLocal dest)
3348 assign_code many = panic "genCCall.assign_code many"
3350 return (load_args_code `appOL`
3353 assign_eax sse_regs `appOL`
3355 assign_code dest_regs)
3358 arg_size = 8 -- always, at the mo
3360 load_args :: [CmmHinted CmmExpr]
3361 -> [Reg] -- int regs avail for args
3362 -> [Reg] -- FP regs avail for args
3364 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3365 load_args args [] [] code = return (args, [], [], code)
3366 -- no more regs to use
3367 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3368 -- no more args to push
3369 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3370 | isFloatType arg_rep =
3374 arg_code <- getAnyReg arg
3375 load_args rest aregs rs (code `appOL` arg_code r)
3380 arg_code <- getAnyReg arg
3381 load_args rest rs fregs (code `appOL` arg_code r)
3383 arg_rep = cmmExprType arg
3386 (args',ars,frs,code') <- load_args rest aregs fregs code
3387 return ((CmmHinted arg hint):args', ars, frs, code')
3389 push_args [] code = return code
3390 push_args ((CmmHinted arg hint):rest) code
3391 | isFloatType arg_rep = do
3392 (arg_reg, arg_code) <- getSomeReg arg
3393 delta <- getDeltaNat
3394 setDeltaNat (delta-arg_size)
3395 let code' = code `appOL` arg_code `appOL` toOL [
3396 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3397 DELTA (delta-arg_size),
3398 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3399 push_args rest code'
3402 -- we only ever generate word-sized function arguments. Promotion
3403 -- has already happened: our Int8# type is kept sign-extended
3404 -- in an Int#, for example.
3405 ASSERT(width == W64) return ()
3406 (arg_op, arg_code) <- getOperand arg
3407 delta <- getDeltaNat
3408 setDeltaNat (delta-arg_size)
3409 let code' = code `appOL` toOL [PUSH II64 arg_op,
3410 DELTA (delta-arg_size)]
3411 push_args rest code'
3413 arg_rep = cmmExprType arg
3414 width = typeWidth arg_rep
3417 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3419 #if sparc_TARGET_ARCH
3421 The SPARC calling convention is an absolute
3422 nightmare. The first 6x32 bits of arguments are mapped into
3423 %o0 through %o5, and the remaining arguments are dumped to the
3424 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3426 If we have to put args on the stack, move %o6==%sp down by
3427 the number of words to go on the stack, to ensure there's enough space.
3429 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3430 16 words above the stack pointer is a word for the address of
3431 a structure return value. I use this as a temporary location
3432 for moving values from float to int regs. Certainly it isn't
3433 safe to put anything in the 16 words starting at %sp, since
3434 this area can get trashed at any time due to window overflows
3435 caused by signal handlers.
3437 A final complication (if the above isn't enough) is that
3438 we can't blithely calculate the arguments one by one into
3439 %o0 .. %o5. Consider the following nested calls:
3443 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3444 the inner call will itself use %o0, which trashes the value put there
3445 in preparation for the outer call. Upshot: we need to calculate the
3446 args into temporary regs, and move those to arg regs or onto the
3447 stack only immediately prior to the call proper. Sigh.
3450 genCCall target dest_regs argsAndHints = do
3452 args = map hintlessCmm argsAndHints
3453 argcode_and_vregs <- mapM arg_to_int_vregs args
3455 (argcodes, vregss) = unzip argcode_and_vregs
3456 n_argRegs = length allArgRegs
3457 n_argRegs_used = min (length vregs) n_argRegs
3458 vregs = concat vregss
3459 -- deal with static vs dynamic call targets
3460 callinsns <- (case target of
3461 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3462 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3463 CmmCallee expr conv -> do
3464 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3465 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3467 (res, reduce) <- outOfLineFloatOp mop
3468 lblOrMopExpr <- case res of
3470 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3472 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3473 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3474 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3478 argcode = concatOL argcodes
3479 (move_sp_down, move_sp_up)
3480 = let diff = length vregs - n_argRegs
3481 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3484 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3487 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3489 -- assign the results, if necessary
3490 assign_code [] = nilOL
3492 assign_code [CmmHinted dest _hint]
3493 = let rep = localRegType dest
3494 width = typeWidth rep
3495 r_dest = getRegisterReg (CmmLocal dest)
3500 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3504 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3506 | not $ isFloatType rep
3508 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3512 return (argcode `appOL`
3513 move_sp_down `appOL`
3514 transfer_code `appOL`
3518 assign_code dest_regs)
3520 -- move args from the integer vregs into which they have been
3521 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3522 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3524 move_final [] _ offset -- all args done
3527 move_final (v:vs) [] offset -- out of aregs; move to stack
3528 = ST II32 v (spRel offset)
3529 : move_final vs [] (offset+1)
3531 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3532 = OR False g0 (RIReg v) a
3533 : move_final vs az offset
3535 -- generate code to calculate an argument, and move it into one
3536 -- or two integer vregs.
3537 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3538 arg_to_int_vregs arg
3539 | isWord64 (cmmExprType arg)
3541 (ChildCode64 code r_lo) <- iselExpr64 arg
3543 r_hi = getHiVRegFromLo r_lo
3544 return (code, [r_hi, r_lo])
3547 (src, code) <- getSomeReg arg
3548 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3550 pk = cmmExprType arg
3551 Just f0_high = fPair f0
3552 case cmmTypeSize pk of
3554 v1 <- getNewRegNat II32
3555 v2 <- getNewRegNat II32
3558 FMOV FF64 src f0 `snocOL`
3559 ST FF32 f0 (spRel 16) `snocOL`
3560 LD II32 (spRel 16) v1 `snocOL`
3561 ST FF32 f0_high (spRel 16) `snocOL`
3562 LD II32 (spRel 16) v2
3567 v1 <- getNewRegNat II32
3570 ST FF32 src (spRel 16) `snocOL`
3571 LD II32 (spRel 16) v1
3576 v1 <- getNewRegNat II32
3578 code `snocOL` OR False g0 (RIReg src) v1
3582 outOfLineFloatOp mop =
3584 dflags <- getDynFlagsNat
3585 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3586 mkForeignLabel functionName Nothing True
3587 let mopLabelOrExpr = case mopExpr of
3588 CmmLit (CmmLabel lbl) -> Left lbl
3590 return (mopLabelOrExpr, reduce)
3592 (reduce, functionName) = case mop of
3593 MO_F32_Exp -> (True, fsLit "exp")
3594 MO_F32_Log -> (True, fsLit "log")
3595 MO_F32_Sqrt -> (True, fsLit "sqrt")
3597 MO_F32_Sin -> (True, fsLit "sin")
3598 MO_F32_Cos -> (True, fsLit "cos")
3599 MO_F32_Tan -> (True, fsLit "tan")
3601 MO_F32_Asin -> (True, fsLit "asin")
3602 MO_F32_Acos -> (True, fsLit "acos")
3603 MO_F32_Atan -> (True, fsLit "atan")
3605 MO_F32_Sinh -> (True, fsLit "sinh")
3606 MO_F32_Cosh -> (True, fsLit "cosh")
3607 MO_F32_Tanh -> (True, fsLit "tanh")
3609 MO_F64_Exp -> (False, fsLit "exp")
3610 MO_F64_Log -> (False, fsLit "log")
3611 MO_F64_Sqrt -> (False, fsLit "sqrt")
3613 MO_F64_Sin -> (False, fsLit "sin")
3614 MO_F64_Cos -> (False, fsLit "cos")
3615 MO_F64_Tan -> (False, fsLit "tan")
3617 MO_F64_Asin -> (False, fsLit "asin")
3618 MO_F64_Acos -> (False, fsLit "acos")
3619 MO_F64_Atan -> (False, fsLit "atan")
3621 MO_F64_Sinh -> (False, fsLit "sinh")
3622 MO_F64_Cosh -> (False, fsLit "cosh")
3623 MO_F64_Tanh -> (False, fsLit "tanh")
3625 other -> pprPanic "outOfLineFloatOp(sparc) "
3626 (pprCallishMachOp mop)
3628 #endif /* sparc_TARGET_ARCH */
3630 #if powerpc_TARGET_ARCH
3632 #if darwin_TARGET_OS || linux_TARGET_OS
3634 The PowerPC calling convention for Darwin/Mac OS X
3635 is described in Apple's document
3636 "Inside Mac OS X - Mach-O Runtime Architecture".
3638 PowerPC Linux uses the System V Release 4 Calling Convention
3639 for PowerPC. It is described in the
3640 "System V Application Binary Interface PowerPC Processor Supplement".
3642 Both conventions are similar:
3643 Parameters may be passed in general-purpose registers starting at r3, in
3644 floating point registers starting at f1, or on the stack.
3646 But there are substantial differences:
3647 * The number of registers used for parameter passing and the exact set of
3648 nonvolatile registers differs (see MachRegs.lhs).
3649 * On Darwin, stack space is always reserved for parameters, even if they are
3650 passed in registers. The called routine may choose to save parameters from
3651 registers to the corresponding space on the stack.
3652 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3653 parameter is passed in an FPR.
3654 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3655 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3656 Darwin just treats an I64 like two separate II32s (high word first).
3657 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3658 4-byte aligned like everything else on Darwin.
3659 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3660 PowerPC Linux does not agree, so neither do we.
3662 According to both conventions, The parameter area should be part of the
3663 caller's stack frame, allocated in the caller's prologue code (large enough
3664 to hold the parameter lists for all called routines). The NCG already
3665 uses the stack for register spilling, leaving 64 bytes free at the top.
3666 If we need a larger parameter area than that, we just allocate a new stack
3667 frame just before ccalling.
3671 genCCall (CmmPrim MO_WriteBarrier) _ _
3672 = return $ unitOL LWSYNC
3674 genCCall target dest_regs argsAndHints
3675 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3676 -- we rely on argument promotion in the codeGen
3678 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3680 allArgRegs allFPArgRegs
3684 (labelOrExpr, reduceToFF32) <- case target of
3685 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3686 CmmCallee expr conv -> return (Right expr, False)
3687 CmmPrim mop -> outOfLineFloatOp mop
3689 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3690 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3695 `snocOL` BL lbl usedRegs
3698 (dynReg, dynCode) <- getSomeReg dyn
3700 `snocOL` MTCTR dynReg
3702 `snocOL` BCTRL usedRegs
3705 #if darwin_TARGET_OS
3706 initialStackOffset = 24
3707 -- size of linkage area + size of arguments, in bytes
3708 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3709 map (widthInBytes . typeWidth) argReps
3710 #elif linux_TARGET_OS
3711 initialStackOffset = 8
3712 stackDelta finalStack = roundTo 16 finalStack
3714 args = map hintlessCmm argsAndHints
3715 argReps = map cmmExprType args
3717 roundTo a x | x `mod` a == 0 = x
3718 | otherwise = x + a - (x `mod` a)
3720 move_sp_down finalStack
3722 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3725 where delta = stackDelta finalStack
3726 move_sp_up finalStack
3728 toOL [ADD sp sp (RIImm (ImmInt delta)),
3731 where delta = stackDelta finalStack
3734 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3735 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3736 accumCode accumUsed | isWord64 arg_ty =
3738 ChildCode64 code vr_lo <- iselExpr64 arg
3739 let vr_hi = getHiVRegFromLo vr_lo
3741 #if darwin_TARGET_OS
3746 (accumCode `appOL` code
3747 `snocOL` storeWord vr_hi gprs stackOffset
3748 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3749 ((take 2 gprs) ++ accumUsed)
3751 storeWord vr (gpr:_) offset = MR gpr vr
3752 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3754 #elif linux_TARGET_OS
3755 let stackOffset' = roundTo 8 stackOffset
3756 stackCode = accumCode `appOL` code
3757 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3758 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3759 regCode hireg loreg =
3760 accumCode `appOL` code
3761 `snocOL` MR hireg vr_hi
3762 `snocOL` MR loreg vr_lo
3765 hireg : loreg : regs | even (length gprs) ->
3766 passArguments args regs fprs stackOffset
3767 (regCode hireg loreg) (hireg : loreg : accumUsed)
3768 _skipped : hireg : loreg : regs ->
3769 passArguments args regs fprs stackOffset
3770 (regCode hireg loreg) (hireg : loreg : accumUsed)
3771 _ -> -- only one or no regs left
3772 passArguments args [] fprs (stackOffset'+8)
3776 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3777 | reg : _ <- regs = do
3778 register <- getRegister arg
3779 let code = case register of
3780 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3781 Any _ acode -> acode reg
3785 #if darwin_TARGET_OS
3786 -- The Darwin ABI requires that we reserve stack slots for register parameters
3787 (stackOffset + stackBytes)
3788 #elif linux_TARGET_OS
3789 -- ... the SysV ABI doesn't.
3792 (accumCode `appOL` code)
3795 (vr, code) <- getSomeReg arg
3799 (stackOffset' + stackBytes)
3800 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
3803 #if darwin_TARGET_OS
3804 -- stackOffset is at least 4-byte aligned
3805 -- The Darwin ABI is happy with that.
3806 stackOffset' = stackOffset
3808 -- ... the SysV ABI requires 8-byte alignment for doubles.
3809 stackOffset' | isFloatType rep && typeWidth rep == W64 =
3810 roundTo 8 stackOffset
3811 | otherwise = stackOffset
3813 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3814 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
3815 II32 -> (1, 0, 4, gprs)
3816 #if darwin_TARGET_OS
3817 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3819 FF32 -> (1, 1, 4, fprs)
3820 FF64 -> (2, 1, 8, fprs)
3821 #elif linux_TARGET_OS
3822 -- ... the SysV ABI doesn't.
3823 FF32 -> (0, 1, 4, fprs)
3824 FF64 -> (0, 1, 8, fprs)
3827 moveResult reduceToFF32 =
3830 [CmmHinted dest _hint]
3831 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
3832 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
3833 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
3835 | otherwise -> unitOL (MR r_dest r3)
3836 where rep = cmmRegType (CmmLocal dest)
3837 r_dest = getRegisterReg (CmmLocal dest)
3839 outOfLineFloatOp mop =
3841 dflags <- getDynFlagsNat
3842 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3843 mkForeignLabel functionName Nothing True
3844 let mopLabelOrExpr = case mopExpr of
3845 CmmLit (CmmLabel lbl) -> Left lbl
3847 return (mopLabelOrExpr, reduce)
3849 (functionName, reduce) = case mop of
3850 MO_F32_Exp -> (fsLit "exp", True)
3851 MO_F32_Log -> (fsLit "log", True)
3852 MO_F32_Sqrt -> (fsLit "sqrt", True)
3854 MO_F32_Sin -> (fsLit "sin", True)
3855 MO_F32_Cos -> (fsLit "cos", True)
3856 MO_F32_Tan -> (fsLit "tan", True)
3858 MO_F32_Asin -> (fsLit "asin", True)
3859 MO_F32_Acos -> (fsLit "acos", True)
3860 MO_F32_Atan -> (fsLit "atan", True)
3862 MO_F32_Sinh -> (fsLit "sinh", True)
3863 MO_F32_Cosh -> (fsLit "cosh", True)
3864 MO_F32_Tanh -> (fsLit "tanh", True)
3865 MO_F32_Pwr -> (fsLit "pow", True)
3867 MO_F64_Exp -> (fsLit "exp", False)
3868 MO_F64_Log -> (fsLit "log", False)
3869 MO_F64_Sqrt -> (fsLit "sqrt", False)
3871 MO_F64_Sin -> (fsLit "sin", False)
3872 MO_F64_Cos -> (fsLit "cos", False)
3873 MO_F64_Tan -> (fsLit "tan", False)
3875 MO_F64_Asin -> (fsLit "asin", False)
3876 MO_F64_Acos -> (fsLit "acos", False)
3877 MO_F64_Atan -> (fsLit "atan", False)
3879 MO_F64_Sinh -> (fsLit "sinh", False)
3880 MO_F64_Cosh -> (fsLit "cosh", False)
3881 MO_F64_Tanh -> (fsLit "tanh", False)
3882 MO_F64_Pwr -> (fsLit "pow", False)
3883 other -> pprPanic "genCCall(ppc): unknown callish op"
3884 (pprCallishMachOp other)
3886 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3888 #endif /* powerpc_TARGET_ARCH */
3891 -- -----------------------------------------------------------------------------
3892 -- Generating a table-branch
3894 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3896 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3900 (reg,e_code) <- getSomeReg expr
3901 lbl <- getNewLabelNat
3902 dflags <- getDynFlagsNat
3903 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3904 (tableReg,t_code) <- getSomeReg $ dynRef
3906 jumpTable = map jumpTableEntryRel ids
3908 jumpTableEntryRel Nothing
3909 = CmmStaticLit (CmmInt 0 wordWidth)
3910 jumpTableEntryRel (Just (BlockId id))
3911 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3912 where blockLabel = mkAsmTempLabel id
3914 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3915 (EAIndex reg wORD_SIZE) (ImmInt 0))
3917 #if x86_64_TARGET_ARCH
3918 #if darwin_TARGET_OS
3919 -- on Mac OS X/x86_64, put the jump table in the text section
3920 -- to work around a limitation of the linker.
3921 -- ld64 is unable to handle the relocations for
3923 -- if L0 is not preceded by a non-anonymous label in its section.
3925 code = e_code `appOL` t_code `appOL` toOL [
3926 ADD (intSize wordWidth) op (OpReg tableReg),
3927 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3928 LDATA Text (CmmDataLabel lbl : jumpTable)
3931 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3932 -- relocations, hence we only get 32-bit offsets in the jump
3933 -- table. As these offsets are always negative we need to properly
3934 -- sign extend them to 64-bit. This hack should be removed in
3935 -- conjunction with the hack in PprMach.hs/pprDataItem once
3936 -- binutils 2.17 is standard.
3937 code = e_code `appOL` t_code `appOL` toOL [
3938 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3940 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3941 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3943 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
3944 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3948 code = e_code `appOL` t_code `appOL` toOL [
3949 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3950 ADD (intSize wordWidth) op (OpReg tableReg),
3951 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3957 (reg,e_code) <- getSomeReg expr
3958 lbl <- getNewLabelNat
3960 jumpTable = map jumpTableEntry ids
3961 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3962 code = e_code `appOL` toOL [
3963 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3964 JMP_TBL op [ id | Just id <- ids ]
3968 #elif powerpc_TARGET_ARCH
3972 (reg,e_code) <- getSomeReg expr
3973 tmp <- getNewRegNat II32
3974 lbl <- getNewLabelNat
3975 dflags <- getDynFlagsNat
3976 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3977 (tableReg,t_code) <- getSomeReg $ dynRef
3979 jumpTable = map jumpTableEntryRel ids
3981 jumpTableEntryRel Nothing
3982 = CmmStaticLit (CmmInt 0 wordWidth)
3983 jumpTableEntryRel (Just (BlockId id))
3984 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3985 where blockLabel = mkAsmTempLabel id
3987 code = e_code `appOL` t_code `appOL` toOL [
3988 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3989 SLW tmp reg (RIImm (ImmInt 2)),
3990 LD II32 tmp (AddrRegReg tableReg tmp),
3991 ADD tmp tmp (RIReg tableReg),
3993 BCTR [ id | Just id <- ids ]
3998 (reg,e_code) <- getSomeReg expr
3999 tmp <- getNewRegNat II32
4000 lbl <- getNewLabelNat
4002 jumpTable = map jumpTableEntry ids
4004 code = e_code `appOL` toOL [
4005 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4006 SLW tmp reg (RIImm (ImmInt 2)),
4007 ADDIS tmp tmp (HA (ImmCLbl lbl)),
4008 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4010 BCTR [ id | Just id <- ids ]
4013 #elif sparc_TARGET_ARCH
4016 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4019 = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
4021 #error "ToDo: genSwitch"
4024 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4025 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4026 where blockLabel = mkAsmTempLabel id
4028 -- -----------------------------------------------------------------------------
4030 -- -----------------------------------------------------------------------------
4033 -- -----------------------------------------------------------------------------
4034 -- 'condIntReg' and 'condFltReg': condition codes into registers
4036 -- Turn those condition codes into integers now (when they appear on
4037 -- the right hand side of an assignment).
4039 -- (If applicable) Do not fill the delay slots here; you will confuse the
4040 -- register allocator.
4042 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4044 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4046 #if alpha_TARGET_ARCH
4047 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4048 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4049 #endif /* alpha_TARGET_ARCH */
4051 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4053 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4055 condIntReg cond x y = do
4056 CondCode _ cond cond_code <- condIntCode cond x y
4057 tmp <- getNewRegNat II8
4059 code dst = cond_code `appOL` toOL [
4060 SETCC cond (OpReg tmp),
4061 MOVZxL II8 (OpReg tmp) (OpReg dst)
4064 return (Any II32 code)
4068 #if i386_TARGET_ARCH
4070 condFltReg cond x y = do
4071 CondCode _ cond cond_code <- condFltCode cond x y
4072 tmp <- getNewRegNat II8
4074 code dst = cond_code `appOL` toOL [
4075 SETCC cond (OpReg tmp),
4076 MOVZxL II8 (OpReg tmp) (OpReg dst)
4079 return (Any II32 code)
4083 #if x86_64_TARGET_ARCH
4085 condFltReg cond x y = do
4086 CondCode _ cond cond_code <- condFltCode cond x y
4087 tmp1 <- getNewRegNat wordSize
4088 tmp2 <- getNewRegNat wordSize
4090 -- We have to worry about unordered operands (eg. comparisons
4091 -- against NaN). If the operands are unordered, the comparison
4092 -- sets the parity flag, carry flag and zero flag.
4093 -- All comparisons are supposed to return false for unordered
4094 -- operands except for !=, which returns true.
4096 -- Optimisation: we don't have to test the parity flag if we
4097 -- know the test has already excluded the unordered case: eg >
4098 -- and >= test for a zero carry flag, which can only occur for
4099 -- ordered operands.
4101 -- ToDo: by reversing comparisons we could avoid testing the
4102 -- parity flag in more cases.
4107 NE -> or_unordered dst
4108 GU -> plain_test dst
4109 GEU -> plain_test dst
4110 _ -> and_ordered dst)
4112 plain_test dst = toOL [
4113 SETCC cond (OpReg tmp1),
4114 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4116 or_unordered dst = toOL [
4117 SETCC cond (OpReg tmp1),
4118 SETCC PARITY (OpReg tmp2),
4119 OR II8 (OpReg tmp1) (OpReg tmp2),
4120 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4122 and_ordered dst = toOL [
4123 SETCC cond (OpReg tmp1),
4124 SETCC NOTPARITY (OpReg tmp2),
4125 AND II8 (OpReg tmp1) (OpReg tmp2),
4126 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4129 return (Any II32 code)
4133 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4135 #if sparc_TARGET_ARCH
4137 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4138 (src, code) <- getSomeReg x
4139 tmp <- getNewRegNat II32
4141 code__2 dst = code `appOL` toOL [
4142 SUB False True g0 (RIReg src) g0,
4143 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4144 return (Any II32 code__2)
4146 condIntReg EQQ x y = do
4147 (src1, code1) <- getSomeReg x
4148 (src2, code2) <- getSomeReg y
4149 tmp1 <- getNewRegNat II32
4150 tmp2 <- getNewRegNat II32
4152 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4153 XOR False src1 (RIReg src2) dst,
4154 SUB False True g0 (RIReg dst) g0,
4155 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4156 return (Any II32 code__2)
4158 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4159 (src, code) <- getSomeReg x
4160 tmp <- getNewRegNat II32
4162 code__2 dst = code `appOL` toOL [
4163 SUB False True g0 (RIReg src) g0,
4164 ADD True False g0 (RIImm (ImmInt 0)) dst]
4165 return (Any II32 code__2)
4167 condIntReg NE x y = do
4168 (src1, code1) <- getSomeReg x
4169 (src2, code2) <- getSomeReg y
4170 tmp1 <- getNewRegNat II32
4171 tmp2 <- getNewRegNat II32
4173 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4174 XOR False src1 (RIReg src2) dst,
4175 SUB False True g0 (RIReg dst) g0,
4176 ADD True False g0 (RIImm (ImmInt 0)) dst]
4177 return (Any II32 code__2)
4179 condIntReg cond x y = do
4180 bid1@(BlockId lbl1) <- getBlockIdNat
4181 bid2@(BlockId lbl2) <- getBlockIdNat
4182 CondCode _ cond cond_code <- condIntCode cond x y
4184 code__2 dst = cond_code `appOL` toOL [
4185 BI cond False bid1, NOP,
4186 OR False g0 (RIImm (ImmInt 0)) dst,
4187 BI ALWAYS False bid2, NOP,
4189 OR False g0 (RIImm (ImmInt 1)) dst,
4191 return (Any II32 code__2)
4193 condFltReg cond x y = do
4194 bid1@(BlockId lbl1) <- getBlockIdNat
4195 bid2@(BlockId lbl2) <- getBlockIdNat
4196 CondCode _ cond cond_code <- condFltCode cond x y
4198 code__2 dst = cond_code `appOL` toOL [
4200 BF cond False bid1, NOP,
4201 OR False g0 (RIImm (ImmInt 0)) dst,
4202 BI ALWAYS False bid2, NOP,
4204 OR False g0 (RIImm (ImmInt 1)) dst,
4206 return (Any II32 code__2)
4208 #endif /* sparc_TARGET_ARCH */
4210 #if powerpc_TARGET_ARCH
4211 condReg getCond = do
4212 lbl1 <- getBlockIdNat
4213 lbl2 <- getBlockIdNat
4214 CondCode _ cond cond_code <- getCond
4216 {- code dst = cond_code `appOL` toOL [
4225 code dst = cond_code
4229 RLWINM dst dst (bit + 1) 31 31
4232 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4235 (bit, do_negate) = case cond of
4249 return (Any II32 code)
4251 condIntReg cond x y = condReg (condIntCode cond x y)
4252 condFltReg cond x y = condReg (condFltCode cond x y)
4253 #endif /* powerpc_TARGET_ARCH */
4256 -- -----------------------------------------------------------------------------
4257 -- 'trivial*Code': deal with trivial instructions
4259 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4260 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4261 -- Only look for constants on the right hand side, because that's
4262 -- where the generic optimizer will have put them.
4264 -- Similarly, for unary instructions, we don't have to worry about
4265 -- matching an StInt as the argument, because genericOpt will already
4266 -- have handled the constant-folding.
4269 :: Width -- Int only
4270 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4271 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4272 -> Maybe (Operand -> Operand -> Instr)
4273 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4274 -> Maybe (Operand -> Operand -> Instr)
4275 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4276 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4278 -> CmmExpr -> CmmExpr -- the two arguments
4281 #ifndef powerpc_TARGET_ARCH
4283 :: Width -- Floating point only
4284 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4285 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4286 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4287 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4289 -> CmmExpr -> CmmExpr -- the two arguments
4295 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4296 ,IF_ARCH_i386 ((Operand -> Instr)
4297 ,IF_ARCH_x86_64 ((Operand -> Instr)
4298 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4299 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4301 -> CmmExpr -- the one argument
4304 #ifndef powerpc_TARGET_ARCH
4307 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4308 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4309 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4310 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4312 -> CmmExpr -- the one argument
4316 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4318 #if alpha_TARGET_ARCH
4320 trivialCode instr x (StInt y)
4322 = getRegister x `thenNat` \ register ->
4323 getNewRegNat IntRep `thenNat` \ tmp ->
4325 code = registerCode register tmp
4326 src1 = registerName register tmp
4327 src2 = ImmInt (fromInteger y)
4328 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4330 return (Any IntRep code__2)
4332 trivialCode instr x y
4333 = getRegister x `thenNat` \ register1 ->
4334 getRegister y `thenNat` \ register2 ->
4335 getNewRegNat IntRep `thenNat` \ tmp1 ->
4336 getNewRegNat IntRep `thenNat` \ tmp2 ->
4338 code1 = registerCode register1 tmp1 []
4339 src1 = registerName register1 tmp1
4340 code2 = registerCode register2 tmp2 []
4341 src2 = registerName register2 tmp2
4342 code__2 dst = asmSeqThen [code1, code2] .
4343 mkSeqInstr (instr src1 (RIReg src2) dst)
4345 return (Any IntRep code__2)
4348 trivialUCode instr x
4349 = getRegister x `thenNat` \ register ->
4350 getNewRegNat IntRep `thenNat` \ tmp ->
4352 code = registerCode register tmp
4353 src = registerName register tmp
4354 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4356 return (Any IntRep code__2)
4359 trivialFCode _ instr x y
4360 = getRegister x `thenNat` \ register1 ->
4361 getRegister y `thenNat` \ register2 ->
4362 getNewRegNat FF64 `thenNat` \ tmp1 ->
4363 getNewRegNat FF64 `thenNat` \ tmp2 ->
4365 code1 = registerCode register1 tmp1
4366 src1 = registerName register1 tmp1
4368 code2 = registerCode register2 tmp2
4369 src2 = registerName register2 tmp2
4371 code__2 dst = asmSeqThen [code1 [], code2 []] .
4372 mkSeqInstr (instr src1 src2 dst)
4374 return (Any FF64 code__2)
4376 trivialUFCode _ instr x
4377 = getRegister x `thenNat` \ register ->
4378 getNewRegNat FF64 `thenNat` \ tmp ->
4380 code = registerCode register tmp
4381 src = registerName register tmp
4382 code__2 dst = code . mkSeqInstr (instr src dst)
4384 return (Any FF64 code__2)
4386 #endif /* alpha_TARGET_ARCH */
4388 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4390 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4393 The Rules of the Game are:
4395 * You cannot assume anything about the destination register dst;
4396 it may be anything, including a fixed reg.
4398 * You may compute an operand into a fixed reg, but you may not
4399 subsequently change the contents of that fixed reg. If you
4400 want to do so, first copy the value either to a temporary
4401 or into dst. You are free to modify dst even if it happens
4402 to be a fixed reg -- that's not your problem.
4404 * You cannot assume that a fixed reg will stay live over an
4405 arbitrary computation. The same applies to the dst reg.
4407 * Temporary regs obtained from getNewRegNat are distinct from
4408 each other and from all other regs, and stay live over
4409 arbitrary computations.
4411 --------------------
4413 SDM's version of The Rules:
4415 * If getRegister returns Any, that means it can generate correct
4416 code which places the result in any register, period. Even if that
4417 register happens to be read during the computation.
4419 Corollary #1: this means that if you are generating code for an
4420 operation with two arbitrary operands, you cannot assign the result
4421 of the first operand into the destination register before computing
4422 the second operand. The second operand might require the old value
4423 of the destination register.
4425 Corollary #2: A function might be able to generate more efficient
4426 code if it knows the destination register is a new temporary (and
4427 therefore not read by any of the sub-computations).
4429 * If getRegister returns Any, then the code it generates may modify only:
4430 (a) fresh temporaries
4431 (b) the destination register
4432 (c) known registers (eg. %ecx is used by shifts)
4433 In particular, it may *not* modify global registers, unless the global
4434 register happens to be the destination register.
4437 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4438 | is32BitLit lit_a = do
4439 b_code <- getAnyReg b
4442 = b_code dst `snocOL`
4443 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4445 return (Any (intSize width) code)
4447 trivialCode width instr maybe_revinstr a b
4448 = genTrivialCode (intSize width) instr a b
4450 -- This is re-used for floating pt instructions too.
4451 genTrivialCode rep instr a b = do
4452 (b_op, b_code) <- getNonClobberedOperand b
4453 a_code <- getAnyReg a
4454 tmp <- getNewRegNat rep
4456 -- We want the value of b to stay alive across the computation of a.
4457 -- But, we want to calculate a straight into the destination register,
4458 -- because the instruction only has two operands (dst := dst `op` src).
4459 -- The troublesome case is when the result of b is in the same register
4460 -- as the destination reg. In this case, we have to save b in a
4461 -- new temporary across the computation of a.
4463 | dst `regClashesWithOp` b_op =
4465 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4467 instr (OpReg tmp) (OpReg dst)
4471 instr b_op (OpReg dst)
4473 return (Any rep code)
4475 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4476 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4477 reg `regClashesWithOp` _ = False
4481 trivialUCode rep instr x = do
4482 x_code <- getAnyReg x
4487 return (Any rep code)
4491 #if i386_TARGET_ARCH
4493 trivialFCode width instr x y = do
4494 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4495 (y_reg, y_code) <- getSomeReg y
4497 size = floatSize width
4501 instr size x_reg y_reg dst
4502 return (Any size code)
4506 #if x86_64_TARGET_ARCH
4507 trivialFCode pk instr x y
4508 = genTrivialCode size (instr size) x y
4509 where size = floatSize pk
4514 trivialUFCode size instr x = do
4515 (x_reg, x_code) <- getSomeReg x
4521 return (Any size code)
4523 #endif /* i386_TARGET_ARCH */
4525 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4527 #if sparc_TARGET_ARCH
4529 trivialCode pk instr x (CmmLit (CmmInt y d))
4532 (src1, code) <- getSomeReg x
4533 tmp <- getNewRegNat II32
4535 src2 = ImmInt (fromInteger y)
4536 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4537 return (Any II32 code__2)
4539 trivialCode pk instr x y = do
4540 (src1, code1) <- getSomeReg x
4541 (src2, code2) <- getSomeReg y
4542 tmp1 <- getNewRegNat II32
4543 tmp2 <- getNewRegNat II32
4545 code__2 dst = code1 `appOL` code2 `snocOL`
4546 instr src1 (RIReg src2) dst
4547 return (Any II32 code__2)
4550 trivialFCode pk instr x y = do
4551 (src1, code1) <- getSomeReg x
4552 (src2, code2) <- getSomeReg y
4553 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4554 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4555 tmp <- getNewRegNat FF64
4557 promote x = FxTOy FF32 FF64 x tmp
4563 if pk1 `cmmEqType` pk2 then
4564 code1 `appOL` code2 `snocOL`
4565 instr (floatSize pk) src1 src2 dst
4566 else if typeWidth pk1 == W32 then
4567 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4568 instr FF64 tmp src2 dst
4570 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4571 instr FF64 src1 tmp dst
4572 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4576 trivialUCode size instr x = do
4577 (src, code) <- getSomeReg x
4578 tmp <- getNewRegNat size
4580 code__2 dst = code `snocOL` instr (RIReg src) dst
4581 return (Any size code__2)
4584 trivialUFCode pk instr x = do
4585 (src, code) <- getSomeReg x
4586 tmp <- getNewRegNat pk
4588 code__2 dst = code `snocOL` instr src dst
4589 return (Any pk code__2)
4591 #endif /* sparc_TARGET_ARCH */
4593 #if powerpc_TARGET_ARCH
4596 Wolfgang's PowerPC version of The Rules:
4598 A slightly modified version of The Rules to take advantage of the fact
4599 that PowerPC instructions work on all registers and don't implicitly
4600 clobber any fixed registers.
4602 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4604 * If getRegister returns Any, then the code it generates may modify only:
4605 (a) fresh temporaries
4606 (b) the destination register
4607 It may *not* modify global registers, unless the global
4608 register happens to be the destination register.
4609 It may not clobber any other registers. In fact, only ccalls clobber any
4611 Also, it may not modify the counter register (used by genCCall).
4613 Corollary: If a getRegister for a subexpression returns Fixed, you need
4614 not move it to a fresh temporary before evaluating the next subexpression.
4615 The Fixed register won't be modified.
4616 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4618 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4619 the value of the destination register.
4622 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4623 | Just imm <- makeImmediate rep signed y
4625 (src1, code1) <- getSomeReg x
4626 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4627 return (Any (intSize rep) code)
4629 trivialCode rep signed instr x y = do
4630 (src1, code1) <- getSomeReg x
4631 (src2, code2) <- getSomeReg y
4632 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4633 return (Any (intSize rep) code)
4635 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4636 -> CmmExpr -> CmmExpr -> NatM Register
4637 trivialCodeNoImm' size instr x y = do
4638 (src1, code1) <- getSomeReg x
4639 (src2, code2) <- getSomeReg y
4640 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4641 return (Any size code)
4643 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4644 -> CmmExpr -> CmmExpr -> NatM Register
4645 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4647 trivialUCode rep instr x = do
4648 (src, code) <- getSomeReg x
4649 let code' dst = code `snocOL` instr dst src
4650 return (Any rep code')
4652 -- There is no "remainder" instruction on the PPC, so we have to do
4654 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4656 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4657 -> CmmExpr -> CmmExpr -> NatM Register
4658 remainderCode rep div x y = do
4659 (src1, code1) <- getSomeReg x
4660 (src2, code2) <- getSomeReg y
4661 let code dst = code1 `appOL` code2 `appOL` toOL [
4663 MULLW dst dst (RIReg src2),
4666 return (Any (intSize rep) code)
4668 #endif /* powerpc_TARGET_ARCH */
4671 -- -----------------------------------------------------------------------------
4672 -- Coercing to/from integer/floating-point...
4674 -- When going to integer, we truncate (round towards 0).
4676 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4677 -- conversions. We have to store temporaries in memory to move
4678 -- between the integer and the floating point register sets.
4680 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4681 -- pretend, on sparc at least, that double and float regs are seperate
4682 -- kinds, so the value has to be computed into one kind before being
4683 -- explicitly "converted" to live in the other kind.
4685 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4686 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4688 #if sparc_TARGET_ARCH
4689 coerceDbl2Flt :: CmmExpr -> NatM Register
4690 coerceFlt2Dbl :: CmmExpr -> NatM Register
4693 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4695 #if alpha_TARGET_ARCH
4698 = getRegister x `thenNat` \ register ->
4699 getNewRegNat IntRep `thenNat` \ reg ->
4701 code = registerCode register reg
4702 src = registerName register reg
4704 code__2 dst = code . mkSeqInstrs [
4706 LD TF dst (spRel 0),
4709 return (Any FF64 code__2)
4713 = getRegister x `thenNat` \ register ->
4714 getNewRegNat FF64 `thenNat` \ tmp ->
4716 code = registerCode register tmp
4717 src = registerName register tmp
4719 code__2 dst = code . mkSeqInstrs [
4721 ST TF tmp (spRel 0),
4724 return (Any IntRep code__2)
4726 #endif /* alpha_TARGET_ARCH */
4728 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4730 #if i386_TARGET_ARCH
4732 coerceInt2FP from to x = do
4733 (x_reg, x_code) <- getSomeReg x
4735 opc = case to of W32 -> GITOF; W64 -> GITOD
4736 code dst = x_code `snocOL` opc x_reg dst
4737 -- ToDo: works for non-II32 reps?
4738 return (Any (floatSize to) code)
4742 coerceFP2Int from to x = do
4743 (x_reg, x_code) <- getSomeReg x
4745 opc = case from of W32 -> GFTOI; W64 -> GDTOI
4746 code dst = x_code `snocOL` opc x_reg dst
4747 -- ToDo: works for non-II32 reps?
4749 return (Any (intSize to) code)
4751 #endif /* i386_TARGET_ARCH */
4753 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4755 #if x86_64_TARGET_ARCH
4757 coerceFP2Int from to x = do
4758 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4760 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
4761 code dst = x_code `snocOL` opc x_op dst
4763 return (Any (intSize to) code) -- works even if the destination rep is <II32
4765 coerceInt2FP from to x = do
4766 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4768 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
4769 code dst = x_code `snocOL` opc x_op dst
4771 return (Any (floatSize to) code) -- works even if the destination rep is <II32
4773 coerceFP2FP :: Width -> CmmExpr -> NatM Register
4774 coerceFP2FP to x = do
4775 (x_reg, x_code) <- getSomeReg x
4777 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
4778 code dst = x_code `snocOL` opc x_reg dst
4780 return (Any (floatSize to) code)
4783 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4785 #if sparc_TARGET_ARCH
4787 coerceInt2FP width1 width2 x = do
4788 (src, code) <- getSomeReg x
4790 code__2 dst = code `appOL` toOL [
4791 ST (intSize width1) src (spRel (-2)),
4792 LD (intSize width1) (spRel (-2)) dst,
4793 FxTOy (intSize width1) (floatSize width2) dst dst]
4794 return (Any (floatSize $ width2) code__2)
4797 coerceFP2Int width1 width2 x = do
4798 let pk = intSize width1
4799 fprep = floatSize width2
4801 (src, code) <- getSomeReg x
4802 reg <- getNewRegNat fprep
4803 tmp <- getNewRegNat pk
4805 code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
4807 FxTOy fprep pk src tmp,
4808 ST pk tmp (spRel (-2)),
4809 LD pk (spRel (-2)) dst]
4810 return (Any pk code__2)
4813 coerceDbl2Flt x = do
4814 (src, code) <- getSomeReg x
4815 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
4818 coerceFlt2Dbl x = do
4819 (src, code) <- getSomeReg x
4820 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
4822 #endif /* sparc_TARGET_ARCH */
4824 #if powerpc_TARGET_ARCH
4825 coerceInt2FP fromRep toRep x = do
4826 (src, code) <- getSomeReg x
4827 lbl <- getNewLabelNat
4828 itmp <- getNewRegNat II32
4829 ftmp <- getNewRegNat FF64
4830 dflags <- getDynFlagsNat
4831 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4832 Amode addr addr_code <- getAmode dynRef
4834 code' dst = code `appOL` maybe_exts `appOL` toOL [
4837 CmmStaticLit (CmmInt 0x43300000 W32),
4838 CmmStaticLit (CmmInt 0x80000000 W32)],
4839 XORIS itmp src (ImmInt 0x8000),
4840 ST II32 itmp (spRel 3),
4841 LIS itmp (ImmInt 0x4330),
4842 ST II32 itmp (spRel 2),
4843 LD FF64 ftmp (spRel 2)
4844 ] `appOL` addr_code `appOL` toOL [
4846 FSUB FF64 dst ftmp dst
4847 ] `appOL` maybe_frsp dst
4849 maybe_exts = case fromRep of
4850 W8 -> unitOL $ EXTS II8 src src
4851 W16 -> unitOL $ EXTS II16 src src
4853 maybe_frsp dst = case toRep of
4854 W32 -> unitOL $ FRSP dst dst
4856 return (Any (floatSize toRep) code')
4858 coerceFP2Int fromRep toRep x = do
4859 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
4860 (src, code) <- getSomeReg x
4861 tmp <- getNewRegNat FF64
4863 code' dst = code `appOL` toOL [
4864 -- convert to int in FP reg
4866 -- store value (64bit) from FP to stack
4867 ST FF64 tmp (spRel 2),
4868 -- read low word of value (high word is undefined)
4869 LD II32 dst (spRel 3)]
4870 return (Any (intSize toRep) code')
4871 #endif /* powerpc_TARGET_ARCH */
4874 -- -----------------------------------------------------------------------------
4875 -- eXTRA_STK_ARGS_HERE
4877 -- We (allegedly) put the first six C-call arguments in registers;
4878 -- where do we start putting the rest of them?
4880 -- Moved from MachInstrs (SDM):
4882 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4883 eXTRA_STK_ARGS_HERE :: Int
4885 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))