2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
28 #include "HsVersions.h"
29 #include "nativeGen/NCG.h"
46 -- Our intermediate code:
49 import PprCmm ( pprExpr )
52 import ClosureInfo ( C_SRT(..) )
55 import StaticFlags ( opt_PIC )
56 import ForeignCall ( CCallConv(..) )
59 import qualified Outputable as O
62 import FastBool ( isFastTrue )
63 import Constants ( wORD_SIZE )
66 import Debug.Trace ( trace )
68 import Control.Monad ( mapAndUnzipM )
69 import Data.Maybe ( fromJust )
78 -> NatM [NatCmmTop Instr]
80 cmmTopCodeGen dynflags
81 (CmmProc info lab params (ListGraph blocks)) = do
82 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
83 picBaseMb <- getPicBaseMaybeNat
84 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
85 tops = proc : concat statics
86 os = platformOS $ targetPlatform dynflags
89 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
90 Nothing -> return tops
92 cmmTopCodeGen _ (CmmData sec dat) = do
93 return [CmmData sec dat] -- no translation, we just use CmmStatic
98 -> NatM ( [NatBasicBlock Instr]
101 basicBlockCodeGen (BasicBlock id stmts) = do
102 instrs <- stmtsToInstrs stmts
103 -- code generation may introduce new basic block boundaries, which
104 -- are indicated by the NEWBLOCK instruction. We must split up the
105 -- instruction stream into basic blocks again. Also, we extract
108 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
110 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
111 = ([], BasicBlock id instrs : blocks, statics)
112 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
113 = (instrs, blocks, CmmData sec dat:statics)
114 mkBlocks instr (instrs,blocks,statics)
115 = (instr:instrs, blocks, statics)
117 return (BasicBlock id top : other_blocks, statics)
120 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
122 = do instrss <- mapM stmtToInstrs stmts
123 return (concatOL instrss)
126 stmtToInstrs :: CmmStmt -> NatM InstrBlock
127 stmtToInstrs stmt = case stmt of
128 CmmNop -> return nilOL
129 CmmComment s -> return (unitOL (COMMENT s))
132 | isFloatType ty -> assignReg_FltCode size reg src
133 #if WORD_SIZE_IN_BITS==32
134 | isWord64 ty -> assignReg_I64Code reg src
136 | otherwise -> assignReg_IntCode size reg src
137 where ty = cmmRegType reg
138 size = cmmTypeSize ty
141 | isFloatType ty -> assignMem_FltCode size addr src
142 #if WORD_SIZE_IN_BITS==32
143 | isWord64 ty -> assignMem_I64Code addr src
145 | otherwise -> assignMem_IntCode size addr src
146 where ty = cmmExprType src
147 size = cmmTypeSize ty
149 CmmCall target result_regs args _ _
150 -> genCCall target result_regs args
152 CmmBranch id -> genBranch id
153 CmmCondBranch arg id -> genCondJump id arg
154 CmmSwitch arg ids -> genSwitch arg ids
155 CmmJump arg params -> genJump arg
157 panic "stmtToInstrs: return statement should have been cps'd away"
160 --------------------------------------------------------------------------------
161 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
162 -- They are really trees of insns to facilitate fast appending, where a
163 -- left-to-right traversal yields the insns in the correct order.
169 -- | Condition codes passed up the tree.
172 = CondCode Bool Cond InstrBlock
175 -- | a.k.a "Register64"
176 -- Reg is the lower 32-bit temporary which contains the result.
177 -- Use getHiVRegFromLo to find the other VRegUnique.
179 -- Rules of this simplified insn selection game are therefore that
180 -- the returned Reg may be modified
188 -- | Register's passed up the tree. If the stix code forces the register
189 -- to live in a pre-decided machine register, it comes out as @Fixed@;
190 -- otherwise, it comes out as @Any@, and the parent can decide which
191 -- register to put it in.
194 = Fixed Size Reg InstrBlock
195 | Any Size (Reg -> InstrBlock)
198 swizzleRegisterRep :: Register -> Size -> Register
199 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
200 swizzleRegisterRep (Any _ codefn) size = Any size codefn
203 -- | Grab the Reg for a CmmReg
204 getRegisterReg :: CmmReg -> Reg
206 getRegisterReg (CmmLocal (LocalReg u pk))
207 = mkVReg u (cmmTypeSize pk)
209 getRegisterReg (CmmGlobal mid)
210 = case get_GlobalReg_reg_or_addr mid of
211 Left (RealReg rrno) -> RealReg rrno
212 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
213 -- By this stage, the only MagicIds remaining should be the
214 -- ones which map to a real machine register on this
215 -- platform. Hence ...
218 -- | Memory addressing modes passed up the tree.
220 = Amode AddrMode InstrBlock
223 Now, given a tree (the argument to an CmmLoad) that references memory,
224 produce a suitable addressing mode.
226 A Rule of the Game (tm) for Amodes: use of the addr bit must
227 immediately follow use of the code part, since the code part puts
228 values in registers which the addr then refers to. So you can't put
229 anything in between, lest it overwrite some of those registers. If
230 you need to do some other computation between the code part and use of
231 the addr bit, first store the effective address from the amode in a
232 temporary, then do the other computation, and then use the temporary:
236 ... other computation ...
241 -- | Check whether an integer will fit in 32 bits.
242 -- A CmmInt is intended to be truncated to the appropriate
243 -- number of bits, so here we truncate it to Int64. This is
244 -- important because e.g. -1 as a CmmInt might be either
245 -- -1 or 18446744073709551615.
247 is32BitInteger :: Integer -> Bool
248 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
249 where i64 = fromIntegral i :: Int64
252 -- | Convert a BlockId to some CmmStatic data
253 jumpTableEntry :: Maybe BlockId -> CmmStatic
254 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
255 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
256 where blockLabel = mkAsmTempLabel id
259 -- -----------------------------------------------------------------------------
260 -- General things for putting together code sequences
262 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
263 -- CmmExprs into CmmRegOff?
264 mangleIndexTree :: CmmExpr -> CmmExpr
265 mangleIndexTree (CmmRegOff reg off)
266 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
267 where width = typeWidth (cmmRegType reg)
269 -- | The dual to getAnyReg: compute an expression into a register, but
270 -- we don't mind which one it is.
271 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
273 r <- getRegister expr
276 tmp <- getNewRegNat rep
277 return (tmp, code tmp)
285 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
286 assignMem_I64Code addrTree valueTree = do
287 Amode addr addr_code <- getAmode addrTree
288 ChildCode64 vcode rlo <- iselExpr64 valueTree
290 rhi = getHiVRegFromLo rlo
292 -- Little-endian store
293 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
294 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
296 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
299 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
300 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
301 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
303 r_dst_lo = mkVReg u_dst II32
304 r_dst_hi = getHiVRegFromLo r_dst_lo
305 r_src_hi = getHiVRegFromLo r_src_lo
306 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
307 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
310 vcode `snocOL` mov_lo `snocOL` mov_hi
313 assignReg_I64Code lvalue valueTree
314 = panic "assignReg_I64Code(i386): invalid lvalue"
319 iselExpr64 :: CmmExpr -> NatM ChildCode64
320 iselExpr64 (CmmLit (CmmInt i _)) = do
321 (rlo,rhi) <- getNewRegPairNat II32
323 r = fromIntegral (fromIntegral i :: Word32)
324 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
326 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
327 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
330 return (ChildCode64 code rlo)
332 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
333 Amode addr addr_code <- getAmode addrTree
334 (rlo,rhi) <- getNewRegPairNat II32
336 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
337 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
340 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
344 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
345 = return (ChildCode64 nilOL (mkVReg vu II32))
347 -- we handle addition, but rather badly
348 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
349 ChildCode64 code1 r1lo <- iselExpr64 e1
350 (rlo,rhi) <- getNewRegPairNat II32
352 r = fromIntegral (fromIntegral i :: Word32)
353 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
354 r1hi = getHiVRegFromLo r1lo
356 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
357 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
358 MOV II32 (OpReg r1hi) (OpReg rhi),
359 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
361 return (ChildCode64 code rlo)
363 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
364 ChildCode64 code1 r1lo <- iselExpr64 e1
365 ChildCode64 code2 r2lo <- iselExpr64 e2
366 (rlo,rhi) <- getNewRegPairNat II32
368 r1hi = getHiVRegFromLo r1lo
369 r2hi = getHiVRegFromLo r2lo
372 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
373 ADD II32 (OpReg r2lo) (OpReg rlo),
374 MOV II32 (OpReg r1hi) (OpReg rhi),
375 ADC II32 (OpReg r2hi) (OpReg rhi) ]
377 return (ChildCode64 code rlo)
379 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
381 r_dst_lo <- getNewRegNat II32
382 let r_dst_hi = getHiVRegFromLo r_dst_lo
385 ChildCode64 (code `snocOL`
386 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
391 = pprPanic "iselExpr64(i386)" (ppr expr)
395 --------------------------------------------------------------------------------
396 getRegister :: CmmExpr -> NatM Register
398 #if !x86_64_TARGET_ARCH
399 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
400 -- register, it can only be used for rip-relative addressing.
401 getRegister (CmmReg (CmmGlobal PicBaseReg))
403 reg <- getPicBaseNat archWordSize
404 return (Fixed archWordSize reg nilOL)
407 getRegister (CmmReg reg)
408 = return (Fixed (cmmTypeSize (cmmRegType reg))
409 (getRegisterReg reg) nilOL)
411 getRegister tree@(CmmRegOff _ _)
412 = getRegister (mangleIndexTree tree)
415 #if WORD_SIZE_IN_BITS==32
416 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
417 -- TO_W_(x), TO_W_(x >> 32)
419 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
420 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
421 ChildCode64 code rlo <- iselExpr64 x
422 return $ Fixed II32 (getHiVRegFromLo rlo) code
424 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
425 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
426 ChildCode64 code rlo <- iselExpr64 x
427 return $ Fixed II32 (getHiVRegFromLo rlo) code
429 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
430 ChildCode64 code rlo <- iselExpr64 x
431 return $ Fixed II32 rlo code
433 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
434 ChildCode64 code rlo <- iselExpr64 x
435 return $ Fixed II32 rlo code
444 getRegister (CmmLit (CmmFloat f W32)) = do
445 lbl <- getNewLabelNat
446 dflags <- getDynFlagsNat
447 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
448 Amode addr addr_code <- getAmode dynRef
452 CmmStaticLit (CmmFloat f W32)]
453 `consOL` (addr_code `snocOL`
456 return (Any FF32 code)
459 getRegister (CmmLit (CmmFloat d W64))
461 = let code dst = unitOL (GLDZ dst)
462 in return (Any FF64 code)
465 = let code dst = unitOL (GLD1 dst)
466 in return (Any FF64 code)
469 lbl <- getNewLabelNat
470 dflags <- getDynFlagsNat
471 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
472 Amode addr addr_code <- getAmode dynRef
476 CmmStaticLit (CmmFloat d W64)]
477 `consOL` (addr_code `snocOL`
480 return (Any FF64 code)
482 #endif /* i386_TARGET_ARCH */
487 #if x86_64_TARGET_ARCH
488 getRegister (CmmLit (CmmFloat 0.0 w)) = do
489 let size = floatSize w
490 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
491 -- I don't know why there are xorpd, xorps, and pxor instructions.
492 -- They all appear to do the same thing --SDM
493 return (Any size code)
495 getRegister (CmmLit (CmmFloat f w)) = do
496 lbl <- getNewLabelNat
497 let code dst = toOL [
500 CmmStaticLit (CmmFloat f w)],
501 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
504 return (Any size code)
505 where size = floatSize w
507 #endif /* x86_64_TARGET_ARCH */
513 -- catch simple cases of zero- or sign-extended load
514 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
515 code <- intLoadCode (MOVZxL II8) addr
516 return (Any II32 code)
518 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
519 code <- intLoadCode (MOVSxL II8) addr
520 return (Any II32 code)
522 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
523 code <- intLoadCode (MOVZxL II16) addr
524 return (Any II32 code)
526 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
527 code <- intLoadCode (MOVSxL II16) addr
528 return (Any II32 code)
531 #if x86_64_TARGET_ARCH
533 -- catch simple cases of zero- or sign-extended load
534 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
535 code <- intLoadCode (MOVZxL II8) addr
536 return (Any II64 code)
538 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
539 code <- intLoadCode (MOVSxL II8) addr
540 return (Any II64 code)
542 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
543 code <- intLoadCode (MOVZxL II16) addr
544 return (Any II64 code)
546 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
547 code <- intLoadCode (MOVSxL II16) addr
548 return (Any II64 code)
550 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
551 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
552 return (Any II64 code)
554 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
555 code <- intLoadCode (MOVSxL II32) addr
556 return (Any II64 code)
558 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
559 CmmLit displacement])
560 = return $ Any II64 (\dst -> unitOL $
561 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
563 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
564 x_code <- getAnyReg x
565 lbl <- getNewLabelNat
567 code dst = x_code dst `appOL` toOL [
568 -- This is how gcc does it, so it can't be that bad:
569 LDATA ReadOnlyData16 [
572 CmmStaticLit (CmmInt 0x80000000 W32),
573 CmmStaticLit (CmmInt 0 W32),
574 CmmStaticLit (CmmInt 0 W32),
575 CmmStaticLit (CmmInt 0 W32)
577 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
578 -- xorps, so we need the 128-bit constant
579 -- ToDo: rip-relative
582 return (Any FF32 code)
584 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
585 x_code <- getAnyReg x
586 lbl <- getNewLabelNat
588 -- This is how gcc does it, so it can't be that bad:
589 code dst = x_code dst `appOL` toOL [
590 LDATA ReadOnlyData16 [
593 CmmStaticLit (CmmInt 0x8000000000000000 W64),
594 CmmStaticLit (CmmInt 0 W64)
596 -- gcc puts an unpck here. Wonder if we need it.
597 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
598 -- xorpd, so we need the 128-bit constant
601 return (Any FF64 code)
603 #endif /* x86_64_TARGET_ARCH */
609 getRegister (CmmMachOp mop [x]) -- unary MachOps
612 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
613 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
616 MO_S_Neg w -> triv_ucode NEGI (intSize w)
617 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
618 MO_Not w -> triv_ucode NOT (intSize w)
621 MO_UU_Conv W32 W8 -> toI8Reg W32 x
622 MO_SS_Conv W32 W8 -> toI8Reg W32 x
623 MO_UU_Conv W16 W8 -> toI8Reg W16 x
624 MO_SS_Conv W16 W8 -> toI8Reg W16 x
625 MO_UU_Conv W32 W16 -> toI16Reg W32 x
626 MO_SS_Conv W32 W16 -> toI16Reg W32 x
628 #if x86_64_TARGET_ARCH
629 MO_UU_Conv W64 W32 -> conversionNop II64 x
630 MO_SS_Conv W64 W32 -> conversionNop II64 x
631 MO_UU_Conv W64 W16 -> toI16Reg W64 x
632 MO_SS_Conv W64 W16 -> toI16Reg W64 x
633 MO_UU_Conv W64 W8 -> toI8Reg W64 x
634 MO_SS_Conv W64 W8 -> toI8Reg W64 x
637 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
638 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
641 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
642 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
643 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
645 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
646 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
647 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
649 #if x86_64_TARGET_ARCH
650 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
651 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
652 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
653 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
654 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
655 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
656 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
657 -- However, we don't want the register allocator to throw it
658 -- away as an unnecessary reg-to-reg move, so we keep it in
659 -- the form of a movzl and print it as a movl later.
663 MO_FF_Conv W32 W64 -> conversionNop FF64 x
664 MO_FF_Conv W64 W32 -> conversionNop FF32 x
666 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
667 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
670 MO_FS_Conv from to -> coerceFP2Int from to x
671 MO_SF_Conv from to -> coerceInt2FP from to x
673 other -> pprPanic "getRegister" (pprMachOp mop)
675 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
676 triv_ucode instr size = trivialUCode size (instr size) x
678 -- signed or unsigned extension.
679 integerExtend :: Width -> Width
680 -> (Size -> Operand -> Operand -> Instr)
681 -> CmmExpr -> NatM Register
682 integerExtend from to instr expr = do
683 (reg,e_code) <- if from == W8 then getByteReg expr
688 instr (intSize from) (OpReg reg) (OpReg dst)
689 return (Any (intSize to) code)
691 toI8Reg :: Width -> CmmExpr -> NatM Register
693 = do codefn <- getAnyReg expr
694 return (Any (intSize new_rep) codefn)
695 -- HACK: use getAnyReg to get a byte-addressable register.
696 -- If the source was a Fixed register, this will add the
697 -- mov instruction to put it into the desired destination.
698 -- We're assuming that the destination won't be a fixed
699 -- non-byte-addressable register; it won't be, because all
700 -- fixed registers are word-sized.
702 toI16Reg = toI8Reg -- for now
704 conversionNop :: Size -> CmmExpr -> NatM Register
705 conversionNop new_size expr
706 = do e_code <- getRegister expr
707 return (swizzleRegisterRep e_code new_size)
710 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
712 MO_F_Eq w -> condFltReg EQQ x y
713 MO_F_Ne w -> condFltReg NE x y
714 MO_F_Gt w -> condFltReg GTT x y
715 MO_F_Ge w -> condFltReg GE x y
716 MO_F_Lt w -> condFltReg LTT x y
717 MO_F_Le w -> condFltReg LE x y
719 MO_Eq rep -> condIntReg EQQ x y
720 MO_Ne rep -> condIntReg NE x y
722 MO_S_Gt rep -> condIntReg GTT x y
723 MO_S_Ge rep -> condIntReg GE x y
724 MO_S_Lt rep -> condIntReg LTT x y
725 MO_S_Le rep -> condIntReg LE x y
727 MO_U_Gt rep -> condIntReg GU x y
728 MO_U_Ge rep -> condIntReg GEU x y
729 MO_U_Lt rep -> condIntReg LU x y
730 MO_U_Le rep -> condIntReg LEU x y
733 MO_F_Add w -> trivialFCode w GADD x y
734 MO_F_Sub w -> trivialFCode w GSUB x y
735 MO_F_Quot w -> trivialFCode w GDIV x y
736 MO_F_Mul w -> trivialFCode w GMUL x y
739 #if x86_64_TARGET_ARCH
740 MO_F_Add w -> trivialFCode w ADD x y
741 MO_F_Sub w -> trivialFCode w SUB x y
742 MO_F_Quot w -> trivialFCode w FDIV x y
743 MO_F_Mul w -> trivialFCode w MUL x y
746 MO_Add rep -> add_code rep x y
747 MO_Sub rep -> sub_code rep x y
749 MO_S_Quot rep -> div_code rep True True x y
750 MO_S_Rem rep -> div_code rep True False x y
751 MO_U_Quot rep -> div_code rep False True x y
752 MO_U_Rem rep -> div_code rep False False x y
754 MO_S_MulMayOflo rep -> imulMayOflo rep x y
756 MO_Mul rep -> triv_op rep IMUL
757 MO_And rep -> triv_op rep AND
758 MO_Or rep -> triv_op rep OR
759 MO_Xor rep -> triv_op rep XOR
761 {- Shift ops on x86s have constraints on their source, it
762 either has to be Imm, CL or 1
763 => trivialCode is not restrictive enough (sigh.)
765 MO_Shl rep -> shift_code rep SHL x y {-False-}
766 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
767 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
769 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
772 triv_op width instr = trivialCode width op (Just op) x y
773 where op = instr (intSize width)
775 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
776 imulMayOflo rep a b = do
777 (a_reg, a_code) <- getNonClobberedReg a
778 b_code <- getAnyReg b
780 shift_amt = case rep of
783 _ -> panic "shift_amt"
786 code = a_code `appOL` b_code eax `appOL`
788 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
789 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
790 -- sign extend lower part
791 SUB size (OpReg edx) (OpReg eax)
792 -- compare against upper
793 -- eax==0 if high part == sign extended low part
796 return (Fixed size eax code)
800 -> (Size -> Operand -> Operand -> Instr)
805 {- Case1: shift length as immediate -}
806 shift_code width instr x y@(CmmLit lit) = do
807 x_code <- getAnyReg x
811 = x_code dst `snocOL`
812 instr size (OpImm (litToImm lit)) (OpReg dst)
814 return (Any size code)
816 {- Case2: shift length is complex (non-immediate)
818 * we cannot do y first *and* put its result in %ecx, because
819 %ecx might be clobbered by x.
820 * if we do y second, then x cannot be
821 in a clobbered reg. Also, we cannot clobber x's reg
822 with the instruction itself.
824 - do y first, put its result in a fresh tmp, then copy it to %ecx later
825 - do y second and put its result into %ecx. x gets placed in a fresh
826 tmp. This is likely to be better, becuase the reg alloc can
827 eliminate this reg->reg move here (it won't eliminate the other one,
828 because the move is into the fixed %ecx).
830 shift_code width instr x y{-amount-} = do
831 x_code <- getAnyReg x
832 let size = intSize width
833 tmp <- getNewRegNat size
834 y_code <- getAnyReg y
836 code = x_code tmp `appOL`
838 instr size (OpReg ecx) (OpReg tmp)
840 return (Fixed size tmp code)
843 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
844 add_code rep x (CmmLit (CmmInt y _))
845 | is32BitInteger y = add_int rep x y
846 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
847 where size = intSize rep
850 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
851 sub_code rep x (CmmLit (CmmInt y _))
852 | is32BitInteger (-y) = add_int rep x (-y)
853 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
855 -- our three-operand add instruction:
856 add_int width x y = do
857 (x_reg, x_code) <- getSomeReg x
860 imm = ImmInt (fromInteger y)
864 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
867 return (Any size code)
869 ----------------------
870 div_code width signed quotient x y = do
871 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
872 x_code <- getAnyReg x
875 widen | signed = CLTD size
876 | otherwise = XOR size (OpReg edx) (OpReg edx)
878 instr | signed = IDIV
881 code = y_code `appOL`
883 toOL [widen, instr size y_op]
885 result | quotient = eax
889 return (Fixed size result code)
892 getRegister (CmmLoad mem pk)
895 Amode src mem_code <- getAmode mem
897 size = cmmTypeSize pk
898 code dst = mem_code `snocOL`
899 IF_ARCH_i386(GLD size src dst,
900 MOV size (OpAddr src) (OpReg dst))
901 return (Any size code)
904 getRegister (CmmLoad mem pk)
907 code <- intLoadCode instr mem
908 return (Any size code)
912 instr = case width of
915 -- We always zero-extend 8-bit loads, if we
916 -- can't think of anything better. This is because
917 -- we can't guarantee access to an 8-bit variant of every register
918 -- (esi and edi don't have 8-bit variants), so to make things
919 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
922 #if x86_64_TARGET_ARCH
923 -- Simpler memory load code on x86_64
924 getRegister (CmmLoad mem pk)
926 code <- intLoadCode (MOV size) mem
927 return (Any size code)
928 where size = intSize $ typeWidth pk
931 getRegister (CmmLit (CmmInt 0 width))
935 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
936 adj_size = case size of II64 -> II32; _ -> size
937 size1 = IF_ARCH_i386( size, adj_size )
939 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
941 return (Any size code)
943 #if x86_64_TARGET_ARCH
944 -- optimisation for loading small literals on x86_64: take advantage
945 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
946 -- instruction forms are shorter.
947 getRegister (CmmLit lit)
948 | isWord64 (cmmLitType lit), not (isBigLit lit)
951 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
953 return (Any II64 code)
955 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
957 -- note1: not the same as (not.is32BitLit), because that checks for
958 -- signed literals that fit in 32 bits, but we want unsigned
960 -- note2: all labels are small, because we're assuming the
961 -- small memory model (see gcc docs, -mcmodel=small).
964 getRegister (CmmLit lit)
966 size = cmmTypeSize (cmmLitType lit)
968 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
970 return (Any size code)
972 getRegister other = pprPanic "getRegister(x86)" (ppr other)
975 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
976 -> NatM (Reg -> InstrBlock)
977 intLoadCode instr mem = do
978 Amode src mem_code <- getAmode mem
979 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
981 -- Compute an expression into *any* register, adding the appropriate
982 -- move instruction if necessary.
983 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
985 r <- getRegister expr
988 anyReg :: Register -> NatM (Reg -> InstrBlock)
989 anyReg (Any _ code) = return code
990 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
992 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
993 -- Fixed registers might not be byte-addressable, so we make sure we've
994 -- got a temporary, inserting an extra reg copy if necessary.
995 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
996 #if x86_64_TARGET_ARCH
997 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1000 r <- getRegister expr
1003 tmp <- getNewRegNat rep
1004 return (tmp, code tmp)
1006 | isVirtualReg reg -> return (reg,code)
1008 tmp <- getNewRegNat rep
1009 return (tmp, code `snocOL` reg2reg rep reg tmp)
1010 -- ToDo: could optimise slightly by checking for byte-addressable
1011 -- real registers, but that will happen very rarely if at all.
1014 -- Another variant: this time we want the result in a register that cannot
1015 -- be modified by code to evaluate an arbitrary expression.
1016 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1017 getNonClobberedReg expr = do
1018 r <- getRegister expr
1021 tmp <- getNewRegNat rep
1022 return (tmp, code tmp)
1024 -- only free regs can be clobbered
1025 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1026 tmp <- getNewRegNat rep
1027 return (tmp, code `snocOL` reg2reg rep reg tmp)
1031 reg2reg :: Size -> Reg -> Reg -> Instr
1032 reg2reg size src dst
1033 #if i386_TARGET_ARCH
1034 | isFloatSize size = GMOV src dst
1036 | otherwise = MOV size (OpReg src) (OpReg dst)
1040 --------------------------------------------------------------------------------
1041 getAmode :: CmmExpr -> NatM Amode
1042 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1044 #if x86_64_TARGET_ARCH
1046 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1047 CmmLit displacement])
1048 = return $ Amode (ripRel (litToImm displacement)) nilOL
1053 -- This is all just ridiculous, since it carefully undoes
1054 -- what mangleIndexTree has just done.
1055 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1057 -- ASSERT(rep == II32)???
1058 = do (x_reg, x_code) <- getSomeReg x
1059 let off = ImmInt (-(fromInteger i))
1060 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1062 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1064 -- ASSERT(rep == II32)???
1065 = do (x_reg, x_code) <- getSomeReg x
1066 let off = ImmInt (fromInteger i)
1067 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1069 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1070 -- recognised by the next rule.
1071 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1073 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1075 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1076 [y, CmmLit (CmmInt shift _)]])
1077 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1078 = x86_complex_amode x y shift 0
1080 getAmode (CmmMachOp (MO_Add rep)
1081 [x, CmmMachOp (MO_Add _)
1082 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1083 CmmLit (CmmInt offset _)]])
1084 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1085 && is32BitInteger offset
1086 = x86_complex_amode x y shift offset
1088 getAmode (CmmMachOp (MO_Add rep) [x,y])
1089 = x86_complex_amode x y 0 0
1091 getAmode (CmmLit lit) | is32BitLit lit
1092 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1095 (reg,code) <- getSomeReg expr
1096 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1099 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1100 x86_complex_amode base index shift offset
1101 = do (x_reg, x_code) <- getNonClobberedReg base
1102 -- x must be in a temp, because it has to stay live over y_code
1103 -- we could compre x_reg and y_reg and do something better here...
1104 (y_reg, y_code) <- getSomeReg index
1106 code = x_code `appOL` y_code
1107 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1108 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1114 -- -----------------------------------------------------------------------------
1115 -- getOperand: sometimes any operand will do.
1117 -- getNonClobberedOperand: the value of the operand will remain valid across
1118 -- the computation of an arbitrary expression, unless the expression
1119 -- is computed directly into a register which the operand refers to
1120 -- (see trivialCode where this function is used for an example).
1122 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1123 #if x86_64_TARGET_ARCH
1124 getNonClobberedOperand (CmmLit lit)
1125 | isSuitableFloatingPointLit lit = do
1126 lbl <- getNewLabelNat
1127 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1129 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1131 getNonClobberedOperand (CmmLit lit)
1132 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
1133 return (OpImm (litToImm lit), nilOL)
1134 getNonClobberedOperand (CmmLoad mem pk)
1135 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1136 Amode src mem_code <- getAmode mem
1138 if (amodeCouldBeClobbered src)
1140 tmp <- getNewRegNat archWordSize
1141 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1142 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1145 return (OpAddr src', save_code `appOL` mem_code)
1146 getNonClobberedOperand e = do
1147 (reg, code) <- getNonClobberedReg e
1148 return (OpReg reg, code)
1150 amodeCouldBeClobbered :: AddrMode -> Bool
1151 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1153 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1154 regClobbered _ = False
1156 -- getOperand: the operand is not required to remain valid across the
1157 -- computation of an arbitrary expression.
1158 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1159 #if x86_64_TARGET_ARCH
1160 getOperand (CmmLit lit)
1161 | isSuitableFloatingPointLit lit = do
1162 lbl <- getNewLabelNat
1163 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1165 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1167 getOperand (CmmLit lit)
1168 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
1169 return (OpImm (litToImm lit), nilOL)
1170 getOperand (CmmLoad mem pk)
1171 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1172 Amode src mem_code <- getAmode mem
1173 return (OpAddr src, mem_code)
1175 (reg, code) <- getSomeReg e
1176 return (OpReg reg, code)
1178 isOperand :: CmmExpr -> Bool
1179 isOperand (CmmLoad _ _) = True
1180 isOperand (CmmLit lit) = is32BitLit lit
1181 || isSuitableFloatingPointLit lit
1184 -- if we want a floating-point literal as an operand, we can
1185 -- use it directly from memory. However, if the literal is
1186 -- zero, we're better off generating it into a register using
1188 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1189 isSuitableFloatingPointLit _ = False
1191 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1192 getRegOrMem (CmmLoad mem pk)
1193 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1194 Amode src mem_code <- getAmode mem
1195 return (OpAddr src, mem_code)
1197 (reg, code) <- getNonClobberedReg e
1198 return (OpReg reg, code)
1200 #if x86_64_TARGET_ARCH
1201 is32BitLit (CmmInt i W64) = is32BitInteger i
1202 -- assume that labels are in the range 0-2^31-1: this assumes the
1203 -- small memory model (see gcc docs, -mcmodel=small).
1210 -- Set up a condition code for a conditional branch.
1212 getCondCode :: CmmExpr -> NatM CondCode
1214 -- yes, they really do seem to want exactly the same!
1216 getCondCode (CmmMachOp mop [x, y])
1219 MO_F_Eq W32 -> condFltCode EQQ x y
1220 MO_F_Ne W32 -> condFltCode NE x y
1221 MO_F_Gt W32 -> condFltCode GTT x y
1222 MO_F_Ge W32 -> condFltCode GE x y
1223 MO_F_Lt W32 -> condFltCode LTT x y
1224 MO_F_Le W32 -> condFltCode LE x y
1226 MO_F_Eq W64 -> condFltCode EQQ x y
1227 MO_F_Ne W64 -> condFltCode NE x y
1228 MO_F_Gt W64 -> condFltCode GTT x y
1229 MO_F_Ge W64 -> condFltCode GE x y
1230 MO_F_Lt W64 -> condFltCode LTT x y
1231 MO_F_Le W64 -> condFltCode LE x y
1233 MO_Eq rep -> condIntCode EQQ x y
1234 MO_Ne rep -> condIntCode NE x y
1236 MO_S_Gt rep -> condIntCode GTT x y
1237 MO_S_Ge rep -> condIntCode GE x y
1238 MO_S_Lt rep -> condIntCode LTT x y
1239 MO_S_Le rep -> condIntCode LE x y
1241 MO_U_Gt rep -> condIntCode GU x y
1242 MO_U_Ge rep -> condIntCode GEU x y
1243 MO_U_Lt rep -> condIntCode LU x y
1244 MO_U_Le rep -> condIntCode LEU x y
1246 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1248 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1253 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1254 -- passed back up the tree.
1256 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1258 -- memory vs immediate
1259 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1260 Amode x_addr x_code <- getAmode x
1263 code = x_code `snocOL`
1264 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1266 return (CondCode False cond code)
1268 -- anything vs zero, using a mask
1269 -- TODO: Add some sanity checking!!!!
1270 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
1271 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
1273 (x_reg, x_code) <- getSomeReg x
1275 code = x_code `snocOL`
1276 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1278 return (CondCode False cond code)
1281 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1282 (x_reg, x_code) <- getSomeReg x
1284 code = x_code `snocOL`
1285 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1287 return (CondCode False cond code)
1289 -- anything vs operand
1290 condIntCode cond x y | isOperand y = do
1291 (x_reg, x_code) <- getNonClobberedReg x
1292 (y_op, y_code) <- getOperand y
1294 code = x_code `appOL` y_code `snocOL`
1295 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1297 return (CondCode False cond code)
1299 -- anything vs anything
1300 condIntCode cond x y = do
1301 (y_reg, y_code) <- getNonClobberedReg y
1302 (x_op, x_code) <- getRegOrMem x
1304 code = y_code `appOL`
1306 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1308 return (CondCode False cond code)
1312 --------------------------------------------------------------------------------
1313 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1315 #if i386_TARGET_ARCH
1316 condFltCode cond x y
1317 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1318 (x_reg, x_code) <- getNonClobberedReg x
1319 (y_reg, y_code) <- getSomeReg y
1321 code = x_code `appOL` y_code `snocOL`
1322 GCMP cond x_reg y_reg
1323 -- The GCMP insn does the test and sets the zero flag if comparable
1324 -- and true. Hence we always supply EQQ as the condition to test.
1325 return (CondCode True EQQ code)
1327 #elif x86_64_TARGET_ARCH
1328 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1329 -- an operand, but the right must be a reg. We can probably do better
1330 -- than this general case...
1331 condFltCode cond x y = do
1332 (x_reg, x_code) <- getNonClobberedReg x
1333 (y_op, y_code) <- getOperand y
1335 code = x_code `appOL`
1337 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1338 -- NB(1): we need to use the unsigned comparison operators on the
1339 -- result of this comparison.
1341 return (CondCode True (condToUnsigned cond) code)
1344 condFltCode = panic "X86.condFltCode: not defined"
1350 -- -----------------------------------------------------------------------------
1351 -- Generating assignments
1353 -- Assignments are really at the heart of the whole code generation
1354 -- business. Almost all top-level nodes of any real importance are
1355 -- assignments, which correspond to loads, stores, or register
1356 -- transfers. If we're really lucky, some of the register transfers
1357 -- will go away, because we can use the destination register to
1358 -- complete the code generation for the right hand side. This only
1359 -- fails when the right hand side is forced into a fixed register
1360 -- (e.g. the result of a call).
1362 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1363 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1365 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1366 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1369 -- integer assignment to memory
1371 -- specific case of adding/subtracting an integer to a particular address.
1372 -- ToDo: catch other cases where we can use an operation directly on a memory
1374 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1375 CmmLit (CmmInt i _)])
1376 | addr == addr2, pk /= II64 || is32BitInteger i,
1377 Just instr <- check op
1378 = do Amode amode code_addr <- getAmode addr
1379 let code = code_addr `snocOL`
1380 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1383 check (MO_Add _) = Just ADD
1384 check (MO_Sub _) = Just SUB
1389 assignMem_IntCode pk addr src = do
1390 Amode addr code_addr <- getAmode addr
1391 (code_src, op_src) <- get_op_RI src
1393 code = code_src `appOL`
1395 MOV pk op_src (OpAddr addr)
1396 -- NOTE: op_src is stable, so it will still be valid
1397 -- after code_addr. This may involve the introduction
1398 -- of an extra MOV to a temporary register, but we hope
1399 -- the register allocator will get rid of it.
1403 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1404 get_op_RI (CmmLit lit) | is32BitLit lit
1405 = return (nilOL, OpImm (litToImm lit))
1407 = do (reg,code) <- getNonClobberedReg op
1408 return (code, OpReg reg)
1411 -- Assign; dst is a reg, rhs is mem
1412 assignReg_IntCode pk reg (CmmLoad src _) = do
1413 load_code <- intLoadCode (MOV pk) src
1414 return (load_code (getRegisterReg reg))
1416 -- dst is a reg, but src could be anything
1417 assignReg_IntCode pk reg src = do
1418 code <- getAnyReg src
1419 return (code (getRegisterReg reg))
1422 -- Floating point assignment to memory
1423 assignMem_FltCode pk addr src = do
1424 (src_reg, src_code) <- getNonClobberedReg src
1425 Amode addr addr_code <- getAmode addr
1427 code = src_code `appOL`
1429 IF_ARCH_i386(GST pk src_reg addr,
1430 MOV pk (OpReg src_reg) (OpAddr addr))
1433 -- Floating point assignment to a register/temporary
1434 assignReg_FltCode pk reg src = do
1435 src_code <- getAnyReg src
1436 return (src_code (getRegisterReg reg))
1439 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1441 genJump (CmmLoad mem pk) = do
1442 Amode target code <- getAmode mem
1443 return (code `snocOL` JMP (OpAddr target))
1445 genJump (CmmLit lit) = do
1446 return (unitOL (JMP (OpImm (litToImm lit))))
1449 (reg,code) <- getSomeReg expr
1450 return (code `snocOL` JMP (OpReg reg))
1453 -- -----------------------------------------------------------------------------
1454 -- Unconditional branches
1456 genBranch :: BlockId -> NatM InstrBlock
1457 genBranch = return . toOL . mkJumpInstr
1461 -- -----------------------------------------------------------------------------
1462 -- Conditional jumps
1465 Conditional jumps are always to local labels, so we can use branch
1466 instructions. We peek at the arguments to decide what kind of
1469 I386: First, we have to ensure that the condition
1470 codes are set according to the supplied comparison operation.
1474 :: BlockId -- the branch target
1475 -> CmmExpr -- the condition on which to branch
1478 #if i386_TARGET_ARCH
1479 genCondJump id bool = do
1480 CondCode _ cond code <- getCondCode bool
1481 return (code `snocOL` JXX cond id)
1483 #elif x86_64_TARGET_ARCH
1484 genCondJump id bool = do
1485 CondCode is_float cond cond_code <- getCondCode bool
1488 return (cond_code `snocOL` JXX cond id)
1490 lbl <- getBlockIdNat
1492 -- see comment with condFltReg
1493 let code = case cond of
1499 plain_test = unitOL (
1502 or_unordered = toOL [
1506 and_ordered = toOL [
1512 return (cond_code `appOL` code)
1515 genCondJump = panic "X86.genCondJump: not defined"
1522 -- -----------------------------------------------------------------------------
1523 -- Generating C calls
1525 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1526 -- @get_arg@, which moves the arguments to the correct registers/stack
1527 -- locations. Apart from that, the code is easy.
1529 -- (If applicable) Do not fill the delay slots here; you will confuse the
1530 -- register allocator.
1533 :: CmmCallTarget -- function to call
1534 -> HintedCmmFormals -- where to put the result
1535 -> HintedCmmActuals -- arguments (of mixed type)
1538 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1540 #if i386_TARGET_ARCH
1542 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1543 -- write barrier compiles to no code on x86/x86-64;
1544 -- we keep it this long in order to prevent earlier optimisations.
1546 -- we only cope with a single result for foreign calls
1547 genCCall (CmmPrim op) [CmmHinted r _] args = do
1548 l1 <- getNewLabelNat
1549 l2 <- getNewLabelNat
1551 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1552 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1554 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1555 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1557 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1558 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1560 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1561 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1563 other_op -> outOfLineFloatOp op r args
1565 actuallyInlineFloatOp instr size [CmmHinted x _]
1566 = do res <- trivialUFCode size (instr size) x
1568 return (any (getRegisterReg (CmmLocal r)))
1570 genCCall target dest_regs args = do
1572 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1573 #if !darwin_TARGET_OS
1574 tot_arg_size = sum sizes
1576 raw_arg_size = sum sizes
1577 tot_arg_size = roundTo 16 raw_arg_size
1578 arg_pad_size = tot_arg_size - raw_arg_size
1579 delta0 <- getDeltaNat
1580 setDeltaNat (delta0 - arg_pad_size)
1583 push_codes <- mapM push_arg (reverse args)
1584 delta <- getDeltaNat
1587 -- deal with static vs dynamic call targets
1588 (callinsns,cconv) <-
1591 CmmCallee (CmmLit (CmmLabel lbl)) conv
1592 -> -- ToDo: stdcall arg sizes
1593 return (unitOL (CALL (Left fn_imm) []), conv)
1594 where fn_imm = ImmCLbl lbl
1596 -> do { (dyn_c, dyn_r) <- get_op expr
1597 ; ASSERT( isWord32 (cmmExprType expr) )
1598 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1601 #if darwin_TARGET_OS
1603 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1604 DELTA (delta0 - arg_pad_size)]
1605 `appOL` concatOL push_codes
1608 = concatOL push_codes
1609 call = callinsns `appOL`
1611 -- Deallocate parameters after call for ccall;
1612 -- but not for stdcall (callee does it)
1613 (if cconv == StdCallConv || tot_arg_size==0 then [] else
1614 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
1616 [DELTA (delta + tot_arg_size)]
1619 setDeltaNat (delta + tot_arg_size)
1622 -- assign the results, if necessary
1623 assign_code [] = nilOL
1624 assign_code [CmmHinted dest _hint]
1625 | isFloatType ty = unitOL (GMOV fake0 r_dest)
1626 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1627 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1628 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1630 ty = localRegType dest
1632 r_dest_hi = getHiVRegFromLo r_dest
1633 r_dest = getRegisterReg (CmmLocal dest)
1634 assign_code many = panic "genCCall.assign_code many"
1636 return (push_code `appOL`
1638 assign_code dest_regs)
1641 arg_size :: CmmType -> Int -- Width in bytes
1642 arg_size ty = widthInBytes (typeWidth ty)
1644 roundTo a x | x `mod` a == 0 = x
1645 | otherwise = x + a - (x `mod` a)
1648 push_arg :: HintedCmmActual {-current argument-}
1649 -> NatM InstrBlock -- code
1651 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
1652 | isWord64 arg_ty = do
1653 ChildCode64 code r_lo <- iselExpr64 arg
1654 delta <- getDeltaNat
1655 setDeltaNat (delta - 8)
1657 r_hi = getHiVRegFromLo r_lo
1659 return ( code `appOL`
1660 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1661 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1666 (code, reg) <- get_op arg
1667 delta <- getDeltaNat
1668 let size = arg_size arg_ty -- Byte size
1669 setDeltaNat (delta-size)
1670 if (isFloatType arg_ty)
1671 then return (code `appOL`
1672 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1674 GST (floatSize (typeWidth arg_ty))
1675 reg (AddrBaseIndex (EABaseReg esp)
1679 else return (code `snocOL`
1680 PUSH II32 (OpReg reg) `snocOL`
1684 arg_ty = cmmExprType arg
1687 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
1689 (reg,code) <- getSomeReg op
1692 #elif x86_64_TARGET_ARCH
1694 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1695 -- write barrier compiles to no code on x86/x86-64;
1696 -- we keep it this long in order to prevent earlier optimisations.
1699 genCCall (CmmPrim op) [CmmHinted r _] args =
1700 outOfLineFloatOp op r args
1702 genCCall target dest_regs args = do
1704 -- load up the register arguments
1705 (stack_args, aregs, fregs, load_args_code)
1706 <- load_args args allArgRegs allFPArgRegs nilOL
1709 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
1710 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1711 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1712 -- for annotating the call instruction with
1714 sse_regs = length fp_regs_used
1716 tot_arg_size = arg_size * length stack_args
1718 -- On entry to the called function, %rsp should be aligned
1719 -- on a 16-byte boundary +8 (i.e. the first stack arg after
1720 -- the return address is 16-byte aligned). In STG land
1721 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1722 -- need to make sure we push a multiple of 16-bytes of args,
1723 -- plus the return address, to get the correct alignment.
1724 -- Urg, this is hard. We need to feed the delta back into
1725 -- the arg pushing code.
1726 (real_size, adjust_rsp) <-
1727 if tot_arg_size `rem` 16 == 0
1728 then return (tot_arg_size, nilOL)
1729 else do -- we need to adjust...
1730 delta <- getDeltaNat
1731 setDeltaNat (delta-8)
1732 return (tot_arg_size+8, toOL [
1733 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1737 -- push the stack args, right to left
1738 push_code <- push_args (reverse stack_args) nilOL
1739 delta <- getDeltaNat
1741 -- deal with static vs dynamic call targets
1742 (callinsns,cconv) <-
1745 CmmCallee (CmmLit (CmmLabel lbl)) conv
1746 -> -- ToDo: stdcall arg sizes
1747 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1748 where fn_imm = ImmCLbl lbl
1750 -> do (dyn_r, dyn_c) <- getSomeReg expr
1751 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1754 -- The x86_64 ABI requires us to set %al to the number of SSE
1755 -- registers that contain arguments, if the called routine
1756 -- is a varargs function. We don't know whether it's a
1757 -- varargs function or not, so we have to assume it is.
1759 -- It's not safe to omit this assignment, even if the number
1760 -- of SSE regs in use is zero. If %al is larger than 8
1761 -- on entry to a varargs function, seg faults ensue.
1762 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1764 let call = callinsns `appOL`
1766 -- Deallocate parameters after call for ccall;
1767 -- but not for stdcall (callee does it)
1768 (if cconv == StdCallConv || real_size==0 then [] else
1769 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1771 [DELTA (delta + real_size)]
1774 setDeltaNat (delta + real_size)
1777 -- assign the results, if necessary
1778 assign_code [] = nilOL
1779 assign_code [CmmHinted dest _hint] =
1780 case typeWidth rep of
1781 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1782 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1783 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1785 rep = localRegType dest
1786 r_dest = getRegisterReg (CmmLocal dest)
1787 assign_code many = panic "genCCall.assign_code many"
1789 return (load_args_code `appOL`
1792 assign_eax sse_regs `appOL`
1794 assign_code dest_regs)
1797 arg_size = 8 -- always, at the mo
1799 load_args :: [CmmHinted CmmExpr]
1800 -> [Reg] -- int regs avail for args
1801 -> [Reg] -- FP regs avail for args
1803 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1804 load_args args [] [] code = return (args, [], [], code)
1805 -- no more regs to use
1806 load_args [] aregs fregs code = return ([], aregs, fregs, code)
1807 -- no more args to push
1808 load_args ((CmmHinted arg hint) : rest) aregs fregs code
1809 | isFloatType arg_rep =
1813 arg_code <- getAnyReg arg
1814 load_args rest aregs rs (code `appOL` arg_code r)
1819 arg_code <- getAnyReg arg
1820 load_args rest rs fregs (code `appOL` arg_code r)
1822 arg_rep = cmmExprType arg
1825 (args',ars,frs,code') <- load_args rest aregs fregs code
1826 return ((CmmHinted arg hint):args', ars, frs, code')
1828 push_args [] code = return code
1829 push_args ((CmmHinted arg hint):rest) code
1830 | isFloatType arg_rep = do
1831 (arg_reg, arg_code) <- getSomeReg arg
1832 delta <- getDeltaNat
1833 setDeltaNat (delta-arg_size)
1834 let code' = code `appOL` arg_code `appOL` toOL [
1835 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1836 DELTA (delta-arg_size),
1837 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
1838 push_args rest code'
1841 -- we only ever generate word-sized function arguments. Promotion
1842 -- has already happened: our Int8# type is kept sign-extended
1843 -- in an Int#, for example.
1844 ASSERT(width == W64) return ()
1845 (arg_op, arg_code) <- getOperand arg
1846 delta <- getDeltaNat
1847 setDeltaNat (delta-arg_size)
1848 let code' = code `appOL` arg_code `appOL` toOL [
1850 DELTA (delta-arg_size)]
1851 push_args rest code'
1853 arg_rep = cmmExprType arg
1854 width = typeWidth arg_rep
1857 genCCall = panic "X86.genCCAll: not defined"
1859 #endif /* x86_64_TARGET_ARCH */
1864 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
1865 outOfLineFloatOp mop res args
1867 dflags <- getDynFlagsNat
1868 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1869 let target = CmmCallee targetExpr CCallConv
1871 if isFloat64 (localRegType res)
1873 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
1877 tmp = LocalReg uq f64
1879 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
1880 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
1881 return (code1 `appOL` code2)
1883 lbl = mkForeignLabel fn Nothing False IsFunction
1886 MO_F32_Sqrt -> fsLit "sqrtf"
1887 MO_F32_Sin -> fsLit "sinf"
1888 MO_F32_Cos -> fsLit "cosf"
1889 MO_F32_Tan -> fsLit "tanf"
1890 MO_F32_Exp -> fsLit "expf"
1891 MO_F32_Log -> fsLit "logf"
1893 MO_F32_Asin -> fsLit "asinf"
1894 MO_F32_Acos -> fsLit "acosf"
1895 MO_F32_Atan -> fsLit "atanf"
1897 MO_F32_Sinh -> fsLit "sinhf"
1898 MO_F32_Cosh -> fsLit "coshf"
1899 MO_F32_Tanh -> fsLit "tanhf"
1900 MO_F32_Pwr -> fsLit "powf"
1902 MO_F64_Sqrt -> fsLit "sqrt"
1903 MO_F64_Sin -> fsLit "sin"
1904 MO_F64_Cos -> fsLit "cos"
1905 MO_F64_Tan -> fsLit "tan"
1906 MO_F64_Exp -> fsLit "exp"
1907 MO_F64_Log -> fsLit "log"
1909 MO_F64_Asin -> fsLit "asin"
1910 MO_F64_Acos -> fsLit "acos"
1911 MO_F64_Atan -> fsLit "atan"
1913 MO_F64_Sinh -> fsLit "sinh"
1914 MO_F64_Cosh -> fsLit "cosh"
1915 MO_F64_Tanh -> fsLit "tanh"
1916 MO_F64_Pwr -> fsLit "pow"
1922 -- -----------------------------------------------------------------------------
1923 -- Generating a table-branch
1925 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1930 (reg,e_code) <- getSomeReg expr
1931 lbl <- getNewLabelNat
1932 dflags <- getDynFlagsNat
1933 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1934 (tableReg,t_code) <- getSomeReg $ dynRef
1936 jumpTable = map jumpTableEntryRel ids
1938 jumpTableEntryRel Nothing
1939 = CmmStaticLit (CmmInt 0 wordWidth)
1940 jumpTableEntryRel (Just (BlockId id))
1941 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1942 where blockLabel = mkAsmTempLabel id
1944 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1945 (EAIndex reg wORD_SIZE) (ImmInt 0))
1947 #if x86_64_TARGET_ARCH
1948 #if darwin_TARGET_OS
1949 -- on Mac OS X/x86_64, put the jump table in the text section
1950 -- to work around a limitation of the linker.
1951 -- ld64 is unable to handle the relocations for
1953 -- if L0 is not preceded by a non-anonymous label in its section.
1955 code = e_code `appOL` t_code `appOL` toOL [
1956 ADD (intSize wordWidth) op (OpReg tableReg),
1957 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
1958 LDATA Text (CmmDataLabel lbl : jumpTable)
1961 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1962 -- relocations, hence we only get 32-bit offsets in the jump
1963 -- table. As these offsets are always negative we need to properly
1964 -- sign extend them to 64-bit. This hack should be removed in
1965 -- conjunction with the hack in PprMach.hs/pprDataItem once
1966 -- binutils 2.17 is standard.
1967 code = e_code `appOL` t_code `appOL` toOL [
1968 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1970 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1971 (EAIndex reg wORD_SIZE) (ImmInt 0)))
1973 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1974 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1978 code = e_code `appOL` t_code `appOL` toOL [
1979 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1980 ADD (intSize wordWidth) op (OpReg tableReg),
1981 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1987 (reg,e_code) <- getSomeReg expr
1988 lbl <- getNewLabelNat
1990 jumpTable = map jumpTableEntry ids
1991 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1992 code = e_code `appOL` toOL [
1993 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1994 JMP_TBL op [ id | Just id <- ids ]
2000 -- -----------------------------------------------------------------------------
2001 -- 'condIntReg' and 'condFltReg': condition codes into registers
2003 -- Turn those condition codes into integers now (when they appear on
2004 -- the right hand side of an assignment).
2006 -- (If applicable) Do not fill the delay slots here; you will confuse the
2007 -- register allocator.
2009 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2011 condIntReg cond x y = do
2012 CondCode _ cond cond_code <- condIntCode cond x y
2013 tmp <- getNewRegNat II8
2015 code dst = cond_code `appOL` toOL [
2016 SETCC cond (OpReg tmp),
2017 MOVZxL II8 (OpReg tmp) (OpReg dst)
2020 return (Any II32 code)
2024 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2026 #if i386_TARGET_ARCH
2027 condFltReg cond x y = do
2028 CondCode _ cond cond_code <- condFltCode cond x y
2029 tmp <- getNewRegNat II8
2031 code dst = cond_code `appOL` toOL [
2032 SETCC cond (OpReg tmp),
2033 MOVZxL II8 (OpReg tmp) (OpReg dst)
2036 return (Any II32 code)
2038 #elif x86_64_TARGET_ARCH
2039 condFltReg cond x y = do
2040 CondCode _ cond cond_code <- condFltCode cond x y
2041 tmp1 <- getNewRegNat archWordSize
2042 tmp2 <- getNewRegNat archWordSize
2044 -- We have to worry about unordered operands (eg. comparisons
2045 -- against NaN). If the operands are unordered, the comparison
2046 -- sets the parity flag, carry flag and zero flag.
2047 -- All comparisons are supposed to return false for unordered
2048 -- operands except for !=, which returns true.
2050 -- Optimisation: we don't have to test the parity flag if we
2051 -- know the test has already excluded the unordered case: eg >
2052 -- and >= test for a zero carry flag, which can only occur for
2053 -- ordered operands.
2055 -- ToDo: by reversing comparisons we could avoid testing the
2056 -- parity flag in more cases.
2061 NE -> or_unordered dst
2062 GU -> plain_test dst
2063 GEU -> plain_test dst
2064 _ -> and_ordered dst)
2066 plain_test dst = toOL [
2067 SETCC cond (OpReg tmp1),
2068 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2070 or_unordered dst = toOL [
2071 SETCC cond (OpReg tmp1),
2072 SETCC PARITY (OpReg tmp2),
2073 OR II8 (OpReg tmp1) (OpReg tmp2),
2074 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2076 and_ordered dst = toOL [
2077 SETCC cond (OpReg tmp1),
2078 SETCC NOTPARITY (OpReg tmp2),
2079 AND II8 (OpReg tmp1) (OpReg tmp2),
2080 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2083 return (Any II32 code)
2086 condFltReg = panic "X86.condFltReg: not defined"
2093 -- -----------------------------------------------------------------------------
2094 -- 'trivial*Code': deal with trivial instructions
2096 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2097 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2098 -- Only look for constants on the right hand side, because that's
2099 -- where the generic optimizer will have put them.
2101 -- Similarly, for unary instructions, we don't have to worry about
2102 -- matching an StInt as the argument, because genericOpt will already
2103 -- have handled the constant-folding.
2107 The Rules of the Game are:
2109 * You cannot assume anything about the destination register dst;
2110 it may be anything, including a fixed reg.
2112 * You may compute an operand into a fixed reg, but you may not
2113 subsequently change the contents of that fixed reg. If you
2114 want to do so, first copy the value either to a temporary
2115 or into dst. You are free to modify dst even if it happens
2116 to be a fixed reg -- that's not your problem.
2118 * You cannot assume that a fixed reg will stay live over an
2119 arbitrary computation. The same applies to the dst reg.
2121 * Temporary regs obtained from getNewRegNat are distinct from
2122 each other and from all other regs, and stay live over
2123 arbitrary computations.
2125 --------------------
2127 SDM's version of The Rules:
2129 * If getRegister returns Any, that means it can generate correct
2130 code which places the result in any register, period. Even if that
2131 register happens to be read during the computation.
2133 Corollary #1: this means that if you are generating code for an
2134 operation with two arbitrary operands, you cannot assign the result
2135 of the first operand into the destination register before computing
2136 the second operand. The second operand might require the old value
2137 of the destination register.
2139 Corollary #2: A function might be able to generate more efficient
2140 code if it knows the destination register is a new temporary (and
2141 therefore not read by any of the sub-computations).
2143 * If getRegister returns Any, then the code it generates may modify only:
2144 (a) fresh temporaries
2145 (b) the destination register
2146 (c) known registers (eg. %ecx is used by shifts)
2147 In particular, it may *not* modify global registers, unless the global
2148 register happens to be the destination register.
2151 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
2152 | is32BitLit lit_a = do
2153 b_code <- getAnyReg b
2156 = b_code dst `snocOL`
2157 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2159 return (Any (intSize width) code)
2161 trivialCode width instr maybe_revinstr a b
2162 = genTrivialCode (intSize width) instr a b
2164 -- This is re-used for floating pt instructions too.
2165 genTrivialCode rep instr a b = do
2166 (b_op, b_code) <- getNonClobberedOperand b
2167 a_code <- getAnyReg a
2168 tmp <- getNewRegNat rep
2170 -- We want the value of b to stay alive across the computation of a.
2171 -- But, we want to calculate a straight into the destination register,
2172 -- because the instruction only has two operands (dst := dst `op` src).
2173 -- The troublesome case is when the result of b is in the same register
2174 -- as the destination reg. In this case, we have to save b in a
2175 -- new temporary across the computation of a.
2177 | dst `regClashesWithOp` b_op =
2179 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2181 instr (OpReg tmp) (OpReg dst)
2185 instr b_op (OpReg dst)
2187 return (Any rep code)
2189 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2190 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2191 reg `regClashesWithOp` _ = False
2195 trivialUCode rep instr x = do
2196 x_code <- getAnyReg x
2201 return (Any rep code)
2205 #if i386_TARGET_ARCH
2207 trivialFCode width instr x y = do
2208 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2209 (y_reg, y_code) <- getSomeReg y
2211 size = floatSize width
2215 instr size x_reg y_reg dst
2216 return (Any size code)
2220 #if x86_64_TARGET_ARCH
2221 trivialFCode pk instr x y
2222 = genTrivialCode size (instr size) x y
2223 where size = floatSize pk
2226 trivialUFCode size instr x = do
2227 (x_reg, x_code) <- getSomeReg x
2233 return (Any size code)
2236 --------------------------------------------------------------------------------
2237 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2239 #if i386_TARGET_ARCH
2240 coerceInt2FP from to x = do
2241 (x_reg, x_code) <- getSomeReg x
2243 opc = case to of W32 -> GITOF; W64 -> GITOD
2244 code dst = x_code `snocOL` opc x_reg dst
2245 -- ToDo: works for non-II32 reps?
2246 return (Any (floatSize to) code)
2248 #elif x86_64_TARGET_ARCH
2249 coerceInt2FP from to x = do
2250 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2252 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2253 code dst = x_code `snocOL` opc x_op dst
2255 return (Any (floatSize to) code) -- works even if the destination rep is <II32
2258 coerceInt2FP = panic "X86.coerceInt2FP: not defined"
2265 --------------------------------------------------------------------------------
2266 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2268 #if i386_TARGET_ARCH
2269 coerceFP2Int from to x = do
2270 (x_reg, x_code) <- getSomeReg x
2272 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2273 code dst = x_code `snocOL` opc x_reg dst
2274 -- ToDo: works for non-II32 reps?
2276 return (Any (intSize to) code)
2278 #elif x86_64_TARGET_ARCH
2279 coerceFP2Int from to x = do
2280 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2282 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
2283 code dst = x_code `snocOL` opc x_op dst
2285 return (Any (intSize to) code) -- works even if the destination rep is <II32
2288 coerceFP2Int = panic "X86.coerceFP2Int: not defined"
2295 --------------------------------------------------------------------------------
2296 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2298 #if x86_64_TARGET_ARCH
2299 coerceFP2FP to x = do
2300 (x_reg, x_code) <- getSomeReg x
2302 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
2303 code dst = x_code `snocOL` opc x_reg dst
2305 return (Any (floatSize to) code)
2308 coerceFP2FP = panic "X86.coerceFP2FP: not defined"