2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
28 #include "HsVersions.h"
29 #include "nativeGen/NCG.h"
30 #include "../includes/MachDeps.h"
46 -- Our intermediate code:
49 import PprCmm ( pprExpr )
52 import ClosureInfo ( C_SRT(..) )
55 import StaticFlags ( opt_PIC )
56 import ForeignCall ( CCallConv(..) )
59 import qualified Outputable as O
62 import FastBool ( isFastTrue )
63 import Constants ( wORD_SIZE )
66 import Debug.Trace ( trace )
68 import Control.Monad ( mapAndUnzipM )
69 import Data.Maybe ( fromJust )
78 -> NatM [NatCmmTop Instr]
80 cmmTopCodeGen dynflags
81 (CmmProc info lab params (ListGraph blocks)) = do
82 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
83 picBaseMb <- getPicBaseMaybeNat
84 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
85 tops = proc : concat statics
86 os = platformOS $ targetPlatform dynflags
89 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
90 Nothing -> return tops
92 cmmTopCodeGen _ (CmmData sec dat) = do
93 return [CmmData sec dat] -- no translation, we just use CmmStatic
98 -> NatM ( [NatBasicBlock Instr]
101 basicBlockCodeGen (BasicBlock id stmts) = do
102 instrs <- stmtsToInstrs stmts
103 -- code generation may introduce new basic block boundaries, which
104 -- are indicated by the NEWBLOCK instruction. We must split up the
105 -- instruction stream into basic blocks again. Also, we extract
108 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
110 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
111 = ([], BasicBlock id instrs : blocks, statics)
112 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
113 = (instrs, blocks, CmmData sec dat:statics)
114 mkBlocks instr (instrs,blocks,statics)
115 = (instr:instrs, blocks, statics)
117 return (BasicBlock id top : other_blocks, statics)
120 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
122 = do instrss <- mapM stmtToInstrs stmts
123 return (concatOL instrss)
126 stmtToInstrs :: CmmStmt -> NatM InstrBlock
127 stmtToInstrs stmt = case stmt of
128 CmmNop -> return nilOL
129 CmmComment s -> return (unitOL (COMMENT s))
132 | isFloatType ty -> assignReg_FltCode size reg src
133 #if WORD_SIZE_IN_BITS==32
134 | isWord64 ty -> assignReg_I64Code reg src
136 | otherwise -> assignReg_IntCode size reg src
137 where ty = cmmRegType reg
138 size = cmmTypeSize ty
141 | isFloatType ty -> assignMem_FltCode size addr src
142 #if WORD_SIZE_IN_BITS==32
143 | isWord64 ty -> assignMem_I64Code addr src
145 | otherwise -> assignMem_IntCode size addr src
146 where ty = cmmExprType src
147 size = cmmTypeSize ty
149 CmmCall target result_regs args _ _
150 -> genCCall target result_regs args
152 CmmBranch id -> genBranch id
153 CmmCondBranch arg id -> genCondJump id arg
154 CmmSwitch arg ids -> genSwitch arg ids
155 CmmJump arg params -> genJump arg
157 panic "stmtToInstrs: return statement should have been cps'd away"
160 --------------------------------------------------------------------------------
161 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
162 -- They are really trees of insns to facilitate fast appending, where a
163 -- left-to-right traversal yields the insns in the correct order.
169 -- | Condition codes passed up the tree.
172 = CondCode Bool Cond InstrBlock
175 -- | a.k.a "Register64"
176 -- Reg is the lower 32-bit temporary which contains the result.
177 -- Use getHiVRegFromLo to find the other VRegUnique.
179 -- Rules of this simplified insn selection game are therefore that
180 -- the returned Reg may be modified
188 -- | Register's passed up the tree. If the stix code forces the register
189 -- to live in a pre-decided machine register, it comes out as @Fixed@;
190 -- otherwise, it comes out as @Any@, and the parent can decide which
191 -- register to put it in.
194 = Fixed Size Reg InstrBlock
195 | Any Size (Reg -> InstrBlock)
198 swizzleRegisterRep :: Register -> Size -> Register
199 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
200 swizzleRegisterRep (Any _ codefn) size = Any size codefn
203 -- | Grab the Reg for a CmmReg
204 getRegisterReg :: CmmReg -> Reg
206 getRegisterReg (CmmLocal (LocalReg u pk))
207 = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
209 getRegisterReg (CmmGlobal mid)
210 = case get_GlobalReg_reg_or_addr mid of
211 Left reg -> RegReal $ reg
212 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
213 -- By this stage, the only MagicIds remaining should be the
214 -- ones which map to a real machine register on this
215 -- platform. Hence ...
218 -- | Memory addressing modes passed up the tree.
220 = Amode AddrMode InstrBlock
223 Now, given a tree (the argument to an CmmLoad) that references memory,
224 produce a suitable addressing mode.
226 A Rule of the Game (tm) for Amodes: use of the addr bit must
227 immediately follow use of the code part, since the code part puts
228 values in registers which the addr then refers to. So you can't put
229 anything in between, lest it overwrite some of those registers. If
230 you need to do some other computation between the code part and use of
231 the addr bit, first store the effective address from the amode in a
232 temporary, then do the other computation, and then use the temporary:
236 ... other computation ...
241 -- | Check whether an integer will fit in 32 bits.
242 -- A CmmInt is intended to be truncated to the appropriate
243 -- number of bits, so here we truncate it to Int64. This is
244 -- important because e.g. -1 as a CmmInt might be either
245 -- -1 or 18446744073709551615.
247 is32BitInteger :: Integer -> Bool
248 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
249 where i64 = fromIntegral i :: Int64
252 -- | Convert a BlockId to some CmmStatic data
253 jumpTableEntry :: Maybe BlockId -> CmmStatic
254 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
255 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
256 where blockLabel = mkAsmTempLabel id
259 -- -----------------------------------------------------------------------------
260 -- General things for putting together code sequences
262 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
263 -- CmmExprs into CmmRegOff?
264 mangleIndexTree :: CmmExpr -> CmmExpr
265 mangleIndexTree (CmmRegOff reg off)
266 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
267 where width = typeWidth (cmmRegType reg)
269 -- | The dual to getAnyReg: compute an expression into a register, but
270 -- we don't mind which one it is.
271 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
273 r <- getRegister expr
276 tmp <- getNewRegNat rep
277 return (tmp, code tmp)
285 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
286 assignMem_I64Code addrTree valueTree = do
287 Amode addr addr_code <- getAmode addrTree
288 ChildCode64 vcode rlo <- iselExpr64 valueTree
290 rhi = getHiVRegFromLo rlo
292 -- Little-endian store
293 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
294 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
296 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
299 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
300 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
301 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
303 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
304 r_dst_hi = getHiVRegFromLo r_dst_lo
305 r_src_hi = getHiVRegFromLo r_src_lo
306 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
307 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
310 vcode `snocOL` mov_lo `snocOL` mov_hi
313 assignReg_I64Code lvalue valueTree
314 = panic "assignReg_I64Code(i386): invalid lvalue"
319 iselExpr64 :: CmmExpr -> NatM ChildCode64
320 iselExpr64 (CmmLit (CmmInt i _)) = do
321 (rlo,rhi) <- getNewRegPairNat II32
323 r = fromIntegral (fromIntegral i :: Word32)
324 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
326 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
327 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
330 return (ChildCode64 code rlo)
332 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
333 Amode addr addr_code <- getAmode addrTree
334 (rlo,rhi) <- getNewRegPairNat II32
336 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
337 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
340 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
344 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
345 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
347 -- we handle addition, but rather badly
348 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
349 ChildCode64 code1 r1lo <- iselExpr64 e1
350 (rlo,rhi) <- getNewRegPairNat II32
352 r = fromIntegral (fromIntegral i :: Word32)
353 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
354 r1hi = getHiVRegFromLo r1lo
356 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
357 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
358 MOV II32 (OpReg r1hi) (OpReg rhi),
359 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
361 return (ChildCode64 code rlo)
363 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
364 ChildCode64 code1 r1lo <- iselExpr64 e1
365 ChildCode64 code2 r2lo <- iselExpr64 e2
366 (rlo,rhi) <- getNewRegPairNat II32
368 r1hi = getHiVRegFromLo r1lo
369 r2hi = getHiVRegFromLo r2lo
372 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
373 ADD II32 (OpReg r2lo) (OpReg rlo),
374 MOV II32 (OpReg r1hi) (OpReg rhi),
375 ADC II32 (OpReg r2hi) (OpReg rhi) ]
377 return (ChildCode64 code rlo)
379 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
381 r_dst_lo <- getNewRegNat II32
382 let r_dst_hi = getHiVRegFromLo r_dst_lo
385 ChildCode64 (code `snocOL`
386 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
391 = pprPanic "iselExpr64(i386)" (ppr expr)
395 --------------------------------------------------------------------------------
396 getRegister :: CmmExpr -> NatM Register
398 #if !x86_64_TARGET_ARCH
399 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
400 -- register, it can only be used for rip-relative addressing.
401 getRegister (CmmReg (CmmGlobal PicBaseReg))
403 reg <- getPicBaseNat archWordSize
404 return (Fixed archWordSize reg nilOL)
407 getRegister (CmmReg reg)
408 = return (Fixed (cmmTypeSize (cmmRegType reg))
409 (getRegisterReg reg) nilOL)
411 getRegister tree@(CmmRegOff _ _)
412 = getRegister (mangleIndexTree tree)
415 #if WORD_SIZE_IN_BITS==32
416 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
417 -- TO_W_(x), TO_W_(x >> 32)
419 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
420 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
421 ChildCode64 code rlo <- iselExpr64 x
422 return $ Fixed II32 (getHiVRegFromLo rlo) code
424 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
425 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
426 ChildCode64 code rlo <- iselExpr64 x
427 return $ Fixed II32 (getHiVRegFromLo rlo) code
429 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
430 ChildCode64 code rlo <- iselExpr64 x
431 return $ Fixed II32 rlo code
433 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
434 ChildCode64 code rlo <- iselExpr64 x
435 return $ Fixed II32 rlo code
444 getRegister (CmmLit (CmmFloat f W32)) = do
445 lbl <- getNewLabelNat
446 dflags <- getDynFlagsNat
447 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
448 Amode addr addr_code <- getAmode dynRef
452 CmmStaticLit (CmmFloat f W32)]
453 `consOL` (addr_code `snocOL`
456 return (Any FF32 code)
459 getRegister (CmmLit (CmmFloat d W64))
461 = let code dst = unitOL (GLDZ dst)
462 in return (Any FF64 code)
465 = let code dst = unitOL (GLD1 dst)
466 in return (Any FF64 code)
469 lbl <- getNewLabelNat
470 dflags <- getDynFlagsNat
471 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
472 Amode addr addr_code <- getAmode dynRef
476 CmmStaticLit (CmmFloat d W64)]
477 `consOL` (addr_code `snocOL`
480 return (Any FF64 code)
482 #endif /* i386_TARGET_ARCH */
487 #if x86_64_TARGET_ARCH
488 getRegister (CmmLit (CmmFloat 0.0 w)) = do
489 let size = floatSize w
490 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
491 -- I don't know why there are xorpd, xorps, and pxor instructions.
492 -- They all appear to do the same thing --SDM
493 return (Any size code)
495 getRegister (CmmLit (CmmFloat f w)) = do
496 lbl <- getNewLabelNat
497 let code dst = toOL [
500 CmmStaticLit (CmmFloat f w)],
501 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
504 return (Any size code)
505 where size = floatSize w
507 #endif /* x86_64_TARGET_ARCH */
513 -- catch simple cases of zero- or sign-extended load
514 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
515 code <- intLoadCode (MOVZxL II8) addr
516 return (Any II32 code)
518 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
519 code <- intLoadCode (MOVSxL II8) addr
520 return (Any II32 code)
522 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
523 code <- intLoadCode (MOVZxL II16) addr
524 return (Any II32 code)
526 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
527 code <- intLoadCode (MOVSxL II16) addr
528 return (Any II32 code)
531 #if x86_64_TARGET_ARCH
533 -- catch simple cases of zero- or sign-extended load
534 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
535 code <- intLoadCode (MOVZxL II8) addr
536 return (Any II64 code)
538 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
539 code <- intLoadCode (MOVSxL II8) addr
540 return (Any II64 code)
542 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
543 code <- intLoadCode (MOVZxL II16) addr
544 return (Any II64 code)
546 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
547 code <- intLoadCode (MOVSxL II16) addr
548 return (Any II64 code)
550 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
551 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
552 return (Any II64 code)
554 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
555 code <- intLoadCode (MOVSxL II32) addr
556 return (Any II64 code)
558 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
559 CmmLit displacement])
560 = return $ Any II64 (\dst -> unitOL $
561 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
563 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
564 x_code <- getAnyReg x
565 lbl <- getNewLabelNat
567 code dst = x_code dst `appOL` toOL [
568 -- This is how gcc does it, so it can't be that bad:
569 LDATA ReadOnlyData16 [
572 CmmStaticLit (CmmInt 0x80000000 W32),
573 CmmStaticLit (CmmInt 0 W32),
574 CmmStaticLit (CmmInt 0 W32),
575 CmmStaticLit (CmmInt 0 W32)
577 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
578 -- xorps, so we need the 128-bit constant
579 -- ToDo: rip-relative
582 return (Any FF32 code)
584 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
585 x_code <- getAnyReg x
586 lbl <- getNewLabelNat
588 -- This is how gcc does it, so it can't be that bad:
589 code dst = x_code dst `appOL` toOL [
590 LDATA ReadOnlyData16 [
593 CmmStaticLit (CmmInt 0x8000000000000000 W64),
594 CmmStaticLit (CmmInt 0 W64)
596 -- gcc puts an unpck here. Wonder if we need it.
597 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
598 -- xorpd, so we need the 128-bit constant
601 return (Any FF64 code)
603 #endif /* x86_64_TARGET_ARCH */
609 getRegister (CmmMachOp mop [x]) -- unary MachOps
612 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
613 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
616 MO_S_Neg w -> triv_ucode NEGI (intSize w)
617 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
618 MO_Not w -> triv_ucode NOT (intSize w)
621 MO_UU_Conv W32 W8 -> toI8Reg W32 x
622 MO_SS_Conv W32 W8 -> toI8Reg W32 x
623 MO_UU_Conv W16 W8 -> toI8Reg W16 x
624 MO_SS_Conv W16 W8 -> toI8Reg W16 x
625 MO_UU_Conv W32 W16 -> toI16Reg W32 x
626 MO_SS_Conv W32 W16 -> toI16Reg W32 x
628 #if x86_64_TARGET_ARCH
629 MO_UU_Conv W64 W32 -> conversionNop II64 x
630 MO_SS_Conv W64 W32 -> conversionNop II64 x
631 MO_UU_Conv W64 W16 -> toI16Reg W64 x
632 MO_SS_Conv W64 W16 -> toI16Reg W64 x
633 MO_UU_Conv W64 W8 -> toI8Reg W64 x
634 MO_SS_Conv W64 W8 -> toI8Reg W64 x
637 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
638 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
641 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
642 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
643 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
645 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
646 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
647 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
649 #if x86_64_TARGET_ARCH
650 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
651 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
652 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
653 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
654 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
655 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
656 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
657 -- However, we don't want the register allocator to throw it
658 -- away as an unnecessary reg-to-reg move, so we keep it in
659 -- the form of a movzl and print it as a movl later.
663 MO_FF_Conv W32 W64 -> conversionNop FF64 x
664 MO_FF_Conv W64 W32 -> conversionNop FF32 x
666 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
667 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
670 MO_FS_Conv from to -> coerceFP2Int from to x
671 MO_SF_Conv from to -> coerceInt2FP from to x
673 other -> pprPanic "getRegister" (pprMachOp mop)
675 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
676 triv_ucode instr size = trivialUCode size (instr size) x
678 -- signed or unsigned extension.
679 integerExtend :: Width -> Width
680 -> (Size -> Operand -> Operand -> Instr)
681 -> CmmExpr -> NatM Register
682 integerExtend from to instr expr = do
683 (reg,e_code) <- if from == W8 then getByteReg expr
688 instr (intSize from) (OpReg reg) (OpReg dst)
689 return (Any (intSize to) code)
691 toI8Reg :: Width -> CmmExpr -> NatM Register
693 = do codefn <- getAnyReg expr
694 return (Any (intSize new_rep) codefn)
695 -- HACK: use getAnyReg to get a byte-addressable register.
696 -- If the source was a Fixed register, this will add the
697 -- mov instruction to put it into the desired destination.
698 -- We're assuming that the destination won't be a fixed
699 -- non-byte-addressable register; it won't be, because all
700 -- fixed registers are word-sized.
702 toI16Reg = toI8Reg -- for now
704 conversionNop :: Size -> CmmExpr -> NatM Register
705 conversionNop new_size expr
706 = do e_code <- getRegister expr
707 return (swizzleRegisterRep e_code new_size)
710 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
712 MO_F_Eq w -> condFltReg EQQ x y
713 MO_F_Ne w -> condFltReg NE x y
714 MO_F_Gt w -> condFltReg GTT x y
715 MO_F_Ge w -> condFltReg GE x y
716 MO_F_Lt w -> condFltReg LTT x y
717 MO_F_Le w -> condFltReg LE x y
719 MO_Eq rep -> condIntReg EQQ x y
720 MO_Ne rep -> condIntReg NE x y
722 MO_S_Gt rep -> condIntReg GTT x y
723 MO_S_Ge rep -> condIntReg GE x y
724 MO_S_Lt rep -> condIntReg LTT x y
725 MO_S_Le rep -> condIntReg LE x y
727 MO_U_Gt rep -> condIntReg GU x y
728 MO_U_Ge rep -> condIntReg GEU x y
729 MO_U_Lt rep -> condIntReg LU x y
730 MO_U_Le rep -> condIntReg LEU x y
733 MO_F_Add w -> trivialFCode w GADD x y
734 MO_F_Sub w -> trivialFCode w GSUB x y
735 MO_F_Quot w -> trivialFCode w GDIV x y
736 MO_F_Mul w -> trivialFCode w GMUL x y
739 #if x86_64_TARGET_ARCH
740 MO_F_Add w -> trivialFCode w ADD x y
741 MO_F_Sub w -> trivialFCode w SUB x y
742 MO_F_Quot w -> trivialFCode w FDIV x y
743 MO_F_Mul w -> trivialFCode w MUL x y
746 MO_Add rep -> add_code rep x y
747 MO_Sub rep -> sub_code rep x y
749 MO_S_Quot rep -> div_code rep True True x y
750 MO_S_Rem rep -> div_code rep True False x y
751 MO_U_Quot rep -> div_code rep False True x y
752 MO_U_Rem rep -> div_code rep False False x y
754 MO_S_MulMayOflo rep -> imulMayOflo rep x y
756 MO_Mul rep -> triv_op rep IMUL
757 MO_And rep -> triv_op rep AND
758 MO_Or rep -> triv_op rep OR
759 MO_Xor rep -> triv_op rep XOR
761 {- Shift ops on x86s have constraints on their source, it
762 either has to be Imm, CL or 1
763 => trivialCode is not restrictive enough (sigh.)
765 MO_Shl rep -> shift_code rep SHL x y {-False-}
766 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
767 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
769 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
772 triv_op width instr = trivialCode width op (Just op) x y
773 where op = instr (intSize width)
775 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
776 imulMayOflo rep a b = do
777 (a_reg, a_code) <- getNonClobberedReg a
778 b_code <- getAnyReg b
780 shift_amt = case rep of
783 _ -> panic "shift_amt"
786 code = a_code `appOL` b_code eax `appOL`
788 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
789 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
790 -- sign extend lower part
791 SUB size (OpReg edx) (OpReg eax)
792 -- compare against upper
793 -- eax==0 if high part == sign extended low part
796 return (Fixed size eax code)
800 -> (Size -> Operand -> Operand -> Instr)
805 {- Case1: shift length as immediate -}
806 shift_code width instr x y@(CmmLit lit) = do
807 x_code <- getAnyReg x
811 = x_code dst `snocOL`
812 instr size (OpImm (litToImm lit)) (OpReg dst)
814 return (Any size code)
816 {- Case2: shift length is complex (non-immediate)
818 * we cannot do y first *and* put its result in %ecx, because
819 %ecx might be clobbered by x.
820 * if we do y second, then x cannot be
821 in a clobbered reg. Also, we cannot clobber x's reg
822 with the instruction itself.
824 - do y first, put its result in a fresh tmp, then copy it to %ecx later
825 - do y second and put its result into %ecx. x gets placed in a fresh
826 tmp. This is likely to be better, becuase the reg alloc can
827 eliminate this reg->reg move here (it won't eliminate the other one,
828 because the move is into the fixed %ecx).
830 shift_code width instr x y{-amount-} = do
831 x_code <- getAnyReg x
832 let size = intSize width
833 tmp <- getNewRegNat size
834 y_code <- getAnyReg y
836 code = x_code tmp `appOL`
838 instr size (OpReg ecx) (OpReg tmp)
840 return (Fixed size tmp code)
843 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
844 add_code rep x (CmmLit (CmmInt y _))
845 | is32BitInteger y = add_int rep x y
846 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
847 where size = intSize rep
850 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
851 sub_code rep x (CmmLit (CmmInt y _))
852 | is32BitInteger (-y) = add_int rep x (-y)
853 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
855 -- our three-operand add instruction:
856 add_int width x y = do
857 (x_reg, x_code) <- getSomeReg x
860 imm = ImmInt (fromInteger y)
864 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
867 return (Any size code)
869 ----------------------
870 div_code width signed quotient x y = do
871 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
872 x_code <- getAnyReg x
875 widen | signed = CLTD size
876 | otherwise = XOR size (OpReg edx) (OpReg edx)
878 instr | signed = IDIV
881 code = y_code `appOL`
883 toOL [widen, instr size y_op]
885 result | quotient = eax
889 return (Fixed size result code)
892 getRegister (CmmLoad mem pk)
895 Amode src mem_code <- getAmode mem
897 size = cmmTypeSize pk
898 code dst = mem_code `snocOL`
899 IF_ARCH_i386(GLD size src dst,
900 MOV size (OpAddr src) (OpReg dst))
901 return (Any size code)
904 getRegister (CmmLoad mem pk)
907 code <- intLoadCode instr mem
908 return (Any size code)
912 instr = case width of
915 -- We always zero-extend 8-bit loads, if we
916 -- can't think of anything better. This is because
917 -- we can't guarantee access to an 8-bit variant of every register
918 -- (esi and edi don't have 8-bit variants), so to make things
919 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
922 #if x86_64_TARGET_ARCH
923 -- Simpler memory load code on x86_64
924 getRegister (CmmLoad mem pk)
926 code <- intLoadCode (MOV size) mem
927 return (Any size code)
928 where size = intSize $ typeWidth pk
931 getRegister (CmmLit (CmmInt 0 width))
935 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
936 adj_size = case size of II64 -> II32; _ -> size
937 size1 = IF_ARCH_i386( size, adj_size )
939 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
941 return (Any size code)
943 #if x86_64_TARGET_ARCH
944 -- optimisation for loading small literals on x86_64: take advantage
945 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
946 -- instruction forms are shorter.
947 getRegister (CmmLit lit)
948 | isWord64 (cmmLitType lit), not (isBigLit lit)
951 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
953 return (Any II64 code)
955 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
957 -- note1: not the same as (not.is32BitLit), because that checks for
958 -- signed literals that fit in 32 bits, but we want unsigned
960 -- note2: all labels are small, because we're assuming the
961 -- small memory model (see gcc docs, -mcmodel=small).
964 getRegister (CmmLit lit)
966 size = cmmTypeSize (cmmLitType lit)
968 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
970 return (Any size code)
972 getRegister other = pprPanic "getRegister(x86)" (ppr other)
975 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
976 -> NatM (Reg -> InstrBlock)
977 intLoadCode instr mem = do
978 Amode src mem_code <- getAmode mem
979 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
981 -- Compute an expression into *any* register, adding the appropriate
982 -- move instruction if necessary.
983 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
985 r <- getRegister expr
988 anyReg :: Register -> NatM (Reg -> InstrBlock)
989 anyReg (Any _ code) = return code
990 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
992 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
993 -- Fixed registers might not be byte-addressable, so we make sure we've
994 -- got a temporary, inserting an extra reg copy if necessary.
995 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
996 #if x86_64_TARGET_ARCH
997 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1000 r <- getRegister expr
1003 tmp <- getNewRegNat rep
1004 return (tmp, code tmp)
1006 | isVirtualReg reg -> return (reg,code)
1008 tmp <- getNewRegNat rep
1009 return (tmp, code `snocOL` reg2reg rep reg tmp)
1010 -- ToDo: could optimise slightly by checking for byte-addressable
1011 -- real registers, but that will happen very rarely if at all.
1014 -- Another variant: this time we want the result in a register that cannot
1015 -- be modified by code to evaluate an arbitrary expression.
1016 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1017 getNonClobberedReg expr = do
1018 r <- getRegister expr
1021 tmp <- getNewRegNat rep
1022 return (tmp, code tmp)
1024 -- only free regs can be clobbered
1025 | RegReal (RealRegSingle rr) <- reg
1026 , isFastTrue (freeReg rr)
1028 tmp <- getNewRegNat rep
1029 return (tmp, code `snocOL` reg2reg rep reg tmp)
1033 reg2reg :: Size -> Reg -> Reg -> Instr
1034 reg2reg size src dst
1035 #if i386_TARGET_ARCH
1036 | isFloatSize size = GMOV src dst
1038 | otherwise = MOV size (OpReg src) (OpReg dst)
1042 --------------------------------------------------------------------------------
1043 getAmode :: CmmExpr -> NatM Amode
1044 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1046 #if x86_64_TARGET_ARCH
1048 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1049 CmmLit displacement])
1050 = return $ Amode (ripRel (litToImm displacement)) nilOL
1055 -- This is all just ridiculous, since it carefully undoes
1056 -- what mangleIndexTree has just done.
1057 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1059 -- ASSERT(rep == II32)???
1060 = do (x_reg, x_code) <- getSomeReg x
1061 let off = ImmInt (-(fromInteger i))
1062 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1064 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1066 -- ASSERT(rep == II32)???
1067 = do (x_reg, x_code) <- getSomeReg x
1068 let off = ImmInt (fromInteger i)
1069 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1071 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1072 -- recognised by the next rule.
1073 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1075 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1077 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1078 [y, CmmLit (CmmInt shift _)]])
1079 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1080 = x86_complex_amode x y shift 0
1082 getAmode (CmmMachOp (MO_Add rep)
1083 [x, CmmMachOp (MO_Add _)
1084 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1085 CmmLit (CmmInt offset _)]])
1086 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1087 && is32BitInteger offset
1088 = x86_complex_amode x y shift offset
1090 getAmode (CmmMachOp (MO_Add rep) [x,y])
1091 = x86_complex_amode x y 0 0
1093 getAmode (CmmLit lit) | is32BitLit lit
1094 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1097 (reg,code) <- getSomeReg expr
1098 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1101 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1102 x86_complex_amode base index shift offset
1103 = do (x_reg, x_code) <- getNonClobberedReg base
1104 -- x must be in a temp, because it has to stay live over y_code
1105 -- we could compre x_reg and y_reg and do something better here...
1106 (y_reg, y_code) <- getSomeReg index
1108 code = x_code `appOL` y_code
1109 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1110 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1116 -- -----------------------------------------------------------------------------
1117 -- getOperand: sometimes any operand will do.
1119 -- getNonClobberedOperand: the value of the operand will remain valid across
1120 -- the computation of an arbitrary expression, unless the expression
1121 -- is computed directly into a register which the operand refers to
1122 -- (see trivialCode where this function is used for an example).
1124 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1125 #if x86_64_TARGET_ARCH
1126 getNonClobberedOperand (CmmLit lit)
1127 | isSuitableFloatingPointLit lit = do
1128 lbl <- getNewLabelNat
1129 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1131 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1133 getNonClobberedOperand (CmmLit lit)
1134 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
1135 return (OpImm (litToImm lit), nilOL)
1136 getNonClobberedOperand (CmmLoad mem pk)
1137 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1138 Amode src mem_code <- getAmode mem
1140 if (amodeCouldBeClobbered src)
1142 tmp <- getNewRegNat archWordSize
1143 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1144 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1147 return (OpAddr src', save_code `appOL` mem_code)
1148 getNonClobberedOperand e = do
1149 (reg, code) <- getNonClobberedReg e
1150 return (OpReg reg, code)
1152 amodeCouldBeClobbered :: AddrMode -> Bool
1153 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1155 regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1156 regClobbered _ = False
1158 -- getOperand: the operand is not required to remain valid across the
1159 -- computation of an arbitrary expression.
1160 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1161 #if x86_64_TARGET_ARCH
1162 getOperand (CmmLit lit)
1163 | isSuitableFloatingPointLit lit = do
1164 lbl <- getNewLabelNat
1165 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1167 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1169 getOperand (CmmLit lit)
1170 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
1171 return (OpImm (litToImm lit), nilOL)
1172 getOperand (CmmLoad mem pk)
1173 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1174 Amode src mem_code <- getAmode mem
1175 return (OpAddr src, mem_code)
1177 (reg, code) <- getSomeReg e
1178 return (OpReg reg, code)
1180 isOperand :: CmmExpr -> Bool
1181 isOperand (CmmLoad _ _) = True
1182 isOperand (CmmLit lit) = is32BitLit lit
1183 || isSuitableFloatingPointLit lit
1186 -- if we want a floating-point literal as an operand, we can
1187 -- use it directly from memory. However, if the literal is
1188 -- zero, we're better off generating it into a register using
1190 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1191 isSuitableFloatingPointLit _ = False
1193 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1194 getRegOrMem (CmmLoad mem pk)
1195 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
1196 Amode src mem_code <- getAmode mem
1197 return (OpAddr src, mem_code)
1199 (reg, code) <- getNonClobberedReg e
1200 return (OpReg reg, code)
1202 #if x86_64_TARGET_ARCH
1203 is32BitLit (CmmInt i W64) = is32BitInteger i
1204 -- assume that labels are in the range 0-2^31-1: this assumes the
1205 -- small memory model (see gcc docs, -mcmodel=small).
1212 -- Set up a condition code for a conditional branch.
1214 getCondCode :: CmmExpr -> NatM CondCode
1216 -- yes, they really do seem to want exactly the same!
1218 getCondCode (CmmMachOp mop [x, y])
1221 MO_F_Eq W32 -> condFltCode EQQ x y
1222 MO_F_Ne W32 -> condFltCode NE x y
1223 MO_F_Gt W32 -> condFltCode GTT x y
1224 MO_F_Ge W32 -> condFltCode GE x y
1225 MO_F_Lt W32 -> condFltCode LTT x y
1226 MO_F_Le W32 -> condFltCode LE x y
1228 MO_F_Eq W64 -> condFltCode EQQ x y
1229 MO_F_Ne W64 -> condFltCode NE x y
1230 MO_F_Gt W64 -> condFltCode GTT x y
1231 MO_F_Ge W64 -> condFltCode GE x y
1232 MO_F_Lt W64 -> condFltCode LTT x y
1233 MO_F_Le W64 -> condFltCode LE x y
1235 MO_Eq rep -> condIntCode EQQ x y
1236 MO_Ne rep -> condIntCode NE x y
1238 MO_S_Gt rep -> condIntCode GTT x y
1239 MO_S_Ge rep -> condIntCode GE x y
1240 MO_S_Lt rep -> condIntCode LTT x y
1241 MO_S_Le rep -> condIntCode LE x y
1243 MO_U_Gt rep -> condIntCode GU x y
1244 MO_U_Ge rep -> condIntCode GEU x y
1245 MO_U_Lt rep -> condIntCode LU x y
1246 MO_U_Le rep -> condIntCode LEU x y
1248 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1250 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1255 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1256 -- passed back up the tree.
1258 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1260 -- memory vs immediate
1261 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1262 Amode x_addr x_code <- getAmode x
1265 code = x_code `snocOL`
1266 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1268 return (CondCode False cond code)
1270 -- anything vs zero, using a mask
1271 -- TODO: Add some sanity checking!!!!
1272 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
1273 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
1275 (x_reg, x_code) <- getSomeReg x
1277 code = x_code `snocOL`
1278 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1280 return (CondCode False cond code)
1283 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1284 (x_reg, x_code) <- getSomeReg x
1286 code = x_code `snocOL`
1287 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1289 return (CondCode False cond code)
1291 -- anything vs operand
1292 condIntCode cond x y | isOperand y = do
1293 (x_reg, x_code) <- getNonClobberedReg x
1294 (y_op, y_code) <- getOperand y
1296 code = x_code `appOL` y_code `snocOL`
1297 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1299 return (CondCode False cond code)
1301 -- anything vs anything
1302 condIntCode cond x y = do
1303 (y_reg, y_code) <- getNonClobberedReg y
1304 (x_op, x_code) <- getRegOrMem x
1306 code = y_code `appOL`
1308 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1310 return (CondCode False cond code)
1314 --------------------------------------------------------------------------------
1315 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1317 #if i386_TARGET_ARCH
1318 condFltCode cond x y
1319 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1320 (x_reg, x_code) <- getNonClobberedReg x
1321 (y_reg, y_code) <- getSomeReg y
1323 code = x_code `appOL` y_code `snocOL`
1324 GCMP cond x_reg y_reg
1325 -- The GCMP insn does the test and sets the zero flag if comparable
1326 -- and true. Hence we always supply EQQ as the condition to test.
1327 return (CondCode True EQQ code)
1329 #elif x86_64_TARGET_ARCH
1330 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1331 -- an operand, but the right must be a reg. We can probably do better
1332 -- than this general case...
1333 condFltCode cond x y = do
1334 (x_reg, x_code) <- getNonClobberedReg x
1335 (y_op, y_code) <- getOperand y
1337 code = x_code `appOL`
1339 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1340 -- NB(1): we need to use the unsigned comparison operators on the
1341 -- result of this comparison.
1343 return (CondCode True (condToUnsigned cond) code)
1346 condFltCode = panic "X86.condFltCode: not defined"
1352 -- -----------------------------------------------------------------------------
1353 -- Generating assignments
1355 -- Assignments are really at the heart of the whole code generation
1356 -- business. Almost all top-level nodes of any real importance are
1357 -- assignments, which correspond to loads, stores, or register
1358 -- transfers. If we're really lucky, some of the register transfers
1359 -- will go away, because we can use the destination register to
1360 -- complete the code generation for the right hand side. This only
1361 -- fails when the right hand side is forced into a fixed register
1362 -- (e.g. the result of a call).
1364 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1365 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1367 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1368 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1371 -- integer assignment to memory
1373 -- specific case of adding/subtracting an integer to a particular address.
1374 -- ToDo: catch other cases where we can use an operation directly on a memory
1376 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1377 CmmLit (CmmInt i _)])
1378 | addr == addr2, pk /= II64 || is32BitInteger i,
1379 Just instr <- check op
1380 = do Amode amode code_addr <- getAmode addr
1381 let code = code_addr `snocOL`
1382 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1385 check (MO_Add _) = Just ADD
1386 check (MO_Sub _) = Just SUB
1391 assignMem_IntCode pk addr src = do
1392 Amode addr code_addr <- getAmode addr
1393 (code_src, op_src) <- get_op_RI src
1395 code = code_src `appOL`
1397 MOV pk op_src (OpAddr addr)
1398 -- NOTE: op_src is stable, so it will still be valid
1399 -- after code_addr. This may involve the introduction
1400 -- of an extra MOV to a temporary register, but we hope
1401 -- the register allocator will get rid of it.
1405 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1406 get_op_RI (CmmLit lit) | is32BitLit lit
1407 = return (nilOL, OpImm (litToImm lit))
1409 = do (reg,code) <- getNonClobberedReg op
1410 return (code, OpReg reg)
1413 -- Assign; dst is a reg, rhs is mem
1414 assignReg_IntCode pk reg (CmmLoad src _) = do
1415 load_code <- intLoadCode (MOV pk) src
1416 return (load_code (getRegisterReg reg))
1418 -- dst is a reg, but src could be anything
1419 assignReg_IntCode pk reg src = do
1420 code <- getAnyReg src
1421 return (code (getRegisterReg reg))
1424 -- Floating point assignment to memory
1425 assignMem_FltCode pk addr src = do
1426 (src_reg, src_code) <- getNonClobberedReg src
1427 Amode addr addr_code <- getAmode addr
1429 code = src_code `appOL`
1431 IF_ARCH_i386(GST pk src_reg addr,
1432 MOV pk (OpReg src_reg) (OpAddr addr))
1435 -- Floating point assignment to a register/temporary
1436 assignReg_FltCode pk reg src = do
1437 src_code <- getAnyReg src
1438 return (src_code (getRegisterReg reg))
1441 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1443 genJump (CmmLoad mem pk) = do
1444 Amode target code <- getAmode mem
1445 return (code `snocOL` JMP (OpAddr target))
1447 genJump (CmmLit lit) = do
1448 return (unitOL (JMP (OpImm (litToImm lit))))
1451 (reg,code) <- getSomeReg expr
1452 return (code `snocOL` JMP (OpReg reg))
1455 -- -----------------------------------------------------------------------------
1456 -- Unconditional branches
1458 genBranch :: BlockId -> NatM InstrBlock
1459 genBranch = return . toOL . mkJumpInstr
1463 -- -----------------------------------------------------------------------------
1464 -- Conditional jumps
1467 Conditional jumps are always to local labels, so we can use branch
1468 instructions. We peek at the arguments to decide what kind of
1471 I386: First, we have to ensure that the condition
1472 codes are set according to the supplied comparison operation.
1476 :: BlockId -- the branch target
1477 -> CmmExpr -- the condition on which to branch
1480 #if i386_TARGET_ARCH
1481 genCondJump id bool = do
1482 CondCode _ cond code <- getCondCode bool
1483 return (code `snocOL` JXX cond id)
1485 #elif x86_64_TARGET_ARCH
1486 genCondJump id bool = do
1487 CondCode is_float cond cond_code <- getCondCode bool
1490 return (cond_code `snocOL` JXX cond id)
1492 lbl <- getBlockIdNat
1494 -- see comment with condFltReg
1495 let code = case cond of
1501 plain_test = unitOL (
1504 or_unordered = toOL [
1508 and_ordered = toOL [
1514 return (cond_code `appOL` code)
1517 genCondJump = panic "X86.genCondJump: not defined"
1524 -- -----------------------------------------------------------------------------
1525 -- Generating C calls
1527 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1528 -- @get_arg@, which moves the arguments to the correct registers/stack
1529 -- locations. Apart from that, the code is easy.
1531 -- (If applicable) Do not fill the delay slots here; you will confuse the
1532 -- register allocator.
1535 :: CmmCallTarget -- function to call
1536 -> HintedCmmFormals -- where to put the result
1537 -> HintedCmmActuals -- arguments (of mixed type)
1540 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1542 #if i386_TARGET_ARCH
1544 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1545 -- write barrier compiles to no code on x86/x86-64;
1546 -- we keep it this long in order to prevent earlier optimisations.
1548 -- we only cope with a single result for foreign calls
1549 genCCall (CmmPrim op) [CmmHinted r _] args = do
1550 l1 <- getNewLabelNat
1551 l2 <- getNewLabelNat
1553 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1554 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1556 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1557 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1559 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1560 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1562 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1563 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1565 other_op -> outOfLineFloatOp op r args
1567 actuallyInlineFloatOp instr size [CmmHinted x _]
1568 = do res <- trivialUFCode size (instr size) x
1570 return (any (getRegisterReg (CmmLocal r)))
1572 genCCall target dest_regs args = do
1574 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1575 #if !darwin_TARGET_OS
1576 tot_arg_size = sum sizes
1578 raw_arg_size = sum sizes
1579 tot_arg_size = roundTo 16 raw_arg_size
1580 arg_pad_size = tot_arg_size - raw_arg_size
1581 delta0 <- getDeltaNat
1582 setDeltaNat (delta0 - arg_pad_size)
1585 push_codes <- mapM push_arg (reverse args)
1586 delta <- getDeltaNat
1589 -- deal with static vs dynamic call targets
1590 (callinsns,cconv) <-
1593 CmmCallee (CmmLit (CmmLabel lbl)) conv
1594 -> -- ToDo: stdcall arg sizes
1595 return (unitOL (CALL (Left fn_imm) []), conv)
1596 where fn_imm = ImmCLbl lbl
1598 -> do { (dyn_c, dyn_r) <- get_op expr
1599 ; ASSERT( isWord32 (cmmExprType expr) )
1600 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1603 #if darwin_TARGET_OS
1605 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1606 DELTA (delta0 - arg_pad_size)]
1607 `appOL` concatOL push_codes
1610 = concatOL push_codes
1611 call = callinsns `appOL`
1613 -- Deallocate parameters after call for ccall;
1614 -- but not for stdcall (callee does it)
1615 (if cconv == StdCallConv || tot_arg_size==0 then [] else
1616 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
1618 [DELTA (delta + tot_arg_size)]
1621 setDeltaNat (delta + tot_arg_size)
1624 -- assign the results, if necessary
1625 assign_code [] = nilOL
1626 assign_code [CmmHinted dest _hint]
1627 | isFloatType ty = unitOL (GMOV fake0 r_dest)
1628 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1629 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1630 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1632 ty = localRegType dest
1634 r_dest_hi = getHiVRegFromLo r_dest
1635 r_dest = getRegisterReg (CmmLocal dest)
1636 assign_code many = panic "genCCall.assign_code many"
1638 return (push_code `appOL`
1640 assign_code dest_regs)
1643 arg_size :: CmmType -> Int -- Width in bytes
1644 arg_size ty = widthInBytes (typeWidth ty)
1646 roundTo a x | x `mod` a == 0 = x
1647 | otherwise = x + a - (x `mod` a)
1650 push_arg :: HintedCmmActual {-current argument-}
1651 -> NatM InstrBlock -- code
1653 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
1654 | isWord64 arg_ty = do
1655 ChildCode64 code r_lo <- iselExpr64 arg
1656 delta <- getDeltaNat
1657 setDeltaNat (delta - 8)
1659 r_hi = getHiVRegFromLo r_lo
1661 return ( code `appOL`
1662 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1663 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1668 (code, reg) <- get_op arg
1669 delta <- getDeltaNat
1670 let size = arg_size arg_ty -- Byte size
1671 setDeltaNat (delta-size)
1672 if (isFloatType arg_ty)
1673 then return (code `appOL`
1674 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1676 GST (floatSize (typeWidth arg_ty))
1677 reg (AddrBaseIndex (EABaseReg esp)
1681 else return (code `snocOL`
1682 PUSH II32 (OpReg reg) `snocOL`
1686 arg_ty = cmmExprType arg
1689 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
1691 (reg,code) <- getSomeReg op
1694 #elif x86_64_TARGET_ARCH
1696 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1697 -- write barrier compiles to no code on x86/x86-64;
1698 -- we keep it this long in order to prevent earlier optimisations.
1701 genCCall (CmmPrim op) [CmmHinted r _] args =
1702 outOfLineFloatOp op r args
1704 genCCall target dest_regs args = do
1706 -- load up the register arguments
1707 (stack_args, aregs, fregs, load_args_code)
1708 <- load_args args allArgRegs allFPArgRegs nilOL
1711 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
1712 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1713 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1714 -- for annotating the call instruction with
1716 sse_regs = length fp_regs_used
1718 tot_arg_size = arg_size * length stack_args
1720 -- On entry to the called function, %rsp should be aligned
1721 -- on a 16-byte boundary +8 (i.e. the first stack arg after
1722 -- the return address is 16-byte aligned). In STG land
1723 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1724 -- need to make sure we push a multiple of 16-bytes of args,
1725 -- plus the return address, to get the correct alignment.
1726 -- Urg, this is hard. We need to feed the delta back into
1727 -- the arg pushing code.
1728 (real_size, adjust_rsp) <-
1729 if tot_arg_size `rem` 16 == 0
1730 then return (tot_arg_size, nilOL)
1731 else do -- we need to adjust...
1732 delta <- getDeltaNat
1733 setDeltaNat (delta-8)
1734 return (tot_arg_size+8, toOL [
1735 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1739 -- push the stack args, right to left
1740 push_code <- push_args (reverse stack_args) nilOL
1741 delta <- getDeltaNat
1743 -- deal with static vs dynamic call targets
1744 (callinsns,cconv) <-
1747 CmmCallee (CmmLit (CmmLabel lbl)) conv
1748 -> -- ToDo: stdcall arg sizes
1749 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1750 where fn_imm = ImmCLbl lbl
1752 -> do (dyn_r, dyn_c) <- getSomeReg expr
1753 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1756 -- The x86_64 ABI requires us to set %al to the number of SSE
1757 -- registers that contain arguments, if the called routine
1758 -- is a varargs function. We don't know whether it's a
1759 -- varargs function or not, so we have to assume it is.
1761 -- It's not safe to omit this assignment, even if the number
1762 -- of SSE regs in use is zero. If %al is larger than 8
1763 -- on entry to a varargs function, seg faults ensue.
1764 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1766 let call = callinsns `appOL`
1768 -- Deallocate parameters after call for ccall;
1769 -- but not for stdcall (callee does it)
1770 (if cconv == StdCallConv || real_size==0 then [] else
1771 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1773 [DELTA (delta + real_size)]
1776 setDeltaNat (delta + real_size)
1779 -- assign the results, if necessary
1780 assign_code [] = nilOL
1781 assign_code [CmmHinted dest _hint] =
1782 case typeWidth rep of
1783 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1784 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1785 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1787 rep = localRegType dest
1788 r_dest = getRegisterReg (CmmLocal dest)
1789 assign_code many = panic "genCCall.assign_code many"
1791 return (load_args_code `appOL`
1794 assign_eax sse_regs `appOL`
1796 assign_code dest_regs)
1799 arg_size = 8 -- always, at the mo
1801 load_args :: [CmmHinted CmmExpr]
1802 -> [Reg] -- int regs avail for args
1803 -> [Reg] -- FP regs avail for args
1805 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1806 load_args args [] [] code = return (args, [], [], code)
1807 -- no more regs to use
1808 load_args [] aregs fregs code = return ([], aregs, fregs, code)
1809 -- no more args to push
1810 load_args ((CmmHinted arg hint) : rest) aregs fregs code
1811 | isFloatType arg_rep =
1815 arg_code <- getAnyReg arg
1816 load_args rest aregs rs (code `appOL` arg_code r)
1821 arg_code <- getAnyReg arg
1822 load_args rest rs fregs (code `appOL` arg_code r)
1824 arg_rep = cmmExprType arg
1827 (args',ars,frs,code') <- load_args rest aregs fregs code
1828 return ((CmmHinted arg hint):args', ars, frs, code')
1830 push_args [] code = return code
1831 push_args ((CmmHinted arg hint):rest) code
1832 | isFloatType arg_rep = do
1833 (arg_reg, arg_code) <- getSomeReg arg
1834 delta <- getDeltaNat
1835 setDeltaNat (delta-arg_size)
1836 let code' = code `appOL` arg_code `appOL` toOL [
1837 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1838 DELTA (delta-arg_size),
1839 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
1840 push_args rest code'
1843 -- we only ever generate word-sized function arguments. Promotion
1844 -- has already happened: our Int8# type is kept sign-extended
1845 -- in an Int#, for example.
1846 ASSERT(width == W64) return ()
1847 (arg_op, arg_code) <- getOperand arg
1848 delta <- getDeltaNat
1849 setDeltaNat (delta-arg_size)
1850 let code' = code `appOL` arg_code `appOL` toOL [
1852 DELTA (delta-arg_size)]
1853 push_args rest code'
1855 arg_rep = cmmExprType arg
1856 width = typeWidth arg_rep
1859 genCCall = panic "X86.genCCAll: not defined"
1861 #endif /* x86_64_TARGET_ARCH */
1866 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
1867 outOfLineFloatOp mop res args
1869 dflags <- getDynFlagsNat
1870 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1871 let target = CmmCallee targetExpr CCallConv
1873 if isFloat64 (localRegType res)
1875 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
1879 tmp = LocalReg uq f64
1881 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
1882 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
1883 return (code1 `appOL` code2)
1885 lbl = mkForeignLabel fn Nothing False IsFunction
1888 MO_F32_Sqrt -> fsLit "sqrtf"
1889 MO_F32_Sin -> fsLit "sinf"
1890 MO_F32_Cos -> fsLit "cosf"
1891 MO_F32_Tan -> fsLit "tanf"
1892 MO_F32_Exp -> fsLit "expf"
1893 MO_F32_Log -> fsLit "logf"
1895 MO_F32_Asin -> fsLit "asinf"
1896 MO_F32_Acos -> fsLit "acosf"
1897 MO_F32_Atan -> fsLit "atanf"
1899 MO_F32_Sinh -> fsLit "sinhf"
1900 MO_F32_Cosh -> fsLit "coshf"
1901 MO_F32_Tanh -> fsLit "tanhf"
1902 MO_F32_Pwr -> fsLit "powf"
1904 MO_F64_Sqrt -> fsLit "sqrt"
1905 MO_F64_Sin -> fsLit "sin"
1906 MO_F64_Cos -> fsLit "cos"
1907 MO_F64_Tan -> fsLit "tan"
1908 MO_F64_Exp -> fsLit "exp"
1909 MO_F64_Log -> fsLit "log"
1911 MO_F64_Asin -> fsLit "asin"
1912 MO_F64_Acos -> fsLit "acos"
1913 MO_F64_Atan -> fsLit "atan"
1915 MO_F64_Sinh -> fsLit "sinh"
1916 MO_F64_Cosh -> fsLit "cosh"
1917 MO_F64_Tanh -> fsLit "tanh"
1918 MO_F64_Pwr -> fsLit "pow"
1924 -- -----------------------------------------------------------------------------
1925 -- Generating a table-branch
1927 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1932 (reg,e_code) <- getSomeReg expr
1933 lbl <- getNewLabelNat
1934 dflags <- getDynFlagsNat
1935 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1936 (tableReg,t_code) <- getSomeReg $ dynRef
1938 jumpTable = map jumpTableEntryRel ids
1940 jumpTableEntryRel Nothing
1941 = CmmStaticLit (CmmInt 0 wordWidth)
1942 jumpTableEntryRel (Just (BlockId id))
1943 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1944 where blockLabel = mkAsmTempLabel id
1946 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1947 (EAIndex reg wORD_SIZE) (ImmInt 0))
1949 #if x86_64_TARGET_ARCH
1950 #if darwin_TARGET_OS
1951 -- on Mac OS X/x86_64, put the jump table in the text section
1952 -- to work around a limitation of the linker.
1953 -- ld64 is unable to handle the relocations for
1955 -- if L0 is not preceded by a non-anonymous label in its section.
1957 code = e_code `appOL` t_code `appOL` toOL [
1958 ADD (intSize wordWidth) op (OpReg tableReg),
1959 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
1960 LDATA Text (CmmDataLabel lbl : jumpTable)
1963 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1964 -- relocations, hence we only get 32-bit offsets in the jump
1965 -- table. As these offsets are always negative we need to properly
1966 -- sign extend them to 64-bit. This hack should be removed in
1967 -- conjunction with the hack in PprMach.hs/pprDataItem once
1968 -- binutils 2.17 is standard.
1969 code = e_code `appOL` t_code `appOL` toOL [
1970 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1972 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1973 (EAIndex reg wORD_SIZE) (ImmInt 0)))
1975 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1976 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1980 code = e_code `appOL` t_code `appOL` toOL [
1981 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1982 ADD (intSize wordWidth) op (OpReg tableReg),
1983 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1989 (reg,e_code) <- getSomeReg expr
1990 lbl <- getNewLabelNat
1992 jumpTable = map jumpTableEntry ids
1993 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1994 code = e_code `appOL` toOL [
1995 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1996 JMP_TBL op [ id | Just id <- ids ]
2002 -- -----------------------------------------------------------------------------
2003 -- 'condIntReg' and 'condFltReg': condition codes into registers
2005 -- Turn those condition codes into integers now (when they appear on
2006 -- the right hand side of an assignment).
2008 -- (If applicable) Do not fill the delay slots here; you will confuse the
2009 -- register allocator.
2011 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2013 condIntReg cond x y = do
2014 CondCode _ cond cond_code <- condIntCode cond x y
2015 tmp <- getNewRegNat II8
2017 code dst = cond_code `appOL` toOL [
2018 SETCC cond (OpReg tmp),
2019 MOVZxL II8 (OpReg tmp) (OpReg dst)
2022 return (Any II32 code)
2026 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2028 #if i386_TARGET_ARCH
2029 condFltReg cond x y = do
2030 CondCode _ cond cond_code <- condFltCode cond x y
2031 tmp <- getNewRegNat II8
2033 code dst = cond_code `appOL` toOL [
2034 SETCC cond (OpReg tmp),
2035 MOVZxL II8 (OpReg tmp) (OpReg dst)
2038 return (Any II32 code)
2040 #elif x86_64_TARGET_ARCH
2041 condFltReg cond x y = do
2042 CondCode _ cond cond_code <- condFltCode cond x y
2043 tmp1 <- getNewRegNat archWordSize
2044 tmp2 <- getNewRegNat archWordSize
2046 -- We have to worry about unordered operands (eg. comparisons
2047 -- against NaN). If the operands are unordered, the comparison
2048 -- sets the parity flag, carry flag and zero flag.
2049 -- All comparisons are supposed to return false for unordered
2050 -- operands except for !=, which returns true.
2052 -- Optimisation: we don't have to test the parity flag if we
2053 -- know the test has already excluded the unordered case: eg >
2054 -- and >= test for a zero carry flag, which can only occur for
2055 -- ordered operands.
2057 -- ToDo: by reversing comparisons we could avoid testing the
2058 -- parity flag in more cases.
2063 NE -> or_unordered dst
2064 GU -> plain_test dst
2065 GEU -> plain_test dst
2066 _ -> and_ordered dst)
2068 plain_test dst = toOL [
2069 SETCC cond (OpReg tmp1),
2070 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2072 or_unordered dst = toOL [
2073 SETCC cond (OpReg tmp1),
2074 SETCC PARITY (OpReg tmp2),
2075 OR II8 (OpReg tmp1) (OpReg tmp2),
2076 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2078 and_ordered dst = toOL [
2079 SETCC cond (OpReg tmp1),
2080 SETCC NOTPARITY (OpReg tmp2),
2081 AND II8 (OpReg tmp1) (OpReg tmp2),
2082 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2085 return (Any II32 code)
2088 condFltReg = panic "X86.condFltReg: not defined"
2095 -- -----------------------------------------------------------------------------
2096 -- 'trivial*Code': deal with trivial instructions
2098 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2099 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2100 -- Only look for constants on the right hand side, because that's
2101 -- where the generic optimizer will have put them.
2103 -- Similarly, for unary instructions, we don't have to worry about
2104 -- matching an StInt as the argument, because genericOpt will already
2105 -- have handled the constant-folding.
2109 The Rules of the Game are:
2111 * You cannot assume anything about the destination register dst;
2112 it may be anything, including a fixed reg.
2114 * You may compute an operand into a fixed reg, but you may not
2115 subsequently change the contents of that fixed reg. If you
2116 want to do so, first copy the value either to a temporary
2117 or into dst. You are free to modify dst even if it happens
2118 to be a fixed reg -- that's not your problem.
2120 * You cannot assume that a fixed reg will stay live over an
2121 arbitrary computation. The same applies to the dst reg.
2123 * Temporary regs obtained from getNewRegNat are distinct from
2124 each other and from all other regs, and stay live over
2125 arbitrary computations.
2127 --------------------
2129 SDM's version of The Rules:
2131 * If getRegister returns Any, that means it can generate correct
2132 code which places the result in any register, period. Even if that
2133 register happens to be read during the computation.
2135 Corollary #1: this means that if you are generating code for an
2136 operation with two arbitrary operands, you cannot assign the result
2137 of the first operand into the destination register before computing
2138 the second operand. The second operand might require the old value
2139 of the destination register.
2141 Corollary #2: A function might be able to generate more efficient
2142 code if it knows the destination register is a new temporary (and
2143 therefore not read by any of the sub-computations).
2145 * If getRegister returns Any, then the code it generates may modify only:
2146 (a) fresh temporaries
2147 (b) the destination register
2148 (c) known registers (eg. %ecx is used by shifts)
2149 In particular, it may *not* modify global registers, unless the global
2150 register happens to be the destination register.
2153 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
2154 | is32BitLit lit_a = do
2155 b_code <- getAnyReg b
2158 = b_code dst `snocOL`
2159 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2161 return (Any (intSize width) code)
2163 trivialCode width instr maybe_revinstr a b
2164 = genTrivialCode (intSize width) instr a b
2166 -- This is re-used for floating pt instructions too.
2167 genTrivialCode rep instr a b = do
2168 (b_op, b_code) <- getNonClobberedOperand b
2169 a_code <- getAnyReg a
2170 tmp <- getNewRegNat rep
2172 -- We want the value of b to stay alive across the computation of a.
2173 -- But, we want to calculate a straight into the destination register,
2174 -- because the instruction only has two operands (dst := dst `op` src).
2175 -- The troublesome case is when the result of b is in the same register
2176 -- as the destination reg. In this case, we have to save b in a
2177 -- new temporary across the computation of a.
2179 | dst `regClashesWithOp` b_op =
2181 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2183 instr (OpReg tmp) (OpReg dst)
2187 instr b_op (OpReg dst)
2189 return (Any rep code)
2191 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2192 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2193 reg `regClashesWithOp` _ = False
2197 trivialUCode rep instr x = do
2198 x_code <- getAnyReg x
2203 return (Any rep code)
2207 #if i386_TARGET_ARCH
2209 trivialFCode width instr x y = do
2210 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2211 (y_reg, y_code) <- getSomeReg y
2213 size = floatSize width
2217 instr size x_reg y_reg dst
2218 return (Any size code)
2222 #if x86_64_TARGET_ARCH
2223 trivialFCode pk instr x y
2224 = genTrivialCode size (instr size) x y
2225 where size = floatSize pk
2228 trivialUFCode size instr x = do
2229 (x_reg, x_code) <- getSomeReg x
2235 return (Any size code)
2238 --------------------------------------------------------------------------------
2239 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2241 #if i386_TARGET_ARCH
2242 coerceInt2FP from to x = do
2243 (x_reg, x_code) <- getSomeReg x
2245 opc = case to of W32 -> GITOF; W64 -> GITOD
2246 code dst = x_code `snocOL` opc x_reg dst
2247 -- ToDo: works for non-II32 reps?
2248 return (Any (floatSize to) code)
2250 #elif x86_64_TARGET_ARCH
2251 coerceInt2FP from to x = do
2252 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2254 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2255 code dst = x_code `snocOL` opc x_op dst
2257 return (Any (floatSize to) code) -- works even if the destination rep is <II32
2260 coerceInt2FP = panic "X86.coerceInt2FP: not defined"
2267 --------------------------------------------------------------------------------
2268 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2270 #if i386_TARGET_ARCH
2271 coerceFP2Int from to x = do
2272 (x_reg, x_code) <- getSomeReg x
2274 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2275 code dst = x_code `snocOL` opc x_reg dst
2276 -- ToDo: works for non-II32 reps?
2278 return (Any (intSize to) code)
2280 #elif x86_64_TARGET_ARCH
2281 coerceFP2Int from to x = do
2282 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2284 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
2285 code dst = x_code `snocOL` opc x_op dst
2287 return (Any (intSize to) code) -- works even if the destination rep is <II32
2290 coerceFP2Int = panic "X86.coerceFP2Int: not defined"
2297 --------------------------------------------------------------------------------
2298 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2300 #if x86_64_TARGET_ARCH
2301 coerceFP2FP to x = do
2302 (x_reg, x_code) <- getSomeReg x
2304 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
2305 code dst = x_code `snocOL` opc x_reg dst
2307 return (Any (floatSize to) code)
2310 coerceFP2FP = panic "X86.coerceFP2FP: not defined"