1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
16 generateJumpTableForInstr,
22 #include "HsVersions.h"
23 #include "nativeGen/NCG.h"
24 #include "../includes/MachDeps.h"
38 -- Our intermediate code:
47 import StaticFlags ( opt_PIC )
48 import ForeignCall ( CCallConv(..) )
53 import FastBool ( isFastTrue )
54 import Constants ( wORD_SIZE )
57 import Control.Monad ( mapAndUnzipM )
58 import Data.Maybe ( fromJust, catMaybes )
63 sse2Enabled :: NatM Bool
64 #if x86_64_TARGET_ARCH
65 -- SSE2 is fixed on for x86_64. It would be possible to make it optional,
66 -- but we'd need to fix at least the foreign call code where the calling
67 -- convention specifies the use of xmm regs, and possibly other places.
68 sse2Enabled = return True
71 dflags <- getDynFlagsNat
72 return (dopt Opt_SSE2 dflags)
75 if_sse2 :: NatM a -> NatM a -> NatM a
78 if b then sse2 else x87
83 -> NatM [NatCmmTop Instr]
85 cmmTopCodeGen dynflags (CmmProc info lab (ListGraph blocks)) = do
86 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
87 picBaseMb <- getPicBaseMaybeNat
88 let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
89 tops = proc : concat statics
90 os = platformOS $ targetPlatform dynflags
93 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
94 Nothing -> return tops
96 cmmTopCodeGen _ (CmmData sec dat) = do
97 return [CmmData sec dat] -- no translation, we just use CmmStatic
102 -> NatM ( [NatBasicBlock Instr]
105 basicBlockCodeGen (BasicBlock id stmts) = do
106 instrs <- stmtsToInstrs stmts
107 -- code generation may introduce new basic block boundaries, which
108 -- are indicated by the NEWBLOCK instruction. We must split up the
109 -- instruction stream into basic blocks again. Also, we extract
112 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
114 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
115 = ([], BasicBlock id instrs : blocks, statics)
116 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
117 = (instrs, blocks, CmmData sec dat:statics)
118 mkBlocks instr (instrs,blocks,statics)
119 = (instr:instrs, blocks, statics)
121 return (BasicBlock id top : other_blocks, statics)
124 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
126 = do instrss <- mapM stmtToInstrs stmts
127 return (concatOL instrss)
130 stmtToInstrs :: CmmStmt -> NatM InstrBlock
131 stmtToInstrs stmt = case stmt of
132 CmmNop -> return nilOL
133 CmmComment s -> return (unitOL (COMMENT s))
136 | isFloatType ty -> assignReg_FltCode size reg src
137 #if WORD_SIZE_IN_BITS==32
138 | isWord64 ty -> assignReg_I64Code reg src
140 | otherwise -> assignReg_IntCode size reg src
141 where ty = cmmRegType reg
142 size = cmmTypeSize ty
145 | isFloatType ty -> assignMem_FltCode size addr src
146 #if WORD_SIZE_IN_BITS==32
147 | isWord64 ty -> assignMem_I64Code addr src
149 | otherwise -> assignMem_IntCode size addr src
150 where ty = cmmExprType src
151 size = cmmTypeSize ty
153 CmmCall target result_regs args _ _
154 -> genCCall target result_regs args
156 CmmBranch id -> genBranch id
157 CmmCondBranch arg id -> genCondJump id arg
158 CmmSwitch arg ids -> genSwitch arg ids
159 CmmJump arg _ -> genJump arg
161 panic "stmtToInstrs: return statement should have been cps'd away"
164 --------------------------------------------------------------------------------
165 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
166 -- They are really trees of insns to facilitate fast appending, where a
167 -- left-to-right traversal yields the insns in the correct order.
173 -- | Condition codes passed up the tree.
176 = CondCode Bool Cond InstrBlock
179 -- | a.k.a "Register64"
180 -- Reg is the lower 32-bit temporary which contains the result.
181 -- Use getHiVRegFromLo to find the other VRegUnique.
183 -- Rules of this simplified insn selection game are therefore that
184 -- the returned Reg may be modified
192 -- | Register's passed up the tree. If the stix code forces the register
193 -- to live in a pre-decided machine register, it comes out as @Fixed@;
194 -- otherwise, it comes out as @Any@, and the parent can decide which
195 -- register to put it in.
198 = Fixed Size Reg InstrBlock
199 | Any Size (Reg -> InstrBlock)
202 swizzleRegisterRep :: Register -> Size -> Register
203 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
204 swizzleRegisterRep (Any _ codefn) size = Any size codefn
207 -- | Grab the Reg for a CmmReg
208 getRegisterReg :: Bool -> CmmReg -> Reg
210 getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
211 = let sz = cmmTypeSize pk in
212 if isFloatSize sz && not use_sse2
213 then RegVirtual (mkVirtualReg u FF80)
214 else RegVirtual (mkVirtualReg u sz)
216 getRegisterReg _ (CmmGlobal mid)
217 = case globalRegMaybe mid of
218 Just reg -> RegReal $ reg
219 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
220 -- By this stage, the only MagicIds remaining should be the
221 -- ones which map to a real machine register on this
222 -- platform. Hence ...
225 -- | Memory addressing modes passed up the tree.
227 = Amode AddrMode InstrBlock
230 Now, given a tree (the argument to an CmmLoad) that references memory,
231 produce a suitable addressing mode.
233 A Rule of the Game (tm) for Amodes: use of the addr bit must
234 immediately follow use of the code part, since the code part puts
235 values in registers which the addr then refers to. So you can't put
236 anything in between, lest it overwrite some of those registers. If
237 you need to do some other computation between the code part and use of
238 the addr bit, first store the effective address from the amode in a
239 temporary, then do the other computation, and then use the temporary:
243 ... other computation ...
248 -- | Check whether an integer will fit in 32 bits.
249 -- A CmmInt is intended to be truncated to the appropriate
250 -- number of bits, so here we truncate it to Int64. This is
251 -- important because e.g. -1 as a CmmInt might be either
252 -- -1 or 18446744073709551615.
254 is32BitInteger :: Integer -> Bool
255 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
256 where i64 = fromIntegral i :: Int64
259 -- | Convert a BlockId to some CmmStatic data
260 jumpTableEntry :: Maybe BlockId -> CmmStatic
261 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
262 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
263 where blockLabel = mkAsmTempLabel (getUnique blockid)
266 -- -----------------------------------------------------------------------------
267 -- General things for putting together code sequences
269 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
270 -- CmmExprs into CmmRegOff?
271 mangleIndexTree :: CmmReg -> Int -> CmmExpr
272 mangleIndexTree reg off
273 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
274 where width = typeWidth (cmmRegType reg)
276 -- | The dual to getAnyReg: compute an expression into a register, but
277 -- we don't mind which one it is.
278 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
280 r <- getRegister expr
283 tmp <- getNewRegNat rep
284 return (tmp, code tmp)
292 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
293 assignMem_I64Code addrTree valueTree = do
294 Amode addr addr_code <- getAmode addrTree
295 ChildCode64 vcode rlo <- iselExpr64 valueTree
297 rhi = getHiVRegFromLo rlo
299 -- Little-endian store
300 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
301 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
303 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
306 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
307 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
308 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
310 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
311 r_dst_hi = getHiVRegFromLo r_dst_lo
312 r_src_hi = getHiVRegFromLo r_src_lo
313 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
314 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
317 vcode `snocOL` mov_lo `snocOL` mov_hi
320 assignReg_I64Code _ _
321 = panic "assignReg_I64Code(i386): invalid lvalue"
326 iselExpr64 :: CmmExpr -> NatM ChildCode64
327 iselExpr64 (CmmLit (CmmInt i _)) = do
328 (rlo,rhi) <- getNewRegPairNat II32
330 r = fromIntegral (fromIntegral i :: Word32)
331 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
333 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
334 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
337 return (ChildCode64 code rlo)
339 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
340 Amode addr addr_code <- getAmode addrTree
341 (rlo,rhi) <- getNewRegPairNat II32
343 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
344 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
347 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
351 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
352 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
354 -- we handle addition, but rather badly
355 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
356 ChildCode64 code1 r1lo <- iselExpr64 e1
357 (rlo,rhi) <- getNewRegPairNat II32
359 r = fromIntegral (fromIntegral i :: Word32)
360 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
361 r1hi = getHiVRegFromLo r1lo
363 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
364 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
365 MOV II32 (OpReg r1hi) (OpReg rhi),
366 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
368 return (ChildCode64 code rlo)
370 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
371 ChildCode64 code1 r1lo <- iselExpr64 e1
372 ChildCode64 code2 r2lo <- iselExpr64 e2
373 (rlo,rhi) <- getNewRegPairNat II32
375 r1hi = getHiVRegFromLo r1lo
376 r2hi = getHiVRegFromLo r2lo
379 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
380 ADD II32 (OpReg r2lo) (OpReg rlo),
381 MOV II32 (OpReg r1hi) (OpReg rhi),
382 ADC II32 (OpReg r2hi) (OpReg rhi) ]
384 return (ChildCode64 code rlo)
386 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
388 r_dst_lo <- getNewRegNat II32
389 let r_dst_hi = getHiVRegFromLo r_dst_lo
392 ChildCode64 (code `snocOL`
393 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
398 = pprPanic "iselExpr64(i386)" (ppr expr)
402 --------------------------------------------------------------------------------
403 getRegister :: CmmExpr -> NatM Register
405 #if !x86_64_TARGET_ARCH
406 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
407 -- register, it can only be used for rip-relative addressing.
408 getRegister (CmmReg (CmmGlobal PicBaseReg))
410 reg <- getPicBaseNat archWordSize
411 return (Fixed archWordSize reg nilOL)
414 getRegister (CmmReg reg)
415 = do use_sse2 <- sse2Enabled
417 sz = cmmTypeSize (cmmRegType reg)
418 size | not use_sse2 && isFloatSize sz = FF80
421 return (Fixed size (getRegisterReg use_sse2 reg) nilOL)
424 getRegister (CmmRegOff r n)
425 = getRegister $ mangleIndexTree r n
428 #if WORD_SIZE_IN_BITS==32
429 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
430 -- TO_W_(x), TO_W_(x >> 32)
432 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
433 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
434 ChildCode64 code rlo <- iselExpr64 x
435 return $ Fixed II32 (getHiVRegFromLo rlo) code
437 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
438 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
439 ChildCode64 code rlo <- iselExpr64 x
440 return $ Fixed II32 (getHiVRegFromLo rlo) code
442 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
443 ChildCode64 code rlo <- iselExpr64 x
444 return $ Fixed II32 rlo code
446 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
447 ChildCode64 code rlo <- iselExpr64 x
448 return $ Fixed II32 rlo code
453 getRegister (CmmLit lit@(CmmFloat f w)) =
454 if_sse2 float_const_sse2 float_const_x87
460 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
461 -- I don't know why there are xorpd, xorps, and pxor instructions.
462 -- They all appear to do the same thing --SDM
463 return (Any size code)
466 Amode addr code <- memConstant (widthInBytes w) lit
467 loadFloatAmode True w addr code
469 float_const_x87 = case w of
472 let code dst = unitOL (GLDZ dst)
473 in return (Any FF80 code)
476 let code dst = unitOL (GLD1 dst)
477 in return (Any FF80 code)
480 Amode addr code <- memConstant (widthInBytes w) lit
481 loadFloatAmode False w addr code
483 -- catch simple cases of zero- or sign-extended load
484 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
485 code <- intLoadCode (MOVZxL II8) addr
486 return (Any II32 code)
488 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
489 code <- intLoadCode (MOVSxL II8) addr
490 return (Any II32 code)
492 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
493 code <- intLoadCode (MOVZxL II16) addr
494 return (Any II32 code)
496 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
497 code <- intLoadCode (MOVSxL II16) addr
498 return (Any II32 code)
501 #if x86_64_TARGET_ARCH
503 -- catch simple cases of zero- or sign-extended load
504 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
505 code <- intLoadCode (MOVZxL II8) addr
506 return (Any II64 code)
508 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
509 code <- intLoadCode (MOVSxL II8) addr
510 return (Any II64 code)
512 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
513 code <- intLoadCode (MOVZxL II16) addr
514 return (Any II64 code)
516 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
517 code <- intLoadCode (MOVSxL II16) addr
518 return (Any II64 code)
520 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
521 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
522 return (Any II64 code)
524 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
525 code <- intLoadCode (MOVSxL II32) addr
526 return (Any II64 code)
528 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
529 CmmLit displacement])
530 = return $ Any II64 (\dst -> unitOL $
531 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
533 #endif /* x86_64_TARGET_ARCH */
539 getRegister (CmmMachOp mop [x]) = do -- unary MachOps
543 | sse2 -> sse2NegCode w x
544 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
546 MO_S_Neg w -> triv_ucode NEGI (intSize w)
547 MO_Not w -> triv_ucode NOT (intSize w)
550 MO_UU_Conv W32 W8 -> toI8Reg W32 x
551 MO_SS_Conv W32 W8 -> toI8Reg W32 x
552 MO_UU_Conv W16 W8 -> toI8Reg W16 x
553 MO_SS_Conv W16 W8 -> toI8Reg W16 x
554 MO_UU_Conv W32 W16 -> toI16Reg W32 x
555 MO_SS_Conv W32 W16 -> toI16Reg W32 x
557 #if x86_64_TARGET_ARCH
558 MO_UU_Conv W64 W32 -> conversionNop II64 x
559 MO_SS_Conv W64 W32 -> conversionNop II64 x
560 MO_UU_Conv W64 W16 -> toI16Reg W64 x
561 MO_SS_Conv W64 W16 -> toI16Reg W64 x
562 MO_UU_Conv W64 W8 -> toI8Reg W64 x
563 MO_SS_Conv W64 W8 -> toI8Reg W64 x
566 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
567 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
570 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
571 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
572 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
574 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
575 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
576 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
578 #if x86_64_TARGET_ARCH
579 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
580 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
581 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
582 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
583 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
584 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
585 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
586 -- However, we don't want the register allocator to throw it
587 -- away as an unnecessary reg-to-reg move, so we keep it in
588 -- the form of a movzl and print it as a movl later.
592 | sse2 -> coerceFP2FP W64 x
593 | otherwise -> conversionNop FF80 x
595 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
597 MO_FS_Conv from to -> coerceFP2Int from to x
598 MO_SF_Conv from to -> coerceInt2FP from to x
600 _other -> pprPanic "getRegister" (pprMachOp mop)
602 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
603 triv_ucode instr size = trivialUCode size (instr size) x
605 -- signed or unsigned extension.
606 integerExtend :: Width -> Width
607 -> (Size -> Operand -> Operand -> Instr)
608 -> CmmExpr -> NatM Register
609 integerExtend from to instr expr = do
610 (reg,e_code) <- if from == W8 then getByteReg expr
615 instr (intSize from) (OpReg reg) (OpReg dst)
616 return (Any (intSize to) code)
618 toI8Reg :: Width -> CmmExpr -> NatM Register
620 = do codefn <- getAnyReg expr
621 return (Any (intSize new_rep) codefn)
622 -- HACK: use getAnyReg to get a byte-addressable register.
623 -- If the source was a Fixed register, this will add the
624 -- mov instruction to put it into the desired destination.
625 -- We're assuming that the destination won't be a fixed
626 -- non-byte-addressable register; it won't be, because all
627 -- fixed registers are word-sized.
629 toI16Reg = toI8Reg -- for now
631 conversionNop :: Size -> CmmExpr -> NatM Register
632 conversionNop new_size expr
633 = do e_code <- getRegister expr
634 return (swizzleRegisterRep e_code new_size)
637 getRegister (CmmMachOp mop [x, y]) = do -- dyadic MachOps
640 MO_F_Eq _ -> condFltReg EQQ x y
641 MO_F_Ne _ -> condFltReg NE x y
642 MO_F_Gt _ -> condFltReg GTT x y
643 MO_F_Ge _ -> condFltReg GE x y
644 MO_F_Lt _ -> condFltReg LTT x y
645 MO_F_Le _ -> condFltReg LE x y
647 MO_Eq _ -> condIntReg EQQ x y
648 MO_Ne _ -> condIntReg NE x y
650 MO_S_Gt _ -> condIntReg GTT x y
651 MO_S_Ge _ -> condIntReg GE x y
652 MO_S_Lt _ -> condIntReg LTT x y
653 MO_S_Le _ -> condIntReg LE x y
655 MO_U_Gt _ -> condIntReg GU x y
656 MO_U_Ge _ -> condIntReg GEU x y
657 MO_U_Lt _ -> condIntReg LU x y
658 MO_U_Le _ -> condIntReg LEU x y
660 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
661 | otherwise -> trivialFCode_x87 GADD x y
662 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
663 | otherwise -> trivialFCode_x87 GSUB x y
664 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
665 | otherwise -> trivialFCode_x87 GDIV x y
666 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
667 | otherwise -> trivialFCode_x87 GMUL x y
669 MO_Add rep -> add_code rep x y
670 MO_Sub rep -> sub_code rep x y
672 MO_S_Quot rep -> div_code rep True True x y
673 MO_S_Rem rep -> div_code rep True False x y
674 MO_U_Quot rep -> div_code rep False True x y
675 MO_U_Rem rep -> div_code rep False False x y
677 MO_S_MulMayOflo rep -> imulMayOflo rep x y
679 MO_Mul rep -> triv_op rep IMUL
680 MO_And rep -> triv_op rep AND
681 MO_Or rep -> triv_op rep OR
682 MO_Xor rep -> triv_op rep XOR
684 {- Shift ops on x86s have constraints on their source, it
685 either has to be Imm, CL or 1
686 => trivialCode is not restrictive enough (sigh.)
688 MO_Shl rep -> shift_code rep SHL x y {-False-}
689 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
690 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
692 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
695 triv_op width instr = trivialCode width op (Just op) x y
696 where op = instr (intSize width)
698 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
699 imulMayOflo rep a b = do
700 (a_reg, a_code) <- getNonClobberedReg a
701 b_code <- getAnyReg b
703 shift_amt = case rep of
706 _ -> panic "shift_amt"
709 code = a_code `appOL` b_code eax `appOL`
711 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
712 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
713 -- sign extend lower part
714 SUB size (OpReg edx) (OpReg eax)
715 -- compare against upper
716 -- eax==0 if high part == sign extended low part
719 return (Fixed size eax code)
723 -> (Size -> Operand -> Operand -> Instr)
728 {- Case1: shift length as immediate -}
729 shift_code width instr x (CmmLit lit) = do
730 x_code <- getAnyReg x
734 = x_code dst `snocOL`
735 instr size (OpImm (litToImm lit)) (OpReg dst)
737 return (Any size code)
739 {- Case2: shift length is complex (non-immediate)
741 * we cannot do y first *and* put its result in %ecx, because
742 %ecx might be clobbered by x.
743 * if we do y second, then x cannot be
744 in a clobbered reg. Also, we cannot clobber x's reg
745 with the instruction itself.
747 - do y first, put its result in a fresh tmp, then copy it to %ecx later
748 - do y second and put its result into %ecx. x gets placed in a fresh
749 tmp. This is likely to be better, becuase the reg alloc can
750 eliminate this reg->reg move here (it won't eliminate the other one,
751 because the move is into the fixed %ecx).
753 shift_code width instr x y{-amount-} = do
754 x_code <- getAnyReg x
755 let size = intSize width
756 tmp <- getNewRegNat size
757 y_code <- getAnyReg y
759 code = x_code tmp `appOL`
761 instr size (OpReg ecx) (OpReg tmp)
763 return (Fixed size tmp code)
766 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
767 add_code rep x (CmmLit (CmmInt y _))
768 | is32BitInteger y = add_int rep x y
769 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
770 where size = intSize rep
773 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
774 sub_code rep x (CmmLit (CmmInt y _))
775 | is32BitInteger (-y) = add_int rep x (-y)
776 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
778 -- our three-operand add instruction:
779 add_int width x y = do
780 (x_reg, x_code) <- getSomeReg x
783 imm = ImmInt (fromInteger y)
787 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
790 return (Any size code)
792 ----------------------
793 div_code width signed quotient x y = do
794 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
795 x_code <- getAnyReg x
798 widen | signed = CLTD size
799 | otherwise = XOR size (OpReg edx) (OpReg edx)
801 instr | signed = IDIV
804 code = y_code `appOL`
806 toOL [widen, instr size y_op]
808 result | quotient = eax
812 return (Fixed size result code)
815 getRegister (CmmLoad mem pk)
818 Amode addr mem_code <- getAmode mem
819 use_sse2 <- sse2Enabled
820 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
823 getRegister (CmmLoad mem pk)
826 code <- intLoadCode instr mem
827 return (Any size code)
831 instr = case width of
834 -- We always zero-extend 8-bit loads, if we
835 -- can't think of anything better. This is because
836 -- we can't guarantee access to an 8-bit variant of every register
837 -- (esi and edi don't have 8-bit variants), so to make things
838 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
841 #if x86_64_TARGET_ARCH
842 -- Simpler memory load code on x86_64
843 getRegister (CmmLoad mem pk)
845 code <- intLoadCode (MOV size) mem
846 return (Any size code)
847 where size = intSize $ typeWidth pk
850 getRegister (CmmLit (CmmInt 0 width))
854 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
855 size1 = IF_ARCH_i386( size, case size of II64 -> II32; _ -> size )
857 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
859 return (Any size code)
861 #if x86_64_TARGET_ARCH
862 -- optimisation for loading small literals on x86_64: take advantage
863 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
864 -- instruction forms are shorter.
865 getRegister (CmmLit lit)
866 | isWord64 (cmmLitType lit), not (isBigLit lit)
869 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
871 return (Any II64 code)
873 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
875 -- note1: not the same as (not.is32BitLit), because that checks for
876 -- signed literals that fit in 32 bits, but we want unsigned
878 -- note2: all labels are small, because we're assuming the
879 -- small memory model (see gcc docs, -mcmodel=small).
882 getRegister (CmmLit lit)
884 size = cmmTypeSize (cmmLitType lit)
886 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
888 return (Any size code)
890 getRegister other = pprPanic "getRegister(x86)" (ppr other)
893 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
894 -> NatM (Reg -> InstrBlock)
895 intLoadCode instr mem = do
896 Amode src mem_code <- getAmode mem
897 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
899 -- Compute an expression into *any* register, adding the appropriate
900 -- move instruction if necessary.
901 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
903 r <- getRegister expr
906 anyReg :: Register -> NatM (Reg -> InstrBlock)
907 anyReg (Any _ code) = return code
908 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
910 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
911 -- Fixed registers might not be byte-addressable, so we make sure we've
912 -- got a temporary, inserting an extra reg copy if necessary.
913 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
914 #if x86_64_TARGET_ARCH
915 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
918 r <- getRegister expr
921 tmp <- getNewRegNat rep
922 return (tmp, code tmp)
924 | isVirtualReg reg -> return (reg,code)
926 tmp <- getNewRegNat rep
927 return (tmp, code `snocOL` reg2reg rep reg tmp)
928 -- ToDo: could optimise slightly by checking for byte-addressable
929 -- real registers, but that will happen very rarely if at all.
932 -- Another variant: this time we want the result in a register that cannot
933 -- be modified by code to evaluate an arbitrary expression.
934 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
935 getNonClobberedReg expr = do
936 r <- getRegister expr
939 tmp <- getNewRegNat rep
940 return (tmp, code tmp)
942 -- only free regs can be clobbered
943 | RegReal (RealRegSingle rr) <- reg
944 , isFastTrue (freeReg rr)
946 tmp <- getNewRegNat rep
947 return (tmp, code `snocOL` reg2reg rep reg tmp)
951 reg2reg :: Size -> Reg -> Reg -> Instr
953 | size == FF80 = GMOV src dst
954 | otherwise = MOV size (OpReg src) (OpReg dst)
957 --------------------------------------------------------------------------------
958 getAmode :: CmmExpr -> NatM Amode
959 getAmode (CmmRegOff r n) = getAmode $ mangleIndexTree r n
961 #if x86_64_TARGET_ARCH
963 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
964 CmmLit displacement])
965 = return $ Amode (ripRel (litToImm displacement)) nilOL
970 -- This is all just ridiculous, since it carefully undoes
971 -- what mangleIndexTree has just done.
972 getAmode (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
974 -- ASSERT(rep == II32)???
975 = do (x_reg, x_code) <- getSomeReg x
976 let off = ImmInt (-(fromInteger i))
977 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
979 getAmode (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
981 -- ASSERT(rep == II32)???
982 = do (x_reg, x_code) <- getSomeReg x
983 let off = litToImm lit
984 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
986 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
987 -- recognised by the next rule.
988 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
990 = getAmode (CmmMachOp (MO_Add rep) [b,a])
992 getAmode (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
993 [y, CmmLit (CmmInt shift _)]])
994 | shift == 0 || shift == 1 || shift == 2 || shift == 3
995 = x86_complex_amode x y shift 0
997 getAmode (CmmMachOp (MO_Add _)
998 [x, CmmMachOp (MO_Add _)
999 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1000 CmmLit (CmmInt offset _)]])
1001 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1002 && is32BitInteger offset
1003 = x86_complex_amode x y shift offset
1005 getAmode (CmmMachOp (MO_Add _) [x,y])
1006 = x86_complex_amode x y 0 0
1008 getAmode (CmmLit lit) | is32BitLit lit
1009 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1012 (reg,code) <- getSomeReg expr
1013 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1016 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1017 x86_complex_amode base index shift offset
1018 = do (x_reg, x_code) <- getNonClobberedReg base
1019 -- x must be in a temp, because it has to stay live over y_code
1020 -- we could compre x_reg and y_reg and do something better here...
1021 (y_reg, y_code) <- getSomeReg index
1023 code = x_code `appOL` y_code
1024 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1025 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1026 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1032 -- -----------------------------------------------------------------------------
1033 -- getOperand: sometimes any operand will do.
1035 -- getNonClobberedOperand: the value of the operand will remain valid across
1036 -- the computation of an arbitrary expression, unless the expression
1037 -- is computed directly into a register which the operand refers to
1038 -- (see trivialCode where this function is used for an example).
1040 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1041 getNonClobberedOperand (CmmLit lit) = do
1042 use_sse2 <- sse2Enabled
1043 if use_sse2 && isSuitableFloatingPointLit lit
1045 let CmmFloat _ w = lit
1046 Amode addr code <- memConstant (widthInBytes w) lit
1047 return (OpAddr addr, code)
1050 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1051 then return (OpImm (litToImm lit), nilOL)
1052 else getNonClobberedOperand_generic (CmmLit lit)
1054 getNonClobberedOperand (CmmLoad mem pk) = do
1055 use_sse2 <- sse2Enabled
1056 if (not (isFloatType pk) || use_sse2)
1057 && IF_ARCH_i386(not (isWord64 pk), True)
1059 Amode src mem_code <- getAmode mem
1061 if (amodeCouldBeClobbered src)
1063 tmp <- getNewRegNat archWordSize
1064 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1065 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1068 return (OpAddr src', save_code `appOL` mem_code)
1070 getNonClobberedOperand_generic (CmmLoad mem pk)
1072 getNonClobberedOperand e = getNonClobberedOperand_generic e
1074 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1075 getNonClobberedOperand_generic e = do
1076 (reg, code) <- getNonClobberedReg e
1077 return (OpReg reg, code)
1079 amodeCouldBeClobbered :: AddrMode -> Bool
1080 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1082 regClobbered :: Reg -> Bool
1083 regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1084 regClobbered _ = False
1086 -- getOperand: the operand is not required to remain valid across the
1087 -- computation of an arbitrary expression.
1088 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1090 getOperand (CmmLit lit) = do
1091 use_sse2 <- sse2Enabled
1092 if (use_sse2 && isSuitableFloatingPointLit lit)
1094 let CmmFloat _ w = lit
1095 Amode addr code <- memConstant (widthInBytes w) lit
1096 return (OpAddr addr, code)
1099 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1100 then return (OpImm (litToImm lit), nilOL)
1101 else getOperand_generic (CmmLit lit)
1103 getOperand (CmmLoad mem pk) = do
1104 use_sse2 <- sse2Enabled
1105 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1107 Amode src mem_code <- getAmode mem
1108 return (OpAddr src, mem_code)
1110 getOperand_generic (CmmLoad mem pk)
1112 getOperand e = getOperand_generic e
1114 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1115 getOperand_generic e = do
1116 (reg, code) <- getSomeReg e
1117 return (OpReg reg, code)
1119 isOperand :: CmmExpr -> Bool
1120 isOperand (CmmLoad _ _) = True
1121 isOperand (CmmLit lit) = is32BitLit lit
1122 || isSuitableFloatingPointLit lit
1125 memConstant :: Int -> CmmLit -> NatM Amode
1126 memConstant align lit = do
1127 #ifdef x86_64_TARGET_ARCH
1128 lbl <- getNewLabelNat
1129 let addr = ripRel (ImmCLbl lbl)
1132 lbl <- getNewLabelNat
1133 dflags <- getDynFlagsNat
1134 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1135 Amode addr addr_code <- getAmode dynRef
1143 return (Amode addr code)
1146 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1147 loadFloatAmode use_sse2 w addr addr_code = do
1148 let size = floatSize w
1149 code dst = addr_code `snocOL`
1151 then MOV size (OpAddr addr) (OpReg dst)
1152 else GLD size addr dst
1154 return (Any (if use_sse2 then size else FF80) code)
1157 -- if we want a floating-point literal as an operand, we can
1158 -- use it directly from memory. However, if the literal is
1159 -- zero, we're better off generating it into a register using
1161 isSuitableFloatingPointLit :: CmmLit -> Bool
1162 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1163 isSuitableFloatingPointLit _ = False
1165 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1166 getRegOrMem e@(CmmLoad mem pk) = do
1167 use_sse2 <- sse2Enabled
1168 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1170 Amode src mem_code <- getAmode mem
1171 return (OpAddr src, mem_code)
1173 (reg, code) <- getNonClobberedReg e
1174 return (OpReg reg, code)
1176 (reg, code) <- getNonClobberedReg e
1177 return (OpReg reg, code)
1179 is32BitLit :: CmmLit -> Bool
1180 #if x86_64_TARGET_ARCH
1181 is32BitLit (CmmInt i W64) = is32BitInteger i
1182 -- assume that labels are in the range 0-2^31-1: this assumes the
1183 -- small memory model (see gcc docs, -mcmodel=small).
1190 -- Set up a condition code for a conditional branch.
1192 getCondCode :: CmmExpr -> NatM CondCode
1194 -- yes, they really do seem to want exactly the same!
1196 getCondCode (CmmMachOp mop [x, y])
1199 MO_F_Eq W32 -> condFltCode EQQ x y
1200 MO_F_Ne W32 -> condFltCode NE x y
1201 MO_F_Gt W32 -> condFltCode GTT x y
1202 MO_F_Ge W32 -> condFltCode GE x y
1203 MO_F_Lt W32 -> condFltCode LTT x y
1204 MO_F_Le W32 -> condFltCode LE x y
1206 MO_F_Eq W64 -> condFltCode EQQ x y
1207 MO_F_Ne W64 -> condFltCode NE x y
1208 MO_F_Gt W64 -> condFltCode GTT x y
1209 MO_F_Ge W64 -> condFltCode GE x y
1210 MO_F_Lt W64 -> condFltCode LTT x y
1211 MO_F_Le W64 -> condFltCode LE x y
1213 MO_Eq _ -> condIntCode EQQ x y
1214 MO_Ne _ -> condIntCode NE x y
1216 MO_S_Gt _ -> condIntCode GTT x y
1217 MO_S_Ge _ -> condIntCode GE x y
1218 MO_S_Lt _ -> condIntCode LTT x y
1219 MO_S_Le _ -> condIntCode LE x y
1221 MO_U_Gt _ -> condIntCode GU x y
1222 MO_U_Ge _ -> condIntCode GEU x y
1223 MO_U_Lt _ -> condIntCode LU x y
1224 MO_U_Le _ -> condIntCode LEU x y
1226 _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1228 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1233 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1234 -- passed back up the tree.
1236 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1238 -- memory vs immediate
1239 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1240 Amode x_addr x_code <- getAmode x
1243 code = x_code `snocOL`
1244 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1246 return (CondCode False cond code)
1248 -- anything vs zero, using a mask
1249 -- TODO: Add some sanity checking!!!!
1250 condIntCode cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1251 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit lit
1253 (x_reg, x_code) <- getSomeReg x
1255 code = x_code `snocOL`
1256 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1258 return (CondCode False cond code)
1261 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1262 (x_reg, x_code) <- getSomeReg x
1264 code = x_code `snocOL`
1265 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1267 return (CondCode False cond code)
1269 -- anything vs operand
1270 condIntCode cond x y | isOperand y = do
1271 (x_reg, x_code) <- getNonClobberedReg x
1272 (y_op, y_code) <- getOperand y
1274 code = x_code `appOL` y_code `snocOL`
1275 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1277 return (CondCode False cond code)
1279 -- anything vs anything
1280 condIntCode cond x y = do
1281 (y_reg, y_code) <- getNonClobberedReg y
1282 (x_op, x_code) <- getRegOrMem x
1284 code = y_code `appOL`
1286 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1288 return (CondCode False cond code)
1292 --------------------------------------------------------------------------------
1293 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1295 condFltCode cond x y
1296 = if_sse2 condFltCode_sse2 condFltCode_x87
1300 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1301 (x_reg, x_code) <- getNonClobberedReg x
1302 (y_reg, y_code) <- getSomeReg y
1304 code = x_code `appOL` y_code `snocOL`
1305 GCMP cond x_reg y_reg
1306 -- The GCMP insn does the test and sets the zero flag if comparable
1307 -- and true. Hence we always supply EQQ as the condition to test.
1308 return (CondCode True EQQ code)
1310 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1311 -- an operand, but the right must be a reg. We can probably do better
1312 -- than this general case...
1313 condFltCode_sse2 = do
1314 (x_reg, x_code) <- getNonClobberedReg x
1315 (y_op, y_code) <- getOperand y
1317 code = x_code `appOL`
1319 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1320 -- NB(1): we need to use the unsigned comparison operators on the
1321 -- result of this comparison.
1323 return (CondCode True (condToUnsigned cond) code)
1325 -- -----------------------------------------------------------------------------
1326 -- Generating assignments
1328 -- Assignments are really at the heart of the whole code generation
1329 -- business. Almost all top-level nodes of any real importance are
1330 -- assignments, which correspond to loads, stores, or register
1331 -- transfers. If we're really lucky, some of the register transfers
1332 -- will go away, because we can use the destination register to
1333 -- complete the code generation for the right hand side. This only
1334 -- fails when the right hand side is forced into a fixed register
1335 -- (e.g. the result of a call).
1337 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1338 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1340 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1341 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1344 -- integer assignment to memory
1346 -- specific case of adding/subtracting an integer to a particular address.
1347 -- ToDo: catch other cases where we can use an operation directly on a memory
1349 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1350 CmmLit (CmmInt i _)])
1351 | addr == addr2, pk /= II64 || is32BitInteger i,
1352 Just instr <- check op
1353 = do Amode amode code_addr <- getAmode addr
1354 let code = code_addr `snocOL`
1355 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1358 check (MO_Add _) = Just ADD
1359 check (MO_Sub _) = Just SUB
1364 assignMem_IntCode pk addr src = do
1365 Amode addr code_addr <- getAmode addr
1366 (code_src, op_src) <- get_op_RI src
1368 code = code_src `appOL`
1370 MOV pk op_src (OpAddr addr)
1371 -- NOTE: op_src is stable, so it will still be valid
1372 -- after code_addr. This may involve the introduction
1373 -- of an extra MOV to a temporary register, but we hope
1374 -- the register allocator will get rid of it.
1378 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1379 get_op_RI (CmmLit lit) | is32BitLit lit
1380 = return (nilOL, OpImm (litToImm lit))
1382 = do (reg,code) <- getNonClobberedReg op
1383 return (code, OpReg reg)
1386 -- Assign; dst is a reg, rhs is mem
1387 assignReg_IntCode pk reg (CmmLoad src _) = do
1388 load_code <- intLoadCode (MOV pk) src
1389 return (load_code (getRegisterReg False{-no sse2-} reg))
1391 -- dst is a reg, but src could be anything
1392 assignReg_IntCode _ reg src = do
1393 code <- getAnyReg src
1394 return (code (getRegisterReg False{-no sse2-} reg))
1397 -- Floating point assignment to memory
1398 assignMem_FltCode pk addr src = do
1399 (src_reg, src_code) <- getNonClobberedReg src
1400 Amode addr addr_code <- getAmode addr
1401 use_sse2 <- sse2Enabled
1403 code = src_code `appOL`
1405 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1406 else GST pk src_reg addr
1409 -- Floating point assignment to a register/temporary
1410 assignReg_FltCode _ reg src = do
1411 use_sse2 <- sse2Enabled
1412 src_code <- getAnyReg src
1413 return (src_code (getRegisterReg use_sse2 reg))
1416 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1418 genJump (CmmLoad mem _) = do
1419 Amode target code <- getAmode mem
1420 return (code `snocOL` JMP (OpAddr target))
1422 genJump (CmmLit lit) = do
1423 return (unitOL (JMP (OpImm (litToImm lit))))
1426 (reg,code) <- getSomeReg expr
1427 return (code `snocOL` JMP (OpReg reg))
1430 -- -----------------------------------------------------------------------------
1431 -- Unconditional branches
1433 genBranch :: BlockId -> NatM InstrBlock
1434 genBranch = return . toOL . mkJumpInstr
1438 -- -----------------------------------------------------------------------------
1439 -- Conditional jumps
1442 Conditional jumps are always to local labels, so we can use branch
1443 instructions. We peek at the arguments to decide what kind of
1446 I386: First, we have to ensure that the condition
1447 codes are set according to the supplied comparison operation.
1451 :: BlockId -- the branch target
1452 -> CmmExpr -- the condition on which to branch
1455 genCondJump id bool = do
1456 CondCode is_float cond cond_code <- getCondCode bool
1457 use_sse2 <- sse2Enabled
1458 if not is_float || not use_sse2
1460 return (cond_code `snocOL` JXX cond id)
1462 lbl <- getBlockIdNat
1464 -- see comment with condFltReg
1465 let code = case cond of
1471 plain_test = unitOL (
1474 or_unordered = toOL [
1478 and_ordered = toOL [
1484 return (cond_code `appOL` code)
1487 -- -----------------------------------------------------------------------------
1488 -- Generating C calls
1490 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1491 -- @get_arg@, which moves the arguments to the correct registers/stack
1492 -- locations. Apart from that, the code is easy.
1494 -- (If applicable) Do not fill the delay slots here; you will confuse the
1495 -- register allocator.
1498 :: CmmCallTarget -- function to call
1499 -> HintedCmmFormals -- where to put the result
1500 -> HintedCmmActuals -- arguments (of mixed type)
1503 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1505 #if i386_TARGET_ARCH
1507 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1508 -- write barrier compiles to no code on x86/x86-64;
1509 -- we keep it this long in order to prevent earlier optimisations.
1511 -- void return type prim op
1512 genCCall (CmmPrim op) [] args =
1513 outOfLineCmmOp op Nothing args
1515 -- we only cope with a single result for foreign calls
1516 genCCall (CmmPrim op) [r_hinted@(CmmHinted r _)] args = do
1517 l1 <- getNewLabelNat
1518 l2 <- getNewLabelNat
1522 outOfLineCmmOp op (Just r_hinted) args
1524 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1525 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1527 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1528 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1530 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1531 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1533 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1534 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1536 _other_op -> outOfLineCmmOp op (Just r_hinted) args
1539 actuallyInlineFloatOp instr size [CmmHinted x _]
1540 = do res <- trivialUFCode size (instr size) x
1542 return (any (getRegisterReg False (CmmLocal r)))
1544 actuallyInlineFloatOp _ _ args
1545 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
1546 ++ show (length args) ++ ")"
1548 genCCall target dest_regs args = do
1550 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1551 #if !darwin_TARGET_OS
1552 tot_arg_size = sum sizes
1554 raw_arg_size = sum sizes
1555 tot_arg_size = roundTo 16 raw_arg_size
1556 arg_pad_size = tot_arg_size - raw_arg_size
1557 delta0 <- getDeltaNat
1558 setDeltaNat (delta0 - arg_pad_size)
1561 use_sse2 <- sse2Enabled
1562 push_codes <- mapM (push_arg use_sse2) (reverse args)
1563 delta <- getDeltaNat
1566 -- deal with static vs dynamic call targets
1567 (callinsns,cconv) <-
1569 CmmCallee (CmmLit (CmmLabel lbl)) conv
1570 -> -- ToDo: stdcall arg sizes
1571 return (unitOL (CALL (Left fn_imm) []), conv)
1572 where fn_imm = ImmCLbl lbl
1574 -> do { (dyn_r, dyn_c) <- getSomeReg expr
1575 ; ASSERT( isWord32 (cmmExprType expr) )
1576 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1578 -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1579 ++ "probably because too many return values."
1582 #if darwin_TARGET_OS
1584 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1585 DELTA (delta0 - arg_pad_size)]
1586 `appOL` concatOL push_codes
1589 = concatOL push_codes
1591 -- Deallocate parameters after call for ccall;
1592 -- but not for stdcall (callee does it)
1594 -- We have to pop any stack padding we added
1595 -- on Darwin even if we are doing stdcall, though (#5052)
1596 pop_size | cconv /= StdCallConv = tot_arg_size
1598 #if darwin_TARGET_OS
1604 call = callinsns `appOL`
1606 (if pop_size==0 then [] else
1607 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
1609 [DELTA (delta + tot_arg_size)]
1612 setDeltaNat (delta + tot_arg_size)
1615 -- assign the results, if necessary
1616 assign_code [] = nilOL
1617 assign_code [CmmHinted dest _hint]
1620 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
1624 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
1625 GST sz fake0 tmp_amode,
1626 MOV sz (OpAddr tmp_amode) (OpReg r_dest),
1627 ADD II32 (OpImm (ImmInt b)) (OpReg esp)]
1628 else unitOL (GMOV fake0 r_dest)
1629 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1630 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1631 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1633 ty = localRegType dest
1636 r_dest_hi = getHiVRegFromLo r_dest
1637 r_dest = getRegisterReg use_sse2 (CmmLocal dest)
1638 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
1640 return (push_code `appOL`
1642 assign_code dest_regs)
1645 arg_size :: CmmType -> Int -- Width in bytes
1646 arg_size ty = widthInBytes (typeWidth ty)
1648 #if darwin_TARGET_OS
1649 roundTo a x | x `mod` a == 0 = x
1650 | otherwise = x + a - (x `mod` a)
1653 push_arg :: Bool -> HintedCmmActual {-current argument-}
1654 -> NatM InstrBlock -- code
1656 push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86
1657 | isWord64 arg_ty = do
1658 ChildCode64 code r_lo <- iselExpr64 arg
1659 delta <- getDeltaNat
1660 setDeltaNat (delta - 8)
1662 r_hi = getHiVRegFromLo r_lo
1664 return ( code `appOL`
1665 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1666 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1670 | isFloatType arg_ty = do
1671 (reg, code) <- getSomeReg arg
1672 delta <- getDeltaNat
1673 setDeltaNat (delta-size)
1674 return (code `appOL`
1675 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1677 let addr = AddrBaseIndex (EABaseReg esp)
1680 size = floatSize (typeWidth arg_ty)
1683 then MOV size (OpReg reg) (OpAddr addr)
1684 else GST size reg addr
1689 (operand, code) <- getOperand arg
1690 delta <- getDeltaNat
1691 setDeltaNat (delta-size)
1692 return (code `snocOL`
1693 PUSH II32 operand `snocOL`
1697 arg_ty = cmmExprType arg
1698 size = arg_size arg_ty -- Byte size
1700 #elif x86_64_TARGET_ARCH
1702 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1703 -- write barrier compiles to no code on x86/x86-64;
1704 -- we keep it this long in order to prevent earlier optimisations.
1706 -- void return type prim op
1707 genCCall (CmmPrim op) [] args =
1708 outOfLineCmmOp op Nothing args
1710 -- we only cope with a single result for foreign calls
1711 genCCall (CmmPrim op) [res] args =
1712 outOfLineCmmOp op (Just res) args
1714 genCCall target dest_regs args = do
1716 -- load up the register arguments
1717 (stack_args, aregs, fregs, load_args_code)
1718 <- load_args args allArgRegs allFPArgRegs nilOL
1721 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
1722 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1723 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1724 -- for annotating the call instruction with
1726 sse_regs = length fp_regs_used
1728 tot_arg_size = arg_size * length stack_args
1730 -- On entry to the called function, %rsp should be aligned
1731 -- on a 16-byte boundary +8 (i.e. the first stack arg after
1732 -- the return address is 16-byte aligned). In STG land
1733 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1734 -- need to make sure we push a multiple of 16-bytes of args,
1735 -- plus the return address, to get the correct alignment.
1736 -- Urg, this is hard. We need to feed the delta back into
1737 -- the arg pushing code.
1738 (real_size, adjust_rsp) <-
1739 if tot_arg_size `rem` 16 == 0
1740 then return (tot_arg_size, nilOL)
1741 else do -- we need to adjust...
1742 delta <- getDeltaNat
1743 setDeltaNat (delta-8)
1744 return (tot_arg_size+8, toOL [
1745 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1749 -- push the stack args, right to left
1750 push_code <- push_args (reverse stack_args) nilOL
1751 delta <- getDeltaNat
1753 -- deal with static vs dynamic call targets
1754 (callinsns,cconv) <-
1756 CmmCallee (CmmLit (CmmLabel lbl)) conv
1757 -> -- ToDo: stdcall arg sizes
1758 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1759 where fn_imm = ImmCLbl lbl
1761 -> do (dyn_r, dyn_c) <- getSomeReg expr
1762 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1764 -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1765 ++ "probably because too many return values."
1768 -- The x86_64 ABI requires us to set %al to the number of SSE2
1769 -- registers that contain arguments, if the called routine
1770 -- is a varargs function. We don't know whether it's a
1771 -- varargs function or not, so we have to assume it is.
1773 -- It's not safe to omit this assignment, even if the number
1774 -- of SSE2 regs in use is zero. If %al is larger than 8
1775 -- on entry to a varargs function, seg faults ensue.
1776 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1778 let call = callinsns `appOL`
1780 -- Deallocate parameters after call for ccall;
1781 -- but not for stdcall (callee does it)
1782 (if cconv == StdCallConv || real_size==0 then [] else
1783 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1785 [DELTA (delta + real_size)]
1788 setDeltaNat (delta + real_size)
1791 -- assign the results, if necessary
1792 assign_code [] = nilOL
1793 assign_code [CmmHinted dest _hint] =
1794 case typeWidth rep of
1795 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1796 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1797 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1799 rep = localRegType dest
1800 r_dest = getRegisterReg True (CmmLocal dest)
1801 assign_code many = panic "genCCall.assign_code many"
1803 return (load_args_code `appOL`
1806 assign_eax sse_regs `appOL`
1808 assign_code dest_regs)
1811 arg_size = 8 -- always, at the mo
1813 load_args :: [CmmHinted CmmExpr]
1814 -> [Reg] -- int regs avail for args
1815 -> [Reg] -- FP regs avail for args
1817 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1818 load_args args [] [] code = return (args, [], [], code)
1819 -- no more regs to use
1820 load_args [] aregs fregs code = return ([], aregs, fregs, code)
1821 -- no more args to push
1822 load_args ((CmmHinted arg hint) : rest) aregs fregs code
1823 | isFloatType arg_rep =
1827 arg_code <- getAnyReg arg
1828 load_args rest aregs rs (code `appOL` arg_code r)
1833 arg_code <- getAnyReg arg
1834 load_args rest rs fregs (code `appOL` arg_code r)
1836 arg_rep = cmmExprType arg
1839 (args',ars,frs,code') <- load_args rest aregs fregs code
1840 return ((CmmHinted arg hint):args', ars, frs, code')
1842 push_args [] code = return code
1843 push_args ((CmmHinted arg hint):rest) code
1844 | isFloatType arg_rep = do
1845 (arg_reg, arg_code) <- getSomeReg arg
1846 delta <- getDeltaNat
1847 setDeltaNat (delta-arg_size)
1848 let code' = code `appOL` arg_code `appOL` toOL [
1849 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1850 DELTA (delta-arg_size),
1851 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
1852 push_args rest code'
1855 -- we only ever generate word-sized function arguments. Promotion
1856 -- has already happened: our Int8# type is kept sign-extended
1857 -- in an Int#, for example.
1858 ASSERT(width == W64) return ()
1859 (arg_op, arg_code) <- getOperand arg
1860 delta <- getDeltaNat
1861 setDeltaNat (delta-arg_size)
1862 let code' = code `appOL` arg_code `appOL` toOL [
1864 DELTA (delta-arg_size)]
1865 push_args rest code'
1867 arg_rep = cmmExprType arg
1868 width = typeWidth arg_rep
1871 genCCall = panic "X86.genCCAll: not defined"
1873 #endif /* x86_64_TARGET_ARCH */
1876 outOfLineCmmOp :: CallishMachOp -> Maybe HintedCmmFormal -> HintedCmmActuals -> NatM InstrBlock
1877 outOfLineCmmOp mop res args
1879 dflags <- getDynFlagsNat
1880 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1881 let target = CmmCallee targetExpr CCallConv
1883 stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn)
1885 -- Assume we can call these functions directly, and that they're not in a dynamic library.
1886 -- TODO: Why is this ok? Under linux this code will be in libm.so
1887 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
1888 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
1891 MO_Memcpy -> init args
1892 MO_Memset -> init args
1893 MO_Memmove -> init args
1897 MO_F32_Sqrt -> fsLit "sqrtf"
1898 MO_F32_Sin -> fsLit "sinf"
1899 MO_F32_Cos -> fsLit "cosf"
1900 MO_F32_Tan -> fsLit "tanf"
1901 MO_F32_Exp -> fsLit "expf"
1902 MO_F32_Log -> fsLit "logf"
1904 MO_F32_Asin -> fsLit "asinf"
1905 MO_F32_Acos -> fsLit "acosf"
1906 MO_F32_Atan -> fsLit "atanf"
1908 MO_F32_Sinh -> fsLit "sinhf"
1909 MO_F32_Cosh -> fsLit "coshf"
1910 MO_F32_Tanh -> fsLit "tanhf"
1911 MO_F32_Pwr -> fsLit "powf"
1913 MO_F64_Sqrt -> fsLit "sqrt"
1914 MO_F64_Sin -> fsLit "sin"
1915 MO_F64_Cos -> fsLit "cos"
1916 MO_F64_Tan -> fsLit "tan"
1917 MO_F64_Exp -> fsLit "exp"
1918 MO_F64_Log -> fsLit "log"
1920 MO_F64_Asin -> fsLit "asin"
1921 MO_F64_Acos -> fsLit "acos"
1922 MO_F64_Atan -> fsLit "atan"
1924 MO_F64_Sinh -> fsLit "sinh"
1925 MO_F64_Cosh -> fsLit "cosh"
1926 MO_F64_Tanh -> fsLit "tanh"
1927 MO_F64_Pwr -> fsLit "pow"
1929 MO_Memcpy -> fsLit "memcpy"
1930 MO_Memset -> fsLit "memset"
1931 MO_Memmove -> fsLit "memmove"
1933 other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")"
1936 -- -----------------------------------------------------------------------------
1937 -- Generating a table-branch
1939 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1944 (reg,e_code) <- getSomeReg expr
1945 lbl <- getNewLabelNat
1946 dflags <- getDynFlagsNat
1947 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1948 (tableReg,t_code) <- getSomeReg $ dynRef
1949 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1950 (EAIndex reg wORD_SIZE) (ImmInt 0))
1952 #if x86_64_TARGET_ARCH
1953 #if darwin_TARGET_OS
1954 -- on Mac OS X/x86_64, put the jump table in the text section
1955 -- to work around a limitation of the linker.
1956 -- ld64 is unable to handle the relocations for
1958 -- if L0 is not preceded by a non-anonymous label in its section.
1960 code = e_code `appOL` t_code `appOL` toOL [
1961 ADD (intSize wordWidth) op (OpReg tableReg),
1962 JMP_TBL (OpReg tableReg) ids Text lbl
1965 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1966 -- relocations, hence we only get 32-bit offsets in the jump
1967 -- table. As these offsets are always negative we need to properly
1968 -- sign extend them to 64-bit. This hack should be removed in
1969 -- conjunction with the hack in PprMach.hs/pprDataItem once
1970 -- binutils 2.17 is standard.
1971 code = e_code `appOL` t_code `appOL` toOL [
1973 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1974 (EAIndex reg wORD_SIZE) (ImmInt 0)))
1976 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1977 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1981 code = e_code `appOL` t_code `appOL` toOL [
1982 ADD (intSize wordWidth) op (OpReg tableReg),
1983 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1989 (reg,e_code) <- getSomeReg expr
1990 lbl <- getNewLabelNat
1992 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1993 code = e_code `appOL` toOL [
1994 JMP_TBL op ids ReadOnlyData lbl
1999 generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr)
2000 generateJumpTableForInstr (JMP_TBL _ ids section lbl) = Just (createJumpTable ids section lbl)
2001 generateJumpTableForInstr _ = Nothing
2003 createJumpTable :: [Maybe BlockId] -> Section -> CLabel -> GenCmmTop CmmStatic h g
2004 createJumpTable ids section lbl
2007 let jumpTableEntryRel Nothing
2008 = CmmStaticLit (CmmInt 0 wordWidth)
2009 jumpTableEntryRel (Just blockid)
2010 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2011 where blockLabel = mkAsmTempLabel (getUnique blockid)
2012 in map jumpTableEntryRel ids
2013 | otherwise = map jumpTableEntry ids
2014 in CmmData section (CmmDataLabel lbl : jumpTable)
2016 -- -----------------------------------------------------------------------------
2017 -- 'condIntReg' and 'condFltReg': condition codes into registers
2019 -- Turn those condition codes into integers now (when they appear on
2020 -- the right hand side of an assignment).
2022 -- (If applicable) Do not fill the delay slots here; you will confuse the
2023 -- register allocator.
2025 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2027 condIntReg cond x y = do
2028 CondCode _ cond cond_code <- condIntCode cond x y
2029 tmp <- getNewRegNat II8
2031 code dst = cond_code `appOL` toOL [
2032 SETCC cond (OpReg tmp),
2033 MOVZxL II8 (OpReg tmp) (OpReg dst)
2036 return (Any II32 code)
2040 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2041 condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2044 CondCode _ cond cond_code <- condFltCode cond x y
2045 tmp <- getNewRegNat II8
2047 code dst = cond_code `appOL` toOL [
2048 SETCC cond (OpReg tmp),
2049 MOVZxL II8 (OpReg tmp) (OpReg dst)
2052 return (Any II32 code)
2054 condFltReg_sse2 = do
2055 CondCode _ cond cond_code <- condFltCode cond x y
2056 tmp1 <- getNewRegNat archWordSize
2057 tmp2 <- getNewRegNat archWordSize
2059 -- We have to worry about unordered operands (eg. comparisons
2060 -- against NaN). If the operands are unordered, the comparison
2061 -- sets the parity flag, carry flag and zero flag.
2062 -- All comparisons are supposed to return false for unordered
2063 -- operands except for !=, which returns true.
2065 -- Optimisation: we don't have to test the parity flag if we
2066 -- know the test has already excluded the unordered case: eg >
2067 -- and >= test for a zero carry flag, which can only occur for
2068 -- ordered operands.
2070 -- ToDo: by reversing comparisons we could avoid testing the
2071 -- parity flag in more cases.
2076 NE -> or_unordered dst
2077 GU -> plain_test dst
2078 GEU -> plain_test dst
2079 _ -> and_ordered dst)
2081 plain_test dst = toOL [
2082 SETCC cond (OpReg tmp1),
2083 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2085 or_unordered dst = toOL [
2086 SETCC cond (OpReg tmp1),
2087 SETCC PARITY (OpReg tmp2),
2088 OR II8 (OpReg tmp1) (OpReg tmp2),
2089 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2091 and_ordered dst = toOL [
2092 SETCC cond (OpReg tmp1),
2093 SETCC NOTPARITY (OpReg tmp2),
2094 AND II8 (OpReg tmp1) (OpReg tmp2),
2095 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2098 return (Any II32 code)
2101 -- -----------------------------------------------------------------------------
2102 -- 'trivial*Code': deal with trivial instructions
2104 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2105 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2106 -- Only look for constants on the right hand side, because that's
2107 -- where the generic optimizer will have put them.
2109 -- Similarly, for unary instructions, we don't have to worry about
2110 -- matching an StInt as the argument, because genericOpt will already
2111 -- have handled the constant-folding.
2115 The Rules of the Game are:
2117 * You cannot assume anything about the destination register dst;
2118 it may be anything, including a fixed reg.
2120 * You may compute an operand into a fixed reg, but you may not
2121 subsequently change the contents of that fixed reg. If you
2122 want to do so, first copy the value either to a temporary
2123 or into dst. You are free to modify dst even if it happens
2124 to be a fixed reg -- that's not your problem.
2126 * You cannot assume that a fixed reg will stay live over an
2127 arbitrary computation. The same applies to the dst reg.
2129 * Temporary regs obtained from getNewRegNat are distinct from
2130 each other and from all other regs, and stay live over
2131 arbitrary computations.
2133 --------------------
2135 SDM's version of The Rules:
2137 * If getRegister returns Any, that means it can generate correct
2138 code which places the result in any register, period. Even if that
2139 register happens to be read during the computation.
2141 Corollary #1: this means that if you are generating code for an
2142 operation with two arbitrary operands, you cannot assign the result
2143 of the first operand into the destination register before computing
2144 the second operand. The second operand might require the old value
2145 of the destination register.
2147 Corollary #2: A function might be able to generate more efficient
2148 code if it knows the destination register is a new temporary (and
2149 therefore not read by any of the sub-computations).
2151 * If getRegister returns Any, then the code it generates may modify only:
2152 (a) fresh temporaries
2153 (b) the destination register
2154 (c) known registers (eg. %ecx is used by shifts)
2155 In particular, it may *not* modify global registers, unless the global
2156 register happens to be the destination register.
2159 trivialCode :: Width -> (Operand -> Operand -> Instr)
2160 -> Maybe (Operand -> Operand -> Instr)
2161 -> CmmExpr -> CmmExpr -> NatM Register
2162 trivialCode width _ (Just revinstr) (CmmLit lit_a) b
2163 | is32BitLit lit_a = do
2164 b_code <- getAnyReg b
2167 = b_code dst `snocOL`
2168 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2170 return (Any (intSize width) code)
2172 trivialCode width instr _ a b
2173 = genTrivialCode (intSize width) instr a b
2175 -- This is re-used for floating pt instructions too.
2176 genTrivialCode :: Size -> (Operand -> Operand -> Instr)
2177 -> CmmExpr -> CmmExpr -> NatM Register
2178 genTrivialCode rep instr a b = do
2179 (b_op, b_code) <- getNonClobberedOperand b
2180 a_code <- getAnyReg a
2181 tmp <- getNewRegNat rep
2183 -- We want the value of b to stay alive across the computation of a.
2184 -- But, we want to calculate a straight into the destination register,
2185 -- because the instruction only has two operands (dst := dst `op` src).
2186 -- The troublesome case is when the result of b is in the same register
2187 -- as the destination reg. In this case, we have to save b in a
2188 -- new temporary across the computation of a.
2190 | dst `regClashesWithOp` b_op =
2192 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2194 instr (OpReg tmp) (OpReg dst)
2198 instr b_op (OpReg dst)
2200 return (Any rep code)
2202 regClashesWithOp :: Reg -> Operand -> Bool
2203 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2204 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2205 _ `regClashesWithOp` _ = False
2209 trivialUCode :: Size -> (Operand -> Instr)
2210 -> CmmExpr -> NatM Register
2211 trivialUCode rep instr x = do
2212 x_code <- getAnyReg x
2217 return (Any rep code)
2221 trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr)
2222 -> CmmExpr -> CmmExpr -> NatM Register
2223 trivialFCode_x87 instr x y = do
2224 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2225 (y_reg, y_code) <- getSomeReg y
2227 size = FF80 -- always, on x87
2231 instr size x_reg y_reg dst
2232 return (Any size code)
2234 trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr)
2235 -> CmmExpr -> CmmExpr -> NatM Register
2236 trivialFCode_sse2 pk instr x y
2237 = genTrivialCode size (instr size) x y
2238 where size = floatSize pk
2241 trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2242 trivialUFCode size instr x = do
2243 (x_reg, x_code) <- getSomeReg x
2249 return (Any size code)
2252 --------------------------------------------------------------------------------
2253 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2254 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2257 (x_reg, x_code) <- getSomeReg x
2259 opc = case to of W32 -> GITOF; W64 -> GITOD;
2260 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2262 code dst = x_code `snocOL` opc x_reg dst
2263 -- ToDo: works for non-II32 reps?
2264 return (Any FF80 code)
2267 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2269 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2270 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2272 code dst = x_code `snocOL` opc (intSize from) x_op dst
2274 return (Any (floatSize to) code)
2275 -- works even if the destination rep is <II32
2277 --------------------------------------------------------------------------------
2278 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2279 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2281 coerceFP2Int_x87 = do
2282 (x_reg, x_code) <- getSomeReg x
2284 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2285 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2287 code dst = x_code `snocOL` opc x_reg dst
2288 -- ToDo: works for non-II32 reps?
2290 return (Any (intSize to) code)
2292 coerceFP2Int_sse2 = do
2293 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2295 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2296 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2298 code dst = x_code `snocOL` opc (intSize to) x_op dst
2300 return (Any (intSize to) code)
2301 -- works even if the destination rep is <II32
2304 --------------------------------------------------------------------------------
2305 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2306 coerceFP2FP to x = do
2307 use_sse2 <- sse2Enabled
2308 (x_reg, x_code) <- getSomeReg x
2310 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2311 n -> panic $ "coerceFP2FP: unhandled width ("
2314 code dst = x_code `snocOL` opc x_reg dst
2316 return (Any (if use_sse2 then floatSize to else FF80) code)
2318 --------------------------------------------------------------------------------
2320 sse2NegCode :: Width -> CmmExpr -> NatM Register
2321 sse2NegCode w x = do
2322 let sz = floatSize w
2323 x_code <- getAnyReg x
2324 -- This is how gcc does it, so it can't be that bad:
2326 const | FF32 <- sz = CmmInt 0x80000000 W32
2327 | otherwise = CmmInt 0x8000000000000000 W64
2328 Amode amode amode_code <- memConstant (widthInBytes w) const
2329 tmp <- getNewRegNat sz
2331 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2332 MOV sz (OpAddr amode) (OpReg tmp),
2333 XOR sz (OpReg tmp) (OpReg dst)
2336 return (Any sz code)