2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
23 generateJumpTableForInstr,
29 #include "HsVersions.h"
30 #include "nativeGen/NCG.h"
31 #include "../includes/MachDeps.h"
47 -- Our intermediate code:
50 import PprCmm ( pprExpr )
54 import ClosureInfo ( C_SRT(..) )
57 import StaticFlags ( opt_PIC )
58 import ForeignCall ( CCallConv(..) )
61 import qualified Outputable as O
65 import FastBool ( isFastTrue )
66 import Constants ( wORD_SIZE )
69 import Debug.Trace ( trace )
71 import Control.Monad ( mapAndUnzipM )
72 import Data.Maybe ( fromJust, catMaybes )
77 sse2Enabled :: NatM Bool
78 #if x86_64_TARGET_ARCH
79 -- SSE2 is fixed on for x86_64. It would be possible to make it optional,
80 -- but we'd need to fix at least the foreign call code where the calling
81 -- convention specifies the use of xmm regs, and possibly other places.
82 sse2Enabled = return True
85 dflags <- getDynFlagsNat
86 return (dopt Opt_SSE2 dflags)
89 if_sse2 :: NatM a -> NatM a -> NatM a
92 if b then sse2 else x87
97 -> NatM [NatCmmTop Instr]
99 cmmTopCodeGen dynflags (CmmProc info lab (ListGraph blocks)) = do
100 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
101 picBaseMb <- getPicBaseMaybeNat
102 let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
103 tops = proc : concat statics
104 os = platformOS $ targetPlatform dynflags
107 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
108 Nothing -> return tops
110 cmmTopCodeGen _ (CmmData sec dat) = do
111 return [CmmData sec dat] -- no translation, we just use CmmStatic
116 -> NatM ( [NatBasicBlock Instr]
119 basicBlockCodeGen (BasicBlock id stmts) = do
120 instrs <- stmtsToInstrs stmts
121 -- code generation may introduce new basic block boundaries, which
122 -- are indicated by the NEWBLOCK instruction. We must split up the
123 -- instruction stream into basic blocks again. Also, we extract
126 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
128 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
129 = ([], BasicBlock id instrs : blocks, statics)
130 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
131 = (instrs, blocks, CmmData sec dat:statics)
132 mkBlocks instr (instrs,blocks,statics)
133 = (instr:instrs, blocks, statics)
135 return (BasicBlock id top : other_blocks, statics)
138 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
140 = do instrss <- mapM stmtToInstrs stmts
141 return (concatOL instrss)
144 stmtToInstrs :: CmmStmt -> NatM InstrBlock
145 stmtToInstrs stmt = case stmt of
146 CmmNop -> return nilOL
147 CmmComment s -> return (unitOL (COMMENT s))
150 | isFloatType ty -> assignReg_FltCode size reg src
151 #if WORD_SIZE_IN_BITS==32
152 | isWord64 ty -> assignReg_I64Code reg src
154 | otherwise -> assignReg_IntCode size reg src
155 where ty = cmmRegType reg
156 size = cmmTypeSize ty
159 | isFloatType ty -> assignMem_FltCode size addr src
160 #if WORD_SIZE_IN_BITS==32
161 | isWord64 ty -> assignMem_I64Code addr src
163 | otherwise -> assignMem_IntCode size addr src
164 where ty = cmmExprType src
165 size = cmmTypeSize ty
167 CmmCall target result_regs args _ _
168 -> genCCall target result_regs args
170 CmmBranch id -> genBranch id
171 CmmCondBranch arg id -> genCondJump id arg
172 CmmSwitch arg ids -> genSwitch arg ids
173 CmmJump arg params -> genJump arg
175 panic "stmtToInstrs: return statement should have been cps'd away"
178 --------------------------------------------------------------------------------
179 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
180 -- They are really trees of insns to facilitate fast appending, where a
181 -- left-to-right traversal yields the insns in the correct order.
187 -- | Condition codes passed up the tree.
190 = CondCode Bool Cond InstrBlock
193 -- | a.k.a "Register64"
194 -- Reg is the lower 32-bit temporary which contains the result.
195 -- Use getHiVRegFromLo to find the other VRegUnique.
197 -- Rules of this simplified insn selection game are therefore that
198 -- the returned Reg may be modified
206 -- | Register's passed up the tree. If the stix code forces the register
207 -- to live in a pre-decided machine register, it comes out as @Fixed@;
208 -- otherwise, it comes out as @Any@, and the parent can decide which
209 -- register to put it in.
212 = Fixed Size Reg InstrBlock
213 | Any Size (Reg -> InstrBlock)
216 swizzleRegisterRep :: Register -> Size -> Register
217 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
218 swizzleRegisterRep (Any _ codefn) size = Any size codefn
221 -- | Grab the Reg for a CmmReg
222 getRegisterReg :: Bool -> CmmReg -> Reg
224 getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
225 = let sz = cmmTypeSize pk in
226 if isFloatSize sz && not use_sse2
227 then RegVirtual (mkVirtualReg u FF80)
228 else RegVirtual (mkVirtualReg u sz)
230 getRegisterReg _ (CmmGlobal mid)
231 = case globalRegMaybe mid of
232 Just reg -> RegReal $ reg
233 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
234 -- By this stage, the only MagicIds remaining should be the
235 -- ones which map to a real machine register on this
236 -- platform. Hence ...
239 -- | Memory addressing modes passed up the tree.
241 = Amode AddrMode InstrBlock
244 Now, given a tree (the argument to an CmmLoad) that references memory,
245 produce a suitable addressing mode.
247 A Rule of the Game (tm) for Amodes: use of the addr bit must
248 immediately follow use of the code part, since the code part puts
249 values in registers which the addr then refers to. So you can't put
250 anything in between, lest it overwrite some of those registers. If
251 you need to do some other computation between the code part and use of
252 the addr bit, first store the effective address from the amode in a
253 temporary, then do the other computation, and then use the temporary:
257 ... other computation ...
262 -- | Check whether an integer will fit in 32 bits.
263 -- A CmmInt is intended to be truncated to the appropriate
264 -- number of bits, so here we truncate it to Int64. This is
265 -- important because e.g. -1 as a CmmInt might be either
266 -- -1 or 18446744073709551615.
268 is32BitInteger :: Integer -> Bool
269 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
270 where i64 = fromIntegral i :: Int64
273 -- | Convert a BlockId to some CmmStatic data
274 jumpTableEntry :: Maybe BlockId -> CmmStatic
275 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
276 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
277 where blockLabel = mkAsmTempLabel (getUnique blockid)
280 -- -----------------------------------------------------------------------------
281 -- General things for putting together code sequences
283 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
284 -- CmmExprs into CmmRegOff?
285 mangleIndexTree :: CmmExpr -> CmmExpr
286 mangleIndexTree (CmmRegOff reg off)
287 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
288 where width = typeWidth (cmmRegType reg)
290 -- | The dual to getAnyReg: compute an expression into a register, but
291 -- we don't mind which one it is.
292 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
294 r <- getRegister expr
297 tmp <- getNewRegNat rep
298 return (tmp, code tmp)
306 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
307 assignMem_I64Code addrTree valueTree = do
308 Amode addr addr_code <- getAmode addrTree
309 ChildCode64 vcode rlo <- iselExpr64 valueTree
311 rhi = getHiVRegFromLo rlo
313 -- Little-endian store
314 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
315 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
317 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
320 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
321 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
322 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
324 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
325 r_dst_hi = getHiVRegFromLo r_dst_lo
326 r_src_hi = getHiVRegFromLo r_src_lo
327 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
328 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
331 vcode `snocOL` mov_lo `snocOL` mov_hi
334 assignReg_I64Code lvalue valueTree
335 = panic "assignReg_I64Code(i386): invalid lvalue"
340 iselExpr64 :: CmmExpr -> NatM ChildCode64
341 iselExpr64 (CmmLit (CmmInt i _)) = do
342 (rlo,rhi) <- getNewRegPairNat II32
344 r = fromIntegral (fromIntegral i :: Word32)
345 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
347 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
348 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
351 return (ChildCode64 code rlo)
353 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
354 Amode addr addr_code <- getAmode addrTree
355 (rlo,rhi) <- getNewRegPairNat II32
357 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
358 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
361 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
365 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
366 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
368 -- we handle addition, but rather badly
369 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
370 ChildCode64 code1 r1lo <- iselExpr64 e1
371 (rlo,rhi) <- getNewRegPairNat II32
373 r = fromIntegral (fromIntegral i :: Word32)
374 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
375 r1hi = getHiVRegFromLo r1lo
377 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
378 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
379 MOV II32 (OpReg r1hi) (OpReg rhi),
380 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
382 return (ChildCode64 code rlo)
384 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
385 ChildCode64 code1 r1lo <- iselExpr64 e1
386 ChildCode64 code2 r2lo <- iselExpr64 e2
387 (rlo,rhi) <- getNewRegPairNat II32
389 r1hi = getHiVRegFromLo r1lo
390 r2hi = getHiVRegFromLo r2lo
393 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
394 ADD II32 (OpReg r2lo) (OpReg rlo),
395 MOV II32 (OpReg r1hi) (OpReg rhi),
396 ADC II32 (OpReg r2hi) (OpReg rhi) ]
398 return (ChildCode64 code rlo)
400 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
402 r_dst_lo <- getNewRegNat II32
403 let r_dst_hi = getHiVRegFromLo r_dst_lo
406 ChildCode64 (code `snocOL`
407 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
412 = pprPanic "iselExpr64(i386)" (ppr expr)
416 --------------------------------------------------------------------------------
417 getRegister :: CmmExpr -> NatM Register
419 #if !x86_64_TARGET_ARCH
420 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
421 -- register, it can only be used for rip-relative addressing.
422 getRegister (CmmReg (CmmGlobal PicBaseReg))
424 reg <- getPicBaseNat archWordSize
425 return (Fixed archWordSize reg nilOL)
428 getRegister (CmmReg reg)
429 = do use_sse2 <- sse2Enabled
431 sz = cmmTypeSize (cmmRegType reg)
432 size | not use_sse2 && isFloatSize sz = FF80
435 return (Fixed size (getRegisterReg use_sse2 reg) nilOL)
438 getRegister tree@(CmmRegOff _ _)
439 = getRegister (mangleIndexTree tree)
442 #if WORD_SIZE_IN_BITS==32
443 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
444 -- TO_W_(x), TO_W_(x >> 32)
446 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
447 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
448 ChildCode64 code rlo <- iselExpr64 x
449 return $ Fixed II32 (getHiVRegFromLo rlo) code
451 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
452 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
453 ChildCode64 code rlo <- iselExpr64 x
454 return $ Fixed II32 (getHiVRegFromLo rlo) code
456 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
457 ChildCode64 code rlo <- iselExpr64 x
458 return $ Fixed II32 rlo code
460 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
461 ChildCode64 code rlo <- iselExpr64 x
462 return $ Fixed II32 rlo code
467 getRegister (CmmLit lit@(CmmFloat f w)) =
468 if_sse2 float_const_sse2 float_const_x87
474 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
475 -- I don't know why there are xorpd, xorps, and pxor instructions.
476 -- They all appear to do the same thing --SDM
477 return (Any size code)
480 Amode addr code <- memConstant (widthInBytes w) lit
481 loadFloatAmode True w addr code
483 float_const_x87 = case w of
486 let code dst = unitOL (GLDZ dst)
487 in return (Any FF80 code)
490 let code dst = unitOL (GLD1 dst)
491 in return (Any FF80 code)
494 Amode addr code <- memConstant (widthInBytes w) lit
495 loadFloatAmode False w addr code
497 -- catch simple cases of zero- or sign-extended load
498 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
499 code <- intLoadCode (MOVZxL II8) addr
500 return (Any II32 code)
502 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
503 code <- intLoadCode (MOVSxL II8) addr
504 return (Any II32 code)
506 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
507 code <- intLoadCode (MOVZxL II16) addr
508 return (Any II32 code)
510 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
511 code <- intLoadCode (MOVSxL II16) addr
512 return (Any II32 code)
515 #if x86_64_TARGET_ARCH
517 -- catch simple cases of zero- or sign-extended load
518 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
519 code <- intLoadCode (MOVZxL II8) addr
520 return (Any II64 code)
522 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
523 code <- intLoadCode (MOVSxL II8) addr
524 return (Any II64 code)
526 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
527 code <- intLoadCode (MOVZxL II16) addr
528 return (Any II64 code)
530 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
531 code <- intLoadCode (MOVSxL II16) addr
532 return (Any II64 code)
534 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
535 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
536 return (Any II64 code)
538 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
539 code <- intLoadCode (MOVSxL II32) addr
540 return (Any II64 code)
542 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
543 CmmLit displacement])
544 = return $ Any II64 (\dst -> unitOL $
545 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
547 #endif /* x86_64_TARGET_ARCH */
553 getRegister (CmmMachOp mop [x]) = do -- unary MachOps
557 | sse2 -> sse2NegCode w x
558 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
560 MO_S_Neg w -> triv_ucode NEGI (intSize w)
561 MO_Not w -> triv_ucode NOT (intSize w)
564 MO_UU_Conv W32 W8 -> toI8Reg W32 x
565 MO_SS_Conv W32 W8 -> toI8Reg W32 x
566 MO_UU_Conv W16 W8 -> toI8Reg W16 x
567 MO_SS_Conv W16 W8 -> toI8Reg W16 x
568 MO_UU_Conv W32 W16 -> toI16Reg W32 x
569 MO_SS_Conv W32 W16 -> toI16Reg W32 x
571 #if x86_64_TARGET_ARCH
572 MO_UU_Conv W64 W32 -> conversionNop II64 x
573 MO_SS_Conv W64 W32 -> conversionNop II64 x
574 MO_UU_Conv W64 W16 -> toI16Reg W64 x
575 MO_SS_Conv W64 W16 -> toI16Reg W64 x
576 MO_UU_Conv W64 W8 -> toI8Reg W64 x
577 MO_SS_Conv W64 W8 -> toI8Reg W64 x
580 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
581 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
584 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
585 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
586 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
588 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
589 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
590 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
592 #if x86_64_TARGET_ARCH
593 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
594 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
595 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
596 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
597 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
598 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
599 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
600 -- However, we don't want the register allocator to throw it
601 -- away as an unnecessary reg-to-reg move, so we keep it in
602 -- the form of a movzl and print it as a movl later.
606 | sse2 -> coerceFP2FP W64 x
607 | otherwise -> conversionNop FF80 x
609 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
611 MO_FS_Conv from to -> coerceFP2Int from to x
612 MO_SF_Conv from to -> coerceInt2FP from to x
614 other -> pprPanic "getRegister" (pprMachOp mop)
616 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
617 triv_ucode instr size = trivialUCode size (instr size) x
619 -- signed or unsigned extension.
620 integerExtend :: Width -> Width
621 -> (Size -> Operand -> Operand -> Instr)
622 -> CmmExpr -> NatM Register
623 integerExtend from to instr expr = do
624 (reg,e_code) <- if from == W8 then getByteReg expr
629 instr (intSize from) (OpReg reg) (OpReg dst)
630 return (Any (intSize to) code)
632 toI8Reg :: Width -> CmmExpr -> NatM Register
634 = do codefn <- getAnyReg expr
635 return (Any (intSize new_rep) codefn)
636 -- HACK: use getAnyReg to get a byte-addressable register.
637 -- If the source was a Fixed register, this will add the
638 -- mov instruction to put it into the desired destination.
639 -- We're assuming that the destination won't be a fixed
640 -- non-byte-addressable register; it won't be, because all
641 -- fixed registers are word-sized.
643 toI16Reg = toI8Reg -- for now
645 conversionNop :: Size -> CmmExpr -> NatM Register
646 conversionNop new_size expr
647 = do e_code <- getRegister expr
648 return (swizzleRegisterRep e_code new_size)
651 getRegister e@(CmmMachOp mop [x, y]) = do -- dyadic MachOps
654 MO_F_Eq w -> condFltReg EQQ x y
655 MO_F_Ne w -> condFltReg NE x y
656 MO_F_Gt w -> condFltReg GTT x y
657 MO_F_Ge w -> condFltReg GE x y
658 MO_F_Lt w -> condFltReg LTT x y
659 MO_F_Le w -> condFltReg LE x y
661 MO_Eq rep -> condIntReg EQQ x y
662 MO_Ne rep -> condIntReg NE x y
664 MO_S_Gt rep -> condIntReg GTT x y
665 MO_S_Ge rep -> condIntReg GE x y
666 MO_S_Lt rep -> condIntReg LTT x y
667 MO_S_Le rep -> condIntReg LE x y
669 MO_U_Gt rep -> condIntReg GU x y
670 MO_U_Ge rep -> condIntReg GEU x y
671 MO_U_Lt rep -> condIntReg LU x y
672 MO_U_Le rep -> condIntReg LEU x y
674 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
675 | otherwise -> trivialFCode_x87 w GADD x y
676 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
677 | otherwise -> trivialFCode_x87 w GSUB x y
678 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
679 | otherwise -> trivialFCode_x87 w GDIV x y
680 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
681 | otherwise -> trivialFCode_x87 w GMUL x y
683 MO_Add rep -> add_code rep x y
684 MO_Sub rep -> sub_code rep x y
686 MO_S_Quot rep -> div_code rep True True x y
687 MO_S_Rem rep -> div_code rep True False x y
688 MO_U_Quot rep -> div_code rep False True x y
689 MO_U_Rem rep -> div_code rep False False x y
691 MO_S_MulMayOflo rep -> imulMayOflo rep x y
693 MO_Mul rep -> triv_op rep IMUL
694 MO_And rep -> triv_op rep AND
695 MO_Or rep -> triv_op rep OR
696 MO_Xor rep -> triv_op rep XOR
698 {- Shift ops on x86s have constraints on their source, it
699 either has to be Imm, CL or 1
700 => trivialCode is not restrictive enough (sigh.)
702 MO_Shl rep -> shift_code rep SHL x y {-False-}
703 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
704 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
706 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
709 triv_op width instr = trivialCode width op (Just op) x y
710 where op = instr (intSize width)
712 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
713 imulMayOflo rep a b = do
714 (a_reg, a_code) <- getNonClobberedReg a
715 b_code <- getAnyReg b
717 shift_amt = case rep of
720 _ -> panic "shift_amt"
723 code = a_code `appOL` b_code eax `appOL`
725 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
726 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
727 -- sign extend lower part
728 SUB size (OpReg edx) (OpReg eax)
729 -- compare against upper
730 -- eax==0 if high part == sign extended low part
733 return (Fixed size eax code)
737 -> (Size -> Operand -> Operand -> Instr)
742 {- Case1: shift length as immediate -}
743 shift_code width instr x y@(CmmLit lit) = do
744 x_code <- getAnyReg x
748 = x_code dst `snocOL`
749 instr size (OpImm (litToImm lit)) (OpReg dst)
751 return (Any size code)
753 {- Case2: shift length is complex (non-immediate)
755 * we cannot do y first *and* put its result in %ecx, because
756 %ecx might be clobbered by x.
757 * if we do y second, then x cannot be
758 in a clobbered reg. Also, we cannot clobber x's reg
759 with the instruction itself.
761 - do y first, put its result in a fresh tmp, then copy it to %ecx later
762 - do y second and put its result into %ecx. x gets placed in a fresh
763 tmp. This is likely to be better, becuase the reg alloc can
764 eliminate this reg->reg move here (it won't eliminate the other one,
765 because the move is into the fixed %ecx).
767 shift_code width instr x y{-amount-} = do
768 x_code <- getAnyReg x
769 let size = intSize width
770 tmp <- getNewRegNat size
771 y_code <- getAnyReg y
773 code = x_code tmp `appOL`
775 instr size (OpReg ecx) (OpReg tmp)
777 return (Fixed size tmp code)
780 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
781 add_code rep x (CmmLit (CmmInt y _))
782 | is32BitInteger y = add_int rep x y
783 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
784 where size = intSize rep
787 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
788 sub_code rep x (CmmLit (CmmInt y _))
789 | is32BitInteger (-y) = add_int rep x (-y)
790 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
792 -- our three-operand add instruction:
793 add_int width x y = do
794 (x_reg, x_code) <- getSomeReg x
797 imm = ImmInt (fromInteger y)
801 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
804 return (Any size code)
806 ----------------------
807 div_code width signed quotient x y = do
808 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
809 x_code <- getAnyReg x
812 widen | signed = CLTD size
813 | otherwise = XOR size (OpReg edx) (OpReg edx)
815 instr | signed = IDIV
818 code = y_code `appOL`
820 toOL [widen, instr size y_op]
822 result | quotient = eax
826 return (Fixed size result code)
829 getRegister (CmmLoad mem pk)
832 Amode addr mem_code <- getAmode mem
833 use_sse2 <- sse2Enabled
834 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
837 getRegister (CmmLoad mem pk)
840 code <- intLoadCode instr mem
841 return (Any size code)
845 instr = case width of
848 -- We always zero-extend 8-bit loads, if we
849 -- can't think of anything better. This is because
850 -- we can't guarantee access to an 8-bit variant of every register
851 -- (esi and edi don't have 8-bit variants), so to make things
852 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
855 #if x86_64_TARGET_ARCH
856 -- Simpler memory load code on x86_64
857 getRegister (CmmLoad mem pk)
859 code <- intLoadCode (MOV size) mem
860 return (Any size code)
861 where size = intSize $ typeWidth pk
864 getRegister (CmmLit (CmmInt 0 width))
868 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
869 adj_size = case size of II64 -> II32; _ -> size
870 size1 = IF_ARCH_i386( size, adj_size )
872 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
874 return (Any size code)
876 #if x86_64_TARGET_ARCH
877 -- optimisation for loading small literals on x86_64: take advantage
878 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
879 -- instruction forms are shorter.
880 getRegister (CmmLit lit)
881 | isWord64 (cmmLitType lit), not (isBigLit lit)
884 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
886 return (Any II64 code)
888 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
890 -- note1: not the same as (not.is32BitLit), because that checks for
891 -- signed literals that fit in 32 bits, but we want unsigned
893 -- note2: all labels are small, because we're assuming the
894 -- small memory model (see gcc docs, -mcmodel=small).
897 getRegister (CmmLit lit)
899 size = cmmTypeSize (cmmLitType lit)
901 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
903 return (Any size code)
905 getRegister other = pprPanic "getRegister(x86)" (ppr other)
908 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
909 -> NatM (Reg -> InstrBlock)
910 intLoadCode instr mem = do
911 Amode src mem_code <- getAmode mem
912 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
914 -- Compute an expression into *any* register, adding the appropriate
915 -- move instruction if necessary.
916 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
918 r <- getRegister expr
921 anyReg :: Register -> NatM (Reg -> InstrBlock)
922 anyReg (Any _ code) = return code
923 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
925 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
926 -- Fixed registers might not be byte-addressable, so we make sure we've
927 -- got a temporary, inserting an extra reg copy if necessary.
928 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
929 #if x86_64_TARGET_ARCH
930 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
933 r <- getRegister expr
936 tmp <- getNewRegNat rep
937 return (tmp, code tmp)
939 | isVirtualReg reg -> return (reg,code)
941 tmp <- getNewRegNat rep
942 return (tmp, code `snocOL` reg2reg rep reg tmp)
943 -- ToDo: could optimise slightly by checking for byte-addressable
944 -- real registers, but that will happen very rarely if at all.
947 -- Another variant: this time we want the result in a register that cannot
948 -- be modified by code to evaluate an arbitrary expression.
949 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
950 getNonClobberedReg expr = do
951 r <- getRegister expr
954 tmp <- getNewRegNat rep
955 return (tmp, code tmp)
957 -- only free regs can be clobbered
958 | RegReal (RealRegSingle rr) <- reg
959 , isFastTrue (freeReg rr)
961 tmp <- getNewRegNat rep
962 return (tmp, code `snocOL` reg2reg rep reg tmp)
966 reg2reg :: Size -> Reg -> Reg -> Instr
968 | size == FF80 = GMOV src dst
969 | otherwise = MOV size (OpReg src) (OpReg dst)
972 --------------------------------------------------------------------------------
973 getAmode :: CmmExpr -> NatM Amode
974 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
976 #if x86_64_TARGET_ARCH
978 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
979 CmmLit displacement])
980 = return $ Amode (ripRel (litToImm displacement)) nilOL
985 -- This is all just ridiculous, since it carefully undoes
986 -- what mangleIndexTree has just done.
987 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
989 -- ASSERT(rep == II32)???
990 = do (x_reg, x_code) <- getSomeReg x
991 let off = ImmInt (-(fromInteger i))
992 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
994 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit])
996 -- ASSERT(rep == II32)???
997 = do (x_reg, x_code) <- getSomeReg x
998 let off = litToImm lit
999 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1001 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1002 -- recognised by the next rule.
1003 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1005 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1007 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1008 [y, CmmLit (CmmInt shift _)]])
1009 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1010 = x86_complex_amode x y shift 0
1012 getAmode (CmmMachOp (MO_Add rep)
1013 [x, CmmMachOp (MO_Add _)
1014 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1015 CmmLit (CmmInt offset _)]])
1016 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1017 && is32BitInteger offset
1018 = x86_complex_amode x y shift offset
1020 getAmode (CmmMachOp (MO_Add rep) [x,y])
1021 = x86_complex_amode x y 0 0
1023 getAmode (CmmLit lit) | is32BitLit lit
1024 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1027 (reg,code) <- getSomeReg expr
1028 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1031 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1032 x86_complex_amode base index shift offset
1033 = do (x_reg, x_code) <- getNonClobberedReg base
1034 -- x must be in a temp, because it has to stay live over y_code
1035 -- we could compre x_reg and y_reg and do something better here...
1036 (y_reg, y_code) <- getSomeReg index
1038 code = x_code `appOL` y_code
1039 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1040 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1046 -- -----------------------------------------------------------------------------
1047 -- getOperand: sometimes any operand will do.
1049 -- getNonClobberedOperand: the value of the operand will remain valid across
1050 -- the computation of an arbitrary expression, unless the expression
1051 -- is computed directly into a register which the operand refers to
1052 -- (see trivialCode where this function is used for an example).
1054 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1055 getNonClobberedOperand (CmmLit lit) = do
1056 use_sse2 <- sse2Enabled
1057 if use_sse2 && isSuitableFloatingPointLit lit
1059 let CmmFloat _ w = lit
1060 Amode addr code <- memConstant (widthInBytes w) lit
1061 return (OpAddr addr, code)
1064 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1065 then return (OpImm (litToImm lit), nilOL)
1066 else getNonClobberedOperand_generic (CmmLit lit)
1068 getNonClobberedOperand (CmmLoad mem pk) = do
1069 use_sse2 <- sse2Enabled
1070 if (not (isFloatType pk) || use_sse2)
1071 && IF_ARCH_i386(not (isWord64 pk), True)
1073 Amode src mem_code <- getAmode mem
1075 if (amodeCouldBeClobbered src)
1077 tmp <- getNewRegNat archWordSize
1078 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1079 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1082 return (OpAddr src', save_code `appOL` mem_code)
1084 getNonClobberedOperand_generic (CmmLoad mem pk)
1086 getNonClobberedOperand e = getNonClobberedOperand_generic e
1088 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1089 getNonClobberedOperand_generic e = do
1090 (reg, code) <- getNonClobberedReg e
1091 return (OpReg reg, code)
1093 amodeCouldBeClobbered :: AddrMode -> Bool
1094 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1096 regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1097 regClobbered _ = False
1099 -- getOperand: the operand is not required to remain valid across the
1100 -- computation of an arbitrary expression.
1101 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1103 getOperand (CmmLit lit) = do
1104 use_sse2 <- sse2Enabled
1105 if (use_sse2 && isSuitableFloatingPointLit lit)
1107 let CmmFloat _ w = lit
1108 Amode addr code <- memConstant (widthInBytes w) lit
1109 return (OpAddr addr, code)
1112 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1113 then return (OpImm (litToImm lit), nilOL)
1114 else getOperand_generic (CmmLit lit)
1116 getOperand (CmmLoad mem pk) = do
1117 use_sse2 <- sse2Enabled
1118 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1120 Amode src mem_code <- getAmode mem
1121 return (OpAddr src, mem_code)
1123 getOperand_generic (CmmLoad mem pk)
1125 getOperand e = getOperand_generic e
1127 getOperand_generic e = do
1128 (reg, code) <- getSomeReg e
1129 return (OpReg reg, code)
1131 isOperand :: CmmExpr -> Bool
1132 isOperand (CmmLoad _ _) = True
1133 isOperand (CmmLit lit) = is32BitLit lit
1134 || isSuitableFloatingPointLit lit
1137 memConstant :: Int -> CmmLit -> NatM Amode
1138 memConstant align lit = do
1139 #ifdef x86_64_TARGET_ARCH
1140 lbl <- getNewLabelNat
1141 let addr = ripRel (ImmCLbl lbl)
1144 lbl <- getNewLabelNat
1145 dflags <- getDynFlagsNat
1146 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1147 Amode addr addr_code <- getAmode dynRef
1155 return (Amode addr code)
1158 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1159 loadFloatAmode use_sse2 w addr addr_code = do
1160 let size = floatSize w
1161 code dst = addr_code `snocOL`
1163 then MOV size (OpAddr addr) (OpReg dst)
1164 else GLD size addr dst
1166 return (Any (if use_sse2 then size else FF80) code)
1169 -- if we want a floating-point literal as an operand, we can
1170 -- use it directly from memory. However, if the literal is
1171 -- zero, we're better off generating it into a register using
1173 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1174 isSuitableFloatingPointLit _ = False
1176 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1177 getRegOrMem e@(CmmLoad mem pk) = do
1178 use_sse2 <- sse2Enabled
1179 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1181 Amode src mem_code <- getAmode mem
1182 return (OpAddr src, mem_code)
1184 (reg, code) <- getNonClobberedReg e
1185 return (OpReg reg, code)
1187 (reg, code) <- getNonClobberedReg e
1188 return (OpReg reg, code)
1190 #if x86_64_TARGET_ARCH
1191 is32BitLit (CmmInt i W64) = is32BitInteger i
1192 -- assume that labels are in the range 0-2^31-1: this assumes the
1193 -- small memory model (see gcc docs, -mcmodel=small).
1200 -- Set up a condition code for a conditional branch.
1202 getCondCode :: CmmExpr -> NatM CondCode
1204 -- yes, they really do seem to want exactly the same!
1206 getCondCode (CmmMachOp mop [x, y])
1209 MO_F_Eq W32 -> condFltCode EQQ x y
1210 MO_F_Ne W32 -> condFltCode NE x y
1211 MO_F_Gt W32 -> condFltCode GTT x y
1212 MO_F_Ge W32 -> condFltCode GE x y
1213 MO_F_Lt W32 -> condFltCode LTT x y
1214 MO_F_Le W32 -> condFltCode LE x y
1216 MO_F_Eq W64 -> condFltCode EQQ x y
1217 MO_F_Ne W64 -> condFltCode NE x y
1218 MO_F_Gt W64 -> condFltCode GTT x y
1219 MO_F_Ge W64 -> condFltCode GE x y
1220 MO_F_Lt W64 -> condFltCode LTT x y
1221 MO_F_Le W64 -> condFltCode LE x y
1223 MO_Eq rep -> condIntCode EQQ x y
1224 MO_Ne rep -> condIntCode NE x y
1226 MO_S_Gt rep -> condIntCode GTT x y
1227 MO_S_Ge rep -> condIntCode GE x y
1228 MO_S_Lt rep -> condIntCode LTT x y
1229 MO_S_Le rep -> condIntCode LE x y
1231 MO_U_Gt rep -> condIntCode GU x y
1232 MO_U_Ge rep -> condIntCode GEU x y
1233 MO_U_Lt rep -> condIntCode LU x y
1234 MO_U_Le rep -> condIntCode LEU x y
1236 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1238 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1243 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1244 -- passed back up the tree.
1246 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1248 -- memory vs immediate
1249 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1250 Amode x_addr x_code <- getAmode x
1253 code = x_code `snocOL`
1254 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1256 return (CondCode False cond code)
1258 -- anything vs zero, using a mask
1259 -- TODO: Add some sanity checking!!!!
1260 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
1261 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
1263 (x_reg, x_code) <- getSomeReg x
1265 code = x_code `snocOL`
1266 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1268 return (CondCode False cond code)
1271 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1272 (x_reg, x_code) <- getSomeReg x
1274 code = x_code `snocOL`
1275 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1277 return (CondCode False cond code)
1279 -- anything vs operand
1280 condIntCode cond x y | isOperand y = do
1281 (x_reg, x_code) <- getNonClobberedReg x
1282 (y_op, y_code) <- getOperand y
1284 code = x_code `appOL` y_code `snocOL`
1285 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1287 return (CondCode False cond code)
1289 -- anything vs anything
1290 condIntCode cond x y = do
1291 (y_reg, y_code) <- getNonClobberedReg y
1292 (x_op, x_code) <- getRegOrMem x
1294 code = y_code `appOL`
1296 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1298 return (CondCode False cond code)
1302 --------------------------------------------------------------------------------
1303 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1305 condFltCode cond x y
1306 = if_sse2 condFltCode_sse2 condFltCode_x87
1310 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1311 (x_reg, x_code) <- getNonClobberedReg x
1312 (y_reg, y_code) <- getSomeReg y
1313 use_sse2 <- sse2Enabled
1315 code = x_code `appOL` y_code `snocOL`
1316 GCMP cond x_reg y_reg
1317 -- The GCMP insn does the test and sets the zero flag if comparable
1318 -- and true. Hence we always supply EQQ as the condition to test.
1319 return (CondCode True EQQ code)
1321 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1322 -- an operand, but the right must be a reg. We can probably do better
1323 -- than this general case...
1324 condFltCode_sse2 = do
1325 (x_reg, x_code) <- getNonClobberedReg x
1326 (y_op, y_code) <- getOperand y
1328 code = x_code `appOL`
1330 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1331 -- NB(1): we need to use the unsigned comparison operators on the
1332 -- result of this comparison.
1334 return (CondCode True (condToUnsigned cond) code)
1336 -- -----------------------------------------------------------------------------
1337 -- Generating assignments
1339 -- Assignments are really at the heart of the whole code generation
1340 -- business. Almost all top-level nodes of any real importance are
1341 -- assignments, which correspond to loads, stores, or register
1342 -- transfers. If we're really lucky, some of the register transfers
1343 -- will go away, because we can use the destination register to
1344 -- complete the code generation for the right hand side. This only
1345 -- fails when the right hand side is forced into a fixed register
1346 -- (e.g. the result of a call).
1348 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1349 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1351 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1352 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1355 -- integer assignment to memory
1357 -- specific case of adding/subtracting an integer to a particular address.
1358 -- ToDo: catch other cases where we can use an operation directly on a memory
1360 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1361 CmmLit (CmmInt i _)])
1362 | addr == addr2, pk /= II64 || is32BitInteger i,
1363 Just instr <- check op
1364 = do Amode amode code_addr <- getAmode addr
1365 let code = code_addr `snocOL`
1366 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1369 check (MO_Add _) = Just ADD
1370 check (MO_Sub _) = Just SUB
1375 assignMem_IntCode pk addr src = do
1376 Amode addr code_addr <- getAmode addr
1377 (code_src, op_src) <- get_op_RI src
1379 code = code_src `appOL`
1381 MOV pk op_src (OpAddr addr)
1382 -- NOTE: op_src is stable, so it will still be valid
1383 -- after code_addr. This may involve the introduction
1384 -- of an extra MOV to a temporary register, but we hope
1385 -- the register allocator will get rid of it.
1389 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1390 get_op_RI (CmmLit lit) | is32BitLit lit
1391 = return (nilOL, OpImm (litToImm lit))
1393 = do (reg,code) <- getNonClobberedReg op
1394 return (code, OpReg reg)
1397 -- Assign; dst is a reg, rhs is mem
1398 assignReg_IntCode pk reg (CmmLoad src _) = do
1399 load_code <- intLoadCode (MOV pk) src
1400 return (load_code (getRegisterReg False{-no sse2-} reg))
1402 -- dst is a reg, but src could be anything
1403 assignReg_IntCode pk reg src = do
1404 code <- getAnyReg src
1405 return (code (getRegisterReg False{-no sse2-} reg))
1408 -- Floating point assignment to memory
1409 assignMem_FltCode pk addr src = do
1410 (src_reg, src_code) <- getNonClobberedReg src
1411 Amode addr addr_code <- getAmode addr
1412 use_sse2 <- sse2Enabled
1414 code = src_code `appOL`
1416 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1417 else GST pk src_reg addr
1420 -- Floating point assignment to a register/temporary
1421 assignReg_FltCode pk reg src = do
1422 use_sse2 <- sse2Enabled
1423 src_code <- getAnyReg src
1424 return (src_code (getRegisterReg use_sse2 reg))
1427 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1429 genJump (CmmLoad mem pk) = do
1430 Amode target code <- getAmode mem
1431 return (code `snocOL` JMP (OpAddr target))
1433 genJump (CmmLit lit) = do
1434 return (unitOL (JMP (OpImm (litToImm lit))))
1437 (reg,code) <- getSomeReg expr
1438 return (code `snocOL` JMP (OpReg reg))
1441 -- -----------------------------------------------------------------------------
1442 -- Unconditional branches
1444 genBranch :: BlockId -> NatM InstrBlock
1445 genBranch = return . toOL . mkJumpInstr
1449 -- -----------------------------------------------------------------------------
1450 -- Conditional jumps
1453 Conditional jumps are always to local labels, so we can use branch
1454 instructions. We peek at the arguments to decide what kind of
1457 I386: First, we have to ensure that the condition
1458 codes are set according to the supplied comparison operation.
1462 :: BlockId -- the branch target
1463 -> CmmExpr -- the condition on which to branch
1466 genCondJump id bool = do
1467 CondCode is_float cond cond_code <- getCondCode bool
1468 use_sse2 <- sse2Enabled
1469 if not is_float || not use_sse2
1471 return (cond_code `snocOL` JXX cond id)
1473 lbl <- getBlockIdNat
1475 -- see comment with condFltReg
1476 let code = case cond of
1482 plain_test = unitOL (
1485 or_unordered = toOL [
1489 and_ordered = toOL [
1495 return (cond_code `appOL` code)
1498 -- -----------------------------------------------------------------------------
1499 -- Generating C calls
1501 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1502 -- @get_arg@, which moves the arguments to the correct registers/stack
1503 -- locations. Apart from that, the code is easy.
1505 -- (If applicable) Do not fill the delay slots here; you will confuse the
1506 -- register allocator.
1509 :: CmmCallTarget -- function to call
1510 -> HintedCmmFormals -- where to put the result
1511 -> HintedCmmActuals -- arguments (of mixed type)
1514 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1516 #if i386_TARGET_ARCH
1518 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1519 -- write barrier compiles to no code on x86/x86-64;
1520 -- we keep it this long in order to prevent earlier optimisations.
1522 -- void return type prim op
1523 genCCall (CmmPrim op) [] args =
1524 outOfLineCmmOp op Nothing args
1526 -- we only cope with a single result for foreign calls
1527 genCCall (CmmPrim op) [r_hinted@(CmmHinted r _)] args = do
1528 l1 <- getNewLabelNat
1529 l2 <- getNewLabelNat
1533 outOfLineCmmOp op (Just r_hinted) args
1535 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1536 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1538 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1539 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1541 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1542 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1544 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1545 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1547 other_op -> outOfLineCmmOp op (Just r_hinted) args
1550 actuallyInlineFloatOp instr size [CmmHinted x _]
1551 = do res <- trivialUFCode size (instr size) x
1553 return (any (getRegisterReg False (CmmLocal r)))
1555 genCCall target dest_regs args = do
1557 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1558 #if !darwin_TARGET_OS
1559 tot_arg_size = sum sizes
1561 raw_arg_size = sum sizes
1562 tot_arg_size = roundTo 16 raw_arg_size
1563 arg_pad_size = tot_arg_size - raw_arg_size
1564 delta0 <- getDeltaNat
1565 setDeltaNat (delta0 - arg_pad_size)
1568 use_sse2 <- sse2Enabled
1569 push_codes <- mapM (push_arg use_sse2) (reverse args)
1570 delta <- getDeltaNat
1573 -- deal with static vs dynamic call targets
1574 (callinsns,cconv) <-
1576 CmmCallee (CmmLit (CmmLabel lbl)) conv
1577 -> -- ToDo: stdcall arg sizes
1578 return (unitOL (CALL (Left fn_imm) []), conv)
1579 where fn_imm = ImmCLbl lbl
1581 -> do { (dyn_r, dyn_c) <- getSomeReg expr
1582 ; ASSERT( isWord32 (cmmExprType expr) )
1583 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1585 -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1586 ++ "probably because too many return values."
1589 #if darwin_TARGET_OS
1591 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1592 DELTA (delta0 - arg_pad_size)]
1593 `appOL` concatOL push_codes
1596 = concatOL push_codes
1598 -- Deallocate parameters after call for ccall;
1599 -- but not for stdcall (callee does it)
1601 -- We have to pop any stack padding we added
1602 -- on Darwin even if we are doing stdcall, though (#5052)
1603 pop_size | cconv /= StdCallConv = tot_arg_size
1605 #if darwin_TARGET_OS
1611 call = callinsns `appOL`
1613 (if pop_size==0 then [] else
1614 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
1616 [DELTA (delta + tot_arg_size)]
1619 setDeltaNat (delta + tot_arg_size)
1622 -- assign the results, if necessary
1623 assign_code [] = nilOL
1624 assign_code [CmmHinted dest _hint]
1627 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
1631 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
1632 GST sz fake0 tmp_amode,
1633 MOV sz (OpAddr tmp_amode) (OpReg r_dest),
1634 ADD II32 (OpImm (ImmInt b)) (OpReg esp)]
1635 else unitOL (GMOV fake0 r_dest)
1636 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1637 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1638 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1640 ty = localRegType dest
1643 r_dest_hi = getHiVRegFromLo r_dest
1644 r_dest = getRegisterReg use_sse2 (CmmLocal dest)
1645 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
1647 return (push_code `appOL`
1649 assign_code dest_regs)
1652 arg_size :: CmmType -> Int -- Width in bytes
1653 arg_size ty = widthInBytes (typeWidth ty)
1655 roundTo a x | x `mod` a == 0 = x
1656 | otherwise = x + a - (x `mod` a)
1658 push_arg :: Bool -> HintedCmmActual {-current argument-}
1659 -> NatM InstrBlock -- code
1661 push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86
1662 | isWord64 arg_ty = do
1663 ChildCode64 code r_lo <- iselExpr64 arg
1664 delta <- getDeltaNat
1665 setDeltaNat (delta - 8)
1667 r_hi = getHiVRegFromLo r_lo
1669 return ( code `appOL`
1670 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1671 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1675 | isFloatType arg_ty = do
1676 (reg, code) <- getSomeReg arg
1677 delta <- getDeltaNat
1678 setDeltaNat (delta-size)
1679 return (code `appOL`
1680 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1682 let addr = AddrBaseIndex (EABaseReg esp)
1685 size = floatSize (typeWidth arg_ty)
1688 then MOV size (OpReg reg) (OpAddr addr)
1689 else GST size reg addr
1694 (operand, code) <- getOperand arg
1695 delta <- getDeltaNat
1696 setDeltaNat (delta-size)
1697 return (code `snocOL`
1698 PUSH II32 operand `snocOL`
1702 arg_ty = cmmExprType arg
1703 size = arg_size arg_ty -- Byte size
1705 #elif x86_64_TARGET_ARCH
1707 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1708 -- write barrier compiles to no code on x86/x86-64;
1709 -- we keep it this long in order to prevent earlier optimisations.
1711 -- void return type prim op
1712 genCCall (CmmPrim op) [] args =
1713 outOfLineCmmOp op Nothing args
1715 -- we only cope with a single result for foreign calls
1716 genCCall (CmmPrim op) [res] args =
1717 outOfLineCmmOp op (Just res) args
1719 genCCall target dest_regs args = do
1721 -- load up the register arguments
1722 (stack_args, aregs, fregs, load_args_code)
1723 <- load_args args allArgRegs allFPArgRegs nilOL
1726 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
1727 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1728 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1729 -- for annotating the call instruction with
1731 sse_regs = length fp_regs_used
1733 tot_arg_size = arg_size * length stack_args
1735 -- On entry to the called function, %rsp should be aligned
1736 -- on a 16-byte boundary +8 (i.e. the first stack arg after
1737 -- the return address is 16-byte aligned). In STG land
1738 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1739 -- need to make sure we push a multiple of 16-bytes of args,
1740 -- plus the return address, to get the correct alignment.
1741 -- Urg, this is hard. We need to feed the delta back into
1742 -- the arg pushing code.
1743 (real_size, adjust_rsp) <-
1744 if tot_arg_size `rem` 16 == 0
1745 then return (tot_arg_size, nilOL)
1746 else do -- we need to adjust...
1747 delta <- getDeltaNat
1748 setDeltaNat (delta-8)
1749 return (tot_arg_size+8, toOL [
1750 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1754 -- push the stack args, right to left
1755 push_code <- push_args (reverse stack_args) nilOL
1756 delta <- getDeltaNat
1758 -- deal with static vs dynamic call targets
1759 (callinsns,cconv) <-
1761 CmmCallee (CmmLit (CmmLabel lbl)) conv
1762 -> -- ToDo: stdcall arg sizes
1763 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1764 where fn_imm = ImmCLbl lbl
1766 -> do (dyn_r, dyn_c) <- getSomeReg expr
1767 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1769 -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1770 ++ "probably because too many return values."
1773 -- The x86_64 ABI requires us to set %al to the number of SSE2
1774 -- registers that contain arguments, if the called routine
1775 -- is a varargs function. We don't know whether it's a
1776 -- varargs function or not, so we have to assume it is.
1778 -- It's not safe to omit this assignment, even if the number
1779 -- of SSE2 regs in use is zero. If %al is larger than 8
1780 -- on entry to a varargs function, seg faults ensue.
1781 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1783 let call = callinsns `appOL`
1785 -- Deallocate parameters after call for ccall;
1786 -- but not for stdcall (callee does it)
1787 (if cconv == StdCallConv || real_size==0 then [] else
1788 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1790 [DELTA (delta + real_size)]
1793 setDeltaNat (delta + real_size)
1796 -- assign the results, if necessary
1797 assign_code [] = nilOL
1798 assign_code [CmmHinted dest _hint] =
1799 case typeWidth rep of
1800 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1801 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1802 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1804 rep = localRegType dest
1805 r_dest = getRegisterReg True (CmmLocal dest)
1806 assign_code many = panic "genCCall.assign_code many"
1808 return (load_args_code `appOL`
1811 assign_eax sse_regs `appOL`
1813 assign_code dest_regs)
1816 arg_size = 8 -- always, at the mo
1818 load_args :: [CmmHinted CmmExpr]
1819 -> [Reg] -- int regs avail for args
1820 -> [Reg] -- FP regs avail for args
1822 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1823 load_args args [] [] code = return (args, [], [], code)
1824 -- no more regs to use
1825 load_args [] aregs fregs code = return ([], aregs, fregs, code)
1826 -- no more args to push
1827 load_args ((CmmHinted arg hint) : rest) aregs fregs code
1828 | isFloatType arg_rep =
1832 arg_code <- getAnyReg arg
1833 load_args rest aregs rs (code `appOL` arg_code r)
1838 arg_code <- getAnyReg arg
1839 load_args rest rs fregs (code `appOL` arg_code r)
1841 arg_rep = cmmExprType arg
1844 (args',ars,frs,code') <- load_args rest aregs fregs code
1845 return ((CmmHinted arg hint):args', ars, frs, code')
1847 push_args [] code = return code
1848 push_args ((CmmHinted arg hint):rest) code
1849 | isFloatType arg_rep = do
1850 (arg_reg, arg_code) <- getSomeReg arg
1851 delta <- getDeltaNat
1852 setDeltaNat (delta-arg_size)
1853 let code' = code `appOL` arg_code `appOL` toOL [
1854 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1855 DELTA (delta-arg_size),
1856 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
1857 push_args rest code'
1860 -- we only ever generate word-sized function arguments. Promotion
1861 -- has already happened: our Int8# type is kept sign-extended
1862 -- in an Int#, for example.
1863 ASSERT(width == W64) return ()
1864 (arg_op, arg_code) <- getOperand arg
1865 delta <- getDeltaNat
1866 setDeltaNat (delta-arg_size)
1867 let code' = code `appOL` arg_code `appOL` toOL [
1869 DELTA (delta-arg_size)]
1870 push_args rest code'
1872 arg_rep = cmmExprType arg
1873 width = typeWidth arg_rep
1876 genCCall = panic "X86.genCCAll: not defined"
1878 #endif /* x86_64_TARGET_ARCH */
1881 outOfLineCmmOp :: CallishMachOp -> Maybe HintedCmmFormal -> HintedCmmActuals -> NatM InstrBlock
1882 outOfLineCmmOp mop res args
1884 dflags <- getDynFlagsNat
1885 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1886 let target = CmmCallee targetExpr CCallConv
1888 stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn)
1890 -- Assume we can call these functions directly, and that they're not in a dynamic library.
1891 -- TODO: Why is this ok? Under linux this code will be in libm.so
1892 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
1893 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
1896 MO_Memcpy -> init args
1897 MO_Memset -> init args
1898 MO_Memmove -> init args
1902 MO_F32_Sqrt -> fsLit "sqrtf"
1903 MO_F32_Sin -> fsLit "sinf"
1904 MO_F32_Cos -> fsLit "cosf"
1905 MO_F32_Tan -> fsLit "tanf"
1906 MO_F32_Exp -> fsLit "expf"
1907 MO_F32_Log -> fsLit "logf"
1909 MO_F32_Asin -> fsLit "asinf"
1910 MO_F32_Acos -> fsLit "acosf"
1911 MO_F32_Atan -> fsLit "atanf"
1913 MO_F32_Sinh -> fsLit "sinhf"
1914 MO_F32_Cosh -> fsLit "coshf"
1915 MO_F32_Tanh -> fsLit "tanhf"
1916 MO_F32_Pwr -> fsLit "powf"
1918 MO_F64_Sqrt -> fsLit "sqrt"
1919 MO_F64_Sin -> fsLit "sin"
1920 MO_F64_Cos -> fsLit "cos"
1921 MO_F64_Tan -> fsLit "tan"
1922 MO_F64_Exp -> fsLit "exp"
1923 MO_F64_Log -> fsLit "log"
1925 MO_F64_Asin -> fsLit "asin"
1926 MO_F64_Acos -> fsLit "acos"
1927 MO_F64_Atan -> fsLit "atan"
1929 MO_F64_Sinh -> fsLit "sinh"
1930 MO_F64_Cosh -> fsLit "cosh"
1931 MO_F64_Tanh -> fsLit "tanh"
1932 MO_F64_Pwr -> fsLit "pow"
1934 MO_Memcpy -> fsLit "memcpy"
1935 MO_Memset -> fsLit "memset"
1936 MO_Memmove -> fsLit "memmove"
1939 -- -----------------------------------------------------------------------------
1940 -- Generating a table-branch
1942 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1947 (reg,e_code) <- getSomeReg expr
1948 lbl <- getNewLabelNat
1949 dflags <- getDynFlagsNat
1950 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1951 (tableReg,t_code) <- getSomeReg $ dynRef
1952 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1953 (EAIndex reg wORD_SIZE) (ImmInt 0))
1955 #if x86_64_TARGET_ARCH
1956 #if darwin_TARGET_OS
1957 -- on Mac OS X/x86_64, put the jump table in the text section
1958 -- to work around a limitation of the linker.
1959 -- ld64 is unable to handle the relocations for
1961 -- if L0 is not preceded by a non-anonymous label in its section.
1963 code = e_code `appOL` t_code `appOL` toOL [
1964 ADD (intSize wordWidth) op (OpReg tableReg),
1965 JMP_TBL (OpReg tableReg) ids Text lbl
1968 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1969 -- relocations, hence we only get 32-bit offsets in the jump
1970 -- table. As these offsets are always negative we need to properly
1971 -- sign extend them to 64-bit. This hack should be removed in
1972 -- conjunction with the hack in PprMach.hs/pprDataItem once
1973 -- binutils 2.17 is standard.
1974 code = e_code `appOL` t_code `appOL` toOL [
1976 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1977 (EAIndex reg wORD_SIZE) (ImmInt 0)))
1979 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1980 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1984 code = e_code `appOL` t_code `appOL` toOL [
1985 ADD (intSize wordWidth) op (OpReg tableReg),
1986 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1992 (reg,e_code) <- getSomeReg expr
1993 lbl <- getNewLabelNat
1995 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1996 code = e_code `appOL` toOL [
1997 JMP_TBL op ids ReadOnlyData lbl
2002 generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr)
2003 generateJumpTableForInstr (JMP_TBL _ ids section lbl) = Just (createJumpTable ids section lbl)
2004 generateJumpTableForInstr _ = Nothing
2006 createJumpTable ids section lbl
2009 let jumpTableEntryRel Nothing
2010 = CmmStaticLit (CmmInt 0 wordWidth)
2011 jumpTableEntryRel (Just blockid)
2012 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2013 where blockLabel = mkAsmTempLabel (getUnique blockid)
2014 in map jumpTableEntryRel ids
2015 | otherwise = map jumpTableEntry ids
2016 in CmmData section (CmmDataLabel lbl : jumpTable)
2018 -- -----------------------------------------------------------------------------
2019 -- 'condIntReg' and 'condFltReg': condition codes into registers
2021 -- Turn those condition codes into integers now (when they appear on
2022 -- the right hand side of an assignment).
2024 -- (If applicable) Do not fill the delay slots here; you will confuse the
2025 -- register allocator.
2027 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2029 condIntReg cond x y = do
2030 CondCode _ cond cond_code <- condIntCode cond x y
2031 tmp <- getNewRegNat II8
2033 code dst = cond_code `appOL` toOL [
2034 SETCC cond (OpReg tmp),
2035 MOVZxL II8 (OpReg tmp) (OpReg dst)
2038 return (Any II32 code)
2042 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2043 condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2046 CondCode _ cond cond_code <- condFltCode cond x y
2047 tmp <- getNewRegNat II8
2049 code dst = cond_code `appOL` toOL [
2050 SETCC cond (OpReg tmp),
2051 MOVZxL II8 (OpReg tmp) (OpReg dst)
2054 return (Any II32 code)
2056 condFltReg_sse2 = do
2057 CondCode _ cond cond_code <- condFltCode cond x y
2058 tmp1 <- getNewRegNat archWordSize
2059 tmp2 <- getNewRegNat archWordSize
2061 -- We have to worry about unordered operands (eg. comparisons
2062 -- against NaN). If the operands are unordered, the comparison
2063 -- sets the parity flag, carry flag and zero flag.
2064 -- All comparisons are supposed to return false for unordered
2065 -- operands except for !=, which returns true.
2067 -- Optimisation: we don't have to test the parity flag if we
2068 -- know the test has already excluded the unordered case: eg >
2069 -- and >= test for a zero carry flag, which can only occur for
2070 -- ordered operands.
2072 -- ToDo: by reversing comparisons we could avoid testing the
2073 -- parity flag in more cases.
2078 NE -> or_unordered dst
2079 GU -> plain_test dst
2080 GEU -> plain_test dst
2081 _ -> and_ordered dst)
2083 plain_test dst = toOL [
2084 SETCC cond (OpReg tmp1),
2085 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2087 or_unordered dst = toOL [
2088 SETCC cond (OpReg tmp1),
2089 SETCC PARITY (OpReg tmp2),
2090 OR II8 (OpReg tmp1) (OpReg tmp2),
2091 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2093 and_ordered dst = toOL [
2094 SETCC cond (OpReg tmp1),
2095 SETCC NOTPARITY (OpReg tmp2),
2096 AND II8 (OpReg tmp1) (OpReg tmp2),
2097 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2100 return (Any II32 code)
2103 -- -----------------------------------------------------------------------------
2104 -- 'trivial*Code': deal with trivial instructions
2106 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2107 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2108 -- Only look for constants on the right hand side, because that's
2109 -- where the generic optimizer will have put them.
2111 -- Similarly, for unary instructions, we don't have to worry about
2112 -- matching an StInt as the argument, because genericOpt will already
2113 -- have handled the constant-folding.
2117 The Rules of the Game are:
2119 * You cannot assume anything about the destination register dst;
2120 it may be anything, including a fixed reg.
2122 * You may compute an operand into a fixed reg, but you may not
2123 subsequently change the contents of that fixed reg. If you
2124 want to do so, first copy the value either to a temporary
2125 or into dst. You are free to modify dst even if it happens
2126 to be a fixed reg -- that's not your problem.
2128 * You cannot assume that a fixed reg will stay live over an
2129 arbitrary computation. The same applies to the dst reg.
2131 * Temporary regs obtained from getNewRegNat are distinct from
2132 each other and from all other regs, and stay live over
2133 arbitrary computations.
2135 --------------------
2137 SDM's version of The Rules:
2139 * If getRegister returns Any, that means it can generate correct
2140 code which places the result in any register, period. Even if that
2141 register happens to be read during the computation.
2143 Corollary #1: this means that if you are generating code for an
2144 operation with two arbitrary operands, you cannot assign the result
2145 of the first operand into the destination register before computing
2146 the second operand. The second operand might require the old value
2147 of the destination register.
2149 Corollary #2: A function might be able to generate more efficient
2150 code if it knows the destination register is a new temporary (and
2151 therefore not read by any of the sub-computations).
2153 * If getRegister returns Any, then the code it generates may modify only:
2154 (a) fresh temporaries
2155 (b) the destination register
2156 (c) known registers (eg. %ecx is used by shifts)
2157 In particular, it may *not* modify global registers, unless the global
2158 register happens to be the destination register.
2161 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
2162 | is32BitLit lit_a = do
2163 b_code <- getAnyReg b
2166 = b_code dst `snocOL`
2167 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2169 return (Any (intSize width) code)
2171 trivialCode width instr maybe_revinstr a b
2172 = genTrivialCode (intSize width) instr a b
2174 -- This is re-used for floating pt instructions too.
2175 genTrivialCode rep instr a b = do
2176 (b_op, b_code) <- getNonClobberedOperand b
2177 a_code <- getAnyReg a
2178 tmp <- getNewRegNat rep
2180 -- We want the value of b to stay alive across the computation of a.
2181 -- But, we want to calculate a straight into the destination register,
2182 -- because the instruction only has two operands (dst := dst `op` src).
2183 -- The troublesome case is when the result of b is in the same register
2184 -- as the destination reg. In this case, we have to save b in a
2185 -- new temporary across the computation of a.
2187 | dst `regClashesWithOp` b_op =
2189 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2191 instr (OpReg tmp) (OpReg dst)
2195 instr b_op (OpReg dst)
2197 return (Any rep code)
2199 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2200 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2201 reg `regClashesWithOp` _ = False
2205 trivialUCode rep instr x = do
2206 x_code <- getAnyReg x
2211 return (Any rep code)
2215 trivialFCode_x87 width instr x y = do
2216 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2217 (y_reg, y_code) <- getSomeReg y
2219 size = FF80 -- always, on x87
2223 instr size x_reg y_reg dst
2224 return (Any size code)
2226 trivialFCode_sse2 pk instr x y
2227 = genTrivialCode size (instr size) x y
2228 where size = floatSize pk
2231 trivialUFCode size instr x = do
2232 (x_reg, x_code) <- getSomeReg x
2238 return (Any size code)
2241 --------------------------------------------------------------------------------
2242 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2243 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2246 (x_reg, x_code) <- getSomeReg x
2248 opc = case to of W32 -> GITOF; W64 -> GITOD
2249 code dst = x_code `snocOL` opc x_reg dst
2250 -- ToDo: works for non-II32 reps?
2251 return (Any FF80 code)
2254 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2256 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2257 code dst = x_code `snocOL` opc (intSize from) x_op dst
2259 return (Any (floatSize to) code)
2260 -- works even if the destination rep is <II32
2262 --------------------------------------------------------------------------------
2263 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2264 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2266 coerceFP2Int_x87 = do
2267 (x_reg, x_code) <- getSomeReg x
2269 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2270 code dst = x_code `snocOL` opc x_reg dst
2271 -- ToDo: works for non-II32 reps?
2273 return (Any (intSize to) code)
2275 coerceFP2Int_sse2 = do
2276 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2278 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
2279 code dst = x_code `snocOL` opc (intSize to) x_op dst
2281 return (Any (intSize to) code)
2282 -- works even if the destination rep is <II32
2285 --------------------------------------------------------------------------------
2286 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2287 coerceFP2FP to x = do
2288 use_sse2 <- sse2Enabled
2289 (x_reg, x_code) <- getSomeReg x
2291 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
2293 code dst = x_code `snocOL` opc x_reg dst
2295 return (Any (if use_sse2 then floatSize to else FF80) code)
2297 --------------------------------------------------------------------------------
2299 sse2NegCode :: Width -> CmmExpr -> NatM Register
2300 sse2NegCode w x = do
2301 let sz = floatSize w
2302 x_code <- getAnyReg x
2303 -- This is how gcc does it, so it can't be that bad:
2305 const | FF32 <- sz = CmmInt 0x80000000 W32
2306 | otherwise = CmmInt 0x8000000000000000 W64
2307 Amode amode amode_code <- memConstant (widthInBytes w) const
2308 tmp <- getNewRegNat sz
2310 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2311 MOV sz (OpAddr amode) (OpReg tmp),
2312 XOR sz (OpReg tmp) (OpReg dst)
2315 return (Any sz code)