2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
28 #include "HsVersions.h"
29 #include "nativeGen/NCG.h"
30 #include "../includes/MachDeps.h"
46 -- Our intermediate code:
49 import PprCmm ( pprExpr )
53 import ClosureInfo ( C_SRT(..) )
56 import StaticFlags ( opt_PIC )
57 import ForeignCall ( CCallConv(..) )
60 import qualified Outputable as O
64 import FastBool ( isFastTrue )
65 import Constants ( wORD_SIZE )
68 import Debug.Trace ( trace )
70 import Control.Monad ( mapAndUnzipM )
71 import Data.Maybe ( fromJust )
76 sse2Enabled :: NatM Bool
77 #if x86_64_TARGET_ARCH
78 -- SSE2 is fixed on for x86_64. It would be possible to make it optional,
79 -- but we'd need to fix at least the foreign call code where the calling
80 -- convention specifies the use of xmm regs, and possibly other places.
81 sse2Enabled = return True
84 dflags <- getDynFlagsNat
85 return (dopt Opt_SSE2 dflags)
88 if_sse2 :: NatM a -> NatM a -> NatM a
91 if b then sse2 else x87
96 -> NatM [NatCmmTop Instr]
98 cmmTopCodeGen dynflags (CmmProc info lab (ListGraph blocks)) = do
99 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
100 picBaseMb <- getPicBaseMaybeNat
101 let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
102 tops = proc : concat statics
103 os = platformOS $ targetPlatform dynflags
106 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
107 Nothing -> return tops
109 cmmTopCodeGen _ (CmmData sec dat) = do
110 return [CmmData sec dat] -- no translation, we just use CmmStatic
115 -> NatM ( [NatBasicBlock Instr]
118 basicBlockCodeGen (BasicBlock id stmts) = do
119 instrs <- stmtsToInstrs stmts
120 -- code generation may introduce new basic block boundaries, which
121 -- are indicated by the NEWBLOCK instruction. We must split up the
122 -- instruction stream into basic blocks again. Also, we extract
125 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
127 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
128 = ([], BasicBlock id instrs : blocks, statics)
129 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
130 = (instrs, blocks, CmmData sec dat:statics)
131 mkBlocks instr (instrs,blocks,statics)
132 = (instr:instrs, blocks, statics)
134 return (BasicBlock id top : other_blocks, statics)
137 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
139 = do instrss <- mapM stmtToInstrs stmts
140 return (concatOL instrss)
143 stmtToInstrs :: CmmStmt -> NatM InstrBlock
144 stmtToInstrs stmt = case stmt of
145 CmmNop -> return nilOL
146 CmmComment s -> return (unitOL (COMMENT s))
149 | isFloatType ty -> assignReg_FltCode size reg src
150 #if WORD_SIZE_IN_BITS==32
151 | isWord64 ty -> assignReg_I64Code reg src
153 | otherwise -> assignReg_IntCode size reg src
154 where ty = cmmRegType reg
155 size = cmmTypeSize ty
158 | isFloatType ty -> assignMem_FltCode size addr src
159 #if WORD_SIZE_IN_BITS==32
160 | isWord64 ty -> assignMem_I64Code addr src
162 | otherwise -> assignMem_IntCode size addr src
163 where ty = cmmExprType src
164 size = cmmTypeSize ty
166 CmmCall target result_regs args _ _
167 -> genCCall target result_regs args
169 CmmBranch id -> genBranch id
170 CmmCondBranch arg id -> genCondJump id arg
171 CmmSwitch arg ids -> genSwitch arg ids
172 CmmJump arg params -> genJump arg
174 panic "stmtToInstrs: return statement should have been cps'd away"
177 --------------------------------------------------------------------------------
178 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
179 -- They are really trees of insns to facilitate fast appending, where a
180 -- left-to-right traversal yields the insns in the correct order.
186 -- | Condition codes passed up the tree.
189 = CondCode Bool Cond InstrBlock
192 -- | a.k.a "Register64"
193 -- Reg is the lower 32-bit temporary which contains the result.
194 -- Use getHiVRegFromLo to find the other VRegUnique.
196 -- Rules of this simplified insn selection game are therefore that
197 -- the returned Reg may be modified
205 -- | Register's passed up the tree. If the stix code forces the register
206 -- to live in a pre-decided machine register, it comes out as @Fixed@;
207 -- otherwise, it comes out as @Any@, and the parent can decide which
208 -- register to put it in.
211 = Fixed Size Reg InstrBlock
212 | Any Size (Reg -> InstrBlock)
215 swizzleRegisterRep :: Register -> Size -> Register
216 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
217 swizzleRegisterRep (Any _ codefn) size = Any size codefn
220 -- | Grab the Reg for a CmmReg
221 getRegisterReg :: Bool -> CmmReg -> Reg
223 getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
224 = let sz = cmmTypeSize pk in
225 if isFloatSize sz && not use_sse2
226 then RegVirtual (mkVirtualReg u FF80)
227 else RegVirtual (mkVirtualReg u sz)
229 getRegisterReg _ (CmmGlobal mid)
230 = case globalRegMaybe mid of
231 Just reg -> RegReal $ reg
232 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
233 -- By this stage, the only MagicIds remaining should be the
234 -- ones which map to a real machine register on this
235 -- platform. Hence ...
238 -- | Memory addressing modes passed up the tree.
240 = Amode AddrMode InstrBlock
243 Now, given a tree (the argument to an CmmLoad) that references memory,
244 produce a suitable addressing mode.
246 A Rule of the Game (tm) for Amodes: use of the addr bit must
247 immediately follow use of the code part, since the code part puts
248 values in registers which the addr then refers to. So you can't put
249 anything in between, lest it overwrite some of those registers. If
250 you need to do some other computation between the code part and use of
251 the addr bit, first store the effective address from the amode in a
252 temporary, then do the other computation, and then use the temporary:
256 ... other computation ...
261 -- | Check whether an integer will fit in 32 bits.
262 -- A CmmInt is intended to be truncated to the appropriate
263 -- number of bits, so here we truncate it to Int64. This is
264 -- important because e.g. -1 as a CmmInt might be either
265 -- -1 or 18446744073709551615.
267 is32BitInteger :: Integer -> Bool
268 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
269 where i64 = fromIntegral i :: Int64
272 -- | Convert a BlockId to some CmmStatic data
273 jumpTableEntry :: Maybe BlockId -> CmmStatic
274 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
275 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
276 where blockLabel = mkAsmTempLabel (getUnique blockid)
279 -- -----------------------------------------------------------------------------
280 -- General things for putting together code sequences
282 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
283 -- CmmExprs into CmmRegOff?
284 mangleIndexTree :: CmmExpr -> CmmExpr
285 mangleIndexTree (CmmRegOff reg off)
286 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
287 where width = typeWidth (cmmRegType reg)
289 -- | The dual to getAnyReg: compute an expression into a register, but
290 -- we don't mind which one it is.
291 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
293 r <- getRegister expr
296 tmp <- getNewRegNat rep
297 return (tmp, code tmp)
305 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
306 assignMem_I64Code addrTree valueTree = do
307 Amode addr addr_code <- getAmode addrTree
308 ChildCode64 vcode rlo <- iselExpr64 valueTree
310 rhi = getHiVRegFromLo rlo
312 -- Little-endian store
313 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
314 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
316 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
319 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
320 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
321 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
323 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
324 r_dst_hi = getHiVRegFromLo r_dst_lo
325 r_src_hi = getHiVRegFromLo r_src_lo
326 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
327 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
330 vcode `snocOL` mov_lo `snocOL` mov_hi
333 assignReg_I64Code lvalue valueTree
334 = panic "assignReg_I64Code(i386): invalid lvalue"
339 iselExpr64 :: CmmExpr -> NatM ChildCode64
340 iselExpr64 (CmmLit (CmmInt i _)) = do
341 (rlo,rhi) <- getNewRegPairNat II32
343 r = fromIntegral (fromIntegral i :: Word32)
344 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
346 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
347 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
350 return (ChildCode64 code rlo)
352 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
353 Amode addr addr_code <- getAmode addrTree
354 (rlo,rhi) <- getNewRegPairNat II32
356 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
357 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
360 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
364 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
365 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
367 -- we handle addition, but rather badly
368 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
369 ChildCode64 code1 r1lo <- iselExpr64 e1
370 (rlo,rhi) <- getNewRegPairNat II32
372 r = fromIntegral (fromIntegral i :: Word32)
373 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
374 r1hi = getHiVRegFromLo r1lo
376 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
377 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
378 MOV II32 (OpReg r1hi) (OpReg rhi),
379 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
381 return (ChildCode64 code rlo)
383 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
384 ChildCode64 code1 r1lo <- iselExpr64 e1
385 ChildCode64 code2 r2lo <- iselExpr64 e2
386 (rlo,rhi) <- getNewRegPairNat II32
388 r1hi = getHiVRegFromLo r1lo
389 r2hi = getHiVRegFromLo r2lo
392 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
393 ADD II32 (OpReg r2lo) (OpReg rlo),
394 MOV II32 (OpReg r1hi) (OpReg rhi),
395 ADC II32 (OpReg r2hi) (OpReg rhi) ]
397 return (ChildCode64 code rlo)
399 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
401 r_dst_lo <- getNewRegNat II32
402 let r_dst_hi = getHiVRegFromLo r_dst_lo
405 ChildCode64 (code `snocOL`
406 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
411 = pprPanic "iselExpr64(i386)" (ppr expr)
415 --------------------------------------------------------------------------------
416 getRegister :: CmmExpr -> NatM Register
418 #if !x86_64_TARGET_ARCH
419 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
420 -- register, it can only be used for rip-relative addressing.
421 getRegister (CmmReg (CmmGlobal PicBaseReg))
423 reg <- getPicBaseNat archWordSize
424 return (Fixed archWordSize reg nilOL)
427 getRegister (CmmReg reg)
428 = do use_sse2 <- sse2Enabled
430 sz = cmmTypeSize (cmmRegType reg)
431 size | not use_sse2 && isFloatSize sz = FF80
434 return (Fixed size (getRegisterReg use_sse2 reg) nilOL)
437 getRegister tree@(CmmRegOff _ _)
438 = getRegister (mangleIndexTree tree)
441 #if WORD_SIZE_IN_BITS==32
442 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
443 -- TO_W_(x), TO_W_(x >> 32)
445 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
446 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
447 ChildCode64 code rlo <- iselExpr64 x
448 return $ Fixed II32 (getHiVRegFromLo rlo) code
450 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
451 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
452 ChildCode64 code rlo <- iselExpr64 x
453 return $ Fixed II32 (getHiVRegFromLo rlo) code
455 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
456 ChildCode64 code rlo <- iselExpr64 x
457 return $ Fixed II32 rlo code
459 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
460 ChildCode64 code rlo <- iselExpr64 x
461 return $ Fixed II32 rlo code
466 getRegister (CmmLit lit@(CmmFloat f w)) =
467 if_sse2 float_const_sse2 float_const_x87
473 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
474 -- I don't know why there are xorpd, xorps, and pxor instructions.
475 -- They all appear to do the same thing --SDM
476 return (Any size code)
479 Amode addr code <- memConstant (widthInBytes w) lit
480 loadFloatAmode True w addr code
482 float_const_x87 = case w of
485 let code dst = unitOL (GLDZ dst)
486 in return (Any FF80 code)
489 let code dst = unitOL (GLD1 dst)
490 in return (Any FF80 code)
493 Amode addr code <- memConstant (widthInBytes w) lit
494 loadFloatAmode False w addr code
496 -- catch simple cases of zero- or sign-extended load
497 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
498 code <- intLoadCode (MOVZxL II8) addr
499 return (Any II32 code)
501 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
502 code <- intLoadCode (MOVSxL II8) addr
503 return (Any II32 code)
505 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
506 code <- intLoadCode (MOVZxL II16) addr
507 return (Any II32 code)
509 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
510 code <- intLoadCode (MOVSxL II16) addr
511 return (Any II32 code)
514 #if x86_64_TARGET_ARCH
516 -- catch simple cases of zero- or sign-extended load
517 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
518 code <- intLoadCode (MOVZxL II8) addr
519 return (Any II64 code)
521 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
522 code <- intLoadCode (MOVSxL II8) addr
523 return (Any II64 code)
525 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
526 code <- intLoadCode (MOVZxL II16) addr
527 return (Any II64 code)
529 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
530 code <- intLoadCode (MOVSxL II16) addr
531 return (Any II64 code)
533 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
534 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
535 return (Any II64 code)
537 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
538 code <- intLoadCode (MOVSxL II32) addr
539 return (Any II64 code)
541 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
542 CmmLit displacement])
543 = return $ Any II64 (\dst -> unitOL $
544 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
546 #endif /* x86_64_TARGET_ARCH */
552 getRegister (CmmMachOp mop [x]) = do -- unary MachOps
556 | sse2 -> sse2NegCode w x
557 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
559 MO_S_Neg w -> triv_ucode NEGI (intSize w)
560 MO_Not w -> triv_ucode NOT (intSize w)
563 MO_UU_Conv W32 W8 -> toI8Reg W32 x
564 MO_SS_Conv W32 W8 -> toI8Reg W32 x
565 MO_UU_Conv W16 W8 -> toI8Reg W16 x
566 MO_SS_Conv W16 W8 -> toI8Reg W16 x
567 MO_UU_Conv W32 W16 -> toI16Reg W32 x
568 MO_SS_Conv W32 W16 -> toI16Reg W32 x
570 #if x86_64_TARGET_ARCH
571 MO_UU_Conv W64 W32 -> conversionNop II64 x
572 MO_SS_Conv W64 W32 -> conversionNop II64 x
573 MO_UU_Conv W64 W16 -> toI16Reg W64 x
574 MO_SS_Conv W64 W16 -> toI16Reg W64 x
575 MO_UU_Conv W64 W8 -> toI8Reg W64 x
576 MO_SS_Conv W64 W8 -> toI8Reg W64 x
579 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
580 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
583 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
584 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
585 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
587 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
588 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
589 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
591 #if x86_64_TARGET_ARCH
592 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
593 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
594 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
595 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
596 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
597 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
598 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
599 -- However, we don't want the register allocator to throw it
600 -- away as an unnecessary reg-to-reg move, so we keep it in
601 -- the form of a movzl and print it as a movl later.
605 | sse2 -> coerceFP2FP W64 x
606 | otherwise -> conversionNop FF80 x
608 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
610 MO_FS_Conv from to -> coerceFP2Int from to x
611 MO_SF_Conv from to -> coerceInt2FP from to x
613 other -> pprPanic "getRegister" (pprMachOp mop)
615 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
616 triv_ucode instr size = trivialUCode size (instr size) x
618 -- signed or unsigned extension.
619 integerExtend :: Width -> Width
620 -> (Size -> Operand -> Operand -> Instr)
621 -> CmmExpr -> NatM Register
622 integerExtend from to instr expr = do
623 (reg,e_code) <- if from == W8 then getByteReg expr
628 instr (intSize from) (OpReg reg) (OpReg dst)
629 return (Any (intSize to) code)
631 toI8Reg :: Width -> CmmExpr -> NatM Register
633 = do codefn <- getAnyReg expr
634 return (Any (intSize new_rep) codefn)
635 -- HACK: use getAnyReg to get a byte-addressable register.
636 -- If the source was a Fixed register, this will add the
637 -- mov instruction to put it into the desired destination.
638 -- We're assuming that the destination won't be a fixed
639 -- non-byte-addressable register; it won't be, because all
640 -- fixed registers are word-sized.
642 toI16Reg = toI8Reg -- for now
644 conversionNop :: Size -> CmmExpr -> NatM Register
645 conversionNop new_size expr
646 = do e_code <- getRegister expr
647 return (swizzleRegisterRep e_code new_size)
650 getRegister e@(CmmMachOp mop [x, y]) = do -- dyadic MachOps
653 MO_F_Eq w -> condFltReg EQQ x y
654 MO_F_Ne w -> condFltReg NE x y
655 MO_F_Gt w -> condFltReg GTT x y
656 MO_F_Ge w -> condFltReg GE x y
657 MO_F_Lt w -> condFltReg LTT x y
658 MO_F_Le w -> condFltReg LE x y
660 MO_Eq rep -> condIntReg EQQ x y
661 MO_Ne rep -> condIntReg NE x y
663 MO_S_Gt rep -> condIntReg GTT x y
664 MO_S_Ge rep -> condIntReg GE x y
665 MO_S_Lt rep -> condIntReg LTT x y
666 MO_S_Le rep -> condIntReg LE x y
668 MO_U_Gt rep -> condIntReg GU x y
669 MO_U_Ge rep -> condIntReg GEU x y
670 MO_U_Lt rep -> condIntReg LU x y
671 MO_U_Le rep -> condIntReg LEU x y
673 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
674 | otherwise -> trivialFCode_x87 w GADD x y
675 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
676 | otherwise -> trivialFCode_x87 w GSUB x y
677 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
678 | otherwise -> trivialFCode_x87 w GDIV x y
679 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
680 | otherwise -> trivialFCode_x87 w GMUL x y
682 MO_Add rep -> add_code rep x y
683 MO_Sub rep -> sub_code rep x y
685 MO_S_Quot rep -> div_code rep True True x y
686 MO_S_Rem rep -> div_code rep True False x y
687 MO_U_Quot rep -> div_code rep False True x y
688 MO_U_Rem rep -> div_code rep False False x y
690 MO_S_MulMayOflo rep -> imulMayOflo rep x y
692 MO_Mul rep -> triv_op rep IMUL
693 MO_And rep -> triv_op rep AND
694 MO_Or rep -> triv_op rep OR
695 MO_Xor rep -> triv_op rep XOR
697 {- Shift ops on x86s have constraints on their source, it
698 either has to be Imm, CL or 1
699 => trivialCode is not restrictive enough (sigh.)
701 MO_Shl rep -> shift_code rep SHL x y {-False-}
702 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
703 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
705 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
708 triv_op width instr = trivialCode width op (Just op) x y
709 where op = instr (intSize width)
711 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
712 imulMayOflo rep a b = do
713 (a_reg, a_code) <- getNonClobberedReg a
714 b_code <- getAnyReg b
716 shift_amt = case rep of
719 _ -> panic "shift_amt"
722 code = a_code `appOL` b_code eax `appOL`
724 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
725 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
726 -- sign extend lower part
727 SUB size (OpReg edx) (OpReg eax)
728 -- compare against upper
729 -- eax==0 if high part == sign extended low part
732 return (Fixed size eax code)
736 -> (Size -> Operand -> Operand -> Instr)
741 {- Case1: shift length as immediate -}
742 shift_code width instr x y@(CmmLit lit) = do
743 x_code <- getAnyReg x
747 = x_code dst `snocOL`
748 instr size (OpImm (litToImm lit)) (OpReg dst)
750 return (Any size code)
752 {- Case2: shift length is complex (non-immediate)
754 * we cannot do y first *and* put its result in %ecx, because
755 %ecx might be clobbered by x.
756 * if we do y second, then x cannot be
757 in a clobbered reg. Also, we cannot clobber x's reg
758 with the instruction itself.
760 - do y first, put its result in a fresh tmp, then copy it to %ecx later
761 - do y second and put its result into %ecx. x gets placed in a fresh
762 tmp. This is likely to be better, becuase the reg alloc can
763 eliminate this reg->reg move here (it won't eliminate the other one,
764 because the move is into the fixed %ecx).
766 shift_code width instr x y{-amount-} = do
767 x_code <- getAnyReg x
768 let size = intSize width
769 tmp <- getNewRegNat size
770 y_code <- getAnyReg y
772 code = x_code tmp `appOL`
774 instr size (OpReg ecx) (OpReg tmp)
776 return (Fixed size tmp code)
779 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
780 add_code rep x (CmmLit (CmmInt y _))
781 | is32BitInteger y = add_int rep x y
782 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
783 where size = intSize rep
786 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
787 sub_code rep x (CmmLit (CmmInt y _))
788 | is32BitInteger (-y) = add_int rep x (-y)
789 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
791 -- our three-operand add instruction:
792 add_int width x y = do
793 (x_reg, x_code) <- getSomeReg x
796 imm = ImmInt (fromInteger y)
800 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
803 return (Any size code)
805 ----------------------
806 div_code width signed quotient x y = do
807 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
808 x_code <- getAnyReg x
811 widen | signed = CLTD size
812 | otherwise = XOR size (OpReg edx) (OpReg edx)
814 instr | signed = IDIV
817 code = y_code `appOL`
819 toOL [widen, instr size y_op]
821 result | quotient = eax
825 return (Fixed size result code)
828 getRegister (CmmLoad mem pk)
831 Amode addr mem_code <- getAmode mem
832 use_sse2 <- sse2Enabled
833 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
836 getRegister (CmmLoad mem pk)
839 code <- intLoadCode instr mem
840 return (Any size code)
844 instr = case width of
847 -- We always zero-extend 8-bit loads, if we
848 -- can't think of anything better. This is because
849 -- we can't guarantee access to an 8-bit variant of every register
850 -- (esi and edi don't have 8-bit variants), so to make things
851 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
854 #if x86_64_TARGET_ARCH
855 -- Simpler memory load code on x86_64
856 getRegister (CmmLoad mem pk)
858 code <- intLoadCode (MOV size) mem
859 return (Any size code)
860 where size = intSize $ typeWidth pk
863 getRegister (CmmLit (CmmInt 0 width))
867 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
868 adj_size = case size of II64 -> II32; _ -> size
869 size1 = IF_ARCH_i386( size, adj_size )
871 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
873 return (Any size code)
875 #if x86_64_TARGET_ARCH
876 -- optimisation for loading small literals on x86_64: take advantage
877 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
878 -- instruction forms are shorter.
879 getRegister (CmmLit lit)
880 | isWord64 (cmmLitType lit), not (isBigLit lit)
883 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
885 return (Any II64 code)
887 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
889 -- note1: not the same as (not.is32BitLit), because that checks for
890 -- signed literals that fit in 32 bits, but we want unsigned
892 -- note2: all labels are small, because we're assuming the
893 -- small memory model (see gcc docs, -mcmodel=small).
896 getRegister (CmmLit lit)
898 size = cmmTypeSize (cmmLitType lit)
900 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
902 return (Any size code)
904 getRegister other = pprPanic "getRegister(x86)" (ppr other)
907 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
908 -> NatM (Reg -> InstrBlock)
909 intLoadCode instr mem = do
910 Amode src mem_code <- getAmode mem
911 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
913 -- Compute an expression into *any* register, adding the appropriate
914 -- move instruction if necessary.
915 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
917 r <- getRegister expr
920 anyReg :: Register -> NatM (Reg -> InstrBlock)
921 anyReg (Any _ code) = return code
922 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
924 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
925 -- Fixed registers might not be byte-addressable, so we make sure we've
926 -- got a temporary, inserting an extra reg copy if necessary.
927 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
928 #if x86_64_TARGET_ARCH
929 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
932 r <- getRegister expr
935 tmp <- getNewRegNat rep
936 return (tmp, code tmp)
938 | isVirtualReg reg -> return (reg,code)
940 tmp <- getNewRegNat rep
941 return (tmp, code `snocOL` reg2reg rep reg tmp)
942 -- ToDo: could optimise slightly by checking for byte-addressable
943 -- real registers, but that will happen very rarely if at all.
946 -- Another variant: this time we want the result in a register that cannot
947 -- be modified by code to evaluate an arbitrary expression.
948 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
949 getNonClobberedReg expr = do
950 r <- getRegister expr
953 tmp <- getNewRegNat rep
954 return (tmp, code tmp)
956 -- only free regs can be clobbered
957 | RegReal (RealRegSingle rr) <- reg
958 , isFastTrue (freeReg rr)
960 tmp <- getNewRegNat rep
961 return (tmp, code `snocOL` reg2reg rep reg tmp)
965 reg2reg :: Size -> Reg -> Reg -> Instr
967 | size == FF80 = GMOV src dst
968 | otherwise = MOV size (OpReg src) (OpReg dst)
971 --------------------------------------------------------------------------------
972 getAmode :: CmmExpr -> NatM Amode
973 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
975 #if x86_64_TARGET_ARCH
977 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
978 CmmLit displacement])
979 = return $ Amode (ripRel (litToImm displacement)) nilOL
984 -- This is all just ridiculous, since it carefully undoes
985 -- what mangleIndexTree has just done.
986 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
988 -- ASSERT(rep == II32)???
989 = do (x_reg, x_code) <- getSomeReg x
990 let off = ImmInt (-(fromInteger i))
991 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
993 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit])
995 -- ASSERT(rep == II32)???
996 = do (x_reg, x_code) <- getSomeReg x
997 let off = litToImm lit
998 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1000 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1001 -- recognised by the next rule.
1002 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1004 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1006 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1007 [y, CmmLit (CmmInt shift _)]])
1008 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1009 = x86_complex_amode x y shift 0
1011 getAmode (CmmMachOp (MO_Add rep)
1012 [x, CmmMachOp (MO_Add _)
1013 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1014 CmmLit (CmmInt offset _)]])
1015 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1016 && is32BitInteger offset
1017 = x86_complex_amode x y shift offset
1019 getAmode (CmmMachOp (MO_Add rep) [x,y])
1020 = x86_complex_amode x y 0 0
1022 getAmode (CmmLit lit) | is32BitLit lit
1023 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1026 (reg,code) <- getSomeReg expr
1027 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1030 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1031 x86_complex_amode base index shift offset
1032 = do (x_reg, x_code) <- getNonClobberedReg base
1033 -- x must be in a temp, because it has to stay live over y_code
1034 -- we could compre x_reg and y_reg and do something better here...
1035 (y_reg, y_code) <- getSomeReg index
1037 code = x_code `appOL` y_code
1038 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1039 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1045 -- -----------------------------------------------------------------------------
1046 -- getOperand: sometimes any operand will do.
1048 -- getNonClobberedOperand: the value of the operand will remain valid across
1049 -- the computation of an arbitrary expression, unless the expression
1050 -- is computed directly into a register which the operand refers to
1051 -- (see trivialCode where this function is used for an example).
1053 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1054 getNonClobberedOperand (CmmLit lit) = do
1055 use_sse2 <- sse2Enabled
1056 if use_sse2 && isSuitableFloatingPointLit lit
1058 let CmmFloat _ w = lit
1059 Amode addr code <- memConstant (widthInBytes w) lit
1060 return (OpAddr addr, code)
1063 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1064 then return (OpImm (litToImm lit), nilOL)
1065 else getNonClobberedOperand_generic (CmmLit lit)
1067 getNonClobberedOperand (CmmLoad mem pk) = do
1068 use_sse2 <- sse2Enabled
1069 if (not (isFloatType pk) || use_sse2)
1070 && IF_ARCH_i386(not (isWord64 pk), True)
1072 Amode src mem_code <- getAmode mem
1074 if (amodeCouldBeClobbered src)
1076 tmp <- getNewRegNat archWordSize
1077 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1078 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1081 return (OpAddr src', save_code `appOL` mem_code)
1083 getNonClobberedOperand_generic (CmmLoad mem pk)
1085 getNonClobberedOperand e = getNonClobberedOperand_generic e
1087 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1088 getNonClobberedOperand_generic e = do
1089 (reg, code) <- getNonClobberedReg e
1090 return (OpReg reg, code)
1092 amodeCouldBeClobbered :: AddrMode -> Bool
1093 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1095 regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1096 regClobbered _ = False
1098 -- getOperand: the operand is not required to remain valid across the
1099 -- computation of an arbitrary expression.
1100 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1102 getOperand (CmmLit lit) = do
1103 use_sse2 <- sse2Enabled
1104 if (use_sse2 && isSuitableFloatingPointLit lit)
1106 let CmmFloat _ w = lit
1107 Amode addr code <- memConstant (widthInBytes w) lit
1108 return (OpAddr addr, code)
1111 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1112 then return (OpImm (litToImm lit), nilOL)
1113 else getOperand_generic (CmmLit lit)
1115 getOperand (CmmLoad mem pk) = do
1116 use_sse2 <- sse2Enabled
1117 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1119 Amode src mem_code <- getAmode mem
1120 return (OpAddr src, mem_code)
1122 getOperand_generic (CmmLoad mem pk)
1124 getOperand e = getOperand_generic e
1126 getOperand_generic e = do
1127 (reg, code) <- getSomeReg e
1128 return (OpReg reg, code)
1130 isOperand :: CmmExpr -> Bool
1131 isOperand (CmmLoad _ _) = True
1132 isOperand (CmmLit lit) = is32BitLit lit
1133 || isSuitableFloatingPointLit lit
1136 memConstant :: Int -> CmmLit -> NatM Amode
1137 memConstant align lit = do
1138 #ifdef x86_64_TARGET_ARCH
1139 lbl <- getNewLabelNat
1140 let addr = ripRel (ImmCLbl lbl)
1143 lbl <- getNewLabelNat
1144 dflags <- getDynFlagsNat
1145 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1146 Amode addr addr_code <- getAmode dynRef
1154 return (Amode addr code)
1157 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1158 loadFloatAmode use_sse2 w addr addr_code = do
1159 let size = floatSize w
1160 code dst = addr_code `snocOL`
1162 then MOV size (OpAddr addr) (OpReg dst)
1163 else GLD size addr dst
1165 return (Any (if use_sse2 then size else FF80) code)
1168 -- if we want a floating-point literal as an operand, we can
1169 -- use it directly from memory. However, if the literal is
1170 -- zero, we're better off generating it into a register using
1172 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1173 isSuitableFloatingPointLit _ = False
1175 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1176 getRegOrMem e@(CmmLoad mem pk) = do
1177 use_sse2 <- sse2Enabled
1178 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1180 Amode src mem_code <- getAmode mem
1181 return (OpAddr src, mem_code)
1183 (reg, code) <- getNonClobberedReg e
1184 return (OpReg reg, code)
1186 (reg, code) <- getNonClobberedReg e
1187 return (OpReg reg, code)
1189 #if x86_64_TARGET_ARCH
1190 is32BitLit (CmmInt i W64) = is32BitInteger i
1191 -- assume that labels are in the range 0-2^31-1: this assumes the
1192 -- small memory model (see gcc docs, -mcmodel=small).
1199 -- Set up a condition code for a conditional branch.
1201 getCondCode :: CmmExpr -> NatM CondCode
1203 -- yes, they really do seem to want exactly the same!
1205 getCondCode (CmmMachOp mop [x, y])
1208 MO_F_Eq W32 -> condFltCode EQQ x y
1209 MO_F_Ne W32 -> condFltCode NE x y
1210 MO_F_Gt W32 -> condFltCode GTT x y
1211 MO_F_Ge W32 -> condFltCode GE x y
1212 MO_F_Lt W32 -> condFltCode LTT x y
1213 MO_F_Le W32 -> condFltCode LE x y
1215 MO_F_Eq W64 -> condFltCode EQQ x y
1216 MO_F_Ne W64 -> condFltCode NE x y
1217 MO_F_Gt W64 -> condFltCode GTT x y
1218 MO_F_Ge W64 -> condFltCode GE x y
1219 MO_F_Lt W64 -> condFltCode LTT x y
1220 MO_F_Le W64 -> condFltCode LE x y
1222 MO_Eq rep -> condIntCode EQQ x y
1223 MO_Ne rep -> condIntCode NE x y
1225 MO_S_Gt rep -> condIntCode GTT x y
1226 MO_S_Ge rep -> condIntCode GE x y
1227 MO_S_Lt rep -> condIntCode LTT x y
1228 MO_S_Le rep -> condIntCode LE x y
1230 MO_U_Gt rep -> condIntCode GU x y
1231 MO_U_Ge rep -> condIntCode GEU x y
1232 MO_U_Lt rep -> condIntCode LU x y
1233 MO_U_Le rep -> condIntCode LEU x y
1235 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1237 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1242 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1243 -- passed back up the tree.
1245 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1247 -- memory vs immediate
1248 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1249 Amode x_addr x_code <- getAmode x
1252 code = x_code `snocOL`
1253 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1255 return (CondCode False cond code)
1257 -- anything vs zero, using a mask
1258 -- TODO: Add some sanity checking!!!!
1259 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
1260 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
1262 (x_reg, x_code) <- getSomeReg x
1264 code = x_code `snocOL`
1265 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1267 return (CondCode False cond code)
1270 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1271 (x_reg, x_code) <- getSomeReg x
1273 code = x_code `snocOL`
1274 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1276 return (CondCode False cond code)
1278 -- anything vs operand
1279 condIntCode cond x y | isOperand y = do
1280 (x_reg, x_code) <- getNonClobberedReg x
1281 (y_op, y_code) <- getOperand y
1283 code = x_code `appOL` y_code `snocOL`
1284 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1286 return (CondCode False cond code)
1288 -- anything vs anything
1289 condIntCode cond x y = do
1290 (y_reg, y_code) <- getNonClobberedReg y
1291 (x_op, x_code) <- getRegOrMem x
1293 code = y_code `appOL`
1295 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1297 return (CondCode False cond code)
1301 --------------------------------------------------------------------------------
1302 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1304 condFltCode cond x y
1305 = if_sse2 condFltCode_sse2 condFltCode_x87
1309 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1310 (x_reg, x_code) <- getNonClobberedReg x
1311 (y_reg, y_code) <- getSomeReg y
1312 use_sse2 <- sse2Enabled
1314 code = x_code `appOL` y_code `snocOL`
1315 GCMP cond x_reg y_reg
1316 -- The GCMP insn does the test and sets the zero flag if comparable
1317 -- and true. Hence we always supply EQQ as the condition to test.
1318 return (CondCode True EQQ code)
1320 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1321 -- an operand, but the right must be a reg. We can probably do better
1322 -- than this general case...
1323 condFltCode_sse2 = do
1324 (x_reg, x_code) <- getNonClobberedReg x
1325 (y_op, y_code) <- getOperand y
1327 code = x_code `appOL`
1329 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1330 -- NB(1): we need to use the unsigned comparison operators on the
1331 -- result of this comparison.
1333 return (CondCode True (condToUnsigned cond) code)
1335 -- -----------------------------------------------------------------------------
1336 -- Generating assignments
1338 -- Assignments are really at the heart of the whole code generation
1339 -- business. Almost all top-level nodes of any real importance are
1340 -- assignments, which correspond to loads, stores, or register
1341 -- transfers. If we're really lucky, some of the register transfers
1342 -- will go away, because we can use the destination register to
1343 -- complete the code generation for the right hand side. This only
1344 -- fails when the right hand side is forced into a fixed register
1345 -- (e.g. the result of a call).
1347 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1348 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1350 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1351 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1354 -- integer assignment to memory
1356 -- specific case of adding/subtracting an integer to a particular address.
1357 -- ToDo: catch other cases where we can use an operation directly on a memory
1359 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1360 CmmLit (CmmInt i _)])
1361 | addr == addr2, pk /= II64 || is32BitInteger i,
1362 Just instr <- check op
1363 = do Amode amode code_addr <- getAmode addr
1364 let code = code_addr `snocOL`
1365 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1368 check (MO_Add _) = Just ADD
1369 check (MO_Sub _) = Just SUB
1374 assignMem_IntCode pk addr src = do
1375 Amode addr code_addr <- getAmode addr
1376 (code_src, op_src) <- get_op_RI src
1378 code = code_src `appOL`
1380 MOV pk op_src (OpAddr addr)
1381 -- NOTE: op_src is stable, so it will still be valid
1382 -- after code_addr. This may involve the introduction
1383 -- of an extra MOV to a temporary register, but we hope
1384 -- the register allocator will get rid of it.
1388 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1389 get_op_RI (CmmLit lit) | is32BitLit lit
1390 = return (nilOL, OpImm (litToImm lit))
1392 = do (reg,code) <- getNonClobberedReg op
1393 return (code, OpReg reg)
1396 -- Assign; dst is a reg, rhs is mem
1397 assignReg_IntCode pk reg (CmmLoad src _) = do
1398 load_code <- intLoadCode (MOV pk) src
1399 return (load_code (getRegisterReg False{-no sse2-} reg))
1401 -- dst is a reg, but src could be anything
1402 assignReg_IntCode pk reg src = do
1403 code <- getAnyReg src
1404 return (code (getRegisterReg False{-no sse2-} reg))
1407 -- Floating point assignment to memory
1408 assignMem_FltCode pk addr src = do
1409 (src_reg, src_code) <- getNonClobberedReg src
1410 Amode addr addr_code <- getAmode addr
1411 use_sse2 <- sse2Enabled
1413 code = src_code `appOL`
1415 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1416 else GST pk src_reg addr
1419 -- Floating point assignment to a register/temporary
1420 assignReg_FltCode pk reg src = do
1421 use_sse2 <- sse2Enabled
1422 src_code <- getAnyReg src
1423 return (src_code (getRegisterReg use_sse2 reg))
1426 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1428 genJump (CmmLoad mem pk) = do
1429 Amode target code <- getAmode mem
1430 return (code `snocOL` JMP (OpAddr target))
1432 genJump (CmmLit lit) = do
1433 return (unitOL (JMP (OpImm (litToImm lit))))
1436 (reg,code) <- getSomeReg expr
1437 return (code `snocOL` JMP (OpReg reg))
1440 -- -----------------------------------------------------------------------------
1441 -- Unconditional branches
1443 genBranch :: BlockId -> NatM InstrBlock
1444 genBranch = return . toOL . mkJumpInstr
1448 -- -----------------------------------------------------------------------------
1449 -- Conditional jumps
1452 Conditional jumps are always to local labels, so we can use branch
1453 instructions. We peek at the arguments to decide what kind of
1456 I386: First, we have to ensure that the condition
1457 codes are set according to the supplied comparison operation.
1461 :: BlockId -- the branch target
1462 -> CmmExpr -- the condition on which to branch
1465 genCondJump id bool = do
1466 CondCode is_float cond cond_code <- getCondCode bool
1467 use_sse2 <- sse2Enabled
1468 if not is_float || not use_sse2
1470 return (cond_code `snocOL` JXX cond id)
1472 lbl <- getBlockIdNat
1474 -- see comment with condFltReg
1475 let code = case cond of
1481 plain_test = unitOL (
1484 or_unordered = toOL [
1488 and_ordered = toOL [
1494 return (cond_code `appOL` code)
1497 -- -----------------------------------------------------------------------------
1498 -- Generating C calls
1500 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1501 -- @get_arg@, which moves the arguments to the correct registers/stack
1502 -- locations. Apart from that, the code is easy.
1504 -- (If applicable) Do not fill the delay slots here; you will confuse the
1505 -- register allocator.
1508 :: CmmCallTarget -- function to call
1509 -> HintedCmmFormals -- where to put the result
1510 -> HintedCmmActuals -- arguments (of mixed type)
1513 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1515 #if i386_TARGET_ARCH
1517 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1518 -- write barrier compiles to no code on x86/x86-64;
1519 -- we keep it this long in order to prevent earlier optimisations.
1521 -- we only cope with a single result for foreign calls
1522 genCCall (CmmPrim op) [CmmHinted r _] args = do
1523 l1 <- getNewLabelNat
1524 l2 <- getNewLabelNat
1528 outOfLineFloatOp op r args
1530 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1531 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1533 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1534 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1536 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1537 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1539 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1540 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1542 other_op -> outOfLineFloatOp op r args
1545 actuallyInlineFloatOp instr size [CmmHinted x _]
1546 = do res <- trivialUFCode size (instr size) x
1548 return (any (getRegisterReg False (CmmLocal r)))
1550 genCCall target dest_regs args = do
1552 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1553 #if !darwin_TARGET_OS
1554 tot_arg_size = sum sizes
1556 raw_arg_size = sum sizes
1557 tot_arg_size = roundTo 16 raw_arg_size
1558 arg_pad_size = tot_arg_size - raw_arg_size
1559 delta0 <- getDeltaNat
1560 setDeltaNat (delta0 - arg_pad_size)
1563 use_sse2 <- sse2Enabled
1564 push_codes <- mapM (push_arg use_sse2) (reverse args)
1565 delta <- getDeltaNat
1568 -- deal with static vs dynamic call targets
1569 (callinsns,cconv) <-
1572 CmmCallee (CmmLit (CmmLabel lbl)) conv
1573 -> -- ToDo: stdcall arg sizes
1574 return (unitOL (CALL (Left fn_imm) []), conv)
1575 where fn_imm = ImmCLbl lbl
1577 -> do { (dyn_r, dyn_c) <- getSomeReg expr
1578 ; ASSERT( isWord32 (cmmExprType expr) )
1579 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1582 #if darwin_TARGET_OS
1584 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1585 DELTA (delta0 - arg_pad_size)]
1586 `appOL` concatOL push_codes
1589 = concatOL push_codes
1591 -- Deallocate parameters after call for ccall;
1592 -- but not for stdcall (callee does it)
1594 -- We have to pop any stack padding we added
1595 -- on Darwin even if we are doing stdcall, though (#5052)
1596 pop_size | cconv /= StdCallConv = tot_arg_size
1598 #if darwin_TARGET_OS
1604 call = callinsns `appOL`
1606 (if pop_size==0 then [] else
1607 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
1609 [DELTA (delta + tot_arg_size)]
1612 setDeltaNat (delta + tot_arg_size)
1615 -- assign the results, if necessary
1616 assign_code [] = nilOL
1617 assign_code [CmmHinted dest _hint]
1620 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
1624 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
1625 GST sz fake0 tmp_amode,
1626 MOV sz (OpAddr tmp_amode) (OpReg r_dest),
1627 ADD II32 (OpImm (ImmInt b)) (OpReg esp)]
1628 else unitOL (GMOV fake0 r_dest)
1629 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1630 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1631 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1633 ty = localRegType dest
1636 r_dest_hi = getHiVRegFromLo r_dest
1637 r_dest = getRegisterReg use_sse2 (CmmLocal dest)
1638 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
1640 return (push_code `appOL`
1642 assign_code dest_regs)
1645 arg_size :: CmmType -> Int -- Width in bytes
1646 arg_size ty = widthInBytes (typeWidth ty)
1648 roundTo a x | x `mod` a == 0 = x
1649 | otherwise = x + a - (x `mod` a)
1652 push_arg :: Bool -> HintedCmmActual {-current argument-}
1653 -> NatM InstrBlock -- code
1655 push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86
1656 | isWord64 arg_ty = do
1657 ChildCode64 code r_lo <- iselExpr64 arg
1658 delta <- getDeltaNat
1659 setDeltaNat (delta - 8)
1661 r_hi = getHiVRegFromLo r_lo
1663 return ( code `appOL`
1664 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1665 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1669 | isFloatType arg_ty = do
1670 (reg, code) <- getSomeReg arg
1671 delta <- getDeltaNat
1672 setDeltaNat (delta-size)
1673 return (code `appOL`
1674 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1676 let addr = AddrBaseIndex (EABaseReg esp)
1679 size = floatSize (typeWidth arg_ty)
1682 then MOV size (OpReg reg) (OpAddr addr)
1683 else GST size reg addr
1688 (operand, code) <- getOperand arg
1689 delta <- getDeltaNat
1690 setDeltaNat (delta-size)
1691 return (code `snocOL`
1692 PUSH II32 operand `snocOL`
1696 arg_ty = cmmExprType arg
1697 size = arg_size arg_ty -- Byte size
1699 #elif x86_64_TARGET_ARCH
1701 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1702 -- write barrier compiles to no code on x86/x86-64;
1703 -- we keep it this long in order to prevent earlier optimisations.
1706 genCCall (CmmPrim op) [CmmHinted r _] args =
1707 outOfLineFloatOp op r args
1709 genCCall target dest_regs args = do
1711 -- load up the register arguments
1712 (stack_args, aregs, fregs, load_args_code)
1713 <- load_args args allArgRegs allFPArgRegs nilOL
1716 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
1717 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1718 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1719 -- for annotating the call instruction with
1721 sse_regs = length fp_regs_used
1723 tot_arg_size = arg_size * length stack_args
1725 -- On entry to the called function, %rsp should be aligned
1726 -- on a 16-byte boundary +8 (i.e. the first stack arg after
1727 -- the return address is 16-byte aligned). In STG land
1728 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1729 -- need to make sure we push a multiple of 16-bytes of args,
1730 -- plus the return address, to get the correct alignment.
1731 -- Urg, this is hard. We need to feed the delta back into
1732 -- the arg pushing code.
1733 (real_size, adjust_rsp) <-
1734 if tot_arg_size `rem` 16 == 0
1735 then return (tot_arg_size, nilOL)
1736 else do -- we need to adjust...
1737 delta <- getDeltaNat
1738 setDeltaNat (delta-8)
1739 return (tot_arg_size+8, toOL [
1740 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1744 -- push the stack args, right to left
1745 push_code <- push_args (reverse stack_args) nilOL
1746 delta <- getDeltaNat
1748 -- deal with static vs dynamic call targets
1749 (callinsns,cconv) <-
1752 CmmCallee (CmmLit (CmmLabel lbl)) conv
1753 -> -- ToDo: stdcall arg sizes
1754 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1755 where fn_imm = ImmCLbl lbl
1757 -> do (dyn_r, dyn_c) <- getSomeReg expr
1758 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1761 -- The x86_64 ABI requires us to set %al to the number of SSE2
1762 -- registers that contain arguments, if the called routine
1763 -- is a varargs function. We don't know whether it's a
1764 -- varargs function or not, so we have to assume it is.
1766 -- It's not safe to omit this assignment, even if the number
1767 -- of SSE2 regs in use is zero. If %al is larger than 8
1768 -- on entry to a varargs function, seg faults ensue.
1769 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1771 let call = callinsns `appOL`
1773 -- Deallocate parameters after call for ccall;
1774 -- but not for stdcall (callee does it)
1775 (if cconv == StdCallConv || real_size==0 then [] else
1776 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1778 [DELTA (delta + real_size)]
1781 setDeltaNat (delta + real_size)
1784 -- assign the results, if necessary
1785 assign_code [] = nilOL
1786 assign_code [CmmHinted dest _hint] =
1787 case typeWidth rep of
1788 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1789 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1790 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1792 rep = localRegType dest
1793 r_dest = getRegisterReg True (CmmLocal dest)
1794 assign_code many = panic "genCCall.assign_code many"
1796 return (load_args_code `appOL`
1799 assign_eax sse_regs `appOL`
1801 assign_code dest_regs)
1804 arg_size = 8 -- always, at the mo
1806 load_args :: [CmmHinted CmmExpr]
1807 -> [Reg] -- int regs avail for args
1808 -> [Reg] -- FP regs avail for args
1810 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1811 load_args args [] [] code = return (args, [], [], code)
1812 -- no more regs to use
1813 load_args [] aregs fregs code = return ([], aregs, fregs, code)
1814 -- no more args to push
1815 load_args ((CmmHinted arg hint) : rest) aregs fregs code
1816 | isFloatType arg_rep =
1820 arg_code <- getAnyReg arg
1821 load_args rest aregs rs (code `appOL` arg_code r)
1826 arg_code <- getAnyReg arg
1827 load_args rest rs fregs (code `appOL` arg_code r)
1829 arg_rep = cmmExprType arg
1832 (args',ars,frs,code') <- load_args rest aregs fregs code
1833 return ((CmmHinted arg hint):args', ars, frs, code')
1835 push_args [] code = return code
1836 push_args ((CmmHinted arg hint):rest) code
1837 | isFloatType arg_rep = do
1838 (arg_reg, arg_code) <- getSomeReg arg
1839 delta <- getDeltaNat
1840 setDeltaNat (delta-arg_size)
1841 let code' = code `appOL` arg_code `appOL` toOL [
1842 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1843 DELTA (delta-arg_size),
1844 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
1845 push_args rest code'
1848 -- we only ever generate word-sized function arguments. Promotion
1849 -- has already happened: our Int8# type is kept sign-extended
1850 -- in an Int#, for example.
1851 ASSERT(width == W64) return ()
1852 (arg_op, arg_code) <- getOperand arg
1853 delta <- getDeltaNat
1854 setDeltaNat (delta-arg_size)
1855 let code' = code `appOL` arg_code `appOL` toOL [
1857 DELTA (delta-arg_size)]
1858 push_args rest code'
1860 arg_rep = cmmExprType arg
1861 width = typeWidth arg_rep
1864 genCCall = panic "X86.genCCAll: not defined"
1866 #endif /* x86_64_TARGET_ARCH */
1871 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
1872 outOfLineFloatOp mop res args
1874 dflags <- getDynFlagsNat
1875 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1876 let target = CmmCallee targetExpr CCallConv
1878 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
1880 -- Assume we can call these functions directly, and that they're not in a dynamic library.
1881 -- TODO: Why is this ok? Under linux this code will be in libm.so
1882 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
1883 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
1886 MO_F32_Sqrt -> fsLit "sqrtf"
1887 MO_F32_Sin -> fsLit "sinf"
1888 MO_F32_Cos -> fsLit "cosf"
1889 MO_F32_Tan -> fsLit "tanf"
1890 MO_F32_Exp -> fsLit "expf"
1891 MO_F32_Log -> fsLit "logf"
1893 MO_F32_Asin -> fsLit "asinf"
1894 MO_F32_Acos -> fsLit "acosf"
1895 MO_F32_Atan -> fsLit "atanf"
1897 MO_F32_Sinh -> fsLit "sinhf"
1898 MO_F32_Cosh -> fsLit "coshf"
1899 MO_F32_Tanh -> fsLit "tanhf"
1900 MO_F32_Pwr -> fsLit "powf"
1902 MO_F64_Sqrt -> fsLit "sqrt"
1903 MO_F64_Sin -> fsLit "sin"
1904 MO_F64_Cos -> fsLit "cos"
1905 MO_F64_Tan -> fsLit "tan"
1906 MO_F64_Exp -> fsLit "exp"
1907 MO_F64_Log -> fsLit "log"
1909 MO_F64_Asin -> fsLit "asin"
1910 MO_F64_Acos -> fsLit "acos"
1911 MO_F64_Atan -> fsLit "atan"
1913 MO_F64_Sinh -> fsLit "sinh"
1914 MO_F64_Cosh -> fsLit "cosh"
1915 MO_F64_Tanh -> fsLit "tanh"
1916 MO_F64_Pwr -> fsLit "pow"
1922 -- -----------------------------------------------------------------------------
1923 -- Generating a table-branch
1925 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1930 (reg,e_code) <- getSomeReg expr
1931 lbl <- getNewLabelNat
1932 dflags <- getDynFlagsNat
1933 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1934 (tableReg,t_code) <- getSomeReg $ dynRef
1936 jumpTable = map jumpTableEntryRel ids
1938 jumpTableEntryRel Nothing
1939 = CmmStaticLit (CmmInt 0 wordWidth)
1940 jumpTableEntryRel (Just blockid)
1941 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1942 where blockLabel = mkAsmTempLabel (getUnique blockid)
1944 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1945 (EAIndex reg wORD_SIZE) (ImmInt 0))
1947 #if x86_64_TARGET_ARCH
1948 #if darwin_TARGET_OS
1949 -- on Mac OS X/x86_64, put the jump table in the text section
1950 -- to work around a limitation of the linker.
1951 -- ld64 is unable to handle the relocations for
1953 -- if L0 is not preceded by a non-anonymous label in its section.
1955 code = e_code `appOL` t_code `appOL` toOL [
1956 ADD (intSize wordWidth) op (OpReg tableReg),
1957 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
1958 LDATA Text (CmmDataLabel lbl : jumpTable)
1961 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1962 -- relocations, hence we only get 32-bit offsets in the jump
1963 -- table. As these offsets are always negative we need to properly
1964 -- sign extend them to 64-bit. This hack should be removed in
1965 -- conjunction with the hack in PprMach.hs/pprDataItem once
1966 -- binutils 2.17 is standard.
1967 code = e_code `appOL` t_code `appOL` toOL [
1968 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1970 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
1971 (EAIndex reg wORD_SIZE) (ImmInt 0)))
1973 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1974 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1978 code = e_code `appOL` t_code `appOL` toOL [
1979 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1980 ADD (intSize wordWidth) op (OpReg tableReg),
1981 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
1987 (reg,e_code) <- getSomeReg expr
1988 lbl <- getNewLabelNat
1990 jumpTable = map jumpTableEntry ids
1991 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1992 code = e_code `appOL` toOL [
1993 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1994 JMP_TBL op [ id | Just id <- ids ]
2000 -- -----------------------------------------------------------------------------
2001 -- 'condIntReg' and 'condFltReg': condition codes into registers
2003 -- Turn those condition codes into integers now (when they appear on
2004 -- the right hand side of an assignment).
2006 -- (If applicable) Do not fill the delay slots here; you will confuse the
2007 -- register allocator.
2009 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2011 condIntReg cond x y = do
2012 CondCode _ cond cond_code <- condIntCode cond x y
2013 tmp <- getNewRegNat II8
2015 code dst = cond_code `appOL` toOL [
2016 SETCC cond (OpReg tmp),
2017 MOVZxL II8 (OpReg tmp) (OpReg dst)
2020 return (Any II32 code)
2024 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2025 condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2028 CondCode _ cond cond_code <- condFltCode cond x y
2029 tmp <- getNewRegNat II8
2031 code dst = cond_code `appOL` toOL [
2032 SETCC cond (OpReg tmp),
2033 MOVZxL II8 (OpReg tmp) (OpReg dst)
2036 return (Any II32 code)
2038 condFltReg_sse2 = do
2039 CondCode _ cond cond_code <- condFltCode cond x y
2040 tmp1 <- getNewRegNat archWordSize
2041 tmp2 <- getNewRegNat archWordSize
2043 -- We have to worry about unordered operands (eg. comparisons
2044 -- against NaN). If the operands are unordered, the comparison
2045 -- sets the parity flag, carry flag and zero flag.
2046 -- All comparisons are supposed to return false for unordered
2047 -- operands except for !=, which returns true.
2049 -- Optimisation: we don't have to test the parity flag if we
2050 -- know the test has already excluded the unordered case: eg >
2051 -- and >= test for a zero carry flag, which can only occur for
2052 -- ordered operands.
2054 -- ToDo: by reversing comparisons we could avoid testing the
2055 -- parity flag in more cases.
2060 NE -> or_unordered dst
2061 GU -> plain_test dst
2062 GEU -> plain_test dst
2063 _ -> and_ordered dst)
2065 plain_test dst = toOL [
2066 SETCC cond (OpReg tmp1),
2067 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2069 or_unordered dst = toOL [
2070 SETCC cond (OpReg tmp1),
2071 SETCC PARITY (OpReg tmp2),
2072 OR II8 (OpReg tmp1) (OpReg tmp2),
2073 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2075 and_ordered dst = toOL [
2076 SETCC cond (OpReg tmp1),
2077 SETCC NOTPARITY (OpReg tmp2),
2078 AND II8 (OpReg tmp1) (OpReg tmp2),
2079 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2082 return (Any II32 code)
2085 -- -----------------------------------------------------------------------------
2086 -- 'trivial*Code': deal with trivial instructions
2088 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2089 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2090 -- Only look for constants on the right hand side, because that's
2091 -- where the generic optimizer will have put them.
2093 -- Similarly, for unary instructions, we don't have to worry about
2094 -- matching an StInt as the argument, because genericOpt will already
2095 -- have handled the constant-folding.
2099 The Rules of the Game are:
2101 * You cannot assume anything about the destination register dst;
2102 it may be anything, including a fixed reg.
2104 * You may compute an operand into a fixed reg, but you may not
2105 subsequently change the contents of that fixed reg. If you
2106 want to do so, first copy the value either to a temporary
2107 or into dst. You are free to modify dst even if it happens
2108 to be a fixed reg -- that's not your problem.
2110 * You cannot assume that a fixed reg will stay live over an
2111 arbitrary computation. The same applies to the dst reg.
2113 * Temporary regs obtained from getNewRegNat are distinct from
2114 each other and from all other regs, and stay live over
2115 arbitrary computations.
2117 --------------------
2119 SDM's version of The Rules:
2121 * If getRegister returns Any, that means it can generate correct
2122 code which places the result in any register, period. Even if that
2123 register happens to be read during the computation.
2125 Corollary #1: this means that if you are generating code for an
2126 operation with two arbitrary operands, you cannot assign the result
2127 of the first operand into the destination register before computing
2128 the second operand. The second operand might require the old value
2129 of the destination register.
2131 Corollary #2: A function might be able to generate more efficient
2132 code if it knows the destination register is a new temporary (and
2133 therefore not read by any of the sub-computations).
2135 * If getRegister returns Any, then the code it generates may modify only:
2136 (a) fresh temporaries
2137 (b) the destination register
2138 (c) known registers (eg. %ecx is used by shifts)
2139 In particular, it may *not* modify global registers, unless the global
2140 register happens to be the destination register.
2143 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
2144 | is32BitLit lit_a = do
2145 b_code <- getAnyReg b
2148 = b_code dst `snocOL`
2149 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2151 return (Any (intSize width) code)
2153 trivialCode width instr maybe_revinstr a b
2154 = genTrivialCode (intSize width) instr a b
2156 -- This is re-used for floating pt instructions too.
2157 genTrivialCode rep instr a b = do
2158 (b_op, b_code) <- getNonClobberedOperand b
2159 a_code <- getAnyReg a
2160 tmp <- getNewRegNat rep
2162 -- We want the value of b to stay alive across the computation of a.
2163 -- But, we want to calculate a straight into the destination register,
2164 -- because the instruction only has two operands (dst := dst `op` src).
2165 -- The troublesome case is when the result of b is in the same register
2166 -- as the destination reg. In this case, we have to save b in a
2167 -- new temporary across the computation of a.
2169 | dst `regClashesWithOp` b_op =
2171 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2173 instr (OpReg tmp) (OpReg dst)
2177 instr b_op (OpReg dst)
2179 return (Any rep code)
2181 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2182 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2183 reg `regClashesWithOp` _ = False
2187 trivialUCode rep instr x = do
2188 x_code <- getAnyReg x
2193 return (Any rep code)
2197 trivialFCode_x87 width instr x y = do
2198 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2199 (y_reg, y_code) <- getSomeReg y
2201 size = FF80 -- always, on x87
2205 instr size x_reg y_reg dst
2206 return (Any size code)
2208 trivialFCode_sse2 pk instr x y
2209 = genTrivialCode size (instr size) x y
2210 where size = floatSize pk
2213 trivialUFCode size instr x = do
2214 (x_reg, x_code) <- getSomeReg x
2220 return (Any size code)
2223 --------------------------------------------------------------------------------
2224 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2225 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2228 (x_reg, x_code) <- getSomeReg x
2230 opc = case to of W32 -> GITOF; W64 -> GITOD
2231 code dst = x_code `snocOL` opc x_reg dst
2232 -- ToDo: works for non-II32 reps?
2233 return (Any FF80 code)
2236 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2238 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2239 code dst = x_code `snocOL` opc (intSize from) x_op dst
2241 return (Any (floatSize to) code)
2242 -- works even if the destination rep is <II32
2244 --------------------------------------------------------------------------------
2245 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2246 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2248 coerceFP2Int_x87 = do
2249 (x_reg, x_code) <- getSomeReg x
2251 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2252 code dst = x_code `snocOL` opc x_reg dst
2253 -- ToDo: works for non-II32 reps?
2255 return (Any (intSize to) code)
2257 coerceFP2Int_sse2 = do
2258 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2260 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
2261 code dst = x_code `snocOL` opc (intSize to) x_op dst
2263 return (Any (intSize to) code)
2264 -- works even if the destination rep is <II32
2267 --------------------------------------------------------------------------------
2268 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2269 coerceFP2FP to x = do
2270 use_sse2 <- sse2Enabled
2271 (x_reg, x_code) <- getSomeReg x
2273 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
2275 code dst = x_code `snocOL` opc x_reg dst
2277 return (Any (if use_sse2 then floatSize to else FF80) code)
2279 --------------------------------------------------------------------------------
2281 sse2NegCode :: Width -> CmmExpr -> NatM Register
2282 sse2NegCode w x = do
2283 let sz = floatSize w
2284 x_code <- getAnyReg x
2285 -- This is how gcc does it, so it can't be that bad:
2287 const | FF32 <- sz = CmmInt 0x80000000 W32
2288 | otherwise = CmmInt 0x8000000000000000 W64
2289 Amode amode amode_code <- memConstant (widthInBytes w) const
2290 tmp <- getNewRegNat sz
2292 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2293 MOV sz (OpAddr amode) (OpReg tmp),
2294 XOR sz (OpReg tmp) (OpReg dst)
2297 return (Any sz code)