1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
16 generateJumpTableForInstr,
22 #include "HsVersions.h"
23 #include "nativeGen/NCG.h"
24 #include "../includes/MachDeps.h"
38 -- Our intermediate code:
47 import StaticFlags ( opt_PIC )
48 import ForeignCall ( CCallConv(..) )
53 import FastBool ( isFastTrue )
54 import Constants ( wORD_SIZE )
57 import Control.Monad ( mapAndUnzipM )
58 import Data.Maybe ( catMaybes )
61 #if WORD_SIZE_IN_BITS==32
62 import Data.Maybe ( fromJust )
67 sse2Enabled :: NatM Bool
68 #if x86_64_TARGET_ARCH
69 -- SSE2 is fixed on for x86_64. It would be possible to make it optional,
70 -- but we'd need to fix at least the foreign call code where the calling
71 -- convention specifies the use of xmm regs, and possibly other places.
72 sse2Enabled = return True
75 dflags <- getDynFlagsNat
76 return (dopt Opt_SSE2 dflags)
79 if_sse2 :: NatM a -> NatM a -> NatM a
82 if b then sse2 else x87
86 -> NatM [NatCmmTop Instr]
88 cmmTopCodeGen (CmmProc info lab (ListGraph blocks)) = do
89 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
90 picBaseMb <- getPicBaseMaybeNat
91 dflags <- getDynFlagsNat
92 let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
93 tops = proc : concat statics
94 os = platformOS $ targetPlatform dflags
97 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
98 Nothing -> return tops
100 cmmTopCodeGen (CmmData sec dat) = do
101 return [CmmData sec dat] -- no translation, we just use CmmStatic
106 -> NatM ( [NatBasicBlock Instr]
109 basicBlockCodeGen (BasicBlock id stmts) = do
110 instrs <- stmtsToInstrs stmts
111 -- code generation may introduce new basic block boundaries, which
112 -- are indicated by the NEWBLOCK instruction. We must split up the
113 -- instruction stream into basic blocks again. Also, we extract
116 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
118 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
119 = ([], BasicBlock id instrs : blocks, statics)
120 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
121 = (instrs, blocks, CmmData sec dat:statics)
122 mkBlocks instr (instrs,blocks,statics)
123 = (instr:instrs, blocks, statics)
125 return (BasicBlock id top : other_blocks, statics)
128 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
130 = do instrss <- mapM stmtToInstrs stmts
131 return (concatOL instrss)
134 stmtToInstrs :: CmmStmt -> NatM InstrBlock
135 stmtToInstrs stmt = case stmt of
136 CmmNop -> return nilOL
137 CmmComment s -> return (unitOL (COMMENT s))
140 | isFloatType ty -> assignReg_FltCode size reg src
141 #if WORD_SIZE_IN_BITS==32
142 | isWord64 ty -> assignReg_I64Code reg src
144 | otherwise -> assignReg_IntCode size reg src
145 where ty = cmmRegType reg
146 size = cmmTypeSize ty
149 | isFloatType ty -> assignMem_FltCode size addr src
150 #if WORD_SIZE_IN_BITS==32
151 | isWord64 ty -> assignMem_I64Code addr src
153 | otherwise -> assignMem_IntCode size addr src
154 where ty = cmmExprType src
155 size = cmmTypeSize ty
157 CmmCall target result_regs args _ _
158 -> genCCall target result_regs args
160 CmmBranch id -> genBranch id
161 CmmCondBranch arg id -> genCondJump id arg
162 CmmSwitch arg ids -> genSwitch arg ids
163 CmmJump arg _ -> genJump arg
165 panic "stmtToInstrs: return statement should have been cps'd away"
168 --------------------------------------------------------------------------------
169 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
170 -- They are really trees of insns to facilitate fast appending, where a
171 -- left-to-right traversal yields the insns in the correct order.
177 -- | Condition codes passed up the tree.
180 = CondCode Bool Cond InstrBlock
183 #if WORD_SIZE_IN_BITS==32
184 -- | a.k.a "Register64"
185 -- Reg is the lower 32-bit temporary which contains the result.
186 -- Use getHiVRegFromLo to find the other VRegUnique.
188 -- Rules of this simplified insn selection game are therefore that
189 -- the returned Reg may be modified
198 -- | Register's passed up the tree. If the stix code forces the register
199 -- to live in a pre-decided machine register, it comes out as @Fixed@;
200 -- otherwise, it comes out as @Any@, and the parent can decide which
201 -- register to put it in.
204 = Fixed Size Reg InstrBlock
205 | Any Size (Reg -> InstrBlock)
208 swizzleRegisterRep :: Register -> Size -> Register
209 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
210 swizzleRegisterRep (Any _ codefn) size = Any size codefn
213 -- | Grab the Reg for a CmmReg
214 getRegisterReg :: Bool -> CmmReg -> Reg
216 getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
217 = let sz = cmmTypeSize pk in
218 if isFloatSize sz && not use_sse2
219 then RegVirtual (mkVirtualReg u FF80)
220 else RegVirtual (mkVirtualReg u sz)
222 getRegisterReg _ (CmmGlobal mid)
223 = case globalRegMaybe mid of
224 Just reg -> RegReal $ reg
225 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
226 -- By this stage, the only MagicIds remaining should be the
227 -- ones which map to a real machine register on this
228 -- platform. Hence ...
231 -- | Memory addressing modes passed up the tree.
233 = Amode AddrMode InstrBlock
236 Now, given a tree (the argument to an CmmLoad) that references memory,
237 produce a suitable addressing mode.
239 A Rule of the Game (tm) for Amodes: use of the addr bit must
240 immediately follow use of the code part, since the code part puts
241 values in registers which the addr then refers to. So you can't put
242 anything in between, lest it overwrite some of those registers. If
243 you need to do some other computation between the code part and use of
244 the addr bit, first store the effective address from the amode in a
245 temporary, then do the other computation, and then use the temporary:
249 ... other computation ...
254 -- | Check whether an integer will fit in 32 bits.
255 -- A CmmInt is intended to be truncated to the appropriate
256 -- number of bits, so here we truncate it to Int64. This is
257 -- important because e.g. -1 as a CmmInt might be either
258 -- -1 or 18446744073709551615.
260 is32BitInteger :: Integer -> Bool
261 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
262 where i64 = fromIntegral i :: Int64
265 -- | Convert a BlockId to some CmmStatic data
266 jumpTableEntry :: Maybe BlockId -> CmmStatic
267 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
268 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
269 where blockLabel = mkAsmTempLabel (getUnique blockid)
272 -- -----------------------------------------------------------------------------
273 -- General things for putting together code sequences
275 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
276 -- CmmExprs into CmmRegOff?
277 mangleIndexTree :: CmmReg -> Int -> CmmExpr
278 mangleIndexTree reg off
279 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
280 where width = typeWidth (cmmRegType reg)
282 -- | The dual to getAnyReg: compute an expression into a register, but
283 -- we don't mind which one it is.
284 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
286 r <- getRegister expr
289 tmp <- getNewRegNat rep
290 return (tmp, code tmp)
295 #if WORD_SIZE_IN_BITS==32
296 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
297 assignMem_I64Code addrTree valueTree = do
298 Amode addr addr_code <- getAmode addrTree
299 ChildCode64 vcode rlo <- iselExpr64 valueTree
301 rhi = getHiVRegFromLo rlo
303 -- Little-endian store
304 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
305 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
307 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
310 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
311 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
312 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
314 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
315 r_dst_hi = getHiVRegFromLo r_dst_lo
316 r_src_hi = getHiVRegFromLo r_src_lo
317 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
318 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
321 vcode `snocOL` mov_lo `snocOL` mov_hi
324 assignReg_I64Code _ _
325 = panic "assignReg_I64Code(i386): invalid lvalue"
328 iselExpr64 :: CmmExpr -> NatM ChildCode64
329 iselExpr64 (CmmLit (CmmInt i _)) = do
330 (rlo,rhi) <- getNewRegPairNat II32
332 r = fromIntegral (fromIntegral i :: Word32)
333 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
335 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
336 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
339 return (ChildCode64 code rlo)
341 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
342 Amode addr addr_code <- getAmode addrTree
343 (rlo,rhi) <- getNewRegPairNat II32
345 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
346 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
349 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
353 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
354 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
356 -- we handle addition, but rather badly
357 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
358 ChildCode64 code1 r1lo <- iselExpr64 e1
359 (rlo,rhi) <- getNewRegPairNat II32
361 r = fromIntegral (fromIntegral i :: Word32)
362 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
363 r1hi = getHiVRegFromLo r1lo
365 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
366 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
367 MOV II32 (OpReg r1hi) (OpReg rhi),
368 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
370 return (ChildCode64 code rlo)
372 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
373 ChildCode64 code1 r1lo <- iselExpr64 e1
374 ChildCode64 code2 r2lo <- iselExpr64 e2
375 (rlo,rhi) <- getNewRegPairNat II32
377 r1hi = getHiVRegFromLo r1lo
378 r2hi = getHiVRegFromLo r2lo
381 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
382 ADD II32 (OpReg r2lo) (OpReg rlo),
383 MOV II32 (OpReg r1hi) (OpReg rhi),
384 ADC II32 (OpReg r2hi) (OpReg rhi) ]
386 return (ChildCode64 code rlo)
388 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
390 r_dst_lo <- getNewRegNat II32
391 let r_dst_hi = getHiVRegFromLo r_dst_lo
394 ChildCode64 (code `snocOL`
395 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
400 = pprPanic "iselExpr64(i386)" (ppr expr)
404 --------------------------------------------------------------------------------
405 getRegister :: CmmExpr -> NatM Register
407 #if !x86_64_TARGET_ARCH
408 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
409 -- register, it can only be used for rip-relative addressing.
410 getRegister (CmmReg (CmmGlobal PicBaseReg))
412 reg <- getPicBaseNat archWordSize
413 return (Fixed archWordSize reg nilOL)
416 getRegister (CmmReg reg)
417 = do use_sse2 <- sse2Enabled
419 sz = cmmTypeSize (cmmRegType reg)
420 size | not use_sse2 && isFloatSize sz = FF80
423 return (Fixed size (getRegisterReg use_sse2 reg) nilOL)
426 getRegister (CmmRegOff r n)
427 = getRegister $ mangleIndexTree r n
430 #if WORD_SIZE_IN_BITS==32
431 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
432 -- TO_W_(x), TO_W_(x >> 32)
434 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
435 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
436 ChildCode64 code rlo <- iselExpr64 x
437 return $ Fixed II32 (getHiVRegFromLo rlo) code
439 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
440 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
441 ChildCode64 code rlo <- iselExpr64 x
442 return $ Fixed II32 (getHiVRegFromLo rlo) code
444 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
445 ChildCode64 code rlo <- iselExpr64 x
446 return $ Fixed II32 rlo code
448 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
449 ChildCode64 code rlo <- iselExpr64 x
450 return $ Fixed II32 rlo code
455 getRegister (CmmLit lit@(CmmFloat f w)) =
456 if_sse2 float_const_sse2 float_const_x87
462 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
463 -- I don't know why there are xorpd, xorps, and pxor instructions.
464 -- They all appear to do the same thing --SDM
465 return (Any size code)
468 Amode addr code <- memConstant (widthInBytes w) lit
469 loadFloatAmode True w addr code
471 float_const_x87 = case w of
474 let code dst = unitOL (GLDZ dst)
475 in return (Any FF80 code)
478 let code dst = unitOL (GLD1 dst)
479 in return (Any FF80 code)
482 Amode addr code <- memConstant (widthInBytes w) lit
483 loadFloatAmode False w addr code
485 -- catch simple cases of zero- or sign-extended load
486 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
487 code <- intLoadCode (MOVZxL II8) addr
488 return (Any II32 code)
490 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
491 code <- intLoadCode (MOVSxL II8) addr
492 return (Any II32 code)
494 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
495 code <- intLoadCode (MOVZxL II16) addr
496 return (Any II32 code)
498 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
499 code <- intLoadCode (MOVSxL II16) addr
500 return (Any II32 code)
503 #if x86_64_TARGET_ARCH
505 -- catch simple cases of zero- or sign-extended load
506 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
507 code <- intLoadCode (MOVZxL II8) addr
508 return (Any II64 code)
510 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
511 code <- intLoadCode (MOVSxL II8) addr
512 return (Any II64 code)
514 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
515 code <- intLoadCode (MOVZxL II16) addr
516 return (Any II64 code)
518 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
519 code <- intLoadCode (MOVSxL II16) addr
520 return (Any II64 code)
522 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
523 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
524 return (Any II64 code)
526 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
527 code <- intLoadCode (MOVSxL II32) addr
528 return (Any II64 code)
530 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
531 CmmLit displacement])
532 = return $ Any II64 (\dst -> unitOL $
533 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
535 #endif /* x86_64_TARGET_ARCH */
541 getRegister (CmmMachOp mop [x]) = do -- unary MachOps
545 | sse2 -> sse2NegCode w x
546 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
548 MO_S_Neg w -> triv_ucode NEGI (intSize w)
549 MO_Not w -> triv_ucode NOT (intSize w)
552 MO_UU_Conv W32 W8 -> toI8Reg W32 x
553 MO_SS_Conv W32 W8 -> toI8Reg W32 x
554 MO_UU_Conv W16 W8 -> toI8Reg W16 x
555 MO_SS_Conv W16 W8 -> toI8Reg W16 x
556 MO_UU_Conv W32 W16 -> toI16Reg W32 x
557 MO_SS_Conv W32 W16 -> toI16Reg W32 x
559 #if x86_64_TARGET_ARCH
560 MO_UU_Conv W64 W32 -> conversionNop II64 x
561 MO_SS_Conv W64 W32 -> conversionNop II64 x
562 MO_UU_Conv W64 W16 -> toI16Reg W64 x
563 MO_SS_Conv W64 W16 -> toI16Reg W64 x
564 MO_UU_Conv W64 W8 -> toI8Reg W64 x
565 MO_SS_Conv W64 W8 -> toI8Reg W64 x
568 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
569 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
572 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
573 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
574 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
576 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
577 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
578 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
580 #if x86_64_TARGET_ARCH
581 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
582 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
583 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
584 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
585 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
586 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
587 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
588 -- However, we don't want the register allocator to throw it
589 -- away as an unnecessary reg-to-reg move, so we keep it in
590 -- the form of a movzl and print it as a movl later.
594 | sse2 -> coerceFP2FP W64 x
595 | otherwise -> conversionNop FF80 x
597 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
599 MO_FS_Conv from to -> coerceFP2Int from to x
600 MO_SF_Conv from to -> coerceInt2FP from to x
602 _other -> pprPanic "getRegister" (pprMachOp mop)
604 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
605 triv_ucode instr size = trivialUCode size (instr size) x
607 -- signed or unsigned extension.
608 integerExtend :: Width -> Width
609 -> (Size -> Operand -> Operand -> Instr)
610 -> CmmExpr -> NatM Register
611 integerExtend from to instr expr = do
612 (reg,e_code) <- if from == W8 then getByteReg expr
617 instr (intSize from) (OpReg reg) (OpReg dst)
618 return (Any (intSize to) code)
620 toI8Reg :: Width -> CmmExpr -> NatM Register
622 = do codefn <- getAnyReg expr
623 return (Any (intSize new_rep) codefn)
624 -- HACK: use getAnyReg to get a byte-addressable register.
625 -- If the source was a Fixed register, this will add the
626 -- mov instruction to put it into the desired destination.
627 -- We're assuming that the destination won't be a fixed
628 -- non-byte-addressable register; it won't be, because all
629 -- fixed registers are word-sized.
631 toI16Reg = toI8Reg -- for now
633 conversionNop :: Size -> CmmExpr -> NatM Register
634 conversionNop new_size expr
635 = do e_code <- getRegister expr
636 return (swizzleRegisterRep e_code new_size)
639 getRegister (CmmMachOp mop [x, y]) = do -- dyadic MachOps
642 MO_F_Eq _ -> condFltReg EQQ x y
643 MO_F_Ne _ -> condFltReg NE x y
644 MO_F_Gt _ -> condFltReg GTT x y
645 MO_F_Ge _ -> condFltReg GE x y
646 MO_F_Lt _ -> condFltReg LTT x y
647 MO_F_Le _ -> condFltReg LE x y
649 MO_Eq _ -> condIntReg EQQ x y
650 MO_Ne _ -> condIntReg NE x y
652 MO_S_Gt _ -> condIntReg GTT x y
653 MO_S_Ge _ -> condIntReg GE x y
654 MO_S_Lt _ -> condIntReg LTT x y
655 MO_S_Le _ -> condIntReg LE x y
657 MO_U_Gt _ -> condIntReg GU x y
658 MO_U_Ge _ -> condIntReg GEU x y
659 MO_U_Lt _ -> condIntReg LU x y
660 MO_U_Le _ -> condIntReg LEU x y
662 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
663 | otherwise -> trivialFCode_x87 GADD x y
664 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
665 | otherwise -> trivialFCode_x87 GSUB x y
666 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
667 | otherwise -> trivialFCode_x87 GDIV x y
668 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
669 | otherwise -> trivialFCode_x87 GMUL x y
671 MO_Add rep -> add_code rep x y
672 MO_Sub rep -> sub_code rep x y
674 MO_S_Quot rep -> div_code rep True True x y
675 MO_S_Rem rep -> div_code rep True False x y
676 MO_U_Quot rep -> div_code rep False True x y
677 MO_U_Rem rep -> div_code rep False False x y
679 MO_S_MulMayOflo rep -> imulMayOflo rep x y
681 MO_Mul rep -> triv_op rep IMUL
682 MO_And rep -> triv_op rep AND
683 MO_Or rep -> triv_op rep OR
684 MO_Xor rep -> triv_op rep XOR
686 {- Shift ops on x86s have constraints on their source, it
687 either has to be Imm, CL or 1
688 => trivialCode is not restrictive enough (sigh.)
690 MO_Shl rep -> shift_code rep SHL x y {-False-}
691 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
692 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
694 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
697 triv_op width instr = trivialCode width op (Just op) x y
698 where op = instr (intSize width)
700 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
701 imulMayOflo rep a b = do
702 (a_reg, a_code) <- getNonClobberedReg a
703 b_code <- getAnyReg b
705 shift_amt = case rep of
708 _ -> panic "shift_amt"
711 code = a_code `appOL` b_code eax `appOL`
713 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
714 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
715 -- sign extend lower part
716 SUB size (OpReg edx) (OpReg eax)
717 -- compare against upper
718 -- eax==0 if high part == sign extended low part
721 return (Fixed size eax code)
725 -> (Size -> Operand -> Operand -> Instr)
730 {- Case1: shift length as immediate -}
731 shift_code width instr x (CmmLit lit) = do
732 x_code <- getAnyReg x
736 = x_code dst `snocOL`
737 instr size (OpImm (litToImm lit)) (OpReg dst)
739 return (Any size code)
741 {- Case2: shift length is complex (non-immediate)
743 * we cannot do y first *and* put its result in %ecx, because
744 %ecx might be clobbered by x.
745 * if we do y second, then x cannot be
746 in a clobbered reg. Also, we cannot clobber x's reg
747 with the instruction itself.
749 - do y first, put its result in a fresh tmp, then copy it to %ecx later
750 - do y second and put its result into %ecx. x gets placed in a fresh
751 tmp. This is likely to be better, becuase the reg alloc can
752 eliminate this reg->reg move here (it won't eliminate the other one,
753 because the move is into the fixed %ecx).
755 shift_code width instr x y{-amount-} = do
756 x_code <- getAnyReg x
757 let size = intSize width
758 tmp <- getNewRegNat size
759 y_code <- getAnyReg y
761 code = x_code tmp `appOL`
763 instr size (OpReg ecx) (OpReg tmp)
765 return (Fixed size tmp code)
768 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
769 add_code rep x (CmmLit (CmmInt y _))
770 | is32BitInteger y = add_int rep x y
771 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
772 where size = intSize rep
775 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
776 sub_code rep x (CmmLit (CmmInt y _))
777 | is32BitInteger (-y) = add_int rep x (-y)
778 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
780 -- our three-operand add instruction:
781 add_int width x y = do
782 (x_reg, x_code) <- getSomeReg x
785 imm = ImmInt (fromInteger y)
789 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
792 return (Any size code)
794 ----------------------
795 div_code width signed quotient x y = do
796 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
797 x_code <- getAnyReg x
800 widen | signed = CLTD size
801 | otherwise = XOR size (OpReg edx) (OpReg edx)
803 instr | signed = IDIV
806 code = y_code `appOL`
808 toOL [widen, instr size y_op]
810 result | quotient = eax
814 return (Fixed size result code)
817 getRegister (CmmLoad mem pk)
820 Amode addr mem_code <- getAmode mem
821 use_sse2 <- sse2Enabled
822 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
825 getRegister (CmmLoad mem pk)
828 code <- intLoadCode instr mem
829 return (Any size code)
833 instr = case width of
836 -- We always zero-extend 8-bit loads, if we
837 -- can't think of anything better. This is because
838 -- we can't guarantee access to an 8-bit variant of every register
839 -- (esi and edi don't have 8-bit variants), so to make things
840 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
843 #if x86_64_TARGET_ARCH
844 -- Simpler memory load code on x86_64
845 getRegister (CmmLoad mem pk)
847 code <- intLoadCode (MOV size) mem
848 return (Any size code)
849 where size = intSize $ typeWidth pk
852 getRegister (CmmLit (CmmInt 0 width))
856 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
857 size1 = IF_ARCH_i386( size, case size of II64 -> II32; _ -> size )
859 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
861 return (Any size code)
863 #if x86_64_TARGET_ARCH
864 -- optimisation for loading small literals on x86_64: take advantage
865 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
866 -- instruction forms are shorter.
867 getRegister (CmmLit lit)
868 | isWord64 (cmmLitType lit), not (isBigLit lit)
871 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
873 return (Any II64 code)
875 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
877 -- note1: not the same as (not.is32BitLit), because that checks for
878 -- signed literals that fit in 32 bits, but we want unsigned
880 -- note2: all labels are small, because we're assuming the
881 -- small memory model (see gcc docs, -mcmodel=small).
884 getRegister (CmmLit lit)
886 size = cmmTypeSize (cmmLitType lit)
888 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
890 return (Any size code)
892 getRegister other = pprPanic "getRegister(x86)" (ppr other)
895 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
896 -> NatM (Reg -> InstrBlock)
897 intLoadCode instr mem = do
898 Amode src mem_code <- getAmode mem
899 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
901 -- Compute an expression into *any* register, adding the appropriate
902 -- move instruction if necessary.
903 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
905 r <- getRegister expr
908 anyReg :: Register -> NatM (Reg -> InstrBlock)
909 anyReg (Any _ code) = return code
910 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
912 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
913 -- Fixed registers might not be byte-addressable, so we make sure we've
914 -- got a temporary, inserting an extra reg copy if necessary.
915 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
916 #if x86_64_TARGET_ARCH
917 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
920 r <- getRegister expr
923 tmp <- getNewRegNat rep
924 return (tmp, code tmp)
926 | isVirtualReg reg -> return (reg,code)
928 tmp <- getNewRegNat rep
929 return (tmp, code `snocOL` reg2reg rep reg tmp)
930 -- ToDo: could optimise slightly by checking for byte-addressable
931 -- real registers, but that will happen very rarely if at all.
934 -- Another variant: this time we want the result in a register that cannot
935 -- be modified by code to evaluate an arbitrary expression.
936 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
937 getNonClobberedReg expr = do
938 r <- getRegister expr
941 tmp <- getNewRegNat rep
942 return (tmp, code tmp)
944 -- only free regs can be clobbered
945 | RegReal (RealRegSingle rr) <- reg
946 , isFastTrue (freeReg rr)
948 tmp <- getNewRegNat rep
949 return (tmp, code `snocOL` reg2reg rep reg tmp)
953 reg2reg :: Size -> Reg -> Reg -> Instr
955 | size == FF80 = GMOV src dst
956 | otherwise = MOV size (OpReg src) (OpReg dst)
959 --------------------------------------------------------------------------------
960 getAmode :: CmmExpr -> NatM Amode
961 getAmode (CmmRegOff r n) = getAmode $ mangleIndexTree r n
963 #if x86_64_TARGET_ARCH
965 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
966 CmmLit displacement])
967 = return $ Amode (ripRel (litToImm displacement)) nilOL
972 -- This is all just ridiculous, since it carefully undoes
973 -- what mangleIndexTree has just done.
974 getAmode (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
976 -- ASSERT(rep == II32)???
977 = do (x_reg, x_code) <- getSomeReg x
978 let off = ImmInt (-(fromInteger i))
979 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
981 getAmode (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
983 -- ASSERT(rep == II32)???
984 = do (x_reg, x_code) <- getSomeReg x
985 let off = litToImm lit
986 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
988 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
989 -- recognised by the next rule.
990 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
992 = getAmode (CmmMachOp (MO_Add rep) [b,a])
994 getAmode (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
995 [y, CmmLit (CmmInt shift _)]])
996 | shift == 0 || shift == 1 || shift == 2 || shift == 3
997 = x86_complex_amode x y shift 0
999 getAmode (CmmMachOp (MO_Add _)
1000 [x, CmmMachOp (MO_Add _)
1001 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1002 CmmLit (CmmInt offset _)]])
1003 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1004 && is32BitInteger offset
1005 = x86_complex_amode x y shift offset
1007 getAmode (CmmMachOp (MO_Add _) [x,y])
1008 = x86_complex_amode x y 0 0
1010 getAmode (CmmLit lit) | is32BitLit lit
1011 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1014 (reg,code) <- getSomeReg expr
1015 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1018 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1019 x86_complex_amode base index shift offset
1020 = do (x_reg, x_code) <- getNonClobberedReg base
1021 -- x must be in a temp, because it has to stay live over y_code
1022 -- we could compre x_reg and y_reg and do something better here...
1023 (y_reg, y_code) <- getSomeReg index
1025 code = x_code `appOL` y_code
1026 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1027 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1028 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1034 -- -----------------------------------------------------------------------------
1035 -- getOperand: sometimes any operand will do.
1037 -- getNonClobberedOperand: the value of the operand will remain valid across
1038 -- the computation of an arbitrary expression, unless the expression
1039 -- is computed directly into a register which the operand refers to
1040 -- (see trivialCode where this function is used for an example).
1042 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1043 getNonClobberedOperand (CmmLit lit) = do
1044 use_sse2 <- sse2Enabled
1045 if use_sse2 && isSuitableFloatingPointLit lit
1047 let CmmFloat _ w = lit
1048 Amode addr code <- memConstant (widthInBytes w) lit
1049 return (OpAddr addr, code)
1052 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1053 then return (OpImm (litToImm lit), nilOL)
1054 else getNonClobberedOperand_generic (CmmLit lit)
1056 getNonClobberedOperand (CmmLoad mem pk) = do
1057 use_sse2 <- sse2Enabled
1058 if (not (isFloatType pk) || use_sse2)
1059 && IF_ARCH_i386(not (isWord64 pk), True)
1061 Amode src mem_code <- getAmode mem
1063 if (amodeCouldBeClobbered src)
1065 tmp <- getNewRegNat archWordSize
1066 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1067 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
1070 return (OpAddr src', save_code `appOL` mem_code)
1072 getNonClobberedOperand_generic (CmmLoad mem pk)
1074 getNonClobberedOperand e = getNonClobberedOperand_generic e
1076 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1077 getNonClobberedOperand_generic e = do
1078 (reg, code) <- getNonClobberedReg e
1079 return (OpReg reg, code)
1081 amodeCouldBeClobbered :: AddrMode -> Bool
1082 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1084 regClobbered :: Reg -> Bool
1085 regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1086 regClobbered _ = False
1088 -- getOperand: the operand is not required to remain valid across the
1089 -- computation of an arbitrary expression.
1090 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1092 getOperand (CmmLit lit) = do
1093 use_sse2 <- sse2Enabled
1094 if (use_sse2 && isSuitableFloatingPointLit lit)
1096 let CmmFloat _ w = lit
1097 Amode addr code <- memConstant (widthInBytes w) lit
1098 return (OpAddr addr, code)
1101 if is32BitLit lit && not (isFloatType (cmmLitType lit))
1102 then return (OpImm (litToImm lit), nilOL)
1103 else getOperand_generic (CmmLit lit)
1105 getOperand (CmmLoad mem pk) = do
1106 use_sse2 <- sse2Enabled
1107 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1109 Amode src mem_code <- getAmode mem
1110 return (OpAddr src, mem_code)
1112 getOperand_generic (CmmLoad mem pk)
1114 getOperand e = getOperand_generic e
1116 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1117 getOperand_generic e = do
1118 (reg, code) <- getSomeReg e
1119 return (OpReg reg, code)
1121 isOperand :: CmmExpr -> Bool
1122 isOperand (CmmLoad _ _) = True
1123 isOperand (CmmLit lit) = is32BitLit lit
1124 || isSuitableFloatingPointLit lit
1127 memConstant :: Int -> CmmLit -> NatM Amode
1128 memConstant align lit = do
1129 #ifdef x86_64_TARGET_ARCH
1130 lbl <- getNewLabelNat
1131 let addr = ripRel (ImmCLbl lbl)
1134 lbl <- getNewLabelNat
1135 dflags <- getDynFlagsNat
1136 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1137 Amode addr addr_code <- getAmode dynRef
1145 return (Amode addr code)
1148 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1149 loadFloatAmode use_sse2 w addr addr_code = do
1150 let size = floatSize w
1151 code dst = addr_code `snocOL`
1153 then MOV size (OpAddr addr) (OpReg dst)
1154 else GLD size addr dst
1156 return (Any (if use_sse2 then size else FF80) code)
1159 -- if we want a floating-point literal as an operand, we can
1160 -- use it directly from memory. However, if the literal is
1161 -- zero, we're better off generating it into a register using
1163 isSuitableFloatingPointLit :: CmmLit -> Bool
1164 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1165 isSuitableFloatingPointLit _ = False
1167 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1168 getRegOrMem e@(CmmLoad mem pk) = do
1169 use_sse2 <- sse2Enabled
1170 if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
1172 Amode src mem_code <- getAmode mem
1173 return (OpAddr src, mem_code)
1175 (reg, code) <- getNonClobberedReg e
1176 return (OpReg reg, code)
1178 (reg, code) <- getNonClobberedReg e
1179 return (OpReg reg, code)
1181 is32BitLit :: CmmLit -> Bool
1182 #if x86_64_TARGET_ARCH
1183 is32BitLit (CmmInt i W64) = is32BitInteger i
1184 -- assume that labels are in the range 0-2^31-1: this assumes the
1185 -- small memory model (see gcc docs, -mcmodel=small).
1192 -- Set up a condition code for a conditional branch.
1194 getCondCode :: CmmExpr -> NatM CondCode
1196 -- yes, they really do seem to want exactly the same!
1198 getCondCode (CmmMachOp mop [x, y])
1201 MO_F_Eq W32 -> condFltCode EQQ x y
1202 MO_F_Ne W32 -> condFltCode NE x y
1203 MO_F_Gt W32 -> condFltCode GTT x y
1204 MO_F_Ge W32 -> condFltCode GE x y
1205 MO_F_Lt W32 -> condFltCode LTT x y
1206 MO_F_Le W32 -> condFltCode LE x y
1208 MO_F_Eq W64 -> condFltCode EQQ x y
1209 MO_F_Ne W64 -> condFltCode NE x y
1210 MO_F_Gt W64 -> condFltCode GTT x y
1211 MO_F_Ge W64 -> condFltCode GE x y
1212 MO_F_Lt W64 -> condFltCode LTT x y
1213 MO_F_Le W64 -> condFltCode LE x y
1215 MO_Eq _ -> condIntCode EQQ x y
1216 MO_Ne _ -> condIntCode NE x y
1218 MO_S_Gt _ -> condIntCode GTT x y
1219 MO_S_Ge _ -> condIntCode GE x y
1220 MO_S_Lt _ -> condIntCode LTT x y
1221 MO_S_Le _ -> condIntCode LE x y
1223 MO_U_Gt _ -> condIntCode GU x y
1224 MO_U_Ge _ -> condIntCode GEU x y
1225 MO_U_Lt _ -> condIntCode LU x y
1226 MO_U_Le _ -> condIntCode LEU x y
1228 _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1230 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1235 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1236 -- passed back up the tree.
1238 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1240 -- memory vs immediate
1241 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
1242 Amode x_addr x_code <- getAmode x
1245 code = x_code `snocOL`
1246 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1248 return (CondCode False cond code)
1250 -- anything vs zero, using a mask
1251 -- TODO: Add some sanity checking!!!!
1252 condIntCode cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1253 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit lit
1255 (x_reg, x_code) <- getSomeReg x
1257 code = x_code `snocOL`
1258 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1260 return (CondCode False cond code)
1263 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1264 (x_reg, x_code) <- getSomeReg x
1266 code = x_code `snocOL`
1267 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1269 return (CondCode False cond code)
1271 -- anything vs operand
1272 condIntCode cond x y | isOperand y = do
1273 (x_reg, x_code) <- getNonClobberedReg x
1274 (y_op, y_code) <- getOperand y
1276 code = x_code `appOL` y_code `snocOL`
1277 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
1279 return (CondCode False cond code)
1281 -- anything vs anything
1282 condIntCode cond x y = do
1283 (y_reg, y_code) <- getNonClobberedReg y
1284 (x_op, x_code) <- getRegOrMem x
1286 code = y_code `appOL`
1288 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
1290 return (CondCode False cond code)
1294 --------------------------------------------------------------------------------
1295 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1297 condFltCode cond x y
1298 = if_sse2 condFltCode_sse2 condFltCode_x87
1302 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1303 (x_reg, x_code) <- getNonClobberedReg x
1304 (y_reg, y_code) <- getSomeReg y
1306 code = x_code `appOL` y_code `snocOL`
1307 GCMP cond x_reg y_reg
1308 -- The GCMP insn does the test and sets the zero flag if comparable
1309 -- and true. Hence we always supply EQQ as the condition to test.
1310 return (CondCode True EQQ code)
1312 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1313 -- an operand, but the right must be a reg. We can probably do better
1314 -- than this general case...
1315 condFltCode_sse2 = do
1316 (x_reg, x_code) <- getNonClobberedReg x
1317 (y_op, y_code) <- getOperand y
1319 code = x_code `appOL`
1321 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
1322 -- NB(1): we need to use the unsigned comparison operators on the
1323 -- result of this comparison.
1325 return (CondCode True (condToUnsigned cond) code)
1327 -- -----------------------------------------------------------------------------
1328 -- Generating assignments
1330 -- Assignments are really at the heart of the whole code generation
1331 -- business. Almost all top-level nodes of any real importance are
1332 -- assignments, which correspond to loads, stores, or register
1333 -- transfers. If we're really lucky, some of the register transfers
1334 -- will go away, because we can use the destination register to
1335 -- complete the code generation for the right hand side. This only
1336 -- fails when the right hand side is forced into a fixed register
1337 -- (e.g. the result of a call).
1339 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1340 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1342 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1343 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1346 -- integer assignment to memory
1348 -- specific case of adding/subtracting an integer to a particular address.
1349 -- ToDo: catch other cases where we can use an operation directly on a memory
1351 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1352 CmmLit (CmmInt i _)])
1353 | addr == addr2, pk /= II64 || is32BitInteger i,
1354 Just instr <- check op
1355 = do Amode amode code_addr <- getAmode addr
1356 let code = code_addr `snocOL`
1357 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1360 check (MO_Add _) = Just ADD
1361 check (MO_Sub _) = Just SUB
1366 assignMem_IntCode pk addr src = do
1367 Amode addr code_addr <- getAmode addr
1368 (code_src, op_src) <- get_op_RI src
1370 code = code_src `appOL`
1372 MOV pk op_src (OpAddr addr)
1373 -- NOTE: op_src is stable, so it will still be valid
1374 -- after code_addr. This may involve the introduction
1375 -- of an extra MOV to a temporary register, but we hope
1376 -- the register allocator will get rid of it.
1380 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1381 get_op_RI (CmmLit lit) | is32BitLit lit
1382 = return (nilOL, OpImm (litToImm lit))
1384 = do (reg,code) <- getNonClobberedReg op
1385 return (code, OpReg reg)
1388 -- Assign; dst is a reg, rhs is mem
1389 assignReg_IntCode pk reg (CmmLoad src _) = do
1390 load_code <- intLoadCode (MOV pk) src
1391 return (load_code (getRegisterReg False{-no sse2-} reg))
1393 -- dst is a reg, but src could be anything
1394 assignReg_IntCode _ reg src = do
1395 code <- getAnyReg src
1396 return (code (getRegisterReg False{-no sse2-} reg))
1399 -- Floating point assignment to memory
1400 assignMem_FltCode pk addr src = do
1401 (src_reg, src_code) <- getNonClobberedReg src
1402 Amode addr addr_code <- getAmode addr
1403 use_sse2 <- sse2Enabled
1405 code = src_code `appOL`
1407 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1408 else GST pk src_reg addr
1411 -- Floating point assignment to a register/temporary
1412 assignReg_FltCode _ reg src = do
1413 use_sse2 <- sse2Enabled
1414 src_code <- getAnyReg src
1415 return (src_code (getRegisterReg use_sse2 reg))
1418 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1420 genJump (CmmLoad mem _) = do
1421 Amode target code <- getAmode mem
1422 return (code `snocOL` JMP (OpAddr target))
1424 genJump (CmmLit lit) = do
1425 return (unitOL (JMP (OpImm (litToImm lit))))
1428 (reg,code) <- getSomeReg expr
1429 return (code `snocOL` JMP (OpReg reg))
1432 -- -----------------------------------------------------------------------------
1433 -- Unconditional branches
1435 genBranch :: BlockId -> NatM InstrBlock
1436 genBranch = return . toOL . mkJumpInstr
1440 -- -----------------------------------------------------------------------------
1441 -- Conditional jumps
1444 Conditional jumps are always to local labels, so we can use branch
1445 instructions. We peek at the arguments to decide what kind of
1448 I386: First, we have to ensure that the condition
1449 codes are set according to the supplied comparison operation.
1453 :: BlockId -- the branch target
1454 -> CmmExpr -- the condition on which to branch
1457 genCondJump id bool = do
1458 CondCode is_float cond cond_code <- getCondCode bool
1459 use_sse2 <- sse2Enabled
1460 if not is_float || not use_sse2
1462 return (cond_code `snocOL` JXX cond id)
1464 lbl <- getBlockIdNat
1466 -- see comment with condFltReg
1467 let code = case cond of
1473 plain_test = unitOL (
1476 or_unordered = toOL [
1480 and_ordered = toOL [
1486 return (cond_code `appOL` code)
1489 -- -----------------------------------------------------------------------------
1490 -- Generating C calls
1492 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1493 -- @get_arg@, which moves the arguments to the correct registers/stack
1494 -- locations. Apart from that, the code is easy.
1496 -- (If applicable) Do not fill the delay slots here; you will confuse the
1497 -- register allocator.
1500 :: CmmCallTarget -- function to call
1501 -> [HintedCmmFormal] -- where to put the result
1502 -> [HintedCmmActual] -- arguments (of mixed type)
1505 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1507 #if i386_TARGET_ARCH
1509 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1510 -- write barrier compiles to no code on x86/x86-64;
1511 -- we keep it this long in order to prevent earlier optimisations.
1513 -- void return type prim op
1514 genCCall (CmmPrim op) [] args =
1515 outOfLineCmmOp op Nothing args
1517 -- we only cope with a single result for foreign calls
1518 genCCall (CmmPrim op) [r_hinted@(CmmHinted r _)] args = do
1519 l1 <- getNewLabelNat
1520 l2 <- getNewLabelNat
1524 outOfLineCmmOp op (Just r_hinted) args
1526 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1527 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1529 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1530 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1532 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1533 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1535 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1536 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1538 _other_op -> outOfLineCmmOp op (Just r_hinted) args
1541 actuallyInlineFloatOp instr size [CmmHinted x _]
1542 = do res <- trivialUFCode size (instr size) x
1544 return (any (getRegisterReg False (CmmLocal r)))
1546 actuallyInlineFloatOp _ _ args
1547 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
1548 ++ show (length args) ++ ")"
1550 genCCall target dest_regs args = do
1552 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
1553 #if !darwin_TARGET_OS
1554 tot_arg_size = sum sizes
1556 raw_arg_size = sum sizes
1557 tot_arg_size = roundTo 16 raw_arg_size
1558 arg_pad_size = tot_arg_size - raw_arg_size
1559 delta0 <- getDeltaNat
1560 setDeltaNat (delta0 - arg_pad_size)
1563 use_sse2 <- sse2Enabled
1564 push_codes <- mapM (push_arg use_sse2) (reverse args)
1565 delta <- getDeltaNat
1568 -- deal with static vs dynamic call targets
1569 (callinsns,cconv) <-
1571 CmmCallee (CmmLit (CmmLabel lbl)) conv
1572 -> -- ToDo: stdcall arg sizes
1573 return (unitOL (CALL (Left fn_imm) []), conv)
1574 where fn_imm = ImmCLbl lbl
1576 -> do { (dyn_r, dyn_c) <- getSomeReg expr
1577 ; ASSERT( isWord32 (cmmExprType expr) )
1578 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1580 -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1581 ++ "probably because too many return values."
1584 #if darwin_TARGET_OS
1586 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1587 DELTA (delta0 - arg_pad_size)]
1588 `appOL` concatOL push_codes
1591 = concatOL push_codes
1593 -- Deallocate parameters after call for ccall;
1594 -- but not for stdcall (callee does it)
1596 -- We have to pop any stack padding we added
1597 -- on Darwin even if we are doing stdcall, though (#5052)
1598 pop_size | cconv /= StdCallConv = tot_arg_size
1600 #if darwin_TARGET_OS
1606 call = callinsns `appOL`
1608 (if pop_size==0 then [] else
1609 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
1611 [DELTA (delta + tot_arg_size)]
1614 setDeltaNat (delta + tot_arg_size)
1617 -- assign the results, if necessary
1618 assign_code [] = nilOL
1619 assign_code [CmmHinted dest _hint]
1622 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
1626 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
1627 GST sz fake0 tmp_amode,
1628 MOV sz (OpAddr tmp_amode) (OpReg r_dest),
1629 ADD II32 (OpImm (ImmInt b)) (OpReg esp)]
1630 else unitOL (GMOV fake0 r_dest)
1631 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1632 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1633 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1635 ty = localRegType dest
1638 r_dest_hi = getHiVRegFromLo r_dest
1639 r_dest = getRegisterReg use_sse2 (CmmLocal dest)
1640 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
1642 return (push_code `appOL`
1644 assign_code dest_regs)
1647 arg_size :: CmmType -> Int -- Width in bytes
1648 arg_size ty = widthInBytes (typeWidth ty)
1650 #if darwin_TARGET_OS
1651 roundTo a x | x `mod` a == 0 = x
1652 | otherwise = x + a - (x `mod` a)
1655 push_arg :: Bool -> HintedCmmActual {-current argument-}
1656 -> NatM InstrBlock -- code
1658 push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86
1659 | isWord64 arg_ty = do
1660 ChildCode64 code r_lo <- iselExpr64 arg
1661 delta <- getDeltaNat
1662 setDeltaNat (delta - 8)
1664 r_hi = getHiVRegFromLo r_lo
1666 return ( code `appOL`
1667 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1668 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1672 | isFloatType arg_ty = do
1673 (reg, code) <- getSomeReg arg
1674 delta <- getDeltaNat
1675 setDeltaNat (delta-size)
1676 return (code `appOL`
1677 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1679 let addr = AddrBaseIndex (EABaseReg esp)
1682 size = floatSize (typeWidth arg_ty)
1685 then MOV size (OpReg reg) (OpAddr addr)
1686 else GST size reg addr
1691 (operand, code) <- getOperand arg
1692 delta <- getDeltaNat
1693 setDeltaNat (delta-size)
1694 return (code `snocOL`
1695 PUSH II32 operand `snocOL`
1699 arg_ty = cmmExprType arg
1700 size = arg_size arg_ty -- Byte size
1702 #elif x86_64_TARGET_ARCH
1704 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
1705 -- write barrier compiles to no code on x86/x86-64;
1706 -- we keep it this long in order to prevent earlier optimisations.
1708 -- void return type prim op
1709 genCCall (CmmPrim op) [] args =
1710 outOfLineCmmOp op Nothing args
1712 -- we only cope with a single result for foreign calls
1713 genCCall (CmmPrim op) [res] args =
1714 outOfLineCmmOp op (Just res) args
1716 genCCall target dest_regs args = do
1718 -- load up the register arguments
1719 (stack_args, aregs, fregs, load_args_code)
1720 <- load_args args allArgRegs allFPArgRegs nilOL
1723 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
1724 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
1725 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
1726 -- for annotating the call instruction with
1728 sse_regs = length fp_regs_used
1730 tot_arg_size = arg_size * length stack_args
1732 -- On entry to the called function, %rsp should be aligned
1733 -- on a 16-byte boundary +8 (i.e. the first stack arg after
1734 -- the return address is 16-byte aligned). In STG land
1735 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
1736 -- need to make sure we push a multiple of 16-bytes of args,
1737 -- plus the return address, to get the correct alignment.
1738 -- Urg, this is hard. We need to feed the delta back into
1739 -- the arg pushing code.
1740 (real_size, adjust_rsp) <-
1741 if tot_arg_size `rem` 16 == 0
1742 then return (tot_arg_size, nilOL)
1743 else do -- we need to adjust...
1744 delta <- getDeltaNat
1745 setDeltaNat (delta-8)
1746 return (tot_arg_size+8, toOL [
1747 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
1751 -- push the stack args, right to left
1752 push_code <- push_args (reverse stack_args) nilOL
1753 delta <- getDeltaNat
1755 -- deal with static vs dynamic call targets
1756 (callinsns,cconv) <-
1758 CmmCallee (CmmLit (CmmLabel lbl)) conv
1759 -> -- ToDo: stdcall arg sizes
1760 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
1761 where fn_imm = ImmCLbl lbl
1763 -> do (dyn_r, dyn_c) <- getSomeReg expr
1764 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
1766 -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
1767 ++ "probably because too many return values."
1770 -- The x86_64 ABI requires us to set %al to the number of SSE2
1771 -- registers that contain arguments, if the called routine
1772 -- is a varargs function. We don't know whether it's a
1773 -- varargs function or not, so we have to assume it is.
1775 -- It's not safe to omit this assignment, even if the number
1776 -- of SSE2 regs in use is zero. If %al is larger than 8
1777 -- on entry to a varargs function, seg faults ensue.
1778 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
1780 let call = callinsns `appOL`
1782 -- Deallocate parameters after call for ccall;
1783 -- but not for stdcall (callee does it)
1784 (if cconv == StdCallConv || real_size==0 then [] else
1785 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
1787 [DELTA (delta + real_size)]
1790 setDeltaNat (delta + real_size)
1793 -- assign the results, if necessary
1794 assign_code [] = nilOL
1795 assign_code [CmmHinted dest _hint] =
1796 case typeWidth rep of
1797 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
1798 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
1799 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
1801 rep = localRegType dest
1802 r_dest = getRegisterReg True (CmmLocal dest)
1803 assign_code _many = panic "genCCall.assign_code many"
1805 return (load_args_code `appOL`
1808 assign_eax sse_regs `appOL`
1810 assign_code dest_regs)
1813 arg_size = 8 -- always, at the mo
1815 load_args :: [CmmHinted CmmExpr]
1816 -> [Reg] -- int regs avail for args
1817 -> [Reg] -- FP regs avail for args
1819 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
1820 load_args args [] [] code = return (args, [], [], code)
1821 -- no more regs to use
1822 load_args [] aregs fregs code = return ([], aregs, fregs, code)
1823 -- no more args to push
1824 load_args ((CmmHinted arg hint) : rest) aregs fregs code
1825 | isFloatType arg_rep =
1829 arg_code <- getAnyReg arg
1830 load_args rest aregs rs (code `appOL` arg_code r)
1835 arg_code <- getAnyReg arg
1836 load_args rest rs fregs (code `appOL` arg_code r)
1838 arg_rep = cmmExprType arg
1841 (args',ars,frs,code') <- load_args rest aregs fregs code
1842 return ((CmmHinted arg hint):args', ars, frs, code')
1844 push_args [] code = return code
1845 push_args ((CmmHinted arg _):rest) code
1846 | isFloatType arg_rep = do
1847 (arg_reg, arg_code) <- getSomeReg arg
1848 delta <- getDeltaNat
1849 setDeltaNat (delta-arg_size)
1850 let code' = code `appOL` arg_code `appOL` toOL [
1851 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
1852 DELTA (delta-arg_size),
1853 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
1854 push_args rest code'
1857 -- we only ever generate word-sized function arguments. Promotion
1858 -- has already happened: our Int8# type is kept sign-extended
1859 -- in an Int#, for example.
1860 ASSERT(width == W64) return ()
1861 (arg_op, arg_code) <- getOperand arg
1862 delta <- getDeltaNat
1863 setDeltaNat (delta-arg_size)
1864 let code' = code `appOL` arg_code `appOL` toOL [
1866 DELTA (delta-arg_size)]
1867 push_args rest code'
1869 arg_rep = cmmExprType arg
1870 width = typeWidth arg_rep
1873 genCCall = panic "X86.genCCAll: not defined"
1875 #endif /* x86_64_TARGET_ARCH */
1878 outOfLineCmmOp :: CallishMachOp -> Maybe HintedCmmFormal -> [HintedCmmActual] -> NatM InstrBlock
1879 outOfLineCmmOp mop res args
1881 dflags <- getDynFlagsNat
1882 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
1883 let target = CmmCallee targetExpr CCallConv
1885 stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn)
1887 -- Assume we can call these functions directly, and that they're not in a dynamic library.
1888 -- TODO: Why is this ok? Under linux this code will be in libm.so
1889 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
1890 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
1893 MO_Memcpy -> init args
1894 MO_Memset -> init args
1895 MO_Memmove -> init args
1899 MO_F32_Sqrt -> fsLit "sqrtf"
1900 MO_F32_Sin -> fsLit "sinf"
1901 MO_F32_Cos -> fsLit "cosf"
1902 MO_F32_Tan -> fsLit "tanf"
1903 MO_F32_Exp -> fsLit "expf"
1904 MO_F32_Log -> fsLit "logf"
1906 MO_F32_Asin -> fsLit "asinf"
1907 MO_F32_Acos -> fsLit "acosf"
1908 MO_F32_Atan -> fsLit "atanf"
1910 MO_F32_Sinh -> fsLit "sinhf"
1911 MO_F32_Cosh -> fsLit "coshf"
1912 MO_F32_Tanh -> fsLit "tanhf"
1913 MO_F32_Pwr -> fsLit "powf"
1915 MO_F64_Sqrt -> fsLit "sqrt"
1916 MO_F64_Sin -> fsLit "sin"
1917 MO_F64_Cos -> fsLit "cos"
1918 MO_F64_Tan -> fsLit "tan"
1919 MO_F64_Exp -> fsLit "exp"
1920 MO_F64_Log -> fsLit "log"
1922 MO_F64_Asin -> fsLit "asin"
1923 MO_F64_Acos -> fsLit "acos"
1924 MO_F64_Atan -> fsLit "atan"
1926 MO_F64_Sinh -> fsLit "sinh"
1927 MO_F64_Cosh -> fsLit "cosh"
1928 MO_F64_Tanh -> fsLit "tanh"
1929 MO_F64_Pwr -> fsLit "pow"
1931 MO_Memcpy -> fsLit "memcpy"
1932 MO_Memset -> fsLit "memset"
1933 MO_Memmove -> fsLit "memmove"
1935 other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")"
1938 -- -----------------------------------------------------------------------------
1939 -- Generating a table-branch
1941 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1946 (reg,e_code) <- getSomeReg expr
1947 lbl <- getNewLabelNat
1948 dflags <- getDynFlagsNat
1949 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1950 (tableReg,t_code) <- getSomeReg $ dynRef
1951 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
1952 (EAIndex reg wORD_SIZE) (ImmInt 0))
1954 #if x86_64_TARGET_ARCH
1955 #if darwin_TARGET_OS
1956 -- on Mac OS X/x86_64, put the jump table in the text section
1957 -- to work around a limitation of the linker.
1958 -- ld64 is unable to handle the relocations for
1960 -- if L0 is not preceded by a non-anonymous label in its section.
1962 code = e_code `appOL` t_code `appOL` toOL [
1963 ADD (intSize wordWidth) op (OpReg tableReg),
1964 JMP_TBL (OpReg tableReg) ids Text lbl
1967 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
1968 -- relocations, hence we only get 32-bit offsets in the jump
1969 -- table. As these offsets are always negative we need to properly
1970 -- sign extend them to 64-bit. This hack should be removed in
1971 -- conjunction with the hack in PprMach.hs/pprDataItem once
1972 -- binutils 2.17 is standard.
1973 code = e_code `appOL` t_code `appOL` toOL [
1974 MOVSxL II32 op (OpReg reg),
1975 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
1976 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1980 code = e_code `appOL` t_code `appOL` toOL [
1981 ADD (intSize wordWidth) op (OpReg tableReg),
1982 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
1988 (reg,e_code) <- getSomeReg expr
1989 lbl <- getNewLabelNat
1990 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
1991 code = e_code `appOL` toOL [
1992 JMP_TBL op ids ReadOnlyData lbl
1997 generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr)
1998 generateJumpTableForInstr (JMP_TBL _ ids section lbl) = Just (createJumpTable ids section lbl)
1999 generateJumpTableForInstr _ = Nothing
2001 createJumpTable :: [Maybe BlockId] -> Section -> CLabel -> GenCmmTop CmmStatic h g
2002 createJumpTable ids section lbl
2005 let jumpTableEntryRel Nothing
2006 = CmmStaticLit (CmmInt 0 wordWidth)
2007 jumpTableEntryRel (Just blockid)
2008 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2009 where blockLabel = mkAsmTempLabel (getUnique blockid)
2010 in map jumpTableEntryRel ids
2011 | otherwise = map jumpTableEntry ids
2012 in CmmData section (CmmDataLabel lbl : jumpTable)
2014 -- -----------------------------------------------------------------------------
2015 -- 'condIntReg' and 'condFltReg': condition codes into registers
2017 -- Turn those condition codes into integers now (when they appear on
2018 -- the right hand side of an assignment).
2020 -- (If applicable) Do not fill the delay slots here; you will confuse the
2021 -- register allocator.
2023 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2025 condIntReg cond x y = do
2026 CondCode _ cond cond_code <- condIntCode cond x y
2027 tmp <- getNewRegNat II8
2029 code dst = cond_code `appOL` toOL [
2030 SETCC cond (OpReg tmp),
2031 MOVZxL II8 (OpReg tmp) (OpReg dst)
2034 return (Any II32 code)
2038 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2039 condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2042 CondCode _ cond cond_code <- condFltCode cond x y
2043 tmp <- getNewRegNat II8
2045 code dst = cond_code `appOL` toOL [
2046 SETCC cond (OpReg tmp),
2047 MOVZxL II8 (OpReg tmp) (OpReg dst)
2050 return (Any II32 code)
2052 condFltReg_sse2 = do
2053 CondCode _ cond cond_code <- condFltCode cond x y
2054 tmp1 <- getNewRegNat archWordSize
2055 tmp2 <- getNewRegNat archWordSize
2057 -- We have to worry about unordered operands (eg. comparisons
2058 -- against NaN). If the operands are unordered, the comparison
2059 -- sets the parity flag, carry flag and zero flag.
2060 -- All comparisons are supposed to return false for unordered
2061 -- operands except for !=, which returns true.
2063 -- Optimisation: we don't have to test the parity flag if we
2064 -- know the test has already excluded the unordered case: eg >
2065 -- and >= test for a zero carry flag, which can only occur for
2066 -- ordered operands.
2068 -- ToDo: by reversing comparisons we could avoid testing the
2069 -- parity flag in more cases.
2074 NE -> or_unordered dst
2075 GU -> plain_test dst
2076 GEU -> plain_test dst
2077 _ -> and_ordered dst)
2079 plain_test dst = toOL [
2080 SETCC cond (OpReg tmp1),
2081 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2083 or_unordered dst = toOL [
2084 SETCC cond (OpReg tmp1),
2085 SETCC PARITY (OpReg tmp2),
2086 OR II8 (OpReg tmp1) (OpReg tmp2),
2087 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2089 and_ordered dst = toOL [
2090 SETCC cond (OpReg tmp1),
2091 SETCC NOTPARITY (OpReg tmp2),
2092 AND II8 (OpReg tmp1) (OpReg tmp2),
2093 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2096 return (Any II32 code)
2099 -- -----------------------------------------------------------------------------
2100 -- 'trivial*Code': deal with trivial instructions
2102 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2103 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2104 -- Only look for constants on the right hand side, because that's
2105 -- where the generic optimizer will have put them.
2107 -- Similarly, for unary instructions, we don't have to worry about
2108 -- matching an StInt as the argument, because genericOpt will already
2109 -- have handled the constant-folding.
2113 The Rules of the Game are:
2115 * You cannot assume anything about the destination register dst;
2116 it may be anything, including a fixed reg.
2118 * You may compute an operand into a fixed reg, but you may not
2119 subsequently change the contents of that fixed reg. If you
2120 want to do so, first copy the value either to a temporary
2121 or into dst. You are free to modify dst even if it happens
2122 to be a fixed reg -- that's not your problem.
2124 * You cannot assume that a fixed reg will stay live over an
2125 arbitrary computation. The same applies to the dst reg.
2127 * Temporary regs obtained from getNewRegNat are distinct from
2128 each other and from all other regs, and stay live over
2129 arbitrary computations.
2131 --------------------
2133 SDM's version of The Rules:
2135 * If getRegister returns Any, that means it can generate correct
2136 code which places the result in any register, period. Even if that
2137 register happens to be read during the computation.
2139 Corollary #1: this means that if you are generating code for an
2140 operation with two arbitrary operands, you cannot assign the result
2141 of the first operand into the destination register before computing
2142 the second operand. The second operand might require the old value
2143 of the destination register.
2145 Corollary #2: A function might be able to generate more efficient
2146 code if it knows the destination register is a new temporary (and
2147 therefore not read by any of the sub-computations).
2149 * If getRegister returns Any, then the code it generates may modify only:
2150 (a) fresh temporaries
2151 (b) the destination register
2152 (c) known registers (eg. %ecx is used by shifts)
2153 In particular, it may *not* modify global registers, unless the global
2154 register happens to be the destination register.
2157 trivialCode :: Width -> (Operand -> Operand -> Instr)
2158 -> Maybe (Operand -> Operand -> Instr)
2159 -> CmmExpr -> CmmExpr -> NatM Register
2160 trivialCode width _ (Just revinstr) (CmmLit lit_a) b
2161 | is32BitLit lit_a = do
2162 b_code <- getAnyReg b
2165 = b_code dst `snocOL`
2166 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2168 return (Any (intSize width) code)
2170 trivialCode width instr _ a b
2171 = genTrivialCode (intSize width) instr a b
2173 -- This is re-used for floating pt instructions too.
2174 genTrivialCode :: Size -> (Operand -> Operand -> Instr)
2175 -> CmmExpr -> CmmExpr -> NatM Register
2176 genTrivialCode rep instr a b = do
2177 (b_op, b_code) <- getNonClobberedOperand b
2178 a_code <- getAnyReg a
2179 tmp <- getNewRegNat rep
2181 -- We want the value of b to stay alive across the computation of a.
2182 -- But, we want to calculate a straight into the destination register,
2183 -- because the instruction only has two operands (dst := dst `op` src).
2184 -- The troublesome case is when the result of b is in the same register
2185 -- as the destination reg. In this case, we have to save b in a
2186 -- new temporary across the computation of a.
2188 | dst `regClashesWithOp` b_op =
2190 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2192 instr (OpReg tmp) (OpReg dst)
2196 instr b_op (OpReg dst)
2198 return (Any rep code)
2200 regClashesWithOp :: Reg -> Operand -> Bool
2201 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2202 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2203 _ `regClashesWithOp` _ = False
2207 trivialUCode :: Size -> (Operand -> Instr)
2208 -> CmmExpr -> NatM Register
2209 trivialUCode rep instr x = do
2210 x_code <- getAnyReg x
2215 return (Any rep code)
2219 trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr)
2220 -> CmmExpr -> CmmExpr -> NatM Register
2221 trivialFCode_x87 instr x y = do
2222 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2223 (y_reg, y_code) <- getSomeReg y
2225 size = FF80 -- always, on x87
2229 instr size x_reg y_reg dst
2230 return (Any size code)
2232 trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr)
2233 -> CmmExpr -> CmmExpr -> NatM Register
2234 trivialFCode_sse2 pk instr x y
2235 = genTrivialCode size (instr size) x y
2236 where size = floatSize pk
2239 trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2240 trivialUFCode size instr x = do
2241 (x_reg, x_code) <- getSomeReg x
2247 return (Any size code)
2250 --------------------------------------------------------------------------------
2251 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2252 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2255 (x_reg, x_code) <- getSomeReg x
2257 opc = case to of W32 -> GITOF; W64 -> GITOD;
2258 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2260 code dst = x_code `snocOL` opc x_reg dst
2261 -- ToDo: works for non-II32 reps?
2262 return (Any FF80 code)
2265 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2267 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2268 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2270 code dst = x_code `snocOL` opc (intSize from) x_op dst
2272 return (Any (floatSize to) code)
2273 -- works even if the destination rep is <II32
2275 --------------------------------------------------------------------------------
2276 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2277 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2279 coerceFP2Int_x87 = do
2280 (x_reg, x_code) <- getSomeReg x
2282 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2283 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2285 code dst = x_code `snocOL` opc x_reg dst
2286 -- ToDo: works for non-II32 reps?
2288 return (Any (intSize to) code)
2290 coerceFP2Int_sse2 = do
2291 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2293 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2294 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2296 code dst = x_code `snocOL` opc (intSize to) x_op dst
2298 return (Any (intSize to) code)
2299 -- works even if the destination rep is <II32
2302 --------------------------------------------------------------------------------
2303 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2304 coerceFP2FP to x = do
2305 use_sse2 <- sse2Enabled
2306 (x_reg, x_code) <- getSomeReg x
2308 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2309 n -> panic $ "coerceFP2FP: unhandled width ("
2312 code dst = x_code `snocOL` opc x_reg dst
2314 return (Any (if use_sse2 then floatSize to else FF80) code)
2316 --------------------------------------------------------------------------------
2318 sse2NegCode :: Width -> CmmExpr -> NatM Register
2319 sse2NegCode w x = do
2320 let sz = floatSize w
2321 x_code <- getAnyReg x
2322 -- This is how gcc does it, so it can't be that bad:
2324 const | FF32 <- sz = CmmInt 0x80000000 W32
2325 | otherwise = CmmInt 0x8000000000000000 W64
2326 Amode amode amode_code <- memConstant (widthInBytes w) const
2327 tmp <- getNewRegNat sz
2329 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2330 MOV sz (OpAddr amode) (OpReg tmp),
2331 XOR sz (OpReg tmp) (OpReg dst)
2334 return (Any sz code)