-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
module X86.CodeGen (
cmmTopCodeGen,
+ generateJumpTableForInstr,
InstrBlock
)
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
-import X86.Ppr
import Instruction
import PIC
import NCGMonad
import Size
import Reg
-import RegClass
import Platform
-- Our intermediate code:
import BasicTypes
import BlockId
-import PprCmm ( pprExpr )
-import Cmm
+import PprCmm ()
+import OldCmm
+import OldPprCmm ()
import CLabel
-import ClosureInfo ( C_SRT(..) )
-- The rest:
import StaticFlags ( opt_PIC )
import ForeignCall ( CCallConv(..) )
import OrdList
-import Pretty
-import qualified Outputable as O
import Outputable
+import Unique
import FastString
import FastBool ( isFastTrue )
import Constants ( wORD_SIZE )
import DynFlags
-import Debug.Trace ( trace )
+import Control.Monad ( mapAndUnzipM )
+import Data.Maybe ( catMaybes )
+import Data.Int
-import Control.Monad ( mapAndUnzipM )
-import Data.Maybe ( fromJust )
-import Data.Bits
+#if WORD_SIZE_IN_BITS==32
+import Data.Maybe ( fromJust )
import Data.Word
-import Data.Int
+import Data.Bits
+#endif
+sse2Enabled :: NatM Bool
+#if x86_64_TARGET_ARCH
+-- SSE2 is fixed on for x86_64. It would be possible to make it optional,
+-- but we'd need to fix at least the foreign call code where the calling
+-- convention specifies the use of xmm regs, and possibly other places.
+sse2Enabled = return True
+#else
+sse2Enabled = do
+ dflags <- getDynFlagsNat
+ return (dopt Opt_SSE2 dflags)
+#endif
+
+if_sse2 :: NatM a -> NatM a -> NatM a
+if_sse2 sse2 x87 = do
+ b <- sse2Enabled
+ if b then sse2 else x87
cmmTopCodeGen
- :: DynFlags
- -> RawCmmTop
+ :: RawCmmTop
-> NatM [NatCmmTop Instr]
-cmmTopCodeGen dynflags
- (CmmProc info lab params (ListGraph blocks)) = do
+cmmTopCodeGen (CmmProc info lab (ListGraph blocks)) = do
(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
picBaseMb <- getPicBaseMaybeNat
- let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+ dflags <- getDynFlagsNat
+ let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
tops = proc : concat statics
- os = platformOS $ targetPlatform dynflags
+ os = platformOS $ targetPlatform dflags
case picBaseMb of
Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
Nothing -> return tops
-cmmTopCodeGen _ (CmmData sec dat) = do
+cmmTopCodeGen (CmmData sec dat) = do
return [CmmData sec dat] -- no translation, we just use CmmStatic
CmmBranch id -> genBranch id
CmmCondBranch arg id -> genCondJump id arg
CmmSwitch arg ids -> genSwitch arg ids
- CmmJump arg params -> genJump arg
- CmmReturn params ->
+ CmmJump arg _ -> genJump arg
+ CmmReturn _ ->
panic "stmtToInstrs: return statement should have been cps'd away"
= CondCode Bool Cond InstrBlock
+#if WORD_SIZE_IN_BITS==32
-- | a.k.a "Register64"
-- Reg is the lower 32-bit temporary which contains the result.
-- Use getHiVRegFromLo to find the other VRegUnique.
= ChildCode64
InstrBlock
Reg
+#endif
-- | Register's passed up the tree. If the stix code forces the register
-- | Grab the Reg for a CmmReg
-getRegisterReg :: CmmReg -> Reg
+getRegisterReg :: Bool -> CmmReg -> Reg
-getRegisterReg (CmmLocal (LocalReg u pk))
- = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
+getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
+ = let sz = cmmTypeSize pk in
+ if isFloatSize sz && not use_sse2
+ then RegVirtual (mkVirtualReg u FF80)
+ else RegVirtual (mkVirtualReg u sz)
-getRegisterReg (CmmGlobal mid)
- = case get_GlobalReg_reg_or_addr mid of
- Left reg -> RegReal $ reg
- _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
- -- By this stage, the only MagicIds remaining should be the
- -- ones which map to a real machine register on this
- -- platform. Hence ...
+getRegisterReg _ (CmmGlobal mid)
+ = case globalRegMaybe mid of
+ Just reg -> RegReal $ reg
+ Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+ -- By this stage, the only MagicIds remaining should be the
+ -- ones which map to a real machine register on this
+ -- platform. Hence ...
-- | Memory addressing modes passed up the tree.
-- | Convert a BlockId to some CmmStatic data
jumpTableEntry :: Maybe BlockId -> CmmStatic
jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
-jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = mkAsmTempLabel id
+jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
+ where blockLabel = mkAsmTempLabel (getUnique blockid)
-- -----------------------------------------------------------------------------
-- Expand CmmRegOff. ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
-mangleIndexTree :: CmmExpr -> CmmExpr
-mangleIndexTree (CmmRegOff reg off)
+mangleIndexTree :: CmmReg -> Int -> CmmExpr
+mangleIndexTree reg off
= CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
where width = typeWidth (cmmRegType reg)
return (reg, code)
-
-
-
+#if WORD_SIZE_IN_BITS==32
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
Amode addr addr_code <- getAmode addrTree
assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
let
r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
vcode `snocOL` mov_lo `snocOL` mov_hi
)
-assignReg_I64Code lvalue valueTree
+assignReg_I64Code _ _
= panic "assignReg_I64Code(i386): invalid lvalue"
-
-
iselExpr64 :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
(rlo,rhi) <- getNewRegPairNat II32
iselExpr64 expr
= pprPanic "iselExpr64(i386)" (ppr expr)
-
+#endif
--------------------------------------------------------------------------------
#endif
getRegister (CmmReg reg)
- = return (Fixed (cmmTypeSize (cmmRegType reg))
- (getRegisterReg reg) nilOL)
+ = do use_sse2 <- sse2Enabled
+ let
+ sz = cmmTypeSize (cmmRegType reg)
+ size | not use_sse2 && isFloatSize sz = FF80
+ | otherwise = sz
+ --
+ return (Fixed size (getRegisterReg use_sse2 reg) nilOL)
+
-getRegister tree@(CmmRegOff _ _)
- = getRegister (mangleIndexTree tree)
+getRegister (CmmRegOff r n)
+ = getRegister $ mangleIndexTree r n
#if WORD_SIZE_IN_BITS==32
#endif
-
-
-#if i386_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat f W32)) = do
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let code dst =
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f W32)]
- `consOL` (addr_code `snocOL`
- GLD FF32 addr dst)
- -- in
- return (Any FF32 code)
-
-
-getRegister (CmmLit (CmmFloat d W64))
- | d == 0.0
- = let code dst = unitOL (GLDZ dst)
- in return (Any FF64 code)
-
- | d == 1.0
- = let code dst = unitOL (GLD1 dst)
- in return (Any FF64 code)
-
- | otherwise = do
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let code dst =
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat d W64)]
- `consOL` (addr_code `snocOL`
- GLD FF64 addr dst)
- -- in
- return (Any FF64 code)
-
-#endif /* i386_TARGET_ARCH */
-
-
-
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmLit (CmmFloat 0.0 w)) = do
- let size = floatSize w
- code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
- -- I don't know why there are xorpd, xorps, and pxor instructions.
- -- They all appear to do the same thing --SDM
- return (Any size code)
-
-getRegister (CmmLit (CmmFloat f w)) = do
- lbl <- getNewLabelNat
- let code dst = toOL [
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f w)],
- MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- ]
- -- in
- return (Any size code)
- where size = floatSize w
-
-#endif /* x86_64_TARGET_ARCH */
-
-
-
-
+getRegister (CmmLit lit@(CmmFloat f w)) =
+ if_sse2 float_const_sse2 float_const_x87
+ where
+ float_const_sse2
+ | f == 0.0 = do
+ let
+ size = floatSize w
+ code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
+ -- I don't know why there are xorpd, xorps, and pxor instructions.
+ -- They all appear to do the same thing --SDM
+ return (Any size code)
+
+ | otherwise = do
+ Amode addr code <- memConstant (widthInBytes w) lit
+ loadFloatAmode True w addr code
+
+ float_const_x87 = case w of
+ W64
+ | f == 0.0 ->
+ let code dst = unitOL (GLDZ dst)
+ in return (Any FF80 code)
+
+ | f == 1.0 ->
+ let code dst = unitOL (GLD1 dst)
+ in return (Any FF80 code)
+
+ _otherwise -> do
+ Amode addr code <- memConstant (widthInBytes w) lit
+ loadFloatAmode False w addr code
-- catch simple cases of zero- or sign-extended load
getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
= return $ Any II64 (\dst -> unitOL $
LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
-getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
- x_code <- getAnyReg x
- lbl <- getNewLabelNat
- let
- code dst = x_code dst `appOL` toOL [
- -- This is how gcc does it, so it can't be that bad:
- LDATA ReadOnlyData16 [
- CmmAlign 16,
- CmmDataLabel lbl,
- CmmStaticLit (CmmInt 0x80000000 W32),
- CmmStaticLit (CmmInt 0 W32),
- CmmStaticLit (CmmInt 0 W32),
- CmmStaticLit (CmmInt 0 W32)
- ],
- XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- -- xorps, so we need the 128-bit constant
- -- ToDo: rip-relative
- ]
- --
- return (Any FF32 code)
-
-getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
- x_code <- getAnyReg x
- lbl <- getNewLabelNat
- let
- -- This is how gcc does it, so it can't be that bad:
- code dst = x_code dst `appOL` toOL [
- LDATA ReadOnlyData16 [
- CmmAlign 16,
- CmmDataLabel lbl,
- CmmStaticLit (CmmInt 0x8000000000000000 W64),
- CmmStaticLit (CmmInt 0 W64)
- ],
- -- gcc puts an unpck here. Wonder if we need it.
- XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- -- xorpd, so we need the 128-bit constant
- ]
- --
- return (Any FF64 code)
-
#endif /* x86_64_TARGET_ARCH */
-getRegister (CmmMachOp mop [x]) -- unary MachOps
- = case mop of
-#if i386_TARGET_ARCH
- MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
- MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
-#endif
+getRegister (CmmMachOp mop [x]) = do -- unary MachOps
+ sse2 <- sse2Enabled
+ case mop of
+ MO_F_Neg w
+ | sse2 -> sse2NegCode w x
+ | otherwise -> trivialUFCode FF80 (GNEG FF80) x
MO_S_Neg w -> triv_ucode NEGI (intSize w)
- MO_F_Neg w -> triv_ucode NEGI (floatSize w)
MO_Not w -> triv_ucode NOT (intSize w)
-- Nop conversions
-- the form of a movzl and print it as a movl later.
#endif
-#if i386_TARGET_ARCH
- MO_FF_Conv W32 W64 -> conversionNop FF64 x
- MO_FF_Conv W64 W32 -> conversionNop FF32 x
-#else
- MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
+ MO_FF_Conv W32 W64
+ | sse2 -> coerceFP2FP W64 x
+ | otherwise -> conversionNop FF80 x
+
MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
-#endif
MO_FS_Conv from to -> coerceFP2Int from to x
MO_SF_Conv from to -> coerceInt2FP from to x
- other -> pprPanic "getRegister" (pprMachOp mop)
+ _other -> pprPanic "getRegister" (pprMachOp mop)
where
triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
triv_ucode instr size = trivialUCode size (instr size) x
return (swizzleRegisterRep e_code new_size)
-getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
- = case mop of
- MO_F_Eq w -> condFltReg EQQ x y
- MO_F_Ne w -> condFltReg NE x y
- MO_F_Gt w -> condFltReg GTT x y
- MO_F_Ge w -> condFltReg GE x y
- MO_F_Lt w -> condFltReg LTT x y
- MO_F_Le w -> condFltReg LE x y
-
- MO_Eq rep -> condIntReg EQQ x y
- MO_Ne rep -> condIntReg NE x y
-
- MO_S_Gt rep -> condIntReg GTT x y
- MO_S_Ge rep -> condIntReg GE x y
- MO_S_Lt rep -> condIntReg LTT x y
- MO_S_Le rep -> condIntReg LE x y
-
- MO_U_Gt rep -> condIntReg GU x y
- MO_U_Ge rep -> condIntReg GEU x y
- MO_U_Lt rep -> condIntReg LU x y
- MO_U_Le rep -> condIntReg LEU x y
-
-#if i386_TARGET_ARCH
- MO_F_Add w -> trivialFCode w GADD x y
- MO_F_Sub w -> trivialFCode w GSUB x y
- MO_F_Quot w -> trivialFCode w GDIV x y
- MO_F_Mul w -> trivialFCode w GMUL x y
-#endif
-
-#if x86_64_TARGET_ARCH
- MO_F_Add w -> trivialFCode w ADD x y
- MO_F_Sub w -> trivialFCode w SUB x y
- MO_F_Quot w -> trivialFCode w FDIV x y
- MO_F_Mul w -> trivialFCode w MUL x y
-#endif
+getRegister (CmmMachOp mop [x, y]) = do -- dyadic MachOps
+ sse2 <- sse2Enabled
+ case mop of
+ MO_F_Eq _ -> condFltReg EQQ x y
+ MO_F_Ne _ -> condFltReg NE x y
+ MO_F_Gt _ -> condFltReg GTT x y
+ MO_F_Ge _ -> condFltReg GE x y
+ MO_F_Lt _ -> condFltReg LTT x y
+ MO_F_Le _ -> condFltReg LE x y
+
+ MO_Eq _ -> condIntReg EQQ x y
+ MO_Ne _ -> condIntReg NE x y
+
+ MO_S_Gt _ -> condIntReg GTT x y
+ MO_S_Ge _ -> condIntReg GE x y
+ MO_S_Lt _ -> condIntReg LTT x y
+ MO_S_Le _ -> condIntReg LE x y
+
+ MO_U_Gt _ -> condIntReg GU x y
+ MO_U_Ge _ -> condIntReg GEU x y
+ MO_U_Lt _ -> condIntReg LU x y
+ MO_U_Le _ -> condIntReg LEU x y
+
+ MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
+ | otherwise -> trivialFCode_x87 GADD x y
+ MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
+ | otherwise -> trivialFCode_x87 GSUB x y
+ MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
+ | otherwise -> trivialFCode_x87 GDIV x y
+ MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
+ | otherwise -> trivialFCode_x87 GMUL x y
MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
- other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
+ _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
where
--------------------
triv_op width instr = trivialCode width op (Just op) x y
-> NatM Register
{- Case1: shift length as immediate -}
- shift_code width instr x y@(CmmLit lit) = do
+ shift_code width instr x (CmmLit lit) = do
x_code <- getAnyReg x
let
size = intSize width
getRegister (CmmLoad mem pk)
| isFloatType pk
= do
- Amode src mem_code <- getAmode mem
- let
- size = cmmTypeSize pk
- code dst = mem_code `snocOL`
- IF_ARCH_i386(GLD size src dst,
- MOV size (OpAddr src) (OpReg dst))
- return (Any size code)
+ Amode addr mem_code <- getAmode mem
+ use_sse2 <- sse2Enabled
+ loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
#if i386_TARGET_ARCH
getRegister (CmmLoad mem pk)
size = intSize width
-- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
- adj_size = case size of II64 -> II32; _ -> size
- size1 = IF_ARCH_i386( size, adj_size )
+ size1 = IF_ARCH_i386( size, case size of II64 -> II32; _ -> size )
code dst
= unitOL (XOR size1 (OpReg dst) (OpReg dst))
in
reg2reg :: Size -> Reg -> Reg -> Instr
reg2reg size src dst
-#if i386_TARGET_ARCH
- | isFloatSize size = GMOV src dst
-#endif
- | otherwise = MOV size (OpReg src) (OpReg dst)
-
+ | size == FF80 = GMOV src dst
+ | otherwise = MOV size (OpReg src) (OpReg dst)
--------------------------------------------------------------------------------
getAmode :: CmmExpr -> NatM Amode
-getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+getAmode (CmmRegOff r n) = getAmode $ mangleIndexTree r n
#if x86_64_TARGET_ARCH
-- This is all just ridiculous, since it carefully undoes
-- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+getAmode (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
| is32BitLit lit
-- ASSERT(rep == II32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (-(fromInteger i))
return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+getAmode (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
| is32BitLit lit
-- ASSERT(rep == II32)???
= do (x_reg, x_code) <- getSomeReg x
- let off = ImmInt (fromInteger i)
+ let off = litToImm lit
return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
b@(CmmLit _)])
= getAmode (CmmMachOp (MO_Add rep) [b,a])
-getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
+getAmode (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
[y, CmmLit (CmmInt shift _)]])
| shift == 0 || shift == 1 || shift == 2 || shift == 3
= x86_complex_amode x y shift 0
-getAmode (CmmMachOp (MO_Add rep)
+getAmode (CmmMachOp (MO_Add _)
[x, CmmMachOp (MO_Add _)
[CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
CmmLit (CmmInt offset _)]])
&& is32BitInteger offset
= x86_complex_amode x y shift offset
-getAmode (CmmMachOp (MO_Add rep) [x,y])
+getAmode (CmmMachOp (MO_Add _) [x,y])
= x86_complex_amode x y 0 0
getAmode (CmmLit lit) | is32BitLit lit
(y_reg, y_code) <- getSomeReg index
let
code = x_code `appOL` y_code
- base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+ base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
+ n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
code)
-- (see trivialCode where this function is used for an example).
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getNonClobberedOperand (CmmLit lit)
- | isSuitableFloatingPointLit lit = do
- lbl <- getNewLabelNat
- let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
- CmmStaticLit lit])
- return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getNonClobberedOperand (CmmLit lit)
- | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
- return (OpImm (litToImm lit), nilOL)
-getNonClobberedOperand (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- (src',save_code) <-
- if (amodeCouldBeClobbered src)
- then do
- tmp <- getNewRegNat archWordSize
- return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
- unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
- else
- return (src, nilOL)
- return (OpAddr src', save_code `appOL` mem_code)
-getNonClobberedOperand e = do
+getNonClobberedOperand (CmmLit lit) = do
+ use_sse2 <- sse2Enabled
+ if use_sse2 && isSuitableFloatingPointLit lit
+ then do
+ let CmmFloat _ w = lit
+ Amode addr code <- memConstant (widthInBytes w) lit
+ return (OpAddr addr, code)
+ else do
+
+ if is32BitLit lit && not (isFloatType (cmmLitType lit))
+ then return (OpImm (litToImm lit), nilOL)
+ else getNonClobberedOperand_generic (CmmLit lit)
+
+getNonClobberedOperand (CmmLoad mem pk) = do
+ use_sse2 <- sse2Enabled
+ if (not (isFloatType pk) || use_sse2)
+ && IF_ARCH_i386(not (isWord64 pk), True)
+ then do
+ Amode src mem_code <- getAmode mem
+ (src',save_code) <-
+ if (amodeCouldBeClobbered src)
+ then do
+ tmp <- getNewRegNat archWordSize
+ return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+ unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
+ else
+ return (src, nilOL)
+ return (OpAddr src', save_code `appOL` mem_code)
+ else do
+ getNonClobberedOperand_generic (CmmLoad mem pk)
+
+getNonClobberedOperand e = getNonClobberedOperand_generic e
+
+getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
+getNonClobberedOperand_generic e = do
(reg, code) <- getNonClobberedReg e
return (OpReg reg, code)
amodeCouldBeClobbered :: AddrMode -> Bool
amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
+regClobbered :: Reg -> Bool
regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
regClobbered _ = False
-- getOperand: the operand is not required to remain valid across the
-- computation of an arbitrary expression.
getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getOperand (CmmLit lit)
- | isSuitableFloatingPointLit lit = do
- lbl <- getNewLabelNat
- let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
- CmmStaticLit lit])
- return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getOperand (CmmLit lit)
- | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
- return (OpImm (litToImm lit), nilOL)
-getOperand (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
-getOperand e = do
+
+getOperand (CmmLit lit) = do
+ use_sse2 <- sse2Enabled
+ if (use_sse2 && isSuitableFloatingPointLit lit)
+ then do
+ let CmmFloat _ w = lit
+ Amode addr code <- memConstant (widthInBytes w) lit
+ return (OpAddr addr, code)
+ else do
+
+ if is32BitLit lit && not (isFloatType (cmmLitType lit))
+ then return (OpImm (litToImm lit), nilOL)
+ else getOperand_generic (CmmLit lit)
+
+getOperand (CmmLoad mem pk) = do
+ use_sse2 <- sse2Enabled
+ if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
+ then do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+ else
+ getOperand_generic (CmmLoad mem pk)
+
+getOperand e = getOperand_generic e
+
+getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
+getOperand_generic e = do
(reg, code) <- getSomeReg e
return (OpReg reg, code)
|| isSuitableFloatingPointLit lit
isOperand _ = False
+memConstant :: Int -> CmmLit -> NatM Amode
+memConstant align lit = do
+#ifdef x86_64_TARGET_ARCH
+ lbl <- getNewLabelNat
+ let addr = ripRel (ImmCLbl lbl)
+ addr_code = nilOL
+#else
+ lbl <- getNewLabelNat
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ Amode addr addr_code <- getAmode dynRef
+#endif
+ let code =
+ LDATA ReadOnlyData
+ [CmmAlign align,
+ CmmDataLabel lbl,
+ CmmStaticLit lit]
+ `consOL` addr_code
+ return (Amode addr code)
+
+
+loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
+loadFloatAmode use_sse2 w addr addr_code = do
+ let size = floatSize w
+ code dst = addr_code `snocOL`
+ if use_sse2
+ then MOV size (OpAddr addr) (OpReg dst)
+ else GLD size addr dst
+ -- in
+ return (Any (if use_sse2 then size else FF80) code)
+
+
-- if we want a floating-point literal as an operand, we can
-- use it directly from memory. However, if the literal is
-- zero, we're better off generating it into a register using
-- xor.
+isSuitableFloatingPointLit :: CmmLit -> Bool
isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
isSuitableFloatingPointLit _ = False
getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
-getRegOrMem (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
+getRegOrMem e@(CmmLoad mem pk) = do
+ use_sse2 <- sse2Enabled
+ if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
+ then do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+ else do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
getRegOrMem e = do
(reg, code) <- getNonClobberedReg e
return (OpReg reg, code)
+is32BitLit :: CmmLit -> Bool
#if x86_64_TARGET_ARCH
is32BitLit (CmmInt i W64) = is32BitInteger i
-- assume that labels are in the range 0-2^31-1: this assumes the
-- small memory model (see gcc docs, -mcmodel=small).
#endif
-is32BitLit x = True
+is32BitLit _ = True
MO_F_Lt W64 -> condFltCode LTT x y
MO_F_Le W64 -> condFltCode LE x y
- MO_Eq rep -> condIntCode EQQ x y
- MO_Ne rep -> condIntCode NE x y
+ MO_Eq _ -> condIntCode EQQ x y
+ MO_Ne _ -> condIntCode NE x y
- MO_S_Gt rep -> condIntCode GTT x y
- MO_S_Ge rep -> condIntCode GE x y
- MO_S_Lt rep -> condIntCode LTT x y
- MO_S_Le rep -> condIntCode LE x y
+ MO_S_Gt _ -> condIntCode GTT x y
+ MO_S_Ge _ -> condIntCode GE x y
+ MO_S_Lt _ -> condIntCode LTT x y
+ MO_S_Le _ -> condIntCode LE x y
- MO_U_Gt rep -> condIntCode GU x y
- MO_U_Ge rep -> condIntCode GEU x y
- MO_U_Lt rep -> condIntCode LU x y
- MO_U_Le rep -> condIntCode LEU x y
+ MO_U_Gt _ -> condIntCode GU x y
+ MO_U_Ge _ -> condIntCode GEU x y
+ MO_U_Lt _ -> condIntCode LU x y
+ MO_U_Le _ -> condIntCode LEU x y
- other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+ _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
-- anything vs zero, using a mask
-- TODO: Add some sanity checking!!!!
-condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
- | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
+condIntCode cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
+ | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit lit
= do
(x_reg, x_code) <- getSomeReg x
let
--------------------------------------------------------------------------------
condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-#if i386_TARGET_ARCH
condFltCode cond x y
- = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
- (x_reg, x_code) <- getNonClobberedReg x
- (y_reg, y_code) <- getSomeReg y
- let
- code = x_code `appOL` y_code `snocOL`
- GCMP cond x_reg y_reg
- -- The GCMP insn does the test and sets the zero flag if comparable
- -- and true. Hence we always supply EQQ as the condition to test.
- return (CondCode True EQQ code)
-
-#elif x86_64_TARGET_ARCH
--- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
--- an operand, but the right must be a reg. We can probably do better
--- than this general case...
-condFltCode cond x y = do
- (x_reg, x_code) <- getNonClobberedReg x
- (y_op, y_code) <- getOperand y
- let
- code = x_code `appOL`
- y_code `snocOL`
- CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
- -- NB(1): we need to use the unsigned comparison operators on the
- -- result of this comparison.
- -- in
- return (CondCode True (condToUnsigned cond) code)
-
-#else
-condFltCode = panic "X86.condFltCode: not defined"
-
-#endif
-
+ = if_sse2 condFltCode_sse2 condFltCode_x87
+ where
+ condFltCode_x87
+ = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_reg, y_code) <- getSomeReg y
+ let
+ code = x_code `appOL` y_code `snocOL`
+ GCMP cond x_reg y_reg
+ -- The GCMP insn does the test and sets the zero flag if comparable
+ -- and true. Hence we always supply EQQ as the condition to test.
+ return (CondCode True EQQ code)
+
+ -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+ -- an operand, but the right must be a reg. We can probably do better
+ -- than this general case...
+ condFltCode_sse2 = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL`
+ y_code `snocOL`
+ CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
+ -- NB(1): we need to use the unsigned comparison operators on the
+ -- result of this comparison.
+ -- in
+ return (CondCode True (condToUnsigned cond) code)
-- -----------------------------------------------------------------------------
-- Generating assignments
-- Assign; dst is a reg, rhs is mem
assignReg_IntCode pk reg (CmmLoad src _) = do
load_code <- intLoadCode (MOV pk) src
- return (load_code (getRegisterReg reg))
+ return (load_code (getRegisterReg False{-no sse2-} reg))
-- dst is a reg, but src could be anything
-assignReg_IntCode pk reg src = do
+assignReg_IntCode _ reg src = do
code <- getAnyReg src
- return (code (getRegisterReg reg))
+ return (code (getRegisterReg False{-no sse2-} reg))
-- Floating point assignment to memory
assignMem_FltCode pk addr src = do
(src_reg, src_code) <- getNonClobberedReg src
Amode addr addr_code <- getAmode addr
+ use_sse2 <- sse2Enabled
let
code = src_code `appOL`
addr_code `snocOL`
- IF_ARCH_i386(GST pk src_reg addr,
- MOV pk (OpReg src_reg) (OpAddr addr))
+ if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
+ else GST pk src_reg addr
return code
-- Floating point assignment to a register/temporary
-assignReg_FltCode pk reg src = do
+assignReg_FltCode _ reg src = do
+ use_sse2 <- sse2Enabled
src_code <- getAnyReg src
- return (src_code (getRegisterReg reg))
+ return (src_code (getRegisterReg use_sse2 reg))
genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
-genJump (CmmLoad mem pk) = do
+genJump (CmmLoad mem _) = do
Amode target code <- getAmode mem
return (code `snocOL` JMP (OpAddr target))
-> CmmExpr -- the condition on which to branch
-> NatM InstrBlock
-#if i386_TARGET_ARCH
-genCondJump id bool = do
- CondCode _ cond code <- getCondCode bool
- return (code `snocOL` JXX cond id)
-
-#elif x86_64_TARGET_ARCH
genCondJump id bool = do
CondCode is_float cond cond_code <- getCondCode bool
- if not is_float
+ use_sse2 <- sse2Enabled
+ if not is_float || not use_sse2
then
return (cond_code `snocOL` JXX cond id)
else do
]
return (cond_code `appOL` code)
-#else
-genCondJump = panic "X86.genCondJump: not defined"
-
-#endif
-
-
-
-- -----------------------------------------------------------------------------
-- Generating C calls
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
+-- void return type prim op
+genCCall (CmmPrim op) [] args =
+ outOfLineCmmOp op Nothing args
+
-- we only cope with a single result for foreign calls
-genCCall (CmmPrim op) [CmmHinted r _] args = do
+genCCall (CmmPrim op) [r_hinted@(CmmHinted r _)] args = do
l1 <- getNewLabelNat
l2 <- getNewLabelNat
- case op of
+ sse2 <- sse2Enabled
+ if sse2
+ then
+ outOfLineCmmOp op (Just r_hinted) args
+ else case op of
MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
- other_op -> outOfLineFloatOp op r args
+ _other_op -> outOfLineCmmOp op (Just r_hinted) args
+
where
actuallyInlineFloatOp instr size [CmmHinted x _]
- = do res <- trivialUFCode size (instr size) x
+ = do res <- trivialUFCode size (instr size) x
any <- anyReg res
- return (any (getRegisterReg (CmmLocal r)))
+ return (any (getRegisterReg False (CmmLocal r)))
+
+ actuallyInlineFloatOp _ _ args
+ = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
+ ++ show (length args) ++ ")"
genCCall target dest_regs args = do
let
setDeltaNat (delta0 - arg_pad_size)
#endif
- push_codes <- mapM push_arg (reverse args)
+ use_sse2 <- sse2Enabled
+ push_codes <- mapM (push_arg use_sse2) (reverse args)
delta <- getDeltaNat
-- in
-- deal with static vs dynamic call targets
(callinsns,cconv) <-
case target of
- -- CmmPrim -> ...
CmmCallee (CmmLit (CmmLabel lbl)) conv
-> -- ToDo: stdcall arg sizes
return (unitOL (CALL (Left fn_imm) []), conv)
where fn_imm = ImmCLbl lbl
CmmCallee expr conv
- -> do { (dyn_c, dyn_r) <- get_op expr
+ -> do { (dyn_r, dyn_c) <- getSomeReg expr
; ASSERT( isWord32 (cmmExprType expr) )
return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
+ CmmPrim _
+ -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
+ ++ "probably because too many return values."
let push_code
#if darwin_TARGET_OS
| otherwise
#endif
= concatOL push_codes
+
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ --
+ -- We have to pop any stack padding we added
+ -- on Darwin even if we are doing stdcall, though (#5052)
+ pop_size | cconv /= StdCallConv = tot_arg_size
+ | otherwise
+#if darwin_TARGET_OS
+ = arg_pad_size
+#else
+ = 0
+#endif
+
call = callinsns `appOL`
toOL (
- -- Deallocate parameters after call for ccall;
- -- but not for stdcall (callee does it)
- (if cconv == StdCallConv || tot_arg_size==0 then [] else
- [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
+ (if pop_size==0 then [] else
+ [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
++
[DELTA (delta + tot_arg_size)]
)
-- assign the results, if necessary
assign_code [] = nilOL
assign_code [CmmHinted dest _hint]
- | isFloatType ty = unitOL (GMOV fake0 r_dest)
+ | isFloatType ty =
+ if use_sse2
+ then let tmp_amode = AddrBaseIndex (EABaseReg esp)
+ EAIndexNone
+ (ImmInt 0)
+ sz = floatSize w
+ in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
+ GST sz fake0 tmp_amode,
+ MOV sz (OpAddr tmp_amode) (OpReg r_dest),
+ ADD II32 (OpImm (ImmInt b)) (OpReg esp)]
+ else unitOL (GMOV fake0 r_dest)
| isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
MOV II32 (OpReg edx) (OpReg r_dest_hi)]
| otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
where
ty = localRegType dest
w = typeWidth ty
+ b = widthInBytes w
r_dest_hi = getHiVRegFromLo r_dest
- r_dest = getRegisterReg (CmmLocal dest)
- assign_code many = panic "genCCall.assign_code many"
+ r_dest = getRegisterReg use_sse2 (CmmLocal dest)
+ assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
return (push_code `appOL`
call `appOL`
arg_size :: CmmType -> Int -- Width in bytes
arg_size ty = widthInBytes (typeWidth ty)
+#if darwin_TARGET_OS
roundTo a x | x `mod` a == 0 = x
| otherwise = x + a - (x `mod` a)
+#endif
-
- push_arg :: HintedCmmActual {-current argument-}
+ push_arg :: Bool -> HintedCmmActual {-current argument-}
-> NatM InstrBlock -- code
- push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
+ push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86
| isWord64 arg_ty = do
ChildCode64 code r_lo <- iselExpr64 arg
delta <- getDeltaNat
DELTA (delta-8)]
)
- | otherwise = do
- (code, reg) <- get_op arg
+ | isFloatType arg_ty = do
+ (reg, code) <- getSomeReg arg
delta <- getDeltaNat
- let size = arg_size arg_ty -- Byte size
setDeltaNat (delta-size)
- if (isFloatType arg_ty)
- then return (code `appOL`
+ return (code `appOL`
toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
DELTA (delta-size),
- GST (floatSize (typeWidth arg_ty))
- reg (AddrBaseIndex (EABaseReg esp)
+ let addr = AddrBaseIndex (EABaseReg esp)
EAIndexNone
- (ImmInt 0))]
- )
- else return (code `snocOL`
- PUSH II32 (OpReg reg) `snocOL`
- DELTA (delta-size)
+ (ImmInt 0)
+ size = floatSize (typeWidth arg_ty)
+ in
+ if use_sse2
+ then MOV size (OpReg reg) (OpAddr addr)
+ else GST size reg addr
+ ]
)
+
+ | otherwise = do
+ (operand, code) <- getOperand arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-size)
+ return (code `snocOL`
+ PUSH II32 operand `snocOL`
+ DELTA (delta-size))
+
where
arg_ty = cmmExprType arg
-
- ------------
- get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
- get_op op = do
- (reg,code) <- getSomeReg op
- return (code, reg)
+ size = arg_size arg_ty -- Byte size
#elif x86_64_TARGET_ARCH
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
+-- void return type prim op
+genCCall (CmmPrim op) [] args =
+ outOfLineCmmOp op Nothing args
-genCCall (CmmPrim op) [CmmHinted r _] args =
- outOfLineFloatOp op r args
+-- we only cope with a single result for foreign calls
+genCCall (CmmPrim op) [res] args =
+ outOfLineCmmOp op (Just res) args
genCCall target dest_regs args = do
-- deal with static vs dynamic call targets
(callinsns,cconv) <-
case target of
- -- CmmPrim -> ...
CmmCallee (CmmLit (CmmLabel lbl)) conv
-> -- ToDo: stdcall arg sizes
return (unitOL (CALL (Left fn_imm) arg_regs), conv)
CmmCallee expr conv
-> do (dyn_r, dyn_c) <- getSomeReg expr
return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+ CmmPrim _
+ -> panic $ "genCCall: Can't handle CmmPrim call type here, error "
+ ++ "probably because too many return values."
let
- -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- The x86_64 ABI requires us to set %al to the number of SSE2
-- registers that contain arguments, if the called routine
-- is a varargs function. We don't know whether it's a
-- varargs function or not, so we have to assume it is.
--
-- It's not safe to omit this assignment, even if the number
- -- of SSE regs in use is zero. If %al is larger than 8
+ -- of SSE2 regs in use is zero. If %al is larger than 8
-- on entry to a varargs function, seg faults ensue.
assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
_ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
where
rep = localRegType dest
- r_dest = getRegisterReg (CmmLocal dest)
- assign_code many = panic "genCCall.assign_code many"
+ r_dest = getRegisterReg True (CmmLocal dest)
+ assign_code _many = panic "genCCall.assign_code many"
return (load_args_code `appOL`
adjust_rsp `appOL`
return ((CmmHinted arg hint):args', ars, frs, code')
push_args [] code = return code
- push_args ((CmmHinted arg hint):rest) code
+ push_args ((CmmHinted arg _):rest) code
| isFloatType arg_rep = do
(arg_reg, arg_code) <- getSomeReg arg
delta <- getDeltaNat
#endif /* x86_64_TARGET_ARCH */
-
-
-outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
-outOfLineFloatOp mop res args
+outOfLineCmmOp :: CallishMachOp -> Maybe HintedCmmFormal -> HintedCmmActuals -> NatM InstrBlock
+outOfLineCmmOp mop res args
= do
dflags <- getDynFlagsNat
targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
let target = CmmCallee targetExpr CCallConv
- if isFloat64 (localRegType res)
- then
- stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
- else do
- uq <- getUniqueNat
- let
- tmp = LocalReg uq f64
- -- in
- code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
- code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
- return (code1 `appOL` code2)
+ stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn)
where
- lbl = mkForeignLabel fn Nothing False IsFunction
+ -- Assume we can call these functions directly, and that they're not in a dynamic library.
+ -- TODO: Why is this ok? Under linux this code will be in libm.so
+ -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
+ lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
+
+ args' = case mop of
+ MO_Memcpy -> init args
+ MO_Memset -> init args
+ MO_Memmove -> init args
+ _ -> args
fn = case mop of
MO_F32_Sqrt -> fsLit "sqrtf"
MO_F64_Tanh -> fsLit "tanh"
MO_F64_Pwr -> fsLit "pow"
+ MO_Memcpy -> fsLit "memcpy"
+ MO_Memset -> fsLit "memset"
+ MO_Memmove -> fsLit "memmove"
-
+ other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")"
-- -----------------------------------------------------------------------------
dflags <- getDynFlagsNat
dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
(tableReg,t_code) <- getSomeReg $ dynRef
- let
- jumpTable = map jumpTableEntryRel ids
-
- jumpTableEntryRel Nothing
- = CmmStaticLit (CmmInt 0 wordWidth)
- jumpTableEntryRel (Just (BlockId id))
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
- where blockLabel = mkAsmTempLabel id
-
- op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+ let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
(EAIndex reg wORD_SIZE) (ImmInt 0))
#if x86_64_TARGET_ARCH
code = e_code `appOL` t_code `appOL` toOL [
ADD (intSize wordWidth) op (OpReg tableReg),
- JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
- LDATA Text (CmmDataLabel lbl : jumpTable)
+ JMP_TBL (OpReg tableReg) ids Text lbl
]
#else
-- HACK: On x86_64 binutils<2.17 is only able to generate PC32
-- conjunction with the hack in PprMach.hs/pprDataItem once
-- binutils 2.17 is standard.
code = e_code `appOL` t_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
- MOVSxL II32
- (OpAddr (AddrBaseIndex (EABaseReg tableReg)
- (EAIndex reg wORD_SIZE) (ImmInt 0)))
- (OpReg reg),
+ MOVSxL II32 op (OpReg reg),
ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
- JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+ JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
]
#endif
#else
code = e_code `appOL` t_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
ADD (intSize wordWidth) op (OpReg tableReg),
- JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+ JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
]
#endif
return code
= do
(reg,e_code) <- getSomeReg expr
lbl <- getNewLabelNat
- let
- jumpTable = map jumpTableEntry ids
- op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+ let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
code = e_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
- JMP_TBL op [ id | Just id <- ids ]
+ JMP_TBL op ids ReadOnlyData lbl
]
-- in
return code
+generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr)
+generateJumpTableForInstr (JMP_TBL _ ids section lbl) = Just (createJumpTable ids section lbl)
+generateJumpTableForInstr _ = Nothing
+
+createJumpTable :: [Maybe BlockId] -> Section -> CLabel -> GenCmmTop CmmStatic h g
+createJumpTable ids section lbl
+ = let jumpTable
+ | opt_PIC =
+ let jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 wordWidth)
+ jumpTableEntryRel (Just blockid)
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+ where blockLabel = mkAsmTempLabel (getUnique blockid)
+ in map jumpTableEntryRel ids
+ | otherwise = map jumpTableEntry ids
+ in CmmData section (CmmDataLabel lbl : jumpTable)
-- -----------------------------------------------------------------------------
-- 'condIntReg' and 'condFltReg': condition codes into registers
condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-
-#if i386_TARGET_ARCH
-condFltReg cond x y = do
- CondCode _ cond cond_code <- condFltCode cond x y
- tmp <- getNewRegNat II8
- let
- code dst = cond_code `appOL` toOL [
- SETCC cond (OpReg tmp),
- MOVZxL II8 (OpReg tmp) (OpReg dst)
- ]
- -- in
- return (Any II32 code)
-
-#elif x86_64_TARGET_ARCH
-condFltReg cond x y = do
- CondCode _ cond cond_code <- condFltCode cond x y
- tmp1 <- getNewRegNat archWordSize
- tmp2 <- getNewRegNat archWordSize
- let
- -- We have to worry about unordered operands (eg. comparisons
- -- against NaN). If the operands are unordered, the comparison
- -- sets the parity flag, carry flag and zero flag.
- -- All comparisons are supposed to return false for unordered
- -- operands except for !=, which returns true.
- --
- -- Optimisation: we don't have to test the parity flag if we
- -- know the test has already excluded the unordered case: eg >
- -- and >= test for a zero carry flag, which can only occur for
- -- ordered operands.
- --
- -- ToDo: by reversing comparisons we could avoid testing the
- -- parity flag in more cases.
-
- code dst =
- cond_code `appOL`
- (case cond of
- NE -> or_unordered dst
- GU -> plain_test dst
- GEU -> plain_test dst
- _ -> and_ordered dst)
-
- plain_test dst = toOL [
- SETCC cond (OpReg tmp1),
- MOVZxL II8 (OpReg tmp1) (OpReg dst)
- ]
- or_unordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC PARITY (OpReg tmp2),
- OR II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- and_ordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC NOTPARITY (OpReg tmp2),
- AND II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- -- in
- return (Any II32 code)
-
-#else
-condFltReg = panic "X86.condFltReg: not defined"
-
-#endif
-
-
+condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
+ where
+ condFltReg_x87 = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp <- getNewRegNat II8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL II8 (OpReg tmp) (OpReg dst)
+ ]
+ -- in
+ return (Any II32 code)
+
+ condFltReg_sse2 = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp1 <- getNewRegNat archWordSize
+ tmp2 <- getNewRegNat archWordSize
+ let
+ -- We have to worry about unordered operands (eg. comparisons
+ -- against NaN). If the operands are unordered, the comparison
+ -- sets the parity flag, carry flag and zero flag.
+ -- All comparisons are supposed to return false for unordered
+ -- operands except for !=, which returns true.
+ --
+ -- Optimisation: we don't have to test the parity flag if we
+ -- know the test has already excluded the unordered case: eg >
+ -- and >= test for a zero carry flag, which can only occur for
+ -- ordered operands.
+ --
+ -- ToDo: by reversing comparisons we could avoid testing the
+ -- parity flag in more cases.
+
+ code dst =
+ cond_code `appOL`
+ (case cond of
+ NE -> or_unordered dst
+ GU -> plain_test dst
+ GEU -> plain_test dst
+ _ -> and_ordered dst)
+
+ plain_test dst = toOL [
+ SETCC cond (OpReg tmp1),
+ MOVZxL II8 (OpReg tmp1) (OpReg dst)
+ ]
+ or_unordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC PARITY (OpReg tmp2),
+ OR II8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL II8 (OpReg tmp2) (OpReg dst)
+ ]
+ and_ordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC NOTPARITY (OpReg tmp2),
+ AND II8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL II8 (OpReg tmp2) (OpReg dst)
+ ]
+ -- in
+ return (Any II32 code)
-- -----------------------------------------------------------------------------
register happens to be the destination register.
-}
-trivialCode width instr (Just revinstr) (CmmLit lit_a) b
+trivialCode :: Width -> (Operand -> Operand -> Instr)
+ -> Maybe (Operand -> Operand -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+trivialCode width _ (Just revinstr) (CmmLit lit_a) b
| is32BitLit lit_a = do
b_code <- getAnyReg b
let
-- in
return (Any (intSize width) code)
-trivialCode width instr maybe_revinstr a b
+trivialCode width instr _ a b
= genTrivialCode (intSize width) instr a b
-- This is re-used for floating pt instructions too.
+genTrivialCode :: Size -> (Operand -> Operand -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
genTrivialCode rep instr a b = do
(b_op, b_code) <- getNonClobberedOperand b
a_code <- getAnyReg a
-- in
return (Any rep code)
+regClashesWithOp :: Reg -> Operand -> Bool
reg `regClashesWithOp` OpReg reg2 = reg == reg2
reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
-reg `regClashesWithOp` _ = False
+_ `regClashesWithOp` _ = False
-----------
+trivialUCode :: Size -> (Operand -> Instr)
+ -> CmmExpr -> NatM Register
trivialUCode rep instr x = do
x_code <- getAnyReg x
let
-----------
-#if i386_TARGET_ARCH
-
-trivialFCode width instr x y = do
+trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+trivialFCode_x87 instr x y = do
(x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
(y_reg, y_code) <- getSomeReg y
let
- size = floatSize width
+ size = FF80 -- always, on x87
code dst =
x_code `appOL`
y_code `snocOL`
instr size x_reg y_reg dst
return (Any size code)
-#endif
+trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+trivialFCode_sse2 pk instr x y
+ = genTrivialCode size (instr size) x y
+ where size = floatSize pk
-#if x86_64_TARGET_ARCH
-trivialFCode pk instr x y
- = genTrivialCode size (instr size) x y
- where size = floatSize pk
-#endif
+trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
trivialUFCode size instr x = do
(x_reg, x_code) <- getSomeReg x
let
--------------------------------------------------------------------------------
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-
-#if i386_TARGET_ARCH
-coerceInt2FP from to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case to of W32 -> GITOF; W64 -> GITOD
- code dst = x_code `snocOL` opc x_reg dst
- -- ToDo: works for non-II32 reps?
- return (Any (floatSize to) code)
-
-#elif x86_64_TARGET_ARCH
-coerceInt2FP from to x = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
- code dst = x_code `snocOL` opc x_op dst
- -- in
- return (Any (floatSize to) code) -- works even if the destination rep is <II32
-
-#else
-coerceInt2FP = panic "X86.coerceInt2FP: not defined"
-
-#endif
-
-
-
+coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
+ where
+ coerce_x87 = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of W32 -> GITOF; W64 -> GITOD;
+ n -> panic $ "coerceInt2FP.x87: unhandled width ("
+ ++ show n ++ ")"
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-II32 reps?
+ return (Any FF80 code)
+
+ coerce_sse2 = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
+ n -> panic $ "coerceInt2FP.sse: unhandled width ("
+ ++ show n ++ ")"
+ code dst = x_code `snocOL` opc (intSize from) x_op dst
+ -- in
+ return (Any (floatSize to) code)
+ -- works even if the destination rep is <II32
--------------------------------------------------------------------------------
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-
-#if i386_TARGET_ARCH
-coerceFP2Int from to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case from of W32 -> GFTOI; W64 -> GDTOI
- code dst = x_code `snocOL` opc x_reg dst
- -- ToDo: works for non-II32 reps?
- -- in
- return (Any (intSize to) code)
-
-#elif x86_64_TARGET_ARCH
-coerceFP2Int from to x = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
- code dst = x_code `snocOL` opc x_op dst
- -- in
- return (Any (intSize to) code) -- works even if the destination rep is <II32
-
-#else
-coerceFP2Int = panic "X86.coerceFP2Int: not defined"
-
-#endif
-
-
+coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
+ where
+ coerceFP2Int_x87 = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case from of W32 -> GFTOI; W64 -> GDTOI
+ n -> panic $ "coerceFP2Int.x87: unhandled width ("
+ ++ show n ++ ")"
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-II32 reps?
+ -- in
+ return (Any (intSize to) code)
+
+ coerceFP2Int_sse2 = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
+ n -> panic $ "coerceFP2Init.sse: unhandled width ("
+ ++ show n ++ ")"
+ code dst = x_code `snocOL` opc (intSize to) x_op dst
+ -- in
+ return (Any (intSize to) code)
+ -- works even if the destination rep is <II32
--------------------------------------------------------------------------------
coerceFP2FP :: Width -> CmmExpr -> NatM Register
-
-#if x86_64_TARGET_ARCH
coerceFP2FP to x = do
+ use_sse2 <- sse2Enabled
(x_reg, x_code) <- getSomeReg x
let
- opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
+ opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
+ n -> panic $ "coerceFP2FP: unhandled width ("
+ ++ show n ++ ")"
+ | otherwise = GDTOF
code dst = x_code `snocOL` opc x_reg dst
-- in
- return (Any (floatSize to) code)
-
-#else
-coerceFP2FP = panic "X86.coerceFP2FP: not defined"
-
-#endif
-
+ return (Any (if use_sse2 then floatSize to else FF80) code)
+--------------------------------------------------------------------------------
+sse2NegCode :: Width -> CmmExpr -> NatM Register
+sse2NegCode w x = do
+ let sz = floatSize w
+ x_code <- getAnyReg x
+ -- This is how gcc does it, so it can't be that bad:
+ let
+ const | FF32 <- sz = CmmInt 0x80000000 W32
+ | otherwise = CmmInt 0x8000000000000000 W64
+ Amode amode amode_code <- memConstant (widthInBytes w) const
+ tmp <- getNewRegNat sz
+ let
+ code dst = x_code dst `appOL` amode_code `appOL` toOL [
+ MOV sz (OpAddr amode) (OpReg tmp),
+ XOR sz (OpReg tmp) (OpReg dst)
+ ]
+ --
+ return (Any sz code)