import Data.Word
import Data.Int
+sse2Enabled :: NatM Bool
+#if x86_64_TARGET_ARCH
+-- SSE2 is fixed on for x86_64. It would be possible to make it optional,
+-- but we'd need to fix at least the foreign call code where the calling
+-- convention specifies the use of xmm regs, and possibly other places.
+sse2Enabled = return True
+#else
+sse2Enabled = do
+ dflags <- getDynFlagsNat
+ return (dopt Opt_SSE2 dflags)
+#endif
+
+if_sse2 :: NatM a -> NatM a -> NatM a
+if_sse2 sse2 x87 = do
+ b <- sse2Enabled
+ if b then sse2 else x87
cmmTopCodeGen
:: DynFlags
-- | Grab the Reg for a CmmReg
-getRegisterReg :: CmmReg -> Reg
+getRegisterReg :: Bool -> CmmReg -> Reg
-getRegisterReg (CmmLocal (LocalReg u pk))
- = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
+getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
+ = let sz = cmmTypeSize pk in
+ if isFloatSize sz && not use_sse2
+ then RegVirtual (mkVirtualReg u FF80)
+ else RegVirtual (mkVirtualReg u sz)
-getRegisterReg (CmmGlobal mid)
+getRegisterReg _ (CmmGlobal mid)
= case get_GlobalReg_reg_or_addr mid of
Left reg -> RegReal $ reg
_other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
#endif
getRegister (CmmReg reg)
- = return (Fixed (cmmTypeSize (cmmRegType reg))
- (getRegisterReg reg) nilOL)
+ = do use_sse2 <- sse2Enabled
+ let
+ sz = cmmTypeSize (cmmRegType reg)
+ size | not use_sse2 && isFloatSize sz = FF80
+ | otherwise = sz
+ --
+ return (Fixed sz (getRegisterReg use_sse2 reg) nilOL)
+
getRegister tree@(CmmRegOff _ _)
= getRegister (mangleIndexTree tree)
#endif
-
-
-#if i386_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat f W32)) = do
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let code dst =
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f W32)]
- `consOL` (addr_code `snocOL`
- GLD FF32 addr dst)
- -- in
- return (Any FF32 code)
-
-
-getRegister (CmmLit (CmmFloat d W64))
- | d == 0.0
- = let code dst = unitOL (GLDZ dst)
- in return (Any FF64 code)
-
- | d == 1.0
- = let code dst = unitOL (GLD1 dst)
- in return (Any FF64 code)
-
- | otherwise = do
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let code dst =
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat d W64)]
- `consOL` (addr_code `snocOL`
- GLD FF64 addr dst)
- -- in
- return (Any FF64 code)
-
-#endif /* i386_TARGET_ARCH */
-
-
-
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmLit (CmmFloat 0.0 w)) = do
- let size = floatSize w
- code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
- -- I don't know why there are xorpd, xorps, and pxor instructions.
- -- They all appear to do the same thing --SDM
- return (Any size code)
-
-getRegister (CmmLit (CmmFloat f w)) = do
- lbl <- getNewLabelNat
- let code dst = toOL [
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f w)],
- MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- ]
- -- in
- return (Any size code)
- where size = floatSize w
-
-#endif /* x86_64_TARGET_ARCH */
-
-
-
-
+getRegister (CmmLit lit@(CmmFloat f w)) =
+ if_sse2 float_const_sse2 float_const_x87
+ where
+ float_const_sse2
+ | f == 0.0 = do
+ let
+ size = floatSize w
+ code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
+ -- I don't know why there are xorpd, xorps, and pxor instructions.
+ -- They all appear to do the same thing --SDM
+ return (Any size code)
+
+ | otherwise = do
+ Amode addr code <- memConstant (widthInBytes w) lit
+ loadFloatAmode True w addr code
+
+ float_const_x87 = case w of
+ W64
+ | f == 0.0 ->
+ let code dst = unitOL (GLDZ dst)
+ in return (Any FF80 code)
+
+ | f == 1.0 ->
+ let code dst = unitOL (GLD1 dst)
+ in return (Any FF80 code)
+
+ _otherwise -> do
+ Amode addr code <- memConstant (widthInBytes w) lit
+ loadFloatAmode False w addr code
-- catch simple cases of zero- or sign-extended load
getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
= return $ Any II64 (\dst -> unitOL $
LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
-getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
- x_code <- getAnyReg x
- lbl <- getNewLabelNat
- let
- code dst = x_code dst `appOL` toOL [
- -- This is how gcc does it, so it can't be that bad:
- LDATA ReadOnlyData16 [
- CmmAlign 16,
- CmmDataLabel lbl,
- CmmStaticLit (CmmInt 0x80000000 W32),
- CmmStaticLit (CmmInt 0 W32),
- CmmStaticLit (CmmInt 0 W32),
- CmmStaticLit (CmmInt 0 W32)
- ],
- XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- -- xorps, so we need the 128-bit constant
- -- ToDo: rip-relative
- ]
- --
- return (Any FF32 code)
-
-getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
- x_code <- getAnyReg x
- lbl <- getNewLabelNat
- let
- -- This is how gcc does it, so it can't be that bad:
- code dst = x_code dst `appOL` toOL [
- LDATA ReadOnlyData16 [
- CmmAlign 16,
- CmmDataLabel lbl,
- CmmStaticLit (CmmInt 0x8000000000000000 W64),
- CmmStaticLit (CmmInt 0 W64)
- ],
- -- gcc puts an unpck here. Wonder if we need it.
- XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- -- xorpd, so we need the 128-bit constant
- ]
- --
- return (Any FF64 code)
-
#endif /* x86_64_TARGET_ARCH */
-getRegister (CmmMachOp mop [x]) -- unary MachOps
- = case mop of
-#if i386_TARGET_ARCH
- MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
- MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
-#endif
+getRegister (CmmMachOp mop [x]) = do -- unary MachOps
+ sse2 <- sse2Enabled
+ case mop of
+ MO_F_Neg w
+ | sse2 -> sse2NegCode w x
+ | otherwise -> trivialUFCode FF80 (GNEG FF80) x
MO_S_Neg w -> triv_ucode NEGI (intSize w)
- MO_F_Neg w -> triv_ucode NEGI (floatSize w)
MO_Not w -> triv_ucode NOT (intSize w)
-- Nop conversions
-- the form of a movzl and print it as a movl later.
#endif
-#if i386_TARGET_ARCH
- MO_FF_Conv W32 W64 -> conversionNop FF64 x
- MO_FF_Conv W64 W32 -> conversionNop FF32 x
-#else
- MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
- MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
-#endif
+ MO_FF_Conv W32 W64
+ | sse2 -> coerceFP2FP W64 x
+ | otherwise -> conversionNop FF80 x
+
+ MO_FF_Conv W64 W32
+ | sse2 -> coerceFP2FP W32 x
+ | otherwise -> conversionNop FF80 x
MO_FS_Conv from to -> coerceFP2Int from to x
MO_SF_Conv from to -> coerceInt2FP from to x
return (swizzleRegisterRep e_code new_size)
-getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
- = case mop of
+getRegister e@(CmmMachOp mop [x, y]) = do -- dyadic MachOps
+ sse2 <- sse2Enabled
+ case mop of
MO_F_Eq w -> condFltReg EQQ x y
MO_F_Ne w -> condFltReg NE x y
MO_F_Gt w -> condFltReg GTT x y
MO_U_Lt rep -> condIntReg LU x y
MO_U_Le rep -> condIntReg LEU x y
-#if i386_TARGET_ARCH
- MO_F_Add w -> trivialFCode w GADD x y
- MO_F_Sub w -> trivialFCode w GSUB x y
- MO_F_Quot w -> trivialFCode w GDIV x y
- MO_F_Mul w -> trivialFCode w GMUL x y
-#endif
-
-#if x86_64_TARGET_ARCH
- MO_F_Add w -> trivialFCode w ADD x y
- MO_F_Sub w -> trivialFCode w SUB x y
- MO_F_Quot w -> trivialFCode w FDIV x y
- MO_F_Mul w -> trivialFCode w MUL x y
-#endif
+ MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
+ | otherwise -> trivialFCode_x87 w GADD x y
+ MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
+ | otherwise -> trivialFCode_x87 w GSUB x y
+ MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
+ | otherwise -> trivialFCode_x87 w GDIV x y
+ MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
+ | otherwise -> trivialFCode_x87 w GMUL x y
MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y
getRegister (CmmLoad mem pk)
| isFloatType pk
= do
- Amode src mem_code <- getAmode mem
- let
- size = cmmTypeSize pk
- code dst = mem_code `snocOL`
- IF_ARCH_i386(GLD size src dst,
- MOV size (OpAddr src) (OpReg dst))
- return (Any size code)
+ Amode addr mem_code <- getAmode mem
+ use_sse2 <- sse2Enabled
+ loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
#if i386_TARGET_ARCH
getRegister (CmmLoad mem pk)
reg2reg :: Size -> Reg -> Reg -> Instr
reg2reg size src dst
-#if i386_TARGET_ARCH
- | isFloatSize size = GMOV src dst
-#endif
- | otherwise = MOV size (OpReg src) (OpReg dst)
-
+ | size == FF80 = GMOV src dst
+ | otherwise = MOV size (OpReg src) (OpReg dst)
--------------------------------------------------------------------------------
-- (see trivialCode where this function is used for an example).
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getNonClobberedOperand (CmmLit lit)
- | isSuitableFloatingPointLit lit = do
- lbl <- getNewLabelNat
- let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
- CmmStaticLit lit])
- return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getNonClobberedOperand (CmmLit lit)
- | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
- return (OpImm (litToImm lit), nilOL)
-getNonClobberedOperand (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- (src',save_code) <-
- if (amodeCouldBeClobbered src)
- then do
- tmp <- getNewRegNat archWordSize
- return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
- unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
- else
- return (src, nilOL)
- return (OpAddr src', save_code `appOL` mem_code)
-getNonClobberedOperand e = do
+getNonClobberedOperand (CmmLit lit) = do
+ use_sse2 <- sse2Enabled
+ if use_sse2 && isSuitableFloatingPointLit lit
+ then do
+ let CmmFloat _ w = lit
+ Amode addr code <- memConstant (widthInBytes w) lit
+ return (OpAddr addr, code)
+ else do
+
+ if is32BitLit lit && not (isFloatType (cmmLitType lit))
+ then return (OpImm (litToImm lit), nilOL)
+ else getNonClobberedOperand_generic (CmmLit lit)
+
+getNonClobberedOperand (CmmLoad mem pk) = do
+ use_sse2 <- sse2Enabled
+ if (not (isFloatType pk) || use_sse2)
+ && IF_ARCH_i386(not (isWord64 pk), True)
+ then do
+ Amode src mem_code <- getAmode mem
+ (src',save_code) <-
+ if (amodeCouldBeClobbered src)
+ then do
+ tmp <- getNewRegNat archWordSize
+ return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+ unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
+ else
+ return (src, nilOL)
+ return (OpAddr src', save_code `appOL` mem_code)
+ else do
+ getNonClobberedOperand_generic (CmmLoad mem pk)
+
+getNonClobberedOperand e = getNonClobberedOperand_generic e
+
+getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
+getNonClobberedOperand_generic e = do
(reg, code) <- getNonClobberedReg e
return (OpReg reg, code)
-- getOperand: the operand is not required to remain valid across the
-- computation of an arbitrary expression.
getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getOperand (CmmLit lit)
- | isSuitableFloatingPointLit lit = do
- lbl <- getNewLabelNat
- let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
- CmmStaticLit lit])
- return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getOperand (CmmLit lit)
- | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
- return (OpImm (litToImm lit), nilOL)
-getOperand (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
-getOperand e = do
+
+getOperand (CmmLit lit) = do
+ use_sse2 <- sse2Enabled
+ if (use_sse2 && isSuitableFloatingPointLit lit)
+ then do
+ let CmmFloat _ w = lit
+ Amode addr code <- memConstant (widthInBytes w) lit
+ return (OpAddr addr, code)
+ else do
+
+ if is32BitLit lit && not (isFloatType (cmmLitType lit))
+ then return (OpImm (litToImm lit), nilOL)
+ else getOperand_generic (CmmLit lit)
+
+getOperand (CmmLoad mem pk) = do
+ use_sse2 <- sse2Enabled
+ if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
+ then do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+ else
+ getOperand_generic (CmmLoad mem pk)
+
+getOperand e = getOperand_generic e
+
+getOperand_generic e = do
(reg, code) <- getSomeReg e
return (OpReg reg, code)
|| isSuitableFloatingPointLit lit
isOperand _ = False
+memConstant :: Int -> CmmLit -> NatM Amode
+memConstant align lit = do
+#ifdef x86_64_TARGET_ARCH
+ lbl <- getNewLabelNat
+ let addr = ripRel (ImmCLbl lbl)
+ addr_code = nilOL
+#else
+ lbl <- getNewLabelNat
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ Amode addr addr_code <- getAmode dynRef
+#endif
+ let code =
+ LDATA ReadOnlyData
+ [CmmAlign align,
+ CmmDataLabel lbl,
+ CmmStaticLit lit]
+ `consOL` addr_code
+ return (Amode addr code)
+
+
+loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
+loadFloatAmode use_sse2 w addr addr_code = do
+ let size = floatSize w
+ code dst = addr_code `snocOL`
+ if use_sse2
+ then MOV size (OpAddr addr) (OpReg dst)
+ else GLD size addr dst
+ -- in
+ return (Any (if use_sse2 then size else FF80) code)
+
+
-- if we want a floating-point literal as an operand, we can
-- use it directly from memory. However, if the literal is
-- zero, we're better off generating it into a register using
isSuitableFloatingPointLit _ = False
getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
-getRegOrMem (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
+getRegOrMem e@(CmmLoad mem pk) = do
+ use_sse2 <- sse2Enabled
+ if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
+ then do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+ else do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
getRegOrMem e = do
(reg, code) <- getNonClobberedReg e
return (OpReg reg, code)
--------------------------------------------------------------------------------
condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-#if i386_TARGET_ARCH
condFltCode cond x y
- = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
- (x_reg, x_code) <- getNonClobberedReg x
- (y_reg, y_code) <- getSomeReg y
- let
- code = x_code `appOL` y_code `snocOL`
- GCMP cond x_reg y_reg
- -- The GCMP insn does the test and sets the zero flag if comparable
- -- and true. Hence we always supply EQQ as the condition to test.
- return (CondCode True EQQ code)
-
-#elif x86_64_TARGET_ARCH
--- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
--- an operand, but the right must be a reg. We can probably do better
--- than this general case...
-condFltCode cond x y = do
- (x_reg, x_code) <- getNonClobberedReg x
- (y_op, y_code) <- getOperand y
- let
- code = x_code `appOL`
- y_code `snocOL`
- CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
- -- NB(1): we need to use the unsigned comparison operators on the
- -- result of this comparison.
- -- in
- return (CondCode True (condToUnsigned cond) code)
-
-#else
-condFltCode = panic "X86.condFltCode: not defined"
-
-#endif
-
+ = if_sse2 condFltCode_sse2 condFltCode_x87
+ where
+ condFltCode_x87
+ = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_reg, y_code) <- getSomeReg y
+ use_sse2 <- sse2Enabled
+ let
+ code = x_code `appOL` y_code `snocOL`
+ GCMP cond x_reg y_reg
+ -- The GCMP insn does the test and sets the zero flag if comparable
+ -- and true. Hence we always supply EQQ as the condition to test.
+ return (CondCode True EQQ code)
+
+ -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+ -- an operand, but the right must be a reg. We can probably do better
+ -- than this general case...
+ condFltCode_sse2 = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL`
+ y_code `snocOL`
+ CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
+ -- NB(1): we need to use the unsigned comparison operators on the
+ -- result of this comparison.
+ -- in
+ return (CondCode True (condToUnsigned cond) code)
-- -----------------------------------------------------------------------------
-- Generating assignments
-- Assign; dst is a reg, rhs is mem
assignReg_IntCode pk reg (CmmLoad src _) = do
load_code <- intLoadCode (MOV pk) src
- return (load_code (getRegisterReg reg))
+ return (load_code (getRegisterReg False{-no sse2-} reg))
-- dst is a reg, but src could be anything
assignReg_IntCode pk reg src = do
code <- getAnyReg src
- return (code (getRegisterReg reg))
+ return (code (getRegisterReg False{-no sse2-} reg))
-- Floating point assignment to memory
assignMem_FltCode pk addr src = do
(src_reg, src_code) <- getNonClobberedReg src
Amode addr addr_code <- getAmode addr
+ use_sse2 <- sse2Enabled
let
code = src_code `appOL`
addr_code `snocOL`
- IF_ARCH_i386(GST pk src_reg addr,
- MOV pk (OpReg src_reg) (OpAddr addr))
+ if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
+ else GST pk src_reg addr
return code
-- Floating point assignment to a register/temporary
assignReg_FltCode pk reg src = do
+ use_sse2 <- sse2Enabled
src_code <- getAnyReg src
- return (src_code (getRegisterReg reg))
+ return (src_code (getRegisterReg use_sse2 reg))
genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
-> CmmExpr -- the condition on which to branch
-> NatM InstrBlock
-#if i386_TARGET_ARCH
-genCondJump id bool = do
- CondCode _ cond code <- getCondCode bool
- return (code `snocOL` JXX cond id)
-
-#elif x86_64_TARGET_ARCH
genCondJump id bool = do
CondCode is_float cond cond_code <- getCondCode bool
- if not is_float
+ use_sse2 <- sse2Enabled
+ if not is_float || not use_sse2
then
return (cond_code `snocOL` JXX cond id)
else do
]
return (cond_code `appOL` code)
-#else
-genCondJump = panic "X86.genCondJump: not defined"
-
-#endif
-
-
-
-- -----------------------------------------------------------------------------
-- Generating C calls
genCCall (CmmPrim op) [CmmHinted r _] args = do
l1 <- getNewLabelNat
l2 <- getNewLabelNat
- case op of
+ sse2 <- sse2Enabled
+ if sse2
+ then
+ outOfLineFloatOp op r args
+ else case op of
MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
other_op -> outOfLineFloatOp op r args
+
where
actuallyInlineFloatOp instr size [CmmHinted x _]
= do res <- trivialUFCode size (instr size) x
any <- anyReg res
- return (any (getRegisterReg (CmmLocal r)))
+ return (any (getRegisterReg False (CmmLocal r)))
genCCall target dest_regs args = do
let
setDeltaNat (delta0 - arg_pad_size)
#endif
- push_codes <- mapM push_arg (reverse args)
+ use_sse2 <- sse2Enabled
+ push_codes <- mapM (push_arg use_sse2) (reverse args)
delta <- getDeltaNat
-- in
-- assign the results, if necessary
assign_code [] = nilOL
assign_code [CmmHinted dest _hint]
- | isFloatType ty = unitOL (GMOV fake0 r_dest)
+ | isFloatType ty =
+ if use_sse2
+ then let tmp_amode = AddrBaseIndex (EABaseReg esp)
+ EAIndexNone
+ (ImmInt 0)
+ sz = floatSize w
+ in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
+ GST sz fake0 tmp_amode,
+ MOV sz (OpAddr tmp_amode) (OpReg r_dest),
+ ADD II32 (OpImm (ImmInt b)) (OpReg esp)]
+ else unitOL (GMOV fake0 r_dest)
| isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
MOV II32 (OpReg edx) (OpReg r_dest_hi)]
| otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
where
ty = localRegType dest
w = typeWidth ty
+ b = widthInBytes w
r_dest_hi = getHiVRegFromLo r_dest
- r_dest = getRegisterReg (CmmLocal dest)
+ r_dest = getRegisterReg use_sse2 (CmmLocal dest)
assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
return (push_code `appOL`
| otherwise = x + a - (x `mod` a)
- push_arg :: HintedCmmActual {-current argument-}
+ push_arg :: Bool -> HintedCmmActual {-current argument-}
-> NatM InstrBlock -- code
- push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
+ push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86
| isWord64 arg_ty = do
ChildCode64 code r_lo <- iselExpr64 arg
delta <- getDeltaNat
then return (code `appOL`
toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
DELTA (delta-size),
- GST (floatSize (typeWidth arg_ty))
- reg (AddrBaseIndex (EABaseReg esp)
+ let addr = AddrBaseIndex (EABaseReg esp)
EAIndexNone
- (ImmInt 0))]
+ (ImmInt 0)
+ size = floatSize (typeWidth arg_ty)
+ in
+ if use_sse2
+ then MOV size (OpReg reg) (OpAddr addr)
+ else GST size reg addr
+ ]
)
else return (code `snocOL`
PUSH II32 (OpReg reg) `snocOL`
return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
let
- -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- The x86_64 ABI requires us to set %al to the number of SSE2
-- registers that contain arguments, if the called routine
-- is a varargs function. We don't know whether it's a
-- varargs function or not, so we have to assume it is.
--
-- It's not safe to omit this assignment, even if the number
- -- of SSE regs in use is zero. If %al is larger than 8
+ -- of SSE2 regs in use is zero. If %al is larger than 8
-- on entry to a varargs function, seg faults ensue.
assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
_ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
where
rep = localRegType dest
- r_dest = getRegisterReg (CmmLocal dest)
+ r_dest = getRegisterReg True (CmmLocal dest)
assign_code many = panic "genCCall.assign_code many"
return (load_args_code `appOL`
targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
let target = CmmCallee targetExpr CCallConv
- if isFloat64 (localRegType res)
- then
- stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
- else do
- uq <- getUniqueNat
- let
- tmp = LocalReg uq f64
- -- in
- code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
- code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
- return (code1 `appOL` code2)
+ stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
where
-- Assume we can call these functions directly, and that they're not in a dynamic library.
-- TODO: Why is this ok? Under linux this code will be in libm.so
condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-
-#if i386_TARGET_ARCH
-condFltReg cond x y = do
- CondCode _ cond cond_code <- condFltCode cond x y
- tmp <- getNewRegNat II8
- let
- code dst = cond_code `appOL` toOL [
- SETCC cond (OpReg tmp),
- MOVZxL II8 (OpReg tmp) (OpReg dst)
- ]
- -- in
- return (Any II32 code)
-
-#elif x86_64_TARGET_ARCH
-condFltReg cond x y = do
- CondCode _ cond cond_code <- condFltCode cond x y
- tmp1 <- getNewRegNat archWordSize
- tmp2 <- getNewRegNat archWordSize
- let
- -- We have to worry about unordered operands (eg. comparisons
- -- against NaN). If the operands are unordered, the comparison
- -- sets the parity flag, carry flag and zero flag.
- -- All comparisons are supposed to return false for unordered
- -- operands except for !=, which returns true.
- --
- -- Optimisation: we don't have to test the parity flag if we
- -- know the test has already excluded the unordered case: eg >
- -- and >= test for a zero carry flag, which can only occur for
- -- ordered operands.
- --
- -- ToDo: by reversing comparisons we could avoid testing the
- -- parity flag in more cases.
-
- code dst =
- cond_code `appOL`
- (case cond of
- NE -> or_unordered dst
- GU -> plain_test dst
- GEU -> plain_test dst
- _ -> and_ordered dst)
-
- plain_test dst = toOL [
- SETCC cond (OpReg tmp1),
- MOVZxL II8 (OpReg tmp1) (OpReg dst)
- ]
- or_unordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC PARITY (OpReg tmp2),
- OR II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- and_ordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC NOTPARITY (OpReg tmp2),
- AND II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- -- in
- return (Any II32 code)
-
-#else
-condFltReg = panic "X86.condFltReg: not defined"
-
-#endif
-
-
+condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
+ where
+ condFltReg_x87 = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp <- getNewRegNat II8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL II8 (OpReg tmp) (OpReg dst)
+ ]
+ -- in
+ return (Any II32 code)
+
+ condFltReg_sse2 = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp1 <- getNewRegNat archWordSize
+ tmp2 <- getNewRegNat archWordSize
+ let
+ -- We have to worry about unordered operands (eg. comparisons
+ -- against NaN). If the operands are unordered, the comparison
+ -- sets the parity flag, carry flag and zero flag.
+ -- All comparisons are supposed to return false for unordered
+ -- operands except for !=, which returns true.
+ --
+ -- Optimisation: we don't have to test the parity flag if we
+ -- know the test has already excluded the unordered case: eg >
+ -- and >= test for a zero carry flag, which can only occur for
+ -- ordered operands.
+ --
+ -- ToDo: by reversing comparisons we could avoid testing the
+ -- parity flag in more cases.
+
+ code dst =
+ cond_code `appOL`
+ (case cond of
+ NE -> or_unordered dst
+ GU -> plain_test dst
+ GEU -> plain_test dst
+ _ -> and_ordered dst)
+
+ plain_test dst = toOL [
+ SETCC cond (OpReg tmp1),
+ MOVZxL II8 (OpReg tmp1) (OpReg dst)
+ ]
+ or_unordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC PARITY (OpReg tmp2),
+ OR II8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL II8 (OpReg tmp2) (OpReg dst)
+ ]
+ and_ordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC NOTPARITY (OpReg tmp2),
+ AND II8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL II8 (OpReg tmp2) (OpReg dst)
+ ]
+ -- in
+ return (Any II32 code)
-- -----------------------------------------------------------------------------
-----------
-#if i386_TARGET_ARCH
-
-trivialFCode width instr x y = do
+trivialFCode_x87 width instr x y = do
(x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
(y_reg, y_code) <- getSomeReg y
let
- size = floatSize width
+ size = FF80 -- always, on x87
code dst =
x_code `appOL`
y_code `snocOL`
instr size x_reg y_reg dst
return (Any size code)
-#endif
+trivialFCode_sse2 pk instr x y
+ = genTrivialCode size (instr size) x y
+ where size = floatSize pk
-#if x86_64_TARGET_ARCH
-trivialFCode pk instr x y
- = genTrivialCode size (instr size) x y
- where size = floatSize pk
-#endif
trivialUFCode size instr x = do
(x_reg, x_code) <- getSomeReg x
--------------------------------------------------------------------------------
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-
-#if i386_TARGET_ARCH
-coerceInt2FP from to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case to of W32 -> GITOF; W64 -> GITOD
- code dst = x_code `snocOL` opc x_reg dst
- -- ToDo: works for non-II32 reps?
- return (Any (floatSize to) code)
-
-#elif x86_64_TARGET_ARCH
-coerceInt2FP from to x = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
- code dst = x_code `snocOL` opc x_op dst
- -- in
- return (Any (floatSize to) code) -- works even if the destination rep is <II32
-
-#else
-coerceInt2FP = panic "X86.coerceInt2FP: not defined"
-
-#endif
-
-
-
+coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
+ where
+ coerce_x87 = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of W32 -> GITOF; W64 -> GITOD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-II32 reps?
+ return (Any FF80 code)
+
+ coerce_sse2 = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
+ code dst = x_code `snocOL` opc (intSize from) x_op dst
+ -- in
+ return (Any (floatSize to) code)
+ -- works even if the destination rep is <II32
--------------------------------------------------------------------------------
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-
-#if i386_TARGET_ARCH
-coerceFP2Int from to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case from of W32 -> GFTOI; W64 -> GDTOI
- code dst = x_code `snocOL` opc x_reg dst
- -- ToDo: works for non-II32 reps?
- -- in
- return (Any (intSize to) code)
-
-#elif x86_64_TARGET_ARCH
-coerceFP2Int from to x = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
- code dst = x_code `snocOL` opc x_op dst
- -- in
- return (Any (intSize to) code) -- works even if the destination rep is <II32
-
-#else
-coerceFP2Int = panic "X86.coerceFP2Int: not defined"
-
-#endif
-
-
+coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
+ where
+ coerceFP2Int_x87 = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case from of W32 -> GFTOI; W64 -> GDTOI
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-II32 reps?
+ -- in
+ return (Any (intSize to) code)
+
+ coerceFP2Int_sse2 = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
+ code dst = x_code `snocOL` opc (intSize to) x_op dst
+ -- in
+ return (Any (intSize to) code)
+ -- works even if the destination rep is <II32
--------------------------------------------------------------------------------
coerceFP2FP :: Width -> CmmExpr -> NatM Register
-
-#if x86_64_TARGET_ARCH
coerceFP2FP to x = do
(x_reg, x_code) <- getSomeReg x
let
-- in
return (Any (floatSize to) code)
-#else
-coerceFP2FP = panic "X86.coerceFP2FP: not defined"
-
-#endif
-
-
+--------------------------------------------------------------------------------
+sse2NegCode :: Width -> CmmExpr -> NatM Register
+sse2NegCode w x = do
+ let sz = floatSize w
+ x_code <- getAnyReg x
+ -- This is how gcc does it, so it can't be that bad:
+ let
+ const | FF32 <- sz = CmmInt 0x80000000 W32
+ | otherwise = CmmInt 0x8000000000000000 W64
+ Amode amode amode_code <- memConstant (widthInBytes w) const
+ tmp <- getNewRegNat sz
+ let
+ code dst = x_code dst `appOL` amode_code `appOL` toOL [
+ MOV sz (OpAddr amode) (OpReg tmp),
+ XOR sz (OpReg tmp) (OpReg dst)
+ ]
+ --
+ return (Any sz code)
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
+ RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
where
#if i386_TARGET_ARCH
ppr_reg_no :: Size -> Int -> Doc
2 -> sLit "%ecx"; 3 -> sLit "%edx";
4 -> sLit "%esi"; 5 -> sLit "%edi";
6 -> sLit "%ebp"; 7 -> sLit "%esp";
- 8 -> sLit "%fake0"; 9 -> sLit "%fake1";
- 10 -> sLit "%fake2"; 11 -> sLit "%fake3";
- 12 -> sLit "%fake4"; 13 -> sLit "%fake5";
- _ -> sLit "very naughty I386 register"
+ _ -> ppr_reg_float i
})
#elif x86_64_TARGET_ARCH
ppr_reg_no :: Size -> Int -> Doc
10 -> sLit "%r10"; 11 -> sLit "%r11";
12 -> sLit "%r12"; 13 -> sLit "%r13";
14 -> sLit "%r14"; 15 -> sLit "%r15";
- 16 -> sLit "%xmm0"; 17 -> sLit "%xmm1";
- 18 -> sLit "%xmm2"; 19 -> sLit "%xmm3";
- 20 -> sLit "%xmm4"; 21 -> sLit "%xmm5";
- 22 -> sLit "%xmm6"; 23 -> sLit "%xmm7";
- 24 -> sLit "%xmm8"; 25 -> sLit "%xmm9";
- 26 -> sLit "%xmm10"; 27 -> sLit "%xmm11";
- 28 -> sLit "%xmm12"; 29 -> sLit "%xmm13";
- 30 -> sLit "%xmm14"; 31 -> sLit "%xmm15";
- _ -> sLit "very naughty x86_64 register"
+ _ -> ppr_reg_float i
})
#else
ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
#endif
+ppr_reg_float :: Int -> LitString
+ppr_reg_float i = case i of
+ 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
+ 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
+ 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
+ 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
+ 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
+ 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
+ 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
+ 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
+ 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
+ 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
+ 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
+ _ -> sLit "very naughty x86 register"
pprSize :: Size -> Doc
pprSize x
II16 -> sLit "w"
II32 -> sLit "l"
II64 -> sLit "q"
-#if i386_TARGET_ARCH
- FF32 -> sLit "s"
- FF64 -> sLit "l"
- FF80 -> sLit "t"
-#elif x86_64_TARGET_ARCH
FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
- _ -> panic "X86.Ppr.pprSize: no match"
-#else
- _ -> panic "X86.Ppr.pprSize: no match"
-#endif
+ FF80 -> sLit "t"
)
+pprSize_x87 :: Size -> Doc
+pprSize_x87 x
+ = ptext $ case x of
+ FF32 -> sLit "s"
+ FF64 -> sLit "l"
+ FF80 -> sLit "t"
+ _ -> panic "X86.Ppr.pprSize_x87"
+
pprCond :: Cond -> Doc
pprCond c
= ptext (case c of {
pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
-pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
-pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
-pprInstr (CVTTSS2SIQ from to) = pprOpReg (sLit "cvttss2siq") from to
-pprInstr (CVTTSD2SIQ from to) = pprOpReg (sLit "cvttsd2siq") from to
-pprInstr (CVTSI2SS from to) = pprOpReg (sLit "cvtsi2ssq") from to
-pprInstr (CVTSI2SD from to) = pprOpReg (sLit "cvtsi2sdq") from to
+pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
+pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
+pprInstr (CVTTSS2SIQ sz from to) = pprSizeOpReg (sLit "cvttss2si") sz from to
+pprInstr (CVTTSD2SIQ sz from to) = pprSizeOpReg (sLit "cvttsd2si") sz from to
+pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
+pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
-- FETCHGOT for PIC on ELF platforms
pprInstr (FETCHGOT reg)
| otherwise
= pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
--- GLD sz addr dst ==> FFREE %st(7) ; FLDsz addr ; FSTP (dst+1)
+-- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
pprInstr g@(GLD sz addr dst)
- = pprG g (hcat [gtab, text "ffree %st(7) ; fld", pprSize sz, gsp,
+ = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
pprAddr addr, gsemi, gpop dst 1])
--- GST sz src addr ==> FFREE %st(7) ; FLD dst ; FSTPsz addr
+-- GST sz src addr ==> FLD dst ; FSTPsz addr
pprInstr g@(GST sz src addr)
+ | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
+ = pprG g (hcat [gtab,
+ text "fst", pprSize_x87 sz, gsp, pprAddr addr])
+ | otherwise
= pprG g (hcat [gtab, gpush src 0, gsemi,
- text "fstp", pprSize sz, gsp, pprAddr addr])
+ text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
pprInstr g@(GLDZ dst)
- = pprG g (hcat [gtab, text "ffree %st(7) ; fldz ; ", gpop dst 1])
+ = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
pprInstr g@(GLD1 dst)
- = pprG g (hcat [gtab, text "ffree %st(7) ; fld1 ; ", gpop dst 1])
+ = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
pprInstr (GFTOI src dst)
= pprInstr (GDTOI src dst)
pprInstr g@(GITOD src dst)
= pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
- text " ; ffree %st(7); fildl (%esp) ; ",
+ text " ; fildl (%esp) ; ",
gpop dst 1, text " ; addl $4,%esp"])
{- Gruesome swamp follows. If you're unfortunate enough to have ventured
pprInstr GFREE
= vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
- ptext (sLit "\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)")
+ ptext (sLit "\tffree %st(4) ;ffree %st(5)")
]
pprInstr _
gpush :: Reg -> RegNo -> Doc
gpush reg offset
- = hcat [text "ffree %st(7) ; fld ", greg reg offset]
-
+ = hcat [text "fld ", greg reg offset]
gpop :: Reg -> RegNo -> Doc
gpop reg offset
= hcat [text "fstp ", greg reg offset]
greg :: Reg -> RegNo -> Doc
-greg reg offset = text "%st(" <> int (gregno reg - 8+offset) <> char ')'
+greg reg offset = text "%st(" <> int (gregno reg - 16+offset) <> char ')'
gsemi :: Doc
gsemi = text " ; "
]
-pprOpReg :: LitString -> Operand -> Reg -> Doc
-pprOpReg name op1 reg2
+pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
+pprSizeOpReg name size op1 reg2
= hcat [
- pprMnemonic_ name,
- pprOperand archWordSize op1,
+ pprMnemonic name size,
+ pprOperand size op1,
comma,
pprReg archWordSize reg2
]
#include "nativeGen/NCG.h"
#include "HsVersions.h"
-#if i386_TARGET_ARCH
-# define STOLEN_X86_REGS 4
--- HACK: go for the max
-#endif
-
#include "../includes/stg/MachRegs.h"
import Reg
-> case vr of
VirtualRegI{} -> _ILIT(1)
VirtualRegHi{} -> _ILIT(1)
- VirtualRegD{} -> _ILIT(0)
- VirtualRegF{} -> _ILIT(0)
-
- -- We don't use floats on this arch, but we can't
- -- return error because the return type is unboxed...
- RcFloat
- -> case vr of
- VirtualRegI{} -> _ILIT(0)
- VirtualRegHi{} -> _ILIT(0)
- VirtualRegD{} -> _ILIT(0)
- VirtualRegF{} -> _ILIT(0)
+ _other -> _ILIT(0)
RcDouble
-> case vr of
- VirtualRegI{} -> _ILIT(0)
- VirtualRegHi{} -> _ILIT(0)
VirtualRegD{} -> _ILIT(1)
VirtualRegF{} -> _ILIT(0)
+ _other -> _ILIT(0)
-{-# INLINE realRegSqueeze #-}
-realRegSqueeze :: RegClass -> RealReg -> FastInt
-
-#if defined(i386_TARGET_ARCH)
-realRegSqueeze cls rr
- = case cls of
- RcInteger
- -> case rr of
- RealRegSingle regNo
- | regNo < 8 -> _ILIT(1) -- first fp reg is 8
- | otherwise -> _ILIT(0)
-
- RealRegPair{} -> _ILIT(0)
-
- -- We don't use floats on this arch, but we can't
- -- return error because the return type is unboxed...
- RcFloat
- -> case rr of
- RealRegSingle regNo
- | regNo < 8 -> _ILIT(0)
- | otherwise -> _ILIT(0)
-
- RealRegPair{} -> _ILIT(0)
+ RcDoubleSSE
+ -> case vr of
+ VirtualRegSSE{} -> _ILIT(1)
+ _other -> _ILIT(0)
- RcDouble
- -> case rr of
- RealRegSingle regNo
- | regNo < 8 -> _ILIT(0)
- | otherwise -> _ILIT(1)
-
- RealRegPair{} -> _ILIT(0)
+ _other -> _ILIT(0)
-#elif defined(x86_64_TARGET_ARCH)
+{-# INLINE realRegSqueeze #-}
+realRegSqueeze :: RegClass -> RealReg -> FastInt
realRegSqueeze cls rr
= case cls of
RcInteger
RealRegPair{} -> _ILIT(0)
- -- We don't use floats on this arch, but we can't
- -- return error because the return type is unboxed...
- RcFloat
+ RcDouble
-> case rr of
RealRegSingle regNo
- | regNo < 16 -> _ILIT(0)
+ | regNo >= 16 && regNo < 24 -> _ILIT(1)
| otherwise -> _ILIT(0)
RealRegPair{} -> _ILIT(0)
- RcDouble
+ RcDoubleSSE
-> case rr of
- RealRegSingle regNo
- | regNo < 16 -> _ILIT(0)
- | otherwise -> _ILIT(1)
-
- RealRegPair{} -> _ILIT(0)
-
-#else
-realRegSqueeze _ _ = _ILIT(0)
-#endif
-
+ RealRegSingle regNo | regNo >= 24 -> _ILIT(1)
+ _otherwise -> _ILIT(0)
+ _other -> _ILIT(0)
-- -----------------------------------------------------------------------------
-- Immediates
argRegs :: RegNo -> [Reg]
argRegs _ = panic "MachRegs.argRegs(x86): should not be used!"
-
-
-
-
-- | The complete set of machine registers.
allMachRegNos :: [RegNo]
-
-#if i386_TARGET_ARCH
-allMachRegNos = [0..13]
-
-#elif x86_64_TARGET_ARCH
-allMachRegNos = [0..31]
-
+#if i386_TARGET_ARCH
+allMachRegNos = [0..7] ++ floatregs -- not %r8..%r15
#else
-allMachRegNos = panic "X86.Regs.callClobberedRegs: not defined for this architecture"
-
+allMachRegNos = [0..15] ++ floatregs
#endif
-
+ where floatregs = fakes ++ xmms; fakes = [16..21]; xmms = [24..39]
-- | Take the class of a register.
{-# INLINE classOfRealReg #-}
classOfRealReg :: RealReg -> RegClass
-
-#if i386_TARGET_ARCH
-- On x86, we might want to have an 8-bit RegClass, which would
-- contain just regs 1-4 (the others don't have 8-bit versions).
-- However, we can get away without this at the moment because the
-- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
classOfRealReg reg
= case reg of
- RealRegSingle i -> if i < 8 then RcInteger else RcDouble
- RealRegPair{} -> panic "X86.Regs.classOfRealReg: RegPairs on this arch"
+ RealRegSingle i
+ | i < 16 -> RcInteger
+ | i < 24 -> RcDouble
+ | otherwise -> RcDoubleSSE
-#elif x86_64_TARGET_ARCH
--- On x86, we might want to have an 8-bit RegClass, which would
--- contain just regs 1-4 (the others don't have 8-bit versions).
--- However, we can get away without this at the moment because the
--- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
-classOfRealReg reg
- = case reg of
- RealRegSingle i -> if i < 16 then RcInteger else RcDouble
RealRegPair{} -> panic "X86.Regs.classOfRealReg: RegPairs on this arch"
-#else
-classOfRealReg _ = panic "X86.Regs.regClass: not defined for this architecture"
-
-#endif
-
-
-- | Get the name of the register with this number.
showReg :: RegNo -> String
-
-#if i386_TARGET_ARCH
-showReg n
- = if n >= 0 && n < 14
- then regNames !! n
- else "%unknown_x86_real_reg_" ++ show n
-
-regNames :: [String]
-regNames
- = ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp",
- "%fake0", "%fake1", "%fake2", "%fake3", "%fake4", "%fake5", "%fake6"]
-
-#elif x86_64_TARGET_ARCH
showReg n
- | n >= 16 = "%xmm" ++ show (n-16)
+ | n >= 24 = "%xmm" ++ show (n-24)
+ | n >= 16 = "%fake" ++ show (n-16)
| n >= 8 = "%r" ++ show n
| otherwise = regNames !! n
regNames :: [String]
regNames
- = ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp" ]
-
-#else
-showReg _ = panic "X86.Regs.showReg: not defined for this architecture"
-
+#if i386_TARGET_ARCH
+ = ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp"]
+#elif x86_64_TARGET_ARCH
+ = ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp" ]
#endif
-
-
-- machine specific ------------------------------------------------------------
edi = regSingle 5
ebp = regSingle 6
esp = regSingle 7
-fake0 = regSingle 8
-fake1 = regSingle 9
-fake2 = regSingle 10
-fake3 = regSingle 11
-fake4 = regSingle 12
-fake5 = regSingle 13
+fake0 = regSingle 16
+fake1 = regSingle 17
+fake2 = regSingle 18
+fake3 = regSingle 19
+fake4 = regSingle 20
+fake5 = regSingle 21
r13 = regSingle 13
r14 = regSingle 14
r15 = regSingle 15
-xmm0 = regSingle 16
-xmm1 = regSingle 17
-xmm2 = regSingle 18
-xmm3 = regSingle 19
-xmm4 = regSingle 20
-xmm5 = regSingle 21
-xmm6 = regSingle 22
-xmm7 = regSingle 23
-xmm8 = regSingle 24
-xmm9 = regSingle 25
-xmm10 = regSingle 26
-xmm11 = regSingle 27
-xmm12 = regSingle 28
-xmm13 = regSingle 29
-xmm14 = regSingle 30
-xmm15 = regSingle 31
+xmm0 = regSingle 24
+xmm1 = regSingle 25
+xmm2 = regSingle 26
+xmm3 = regSingle 27
+xmm4 = regSingle 28
+xmm5 = regSingle 29
+xmm6 = regSingle 30
+xmm7 = regSingle 31
+xmm8 = regSingle 32
+xmm9 = regSingle 33
+xmm10 = regSingle 34
+xmm11 = regSingle 35
+xmm12 = regSingle 36
+xmm13 = regSingle 37
+xmm14 = regSingle 38
+xmm15 = regSingle 39
allFPArgRegs :: [Reg]
-allFPArgRegs = map regSingle [16 .. 23]
+allFPArgRegs = map regSingle [24 .. 31]
ripRel :: Displacement -> AddrMode
ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm
-}
xmm :: RegNo -> Reg
-xmm n = regSingle (16+n)
+xmm n = regSingle (24+n)
#define edi 5
#define ebp 6
#define esp 7
-#define fake0 8
-#define fake1 9
-#define fake2 10
-#define fake3 11
-#define fake4 12
-#define fake5 13
#endif
#if x86_64_TARGET_ARCH
#define r13 13
#define r14 14
#define r15 15
-#define xmm0 16
-#define xmm1 17
-#define xmm2 18
-#define xmm3 19
-#define xmm4 20
-#define xmm5 21
-#define xmm6 22
-#define xmm7 23
-#define xmm8 24
-#define xmm9 25
-#define xmm10 26
-#define xmm11 27
-#define xmm12 28
-#define xmm13 29
-#define xmm14 30
-#define xmm15 31
#endif
+#define fake0 16
+#define fake1 17
+#define fake2 18
+#define fake3 19
+#define fake4 20
+#define fake5 21
+
+#define xmm0 24
+#define xmm1 25
+#define xmm2 26
+#define xmm3 27
+#define xmm4 28
+#define xmm5 29
+#define xmm6 30
+#define xmm7 31
+#define xmm8 32
+#define xmm9 33
+#define xmm10 34
+#define xmm11 35
+#define xmm12 36
+#define xmm13 37
+#define xmm14 38
+#define xmm15 39
#if i386_TARGET_ARCH
#if i386_TARGET_ARCH
-- caller-saves registers
callClobberedRegs
- = map regSingle [eax,ecx,edx,fake0,fake1,fake2,fake3,fake4,fake5]
+ = map regSingle ([eax,ecx,edx] ++ [16..39])
#elif x86_64_TARGET_ARCH
-- all xmm regs are caller-saves
-- caller-saves registers
callClobberedRegs
- = map regSingle ([rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11] ++ [16..31])
+ = map regSingle ([rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11] ++ [16..39])
#else
callClobberedRegs