getRegister :: CmmExpr -> NatM Register
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+ = do
+ reg <- getPicBaseNat wordRep
+ return (Fixed wordRep reg nilOL)
+
getRegister (CmmReg reg)
= return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
getRegister tree@(CmmRegOff _ _)
= getRegister (mangleIndexTree tree)
-getRegister CmmPicBaseReg
- = do
- reg <- getPicBaseNat wordRep
- return (Fixed wordRep reg nilOL)
-
-- end of machine-"independent" bit; here we go on the rest...
#if alpha_TARGET_ARCH
LDATA ReadOnlyData
[CmmDataLabel lbl,
CmmStaticLit (CmmFloat f rep)],
- MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
- -- ToDo: should use %rip-relative
+ MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
]
-- in
return (Any rep code)
#if x86_64_TARGET_ARCH
getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+ x_code <- getAnyReg x
lbl <- getNewLabelNat
let
- code dst = toOL [
+ code dst = x_code dst `appOL` toOL [
-- This is how gcc does it, so it can't be that bad:
LDATA ReadOnlyData16 [
CmmAlign 16,
CmmStaticLit (CmmInt 0 I32),
CmmStaticLit (CmmInt 0 I32)
],
- XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+ XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-- xorps, so we need the 128-bit constant
-- ToDo: rip-relative
]
return (Any F32 code)
getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+ x_code <- getAnyReg x
lbl <- getNewLabelNat
let
-- This is how gcc does it, so it can't be that bad:
- code dst = toOL [
+ code dst = x_code dst `appOL` toOL [
LDATA ReadOnlyData16 [
CmmAlign 16,
CmmDataLabel lbl,
CmmStaticLit (CmmInt 0 I64)
],
-- gcc puts an unpck here. Wonder if we need it.
- XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+ XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-- xorpd, so we need the 128-bit constant
- -- ToDo: rip-relative
]
--
return (Any F64 code)
code dst
= x_code `snocOL`
LEA rep
- (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
+ (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
(OpReg dst)
--
return (Any rep code)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (-(fromInteger i))
- return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
| not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (fromInteger i)
- return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
-- recognised by the next rule.
let
code = x_code `appOL` y_code
base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
- return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
code)
getAmode (CmmLit lit) | not (is64BitLit lit)
getAmode expr = do
(reg,code) <- getSomeReg expr
- return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
+ return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
getNonClobberedOperand (CmmLit lit)
| not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
return (OpImm (litToImm lit), nilOL)
if (amodeCouldBeClobbered src)
then do
tmp <- getNewRegNat wordRep
- return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
+ return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
else
return (src, nilOL)
-- getOperand: the operand is not required to remain valid across the
-- computation of an arbitrary expression.
getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
getOperand (CmmLit lit)
- | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
return (OpImm (litToImm lit), nilOL)
getOperand (CmmLoad mem pk)
| IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
Amode src mem_code <- getAmode mem
return (OpAddr src, mem_code)
getOperand e = do
- (reg, code) <- getNonClobberedReg e
+ (reg, code) <- getSomeReg e
return (OpReg reg, code)
isOperand :: CmmExpr -> Bool
isOperand (CmmLoad _ _) = True
-isOperand (CmmLit lit) = not (is64BitLit lit) &&
- not (isFloatingRep (cmmLitRep lit))
+isOperand (CmmLit lit) = not (is64BitLit lit)
+ || isSuitableFloatingPointLit lit
isOperand _ = False
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory. However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem (CmmLoad mem pk)
| IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
code = x_code `appOL`
y_code `snocOL`
CMP (cmmExprRep x) y_op (OpReg x_reg)
- -- in
- return (CondCode False (condToUnsigned cond) code)
- -- we need to use the unsigned comparison operators on the
+ -- NB(1): we need to use the unsigned comparison operators on the
-- result of this comparison.
+ -- in
+ return (CondCode True (condToUnsigned cond) code)
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+#if i386_TARGET_ARCH
genCondJump id bool = do
CondCode _ cond code <- getCondCode bool
return (code `snocOL` JXX cond id)
-#endif /* i386_TARGET_ARCH */
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+ CondCode is_float cond cond_code <- getCondCode bool
+ if not is_float
+ then
+ return (cond_code `snocOL` JXX cond id)
+ else do
+ lbl <- getBlockIdNat
+
+ -- see comment with condFltReg
+ let code = case cond of
+ NE -> or_unordered
+ GU -> plain_test
+ GEU -> plain_test
+ _ -> and_ordered
+
+ plain_test = unitOL (
+ JXX cond id
+ )
+ or_unordered = toOL [
+ JXX cond id,
+ JXX PARITY id
+ ]
+ and_ordered = toOL [
+ JXX PARITY lbl,
+ JXX cond id,
+ JXX ALWAYS lbl,
+ NEWBLOCK lbl
+ ]
+ return (cond_code `appOL` code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
-- CmmPrim -> ...
CmmForeignCall (CmmLit (CmmLabel lbl)) conv
-> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm)), conv)
+ return (unitOL (CALL (Left fn_imm) []), conv)
where fn_imm = ImmCLbl lbl
CmmForeignCall expr conv
-> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
ASSERT(dyn_rep == I32)
- return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+ return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
let push_code = concatOL push_codes
call = callinsns `appOL`
code `appOL`
toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
DELTA (delta-size),
- GST sz reg (AddrBaseIndex (Just esp)
- Nothing
+ GST sz reg (AddrBaseIndex (EABaseReg esp)
+ EAIndexNone
(ImmInt 0))]
)
else return (size,
(reg,code) <- getSomeReg op
return (code, reg, cmmExprRep op)
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
-> Maybe [GlobalReg] -> NatM InstrBlock
lbl = CmmLabel (mkForeignLabel fn Nothing False)
fn = case mop of
+ MO_F32_Sqrt -> FSLIT("sqrt")
+ MO_F32_Sin -> FSLIT("sin")
+ MO_F32_Cos -> FSLIT("cos")
+ MO_F32_Tan -> FSLIT("tan")
MO_F32_Exp -> FSLIT("exp")
MO_F32_Log -> FSLIT("log")
MO_F32_Tanh -> FSLIT("tanh")
MO_F32_Pwr -> FSLIT("pow")
+ MO_F64_Sqrt -> FSLIT("sqrt")
+ MO_F64_Sin -> FSLIT("sin")
+ MO_F64_Cos -> FSLIT("cos")
+ MO_F64_Tan -> FSLIT("tan")
MO_F64_Exp -> FSLIT("exp")
MO_F64_Log -> FSLIT("log")
other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if x86_64_TARGET_ARCH
genCCall (CmmPrim op) [(r,_)] args vols =
- panic "genCCall(CmmPrim)(x86_64)"
+ outOfLineFloatOp op r args vols
genCCall target dest_regs args vols = do
-- load up the register arguments
- (stack_args, sse_regs, load_args_code)
- <- load_args args allArgRegs allFPArgRegs 0 nilOL
+ (stack_args, aregs, fregs, load_args_code)
+ <- load_args args allArgRegs allFPArgRegs nilOL
let
+ fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
+ int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+ arg_regs = int_regs_used ++ fp_regs_used
+ -- for annotating the call instruction with
+
+ sse_regs = length fp_regs_used
+
tot_arg_size = arg_size * length stack_args
-- On entry to the called function, %rsp should be aligned
-- CmmPrim -> ...
CmmForeignCall (CmmLit (CmmLabel lbl)) conv
-> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm)), conv)
+ return (unitOL (CALL (Left fn_imm) arg_regs), conv)
where fn_imm = ImmCLbl lbl
CmmForeignCall expr conv
-> do (dyn_r, dyn_c) <- getSomeReg expr
- return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+ return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
let
-- The x86_64 ABI requires us to set %al to the number of SSE
load_args :: [(CmmExpr,MachHint)]
-> [Reg] -- int regs avail for args
-> [Reg] -- FP regs avail for args
- -> Int -> InstrBlock
- -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
- load_args args [] [] sse_regs code = return (args, sse_regs, code)
+ -> InstrBlock
+ -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+ load_args args [] [] code = return (args, [], [], code)
-- no more regs to use
- load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
+ load_args [] aregs fregs code = return ([], aregs, fregs, code)
-- no more args to push
- load_args ((arg,hint) : rest) aregs fregs sse_regs code
+ load_args ((arg,hint) : rest) aregs fregs code
| isFloatingRep arg_rep =
case fregs of
[] -> push_this_arg
(r:rs) -> do
arg_code <- getAnyReg arg
- load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
+ load_args rest aregs rs (code `appOL` arg_code r)
| otherwise =
case aregs of
[] -> push_this_arg
(r:rs) -> do
arg_code <- getAnyReg arg
- load_args rest rs fregs sse_regs (code `appOL` arg_code r)
+ load_args rest rs fregs (code `appOL` arg_code r)
where
arg_rep = cmmExprRep arg
push_this_arg = do
- (args',sse',code') <- load_args rest aregs fregs sse_regs code
- return ((arg,hint):args', sse', code')
+ (args',ars,frs,code') <- load_args rest aregs fregs code
+ return ((arg,hint):args', ars, frs, code')
push_args [] code = return code
push_args ((arg,hint):rest) code
lbl <- getNewLabelNat
let
jumpTable = map jumpTableEntry ids
- op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
+ op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
code = e_code `appOL` toOL [
LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
JMP_TBL op [ id | Just id <- ids ]
let
code dst = cond_code `appOL` toOL [
SETCC cond (OpReg tmp),
- MOV I32 (OpReg tmp) (OpReg dst),
- AND I32 (OpImm (ImmInt 1)) (OpReg dst)
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
]
- -- NB. (1) Tha AND is needed here because the x86 only
- -- sets the low byte in the SETCC instruction.
- -- NB. (2) The extra temporary register is a hack to
- -- work around the fact that the setcc instructions only
- -- accept byte registers. dst might not be a byte-able reg,
- -- but currently all free registers are byte-able, so we're
- -- guaranteed that a new temporary is byte-able.
-- in
return (Any I32 code)
+#endif
+
+#if i386_TARGET_ARCH
condFltReg cond x y = do
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
CondCode _ cond cond_code <- condFltCode cond x y
- let
- code dst = cond_code `appOL` toOL [
- JXX cond lbl1,
- MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
- JXX ALWAYS lbl2,
- NEWBLOCK lbl1,
- MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
- JXX ALWAYS lbl2,
- NEWBLOCK lbl2]
- -- SIGH, have to split up this block somehow...
+ tmp <- getNewRegNat I8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
+ ]
-- in
return (Any I32 code)
-#endif /* i386_TARGET_ARCH */
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp1 <- getNewRegNat wordRep
+ tmp2 <- getNewRegNat wordRep
+ let
+ -- We have to worry about unordered operands (eg. comparisons
+ -- against NaN). If the operands are unordered, the comparison
+ -- sets the parity flag, carry flag and zero flag.
+ -- All comparisons are supposed to return false for unordered
+ -- operands except for !=, which returns true.
+ --
+ -- Optimisation: we don't have to test the parity flag if we
+ -- know the test has already excluded the unordered case: eg >
+ -- and >= test for a zero carry flag, which can only occur for
+ -- ordered operands.
+ --
+ -- ToDo: by reversing comparisons we could avoid testing the
+ -- parity flag in more cases.
+
+ code dst =
+ cond_code `appOL`
+ (case cond of
+ NE -> or_unordered dst
+ GU -> plain_test dst
+ GEU -> plain_test dst
+ _ -> and_ordered dst)
+
+ plain_test dst = toOL [
+ SETCC cond (OpReg tmp1),
+ MOVZxL I8 (OpReg tmp1) (OpReg dst)
+ ]
+ or_unordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC PARITY (OpReg tmp2),
+ OR I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ and_ordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC NOTPARITY (OpReg tmp2),
+ AND I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ -- in
+ return (Any I32 code)
+
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- in
return (Any rep code)
-trivialCode rep instr maybe_revinstr a b = do
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
(b_op, b_code) <- getNonClobberedOperand b
a_code <- getAnyReg a
tmp <- getNewRegNat rep
-- as the destination reg. In this case, we have to save b in a
-- new temporary across the computation of a.
code dst
- | dst `clashesWith` b_op =
+ | dst `regClashesWithOp` b_op =
b_code `appOL`
unitOL (MOV rep b_op (OpReg tmp)) `appOL`
a_code dst `snocOL`
instr b_op (OpReg dst)
-- in
return (Any rep code)
- where
- reg `clashesWith` OpReg reg2 = reg == reg2
- reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
- reg `clashesWith` _ = False
+
+reg `regClashesWithOp` OpReg reg2 = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _ = False
-----------
#if x86_64_TARGET_ARCH
--- We use the 2-operand SSE2 floating pt instructions. ToDo: improve on
--- this by using some of the special cases in trivialCode above.
-trivialFCode pk instr x y = do
- (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
- x_code <- getAnyReg x
- let
- code dst =
- y_code `appOL`
- x_code dst `snocOL`
- instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
- (IF_ARCH_x86_64(OpReg,) dst)
- -- in
- return (Any pk code)
+trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
#endif
coerceFP2Int from to x = do
(x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
let
- opc = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+ opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
code dst = x_code `snocOL` opc x_op dst
-- in
return (Any to code) -- works even if the destination rep is <I32