#include "HsVersions.h"
#include "nativeGen/NCG.h"
+#include "MachDeps.h"
-- NCG stuff:
import MachInstrs
import CLabel
-- The rest:
-import CmdLineOpts ( opt_PIC )
+import StaticFlags ( opt_PIC )
import ForeignCall ( CCallConv(..) )
import OrdList
import Pretty
import Outputable
-import qualified Outputable
import FastString
import FastTypes ( isFastTrue )
+import Constants ( wORD_SIZE )
#ifdef DEBUG
import Outputable ( assertPanic )
CmmAssign reg src
| isFloatingRep kind -> assignReg_FltCode kind reg src
- | wordRep == I32 && kind == I64
- -> assignReg_I64Code reg src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignReg_I64Code reg src
+#endif
| otherwise -> assignReg_IntCode kind reg src
where kind = cmmRegRep reg
CmmStore addr src
| isFloatingRep kind -> assignMem_FltCode kind addr src
- | wordRep == I32 && kind == I64
- -> assignMem_I64Code addr src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignMem_I64Code addr src
+#endif
| otherwise -> assignMem_IntCode kind addr src
where kind = cmmExprRep src
-- selection game are therefore that the returned
-- Reg may be modified
+#if WORD_SIZE_IN_BITS==32
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
iselExpr64 :: CmmExpr -> NatM ChildCode64
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+
+-- -----------------------------------------------------------------------------
-- Grab the Reg for a CmmReg
getRegisterReg :: CmmReg -> Reg
getRegister :: CmmExpr -> NatM Register
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+ = do
+ reg <- getPicBaseNat wordRep
+ return (Fixed wordRep reg nilOL)
+
getRegister (CmmReg reg)
= return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
getRegister tree@(CmmRegOff _ _)
= getRegister (mangleIndexTree tree)
-getRegister CmmPicBaseReg
- = do
- reg <- getPicBaseNat wordRep
- return (Fixed wordRep reg nilOL)
-
-- end of machine-"independent" bit; here we go on the rest...
#if alpha_TARGET_ARCH
-- in
return (Any F64 code)
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+ let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
+ -- I don't know why there are xorpd, xorps, and pxor instructions.
+ -- They all appear to do the same thing --SDM
+ return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+ lbl <- getNewLabelNat
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f rep)],
+ MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ ]
+ -- in
+ return (Any rep code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- catch simple cases of zero- or sign-extended load
getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
code <- intLoadCode (MOVSxL I16) addr
return (Any I32 code)
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I32) addr
+ return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ code dst = x_code dst `appOL` toOL [
+ -- This is how gcc does it, so it can't be that bad:
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x80000000 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32)
+ ],
+ XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorps, so we need the 128-bit constant
+ -- ToDo: rip-relative
+ ]
+ --
+ return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ -- This is how gcc does it, so it can't be that bad:
+ code dst = x_code dst `appOL` toOL [
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x8000000000000000 I64),
+ CmmStaticLit (CmmInt 0 I64)
+ ],
+ -- gcc puts an unpck here. Wonder if we need it.
+ XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorpd, so we need the 128-bit constant
+ ]
+ --
+ return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
getRegister (CmmMachOp mop [x]) -- unary MachOps
= case mop of
+#if i386_TARGET_ARCH
MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
MO_Not rep -> trivialUCode rep (NOT rep) x
MO_S_Conv I16 I8 -> conversionNop I16 x
MO_U_Conv I32 I16 -> conversionNop I32 x
MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I64 I32 -> conversionNop I64 x
+ MO_S_Conv I64 I32 -> conversionNop I64 x
+ MO_U_Conv I64 I16 -> conversionNop I64 x
+ MO_S_Conv I64 I16 -> conversionNop I64 x
+ MO_U_Conv I64 I8 -> conversionNop I64 x
+ MO_S_Conv I64 I8 -> conversionNop I64 x
+#endif
+
MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
+ MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+ MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+ MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
+ MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+ MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+ -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+ -- However, we don't want the register allocator to throw it
+ -- away as an unnecessary reg-to-reg move, so we keep it in
+ -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
MO_S_Conv F32 F64 -> conversionNop F64 x
MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+ MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+ MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
MO_S_Conv from to
| isFloatingRep from -> coerceFP2Int from to x
| isFloatingRep to -> coerceInt2FP from to x
+ other -> pprPanic "getRegister" (pprMachOp mop)
where
-- signed or unsigned extension.
integerExtend from to instr expr = do
MO_U_Lt rep -> condIntReg LU x y
MO_U_Le rep -> condIntReg LEU x y
- MO_Add F32 -> trivialFCode F32 GADD x y
- MO_Sub F32 -> trivialFCode F32 GSUB x y
+#if i386_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 GADD x y
+ MO_Sub F32 -> trivialFCode F32 GSUB x y
MO_Add F64 -> trivialFCode F64 GADD x y
MO_Sub F64 -> trivialFCode F64 GSUB x y
- MO_S_Quot F32 -> trivialFCode F32 GDIV x y
+ MO_S_Quot F32 -> trivialFCode F32 GDIV x y
MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 ADD x y
+ MO_Sub F32 -> trivialFCode F32 SUB x y
+
+ MO_Add F64 -> trivialFCode F64 ADD x y
+ MO_Sub F64 -> trivialFCode F64 SUB x y
+
+ MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+ MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y
MO_U_Quot rep -> div_code rep False True x y
MO_U_Rem rep -> div_code rep False False x y
+#if i386_TARGET_ARCH
MO_Mul F32 -> trivialFCode F32 GMUL x y
MO_Mul F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Mul F32 -> trivialFCode F32 MUL x y
+ MO_Mul F64 -> trivialFCode F64 MUL x y
+#endif
+
MO_Mul rep -> let op = IMUL rep in
trivialCode rep op (Just op) x y
where
--------------------
imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
- imulMayOflo I32 a b = do
- res_lo <- getNewRegNat I32
- res_hi <- getNewRegNat I32
+ imulMayOflo rep a b = do
(a_reg, a_code) <- getNonClobberedReg a
- (b_reg, b_code) <- getSomeReg b
+ b_code <- getAnyReg b
let
- code dst = a_code `appOL` b_code `appOL`
+ shift_amt = case rep of
+ I32 -> 31
+ I64 -> 63
+ _ -> panic "shift_amt"
+
+ code = a_code `appOL` b_code eax `appOL`
toOL [
- MOV I32 (OpReg a_reg) (OpReg res_hi),
- MOV I32 (OpReg b_reg) (OpReg res_lo),
- IMUL64 res_hi res_lo, -- result in res_hi:res_lo
- SAR I32 (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
- SUB I32 (OpReg res_hi) (OpReg res_lo), -- compare against upper
- MOV I32 (OpReg res_lo) (OpReg dst)
- -- dst==0 if high part == sign extended low part
+ IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
+ SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
+ -- sign extend lower part
+ SUB rep (OpReg edx) (OpReg eax)
+ -- compare against upper
+ -- eax==0 if high part == sign extended low part
]
-- in
- return (Any I32 code)
+ return (Fixed rep eax code)
--------------------
shift_code :: MachRep
code dst
= x_code `snocOL`
LEA rep
- (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
+ (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
(OpReg dst)
--
return (Any rep code)
----------------------
div_code rep signed quotient x y = do
- (y_op, y_code) <- getOperand y -- cannot be clobbered
+ (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
x_code <- getAnyReg x
let
- widen | signed = CLTD
+ widen | signed = CLTD rep
| otherwise = XOR rep (OpReg edx) (OpReg edx)
instr | signed = IDIV
return (Fixed rep result code)
-
getRegister (CmmLoad mem pk)
| isFloatingRep pk
= do
Amode src mem_code <- getAmode mem
let
code dst = mem_code `snocOL`
- GLD pk src dst
+ IF_ARCH_i386(GLD pk src dst,
+ MOV pk (OpAddr src) (OpReg dst))
--
return (Any pk code)
+#if i386_TARGET_ARCH
getRegister (CmmLoad mem pk)
| pk /= I64
= do
-- we can't guarantee access to an 8-bit variant of every register
-- (esi and edi don't have 8-bit variants), so to make things
-- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+ = do
+ code <- intLoadCode (MOV pk) mem
+ return (Any pk code)
+#endif
getRegister (CmmLit (CmmInt 0 rep))
= let
+ -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+ adj_rep = case rep of I64 -> I32; _ -> rep
+ rep1 = IF_ARCH_i386( rep, adj_rep )
code dst
- = unitOL (XOR rep (OpReg dst) (OpReg dst))
+ = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
in
return (Any rep code)
+#if x86_64_TARGET_ARCH
+ -- optimisation for loading small literals on x86_64: take advantage
+ -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+ -- instruction forms are shorter.
+getRegister (CmmLit lit)
+ | I64 <- cmmLitRep lit, not (isBigLit lit)
+ = let
+ imm = litToImm lit
+ code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
+ in
+ return (Any I64 code)
+ where
+ isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
+ isBigLit _ = False
+ -- note1: not the same as is64BitLit, because that checks for
+ -- signed literals that fit in 32 bits, but we want unsigned
+ -- literals here.
+ -- note2: all labels are small, because we're assuming the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
getRegister (CmmLit lit)
= let
rep = cmmLitRep lit
in
return (Any rep code)
-getRegister other = panic "getRegister(x86)"
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
anyReg (Any _ code) = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
getByteReg expr = do
r <- getRegister expr
case r of
return (tmp, code `snocOL` reg2reg rep reg tmp)
-- ToDo: could optimise slightly by checking for byte-addressable
-- real registers, but that will happen very rarely if at all.
+#endif
-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
reg2reg :: MachRep -> Reg -> Reg -> Instr
reg2reg rep src dst
+#if i386_TARGET_ARCH
| isFloatingRep rep = GMOV src dst
+#endif
| otherwise = MOV rep (OpReg src) (OpReg dst)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
extendUExpr I32 x = x
extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
--- ###FIXME: exact code duplication from x86 case
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
#endif /* powerpc_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- This is all just ridiculous, since it carefully undoes
-- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (-(fromInteger i))
- return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (fromInteger i)
- return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
-- recognised by the next rule.
let
code = x_code `appOL` y_code
base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
- return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
code)
-getAmode (CmmLit lit)
+getAmode (CmmLit lit) | not (is64BitLit lit)
= return (Amode (ImmAddr (litToImm lit) 0) nilOL)
getAmode expr = do
(reg,code) <- getSomeReg expr
- return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
+ return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- -----------------------------------------------------------------------------
-- getOperand: sometimes any operand will do.
--- getOperand gets a *safe* operand; that is, the value of the operand
--- will remain valid across the computation of an arbitrary expression,
--- unless the expression is computed directly into a register which
--- the operand refers to (see trivialCode where this function is used
--- for an example).
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-getOperand (CmmLoad mem pk)
- | not (isFloatingRep pk) && pk /= I64 = do
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+ return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
Amode src mem_code <- getAmode mem
(src',save_code) <-
if (amodeCouldBeClobbered src)
then do
tmp <- getNewRegNat wordRep
- return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
+ return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
else
return (src, nilOL)
return (OpAddr src', save_code `appOL` mem_code)
-
-getOperand e = do
+getNonClobberedOperand e = do
(reg, code) <- getNonClobberedReg e
return (OpReg reg, code)
regClobbered (RealReg rr) = isFastTrue (freeReg rr)
regClobbered _ = False
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
+ return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getOperand e = do
+ (reg, code) <- getSomeReg e
+ return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit) = not (is64BitLit lit)
+ || isSuitableFloatingPointLit lit
+isOperand _ = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory. However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getRegOrMem e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
+ -- assume that labels are in the range 0-2^31-1: this assumes the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is64BitLit x = False
#endif
-- -----------------------------------------------------------------------------
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH || sparc_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
-- yes, they really do seem to want exactly the same!
getCondCode (CmmMachOp mop [x, y])
#endif /* alpha_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) = do
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
Amode x_addr x_code <- getAmode x
let
imm = litToImm lit
--
return (CondCode False cond code)
--- anything vs immediate
-condIntCode cond x (CmmLit lit) = do
- (x_reg, x_code) <- getSomeReg x
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
let
- imm = litToImm lit
- code = x_code `snocOL`
- CMP (cmmLitRep lit) (OpImm imm) (OpReg x_reg)
- -- in
- return (CondCode False cond code)
-
--- memory vs anything
-condIntCode cond (CmmLoad x pk) y = do
- (y_reg, y_code) <- getNonClobberedReg y
- Amode x_addr x_code <- getAmode x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP pk (OpReg y_reg) (OpAddr x_addr)
- -- in
- return (CondCode False cond code)
-
--- anything vs memory
-condIntCode cond y (CmmLoad x pk) = do
- (y_reg, y_code) <- getNonClobberedReg y
- Amode x_addr x_code <- getAmode x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP pk (OpAddr x_addr) (OpReg y_reg)
+ code = x_code `appOL` y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
-- in
return (CondCode False cond code)
-- anything vs anything
condIntCode cond x y = do
- (x_op, x_code) <- getOperand x
- (y_reg, y_code) <- getSomeReg y
+ (y_reg, y_code) <- getNonClobberedReg y
+ (x_op, x_code) <- getRegOrMem x
let
- code = x_code `appOL`
- y_code `snocOL`
+ code = y_code `appOL`
+ x_code `snocOL`
CMP (cmmExprRep x) (OpReg y_reg) x_op
-- in
return (CondCode False cond code)
+#endif
------------
+#if i386_TARGET_ARCH
condFltCode cond x y
= ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
(x_reg, x_code) <- getNonClobberedReg x
-- The GCMP insn does the test and sets the zero flag if comparable
-- and true. Hence we always supply EQQ as the condition to test.
return (CondCode True EQQ code)
-
#endif /* i386_TARGET_ARCH */
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg. We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL`
+ y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
+ -- NB(1): we need to use the unsigned comparison operators on the
+ -- result of this comparison.
+ -- in
+ return (CondCode True (condToUnsigned cond) code)
+#endif
+
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
#endif /* sparc_TARGET_ARCH */
#if powerpc_TARGET_ARCH
--- ###FIXME: I16 and I8!
+-- ###FIXME: I16 and I8!
condIntCode cond x (CmmLit (CmmInt y rep))
| Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
= do
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- integer assignment to memory
assignMem_IntCode pk addr src = do
return code
where
get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
- get_op_RI (CmmLit lit)
+ get_op_RI (CmmLit lit) | not (is64BitLit lit)
= return (nilOL, OpImm (litToImm lit))
get_op_RI op
= do (reg,code) <- getNonClobberedReg op
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- Floating point assignment to memory
assignMem_FltCode pk addr src = do
let
code = src_code `appOL`
addr_code `snocOL`
- GST pk src_reg addr
+ IF_ARCH_i386(GST pk src_reg addr,
+ MOV pk (OpReg src_reg) (OpAddr addr))
return code
-- Floating point assignment to a register/temporary
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genJump (CmmLoad mem pk) = do
Amode target code <- getAmode mem
genBranch id = return (unitOL (BR id))
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genBranch id = return (unitOL (JXX ALWAYS id))
#endif
CondCode _ cond code <- getCondCode bool
return (code `snocOL` JXX cond id)
-#endif /* i386_TARGET_ARCH */
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+ CondCode is_float cond cond_code <- getCondCode bool
+ if not is_float
+ then
+ return (cond_code `snocOL` JXX cond id)
+ else do
+ lbl <- getBlockIdNat
+
+ -- see comment with condFltReg
+ let code = case cond of
+ NE -> or_unordered
+ GU -> plain_test
+ GEU -> plain_test
+ _ -> and_ordered
+
+ plain_test = unitOL (
+ JXX cond id
+ )
+ or_unordered = toOL [
+ JXX cond id,
+ JXX PARITY id
+ ]
+ and_ordered = toOL [
+ JXX PARITY lbl,
+ JXX cond id,
+ JXX ALWAYS lbl,
+ NEWBLOCK lbl
+ ]
+ return (cond_code `appOL` code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
-- CmmPrim -> ...
CmmForeignCall (CmmLit (CmmLabel lbl)) conv
-> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm)), conv)
+ return (unitOL (CALL (Left fn_imm) []), conv)
where fn_imm = ImmCLbl lbl
CmmForeignCall expr conv
-> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
ASSERT(dyn_rep == I32)
- return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+ return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
let push_code = concatOL push_codes
call = callinsns `appOL`
toOL (
-- Deallocate parameters after call for ccall;
-- but not for stdcall (callee does it)
- (if cconv == StdCallConv then [] else
+ (if cconv == StdCallConv || tot_arg_size==0 then [] else
[ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
++
[DELTA (delta + tot_arg_size)]
code `appOL`
toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
DELTA (delta-size),
- GST sz reg (AddrBaseIndex (Just esp)
- Nothing
+ GST sz reg (AddrBaseIndex (EABaseReg esp)
+ EAIndexNone
(ImmInt 0))]
)
else return (size,
(reg,code) <- getSomeReg op
return (code, reg, cmmExprRep op)
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
-> Maybe [GlobalReg] -> NatM InstrBlock
lbl = CmmLabel (mkForeignLabel fn Nothing False)
fn = case mop of
+ MO_F32_Sqrt -> FSLIT("sqrt")
+ MO_F32_Sin -> FSLIT("sin")
+ MO_F32_Cos -> FSLIT("cos")
+ MO_F32_Tan -> FSLIT("tan")
MO_F32_Exp -> FSLIT("exp")
MO_F32_Log -> FSLIT("log")
MO_F32_Tanh -> FSLIT("tanh")
MO_F32_Pwr -> FSLIT("pow")
+ MO_F64_Sqrt -> FSLIT("sqrt")
+ MO_F64_Sin -> FSLIT("sin")
+ MO_F64_Cos -> FSLIT("cos")
+ MO_F64_Tan -> FSLIT("tan")
MO_F64_Exp -> FSLIT("exp")
MO_F64_Log -> FSLIT("log")
MO_F64_Tanh -> FSLIT("tanh")
MO_F64_Pwr -> FSLIT("pow")
- other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-#endif /* i386_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols =
+ outOfLineFloatOp op r args vols
+
+genCCall target dest_regs args vols = do
+
+ -- load up the register arguments
+ (stack_args, aregs, fregs, load_args_code)
+ <- load_args args allArgRegs allFPArgRegs nilOL
+
+ let
+ fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
+ int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+ arg_regs = int_regs_used ++ fp_regs_used
+ -- for annotating the call instruction with
+
+ sse_regs = length fp_regs_used
+
+ tot_arg_size = arg_size * length stack_args
+
+ -- On entry to the called function, %rsp should be aligned
+ -- on a 16-byte boundary +8 (i.e. the first stack arg after
+ -- the return address is 16-byte aligned). In STG land
+ -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+ -- need to make sure we push a multiple of 16-bytes of args,
+ -- plus the return address, to get the correct alignment.
+ -- Urg, this is hard. We need to feed the delta back into
+ -- the arg pushing code.
+ (real_size, adjust_rsp) <-
+ if tot_arg_size `rem` 16 == 0
+ then return (tot_arg_size, nilOL)
+ else do -- we need to adjust...
+ delta <- getDeltaNat
+ setDeltaNat (delta-8)
+ return (tot_arg_size+8, toOL [
+ SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+ DELTA (delta-8)
+ ])
+
+ -- push the stack args, right to left
+ push_code <- push_args (reverse stack_args) nilOL
+ delta <- getDeltaNat
+
+ -- deal with static vs dynamic call targets
+ (callinsns,cconv) <-
+ case target of
+ -- CmmPrim -> ...
+ CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+ -> -- ToDo: stdcall arg sizes
+ return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+ where fn_imm = ImmCLbl lbl
+ CmmForeignCall expr conv
+ -> do (dyn_r, dyn_c) <- getSomeReg expr
+ return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+ let
+ -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- registers that contain arguments, if the called routine
+ -- is a varargs function. We don't know whether it's a
+ -- varargs function or not, so we have to assume it is.
+ --
+ -- It's not safe to omit this assignment, even if the number
+ -- of SSE regs in use is zero. If %al is larger than 8
+ -- on entry to a varargs function, seg faults ensue.
+ assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+ let call = callinsns `appOL`
+ toOL (
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ (if cconv == StdCallConv || real_size==0 then [] else
+ [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+ ++
+ [DELTA (delta + real_size)]
+ )
+ -- in
+ setDeltaNat (delta + real_size)
+
+ let
+ -- assign the results, if necessary
+ assign_code [] = nilOL
+ assign_code [(dest,_hint)] =
+ case rep of
+ F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+ where
+ rep = cmmRegRep dest
+ r_dest = getRegisterReg dest
+ assign_code many = panic "genCCall.assign_code many"
+
+ return (load_args_code `appOL`
+ adjust_rsp `appOL`
+ push_code `appOL`
+ assign_eax sse_regs `appOL`
+ call `appOL`
+ assign_code dest_regs)
+
+ where
+ arg_size = 8 -- always, at the mo
+
+ load_args :: [(CmmExpr,MachHint)]
+ -> [Reg] -- int regs avail for args
+ -> [Reg] -- FP regs avail for args
+ -> InstrBlock
+ -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+ load_args args [] [] code = return (args, [], [], code)
+ -- no more regs to use
+ load_args [] aregs fregs code = return ([], aregs, fregs, code)
+ -- no more args to push
+ load_args ((arg,hint) : rest) aregs fregs code
+ | isFloatingRep arg_rep =
+ case fregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest aregs rs (code `appOL` arg_code r)
+ | otherwise =
+ case aregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest rs fregs (code `appOL` arg_code r)
+ where
+ arg_rep = cmmExprRep arg
+
+ push_this_arg = do
+ (args',ars,frs,code') <- load_args rest aregs fregs code
+ return ((arg,hint):args', ars, frs, code')
+
+ push_args [] code = return code
+ push_args ((arg,hint):rest) code
+ | isFloatingRep arg_rep = do
+ (arg_reg, arg_code) <- getSomeReg arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [
+ MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
+ SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+
+ | otherwise = do
+ -- we only ever generate word-sized function arguments. Promotion
+ -- has already happened: our Int8# type is kept sign-extended
+ -- in an Int#, for example.
+ ASSERT(arg_rep == I64) return ()
+ (arg_op, arg_code) <- getOperand arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [PUSH I64 arg_op,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+ where
+ arg_rep = cmmExprRep arg
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genSwitch expr ids = do
(reg,e_code) <- getSomeReg expr
lbl <- getNewLabelNat
let
jumpTable = map jumpTableEntry ids
- op = OpAddr (AddrBaseIndex Nothing (Just (reg,4)) (ImmCLbl lbl))
+ op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
code = e_code `appOL` toOL [
LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
JMP_TBL op [ id | Just id <- ids ]
dynRef <- cmmMakeDynamicReference addImportNat False lbl
(tableReg,t_code) <- getSomeReg $ dynRef
let
- jumpTable = map jumpTableEntry ids
-
+ jumpTable = map jumpTableEntryRel ids
+
+ jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 wordRep)
+ jumpTableEntryRel (Just (BlockId id))
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+ where blockLabel = mkAsmTempLabel id
+
code = e_code `appOL` t_code `appOL` toOL [
LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
SLW tmp reg (RIImm (ImmInt 2)),
LD I32 tmp (AddrRegReg tableReg tmp),
+ ADD tmp tmp (RIReg tableReg),
MTCTR tmp,
BCTR [ id | Just id <- ids ]
]
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
condIntReg cond x y = do
CondCode _ cond cond_code <- condIntCode cond x y
let
code dst = cond_code `appOL` toOL [
SETCC cond (OpReg tmp),
- MOV I32 (OpReg tmp) (OpReg dst),
- AND I32 (OpImm (ImmInt 1)) (OpReg dst)
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
]
- -- NB. (1) Tha AND is needed here because the x86 only
- -- sets the low byte in the SETCC instruction.
- -- NB. (2) The extra temporary register is a hack to
- -- work around the fact that the setcc instructions only
- -- accept byte registers. dst might not be a byte-able reg,
- -- but currently all free registers are byte-able, so we're
- -- guaranteed that a new temporary is byte-able.
-- in
return (Any I32 code)
+#endif
+
+#if i386_TARGET_ARCH
condFltReg cond x y = do
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
CondCode _ cond cond_code <- condFltCode cond x y
- let
- code dst = cond_code `appOL` toOL [
- JXX cond lbl1,
- MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
- JXX ALWAYS lbl2,
- NEWBLOCK lbl1,
- MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
- JXX ALWAYS lbl2,
- NEWBLOCK lbl2]
- -- SIGH, have to split up this block somehow...
+ tmp <- getNewRegNat I8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
+ ]
-- in
return (Any I32 code)
-#endif /* i386_TARGET_ARCH */
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp1 <- getNewRegNat wordRep
+ tmp2 <- getNewRegNat wordRep
+ let
+ -- We have to worry about unordered operands (eg. comparisons
+ -- against NaN). If the operands are unordered, the comparison
+ -- sets the parity flag, carry flag and zero flag.
+ -- All comparisons are supposed to return false for unordered
+ -- operands except for !=, which returns true.
+ --
+ -- Optimisation: we don't have to test the parity flag if we
+ -- know the test has already excluded the unordered case: eg >
+ -- and >= test for a zero carry flag, which can only occur for
+ -- ordered operands.
+ --
+ -- ToDo: by reversing comparisons we could avoid testing the
+ -- parity flag in more cases.
+
+ code dst =
+ cond_code `appOL`
+ (case cond of
+ NE -> or_unordered dst
+ GU -> plain_test dst
+ GEU -> plain_test dst
+ _ -> and_ordered dst)
+
+ plain_test dst = toOL [
+ SETCC cond (OpReg tmp1),
+ MOVZxL I8 (OpReg tmp1) (OpReg dst)
+ ]
+ or_unordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC PARITY (OpReg tmp2),
+ OR I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ and_ordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC NOTPARITY (OpReg tmp2),
+ AND I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ -- in
+ return (Any I32 code)
+
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
,IF_ARCH_i386 ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
+ -> Maybe (Operand -> Operand -> Instr)
,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
- ,))))
+ ,)))))
-> CmmExpr -> CmmExpr -- the two arguments
-> NatM Register
-> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
- ,)))
+ ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+ ,))))
-> CmmExpr -> CmmExpr -- the two arguments
-> NatM Register
#endif
:: MachRep
-> IF_ARCH_alpha((RI -> Reg -> Instr)
,IF_ARCH_i386 ((Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Instr)
,IF_ARCH_sparc((RI -> Reg -> Instr)
,IF_ARCH_powerpc((Reg -> Reg -> Instr)
- ,))))
+ ,)))))
-> CmmExpr -- the one argument
-> NatM Register
:: MachRep
-> IF_ARCH_alpha((Reg -> Reg -> Instr)
,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+ ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
,IF_ARCH_sparc((Reg -> Reg -> Instr)
- ,)))
+ ,))))
-> CmmExpr -- the one argument
-> NatM Register
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
{-
The Rules of the Game are:
register happens to be the destination register.
-}
-trivialCode rep instr maybe_revinstr a (CmmLit lit_b) = do
- a_code <- getAnyReg a
- let
- code dst
- = a_code dst `snocOL`
- instr (OpImm (litToImm lit_b)) (OpReg dst)
- -- in
- return (Any rep code)
-
-trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+ | not (is64BitLit lit_a) = do
b_code <- getAnyReg b
let
code dst
-- in
return (Any rep code)
-trivialCode rep instr maybe_revinstr a b = do
- (b_op, b_code) <- getOperand b
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+ (b_op, b_code) <- getNonClobberedOperand b
a_code <- getAnyReg a
tmp <- getNewRegNat rep
let
-- as the destination reg. In this case, we have to save b in a
-- new temporary across the computation of a.
code dst
- | dst `clashesWith` b_op =
+ | dst `regClashesWithOp` b_op =
b_code `appOL`
unitOL (MOV rep b_op (OpReg tmp)) `appOL`
a_code dst `snocOL`
instr b_op (OpReg dst)
-- in
return (Any rep code)
- where
- reg `clashesWith` OpReg reg2 = reg == reg2
- reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
+
+reg `regClashesWithOp` OpReg reg2 = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _ = False
-----------
-----------
+#if i386_TARGET_ARCH
+
trivialFCode pk instr x y = do
(x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
(y_reg, y_code) <- getSomeReg y
-- in
return (Any pk code)
+#endif
+
+#if x86_64_TARGET_ARCH
+
+trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
+
+#endif
+
-------------
trivialUFCode rep instr x = do
coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
-#ifdef sparc_TARGET_ARCH
+#if sparc_TARGET_ARCH
coerceDbl2Flt :: CmmExpr -> NatM Register
coerceFlt2Dbl :: CmmExpr -> NatM Register
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- in
+ return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
#if sparc_TARGET_ARCH
coerceInt2FP pk x