#include "HsVersions.h"
#include "nativeGen/NCG.h"
+#include "MachDeps.h"
-- NCG stuff:
import MachInstrs
import CLabel
-- The rest:
-import CmdLineOpts ( opt_PIC )
+import StaticFlags ( opt_PIC )
import ForeignCall ( CCallConv(..) )
import OrdList
import Pretty
import Outputable
-import qualified Outputable
import FastString
import FastTypes ( isFastTrue )
+import Constants ( wORD_SIZE )
#ifdef DEBUG
import Outputable ( assertPanic )
CmmAssign reg src
| isFloatingRep kind -> assignReg_FltCode kind reg src
- | wordRep == I32 && kind == I64
- -> assignReg_I64Code reg src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignReg_I64Code reg src
+#endif
| otherwise -> assignReg_IntCode kind reg src
where kind = cmmRegRep reg
CmmStore addr src
| isFloatingRep kind -> assignMem_FltCode kind addr src
- | wordRep == I32 && kind == I64
- -> assignMem_I64Code addr src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignMem_I64Code addr src
+#endif
| otherwise -> assignMem_IntCode kind addr src
where kind = cmmExprRep src
-- selection game are therefore that the returned
-- Reg may be modified
+#if WORD_SIZE_IN_BITS==32
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
iselExpr64 :: CmmExpr -> NatM ChildCode64
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+
+-- -----------------------------------------------------------------------------
-- Grab the Reg for a CmmReg
getRegisterReg :: CmmReg -> Reg
getRegister :: CmmExpr -> NatM Register
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+ = do
+ reg <- getPicBaseNat wordRep
+ return (Fixed wordRep reg nilOL)
+
getRegister (CmmReg reg)
= return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
getRegister tree@(CmmRegOff _ _)
= getRegister (mangleIndexTree tree)
-getRegister CmmPicBaseReg
- = do
- reg <- getPicBaseNat wordRep
- return (Fixed wordRep reg nilOL)
-
-- end of machine-"independent" bit; here we go on the rest...
#if alpha_TARGET_ARCH
-- in
return (Any F64 code)
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+ let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
+ -- I don't know why there are xorpd, xorps, and pxor instructions.
+ -- They all appear to do the same thing --SDM
+ return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+ lbl <- getNewLabelNat
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f rep)],
+ MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ ]
+ -- in
+ return (Any rep code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- catch simple cases of zero- or sign-extended load
getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
code <- intLoadCode (MOVSxL I16) addr
return (Any I32 code)
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I32) addr
+ return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ code dst = x_code dst `appOL` toOL [
+ -- This is how gcc does it, so it can't be that bad:
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x80000000 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32)
+ ],
+ XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorps, so we need the 128-bit constant
+ -- ToDo: rip-relative
+ ]
+ --
+ return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ -- This is how gcc does it, so it can't be that bad:
+ code dst = x_code dst `appOL` toOL [
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x8000000000000000 I64),
+ CmmStaticLit (CmmInt 0 I64)
+ ],
+ -- gcc puts an unpck here. Wonder if we need it.
+ XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorpd, so we need the 128-bit constant
+ ]
+ --
+ return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
getRegister (CmmMachOp mop [x]) -- unary MachOps
= case mop of
+#if i386_TARGET_ARCH
MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
MO_Not rep -> trivialUCode rep (NOT rep) x
MO_S_Conv I16 I8 -> conversionNop I16 x
MO_U_Conv I32 I16 -> conversionNop I32 x
MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I64 I32 -> conversionNop I64 x
+ MO_S_Conv I64 I32 -> conversionNop I64 x
+ MO_U_Conv I64 I16 -> conversionNop I64 x
+ MO_S_Conv I64 I16 -> conversionNop I64 x
+ MO_U_Conv I64 I8 -> conversionNop I64 x
+ MO_S_Conv I64 I8 -> conversionNop I64 x
+#endif
+
MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
+ MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+ MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+ MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
+ MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+ MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+ -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+ -- However, we don't want the register allocator to throw it
+ -- away as an unnecessary reg-to-reg move, so we keep it in
+ -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
MO_S_Conv F32 F64 -> conversionNop F64 x
MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+ MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+ MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
MO_S_Conv from to
| isFloatingRep from -> coerceFP2Int from to x
| isFloatingRep to -> coerceInt2FP from to x
+ other -> pprPanic "getRegister" (pprMachOp mop)
where
-- signed or unsigned extension.
integerExtend from to instr expr = do
MO_U_Lt rep -> condIntReg LU x y
MO_U_Le rep -> condIntReg LEU x y
- MO_Add F32 -> trivialFCode F32 GADD x y
- MO_Sub F32 -> trivialFCode F32 GSUB x y
+#if i386_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 GADD x y
+ MO_Sub F32 -> trivialFCode F32 GSUB x y
MO_Add F64 -> trivialFCode F64 GADD x y
MO_Sub F64 -> trivialFCode F64 GSUB x y
- MO_S_Quot F32 -> trivialFCode F32 GDIV x y
+ MO_S_Quot F32 -> trivialFCode F32 GDIV x y
MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 ADD x y
+ MO_Sub F32 -> trivialFCode F32 SUB x y
+
+ MO_Add F64 -> trivialFCode F64 ADD x y
+ MO_Sub F64 -> trivialFCode F64 SUB x y
+
+ MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+ MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y
MO_U_Quot rep -> div_code rep False True x y
MO_U_Rem rep -> div_code rep False False x y
+#if i386_TARGET_ARCH
MO_Mul F32 -> trivialFCode F32 GMUL x y
MO_Mul F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Mul F32 -> trivialFCode F32 MUL x y
+ MO_Mul F64 -> trivialFCode F64 MUL x y
+#endif
+
MO_Mul rep -> let op = IMUL rep in
trivialCode rep op (Just op) x y
where
--------------------
imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
- imulMayOflo I32 a b = do
- res_lo <- getNewRegNat I32
- res_hi <- getNewRegNat I32
+ imulMayOflo rep a b = do
(a_reg, a_code) <- getNonClobberedReg a
- (b_reg, b_code) <- getSomeReg b
+ b_code <- getAnyReg b
let
- code dst = a_code `appOL` b_code `appOL`
+ shift_amt = case rep of
+ I32 -> 31
+ I64 -> 63
+ _ -> panic "shift_amt"
+
+ code = a_code `appOL` b_code eax `appOL`
toOL [
- MOV I32 (OpReg a_reg) (OpReg res_hi),
- MOV I32 (OpReg b_reg) (OpReg res_lo),
- IMUL64 res_hi res_lo, -- result in res_hi:res_lo
- SAR I32 (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
- SUB I32 (OpReg res_hi) (OpReg res_lo), -- compare against upper
- MOV I32 (OpReg res_lo) (OpReg dst)
- -- dst==0 if high part == sign extended low part
+ IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
+ SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
+ -- sign extend lower part
+ SUB rep (OpReg edx) (OpReg eax)
+ -- compare against upper
+ -- eax==0 if high part == sign extended low part
]
-- in
- return (Any I32 code)
+ return (Fixed rep eax code)
--------------------
shift_code :: MachRep
code dst
= x_code `snocOL`
LEA rep
- (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
+ (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
(OpReg dst)
--
return (Any rep code)
----------------------
div_code rep signed quotient x y = do
- (y_op, y_code) <- getOperand y -- cannot be clobbered
+ (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
x_code <- getAnyReg x
let
- widen | signed = CLTD
+ widen | signed = CLTD rep
| otherwise = XOR rep (OpReg edx) (OpReg edx)
instr | signed = IDIV
return (Fixed rep result code)
-
getRegister (CmmLoad mem pk)
| isFloatingRep pk
= do
Amode src mem_code <- getAmode mem
let
code dst = mem_code `snocOL`
- GLD pk src dst
+ IF_ARCH_i386(GLD pk src dst,
+ MOV pk (OpAddr src) (OpReg dst))
--
return (Any pk code)
+#if i386_TARGET_ARCH
getRegister (CmmLoad mem pk)
| pk /= I64
= do
-- we can't guarantee access to an 8-bit variant of every register
-- (esi and edi don't have 8-bit variants), so to make things
-- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+ = do
+ code <- intLoadCode (MOV pk) mem
+ return (Any pk code)
+#endif
getRegister (CmmLit (CmmInt 0 rep))
= let
+ -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+ adj_rep = case rep of I64 -> I32; _ -> rep
+ rep1 = IF_ARCH_i386( rep, adj_rep )
code dst
- = unitOL (XOR rep (OpReg dst) (OpReg dst))
+ = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
in
return (Any rep code)
+#if x86_64_TARGET_ARCH
+ -- optimisation for loading small literals on x86_64: take advantage
+ -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+ -- instruction forms are shorter.
+getRegister (CmmLit lit)
+ | I64 <- cmmLitRep lit, not (isBigLit lit)
+ = let
+ imm = litToImm lit
+ code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
+ in
+ return (Any I64 code)
+ where
+ isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
+ isBigLit _ = False
+ -- note1: not the same as is64BitLit, because that checks for
+ -- signed literals that fit in 32 bits, but we want unsigned
+ -- literals here.
+ -- note2: all labels are small, because we're assuming the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
getRegister (CmmLit lit)
= let
rep = cmmLitRep lit
in
return (Any rep code)
-getRegister other = panic "getRegister(x86)"
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
anyReg (Any _ code) = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
getByteReg expr = do
r <- getRegister expr
case r of
return (tmp, code `snocOL` reg2reg rep reg tmp)
-- ToDo: could optimise slightly by checking for byte-addressable
-- real registers, but that will happen very rarely if at all.
+#endif
-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
reg2reg :: MachRep -> Reg -> Reg -> Instr
reg2reg rep src dst
+#if i386_TARGET_ARCH
| isFloatingRep rep = GMOV src dst
+#endif
| otherwise = MOV rep (OpReg src) (OpReg dst)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
extendUExpr I32 x = x
extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
--- ###FIXME: exact code duplication from x86 case
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
#endif /* powerpc_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- This is all just ridiculous, since it carefully undoes
-- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (-(fromInteger i))
- return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (fromInteger i)
- return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
-- recognised by the next rule.
let
code = x_code `appOL` y_code
base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
- return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
code)
-getAmode (CmmLit lit)
+getAmode (CmmLit lit) | not (is64BitLit lit)
= return (Amode (ImmAddr (litToImm lit) 0) nilOL)
getAmode expr = do
(reg,code) <- getSomeReg expr
- return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
+ return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- -----------------------------------------------------------------------------
-- getOperand: sometimes any operand will do.
--- getOperand gets a *safe* operand; that is, the value of the operand
--- will remain valid across the computation of an arbitrary expression,
--- unless the expression is computed directly into a register which
--- the operand refers to (see trivialCode where this function is used
--- for an example).
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-getOperand (CmmLoad mem pk)
- | not (isFloatingRep pk) && pk /= I64 = do
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+ return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
Amode src mem_code <- getAmode mem
(src',save_code) <-
if (amodeCouldBeClobbered src)
then do
tmp <- getNewRegNat wordRep
- return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
+ return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
else
return (src, nilOL)
return (OpAddr src', save_code `appOL` mem_code)
-
-getOperand e = do
+getNonClobberedOperand e = do
(reg, code) <- getNonClobberedReg e
return (OpReg reg, code)
regClobbered (RealReg rr) = isFastTrue (freeReg rr)
regClobbered _ = False
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
+ return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getOperand e = do
+ (reg, code) <- getSomeReg e
+ return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit) = not (is64BitLit lit)
+ || isSuitableFloatingPointLit lit
+isOperand _ = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory. However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getRegOrMem e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
+ -- assume that labels are in the range 0-2^31-1: this assumes the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is64BitLit x = False
#endif
-- -----------------------------------------------------------------------------
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH || sparc_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
-- yes, they really do seem to want exactly the same!
getCondCode (CmmMachOp mop [x, y])
#endif /* alpha_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) = do
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
Amode x_addr x_code <- getAmode x
let
imm = litToImm lit
--
return (CondCode False cond code)
--- anything vs immediate
-condIntCode cond x (CmmLit lit) = do
- (x_reg, x_code) <- getSomeReg x
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
let
- imm = litToImm lit
- code = x_code `snocOL`
- CMP (cmmLitRep lit) (OpImm imm) (OpReg x_reg)
- -- in
- return (CondCode False cond code)
-
--- memory vs anything
-condIntCode cond (CmmLoad x pk) y = do
- (y_reg, y_code) <- getNonClobberedReg y
- Amode x_addr x_code <- getAmode x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP pk (OpReg y_reg) (OpAddr x_addr)
- -- in
- return (CondCode False cond code)
-
--- anything vs memory
-condIntCode cond y (CmmLoad x pk) = do
- (y_reg, y_code) <- getNonClobberedReg y
- Amode x_addr x_code <- getAmode x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP pk (OpAddr x_addr) (OpReg y_reg)
+ code = x_code `appOL` y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
-- in
return (CondCode False cond code)
-- anything vs anything
condIntCode cond x y = do
- (x_op, x_code) <- getOperand x
- (y_reg, y_code) <- getSomeReg y
+ (y_reg, y_code) <- getNonClobberedReg y
+ (x_op, x_code) <- getRegOrMem x
let
- code = x_code `appOL`
- y_code `snocOL`
+ code = y_code `appOL`
+ x_code `snocOL`
CMP (cmmExprRep x) (OpReg y_reg) x_op
-- in
return (CondCode False cond code)
+#endif
------------
+#if i386_TARGET_ARCH
condFltCode cond x y
= ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
(x_reg, x_code) <- getNonClobberedReg x
-- The GCMP insn does the test and sets the zero flag if comparable
-- and true. Hence we always supply EQQ as the condition to test.
return (CondCode True EQQ code)
-
#endif /* i386_TARGET_ARCH */
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg. We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL`
+ y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
+ -- NB(1): we need to use the unsigned comparison operators on the
+ -- result of this comparison.
+ -- in
+ return (CondCode True (condToUnsigned cond) code)
+#endif
+
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
#endif /* sparc_TARGET_ARCH */
#if powerpc_TARGET_ARCH
--- ###FIXME: I16 and I8!
+-- ###FIXME: I16 and I8!
condIntCode cond x (CmmLit (CmmInt y rep))
| Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
= do
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- integer assignment to memory
assignMem_IntCode pk addr src = do
return code
where
get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
- get_op_RI (CmmLit lit)
+ get_op_RI (CmmLit lit) | not (is64BitLit lit)
= return (nilOL, OpImm (litToImm lit))
get_op_RI op
= do (reg,code) <- getNonClobberedReg op
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- Floating point assignment to memory
assignMem_FltCode pk addr src = do
let
code = src_code `appOL`
addr_code `snocOL`
- GST pk src_reg addr
+ IF_ARCH_i386(GST pk src_reg addr,
+ MOV pk (OpReg src_reg) (OpAddr addr))
return code
-- Floating point assignment to a register/temporary
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genJump (CmmLoad mem pk) = do
Amode target code <- getAmode mem
genBranch id = return (unitOL (BR id))
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genBranch id = return (unitOL (JXX ALWAYS id))
#endif
CondCode _ cond code <- getCondCode bool
return (code `snocOL` JXX cond id)
-#endif /* i386_TARGET_ARCH */
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+ CondCode is_float cond cond_code <- getCondCode bool
+ if not is_float
+ then
+ return (cond_code `snocOL` JXX cond id)
+ else do
+ lbl <- getBlockIdNat
+
+ -- see comment with condFltReg
+ let code = case cond of
+ NE -> or_unordered
+ GU -> plain_test
+ GEU -> plain_test
+ _ -> and_ordered
+
+ plain_test = unitOL (
+ JXX cond id
+ )
+ or_unordered = toOL [
+ JXX cond id,
+ JXX PARITY id
+ ]
+ and_ordered = toOL [
+ JXX PARITY lbl,
+ JXX cond id,
+ JXX ALWAYS lbl,
+ NEWBLOCK lbl
+ ]
+ return (cond_code `appOL` code)
+
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
-- CmmPrim -> ...
CmmForeignCall (CmmLit (CmmLabel lbl)) conv
-> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm)), conv)
+ return (unitOL (CALL (Left fn_imm) []), conv)
where fn_imm = ImmCLbl lbl
CmmForeignCall expr conv
-> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
ASSERT(dyn_rep == I32)
- return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+ return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
let push_code = concatOL push_codes
call = callinsns `appOL`
toOL (
-- Deallocate parameters after call for ccall;
-- but not for stdcall (callee does it)
- (if cconv == StdCallConv then [] else
+ (if cconv == StdCallConv || tot_arg_size==0 then [] else
[ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
++
[DELTA (delta + tot_arg_size)]
code `appOL`
toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
DELTA (delta-size),
- GST sz reg (AddrBaseIndex (Just esp)
- Nothing
+ GST sz reg (AddrBaseIndex (EABaseReg esp)
+ EAIndexNone
(ImmInt 0))]
)
else return (size,
(reg,code) <- getSomeReg op
return (code, reg, cmmExprRep op)
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
-> Maybe [GlobalReg] -> NatM InstrBlock
lbl = CmmLabel (mkForeignLabel fn Nothing False)
fn = case mop of
+ MO_F32_Sqrt -> FSLIT("sqrt")
+ MO_F32_Sin -> FSLIT("sin")
+ MO_F32_Cos -> FSLIT("cos")
+ MO_F32_Tan -> FSLIT("tan")
MO_F32_Exp -> FSLIT("exp")
MO_F32_Log -> FSLIT("log")
MO_F32_Tanh -> FSLIT("tanh")
MO_F32_Pwr -> FSLIT("pow")
+ MO_F64_Sqrt -> FSLIT("sqrt")
+ MO_F64_Sin -> FSLIT("sin")
+ MO_F64_Cos -> FSLIT("cos")
+ MO_F64_Tan -> FSLIT("tan")
MO_F64_Exp -> FSLIT("exp")
MO_F64_Log -> FSLIT("log")
MO_F64_Tanh -> FSLIT("tanh")
MO_F64_Pwr -> FSLIT("pow")
- other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-#endif /* i386_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols =
+ outOfLineFloatOp op r args vols
+
+genCCall target dest_regs args vols = do
+
+ -- load up the register arguments
+ (stack_args, aregs, fregs, load_args_code)
+ <- load_args args allArgRegs allFPArgRegs nilOL
+
+ let
+ fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
+ int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+ arg_regs = int_regs_used ++ fp_regs_used
+ -- for annotating the call instruction with
+
+ sse_regs = length fp_regs_used
+
+ tot_arg_size = arg_size * length stack_args
+
+ -- On entry to the called function, %rsp should be aligned
+ -- on a 16-byte boundary +8 (i.e. the first stack arg after
+ -- the return address is 16-byte aligned). In STG land
+ -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+ -- need to make sure we push a multiple of 16-bytes of args,
+ -- plus the return address, to get the correct alignment.
+ -- Urg, this is hard. We need to feed the delta back into
+ -- the arg pushing code.
+ (real_size, adjust_rsp) <-
+ if tot_arg_size `rem` 16 == 0
+ then return (tot_arg_size, nilOL)
+ else do -- we need to adjust...
+ delta <- getDeltaNat
+ setDeltaNat (delta-8)
+ return (tot_arg_size+8, toOL [
+ SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+ DELTA (delta-8)
+ ])
+
+ -- push the stack args, right to left
+ push_code <- push_args (reverse stack_args) nilOL
+ delta <- getDeltaNat
+
+ -- deal with static vs dynamic call targets
+ (callinsns,cconv) <-
+ case target of
+ -- CmmPrim -> ...
+ CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+ -> -- ToDo: stdcall arg sizes
+ return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+ where fn_imm = ImmCLbl lbl
+ CmmForeignCall expr conv
+ -> do (dyn_r, dyn_c) <- getSomeReg expr
+ return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+ let
+ -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- registers that contain arguments, if the called routine
+ -- is a varargs function. We don't know whether it's a
+ -- varargs function or not, so we have to assume it is.
+ --
+ -- It's not safe to omit this assignment, even if the number
+ -- of SSE regs in use is zero. If %al is larger than 8
+ -- on entry to a varargs function, seg faults ensue.
+ assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+ let call = callinsns `appOL`
+ toOL (
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ (if cconv == StdCallConv || real_size==0 then [] else
+ [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+ ++
+ [DELTA (delta + real_size)]
+ )
+ -- in
+ setDeltaNat (delta + real_size)
+
+ let
+ -- assign the results, if necessary
+ assign_code [] = nilOL
+ assign_code [(dest,_hint)] =
+ case rep of
+ F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+ where
+ rep = cmmRegRep dest
+ r_dest = getRegisterReg dest
+ assign_code many = panic "genCCall.assign_code many"
+
+ return (load_args_code `appOL`
+ adjust_rsp `appOL`
+ push_code `appOL`
+ assign_eax sse_regs `appOL`
+ call `appOL`
+ assign_code dest_regs)
+
+ where
+ arg_size = 8 -- always, at the mo
+
+ load_args :: [(CmmExpr,MachHint)]
+ -> [Reg] -- int regs avail for args
+ -> [Reg] -- FP regs avail for args
+ -> InstrBlock
+ -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+ load_args args [] [] code = return (args, [], [], code)
+ -- no more regs to use
+ load_args [] aregs fregs code = return ([], aregs, fregs, code)
+ -- no more args to push
+ load_args ((arg,hint) : rest) aregs fregs code
+ | isFloatingRep arg_rep =
+ case fregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest aregs rs (code `appOL` arg_code r)
+ | otherwise =
+ case aregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest rs fregs (code `appOL` arg_code r)
+ where
+ arg_rep = cmmExprRep arg
+
+ push_this_arg = do
+ (args',ars,frs,code') <- load_args rest aregs fregs code
+ return ((arg,hint):args', ars, frs, code')
+
+ push_args [] code = return code
+ push_args ((arg,hint):rest) code
+ | isFloatingRep arg_rep = do
+ (arg_reg, arg_code) <- getSomeReg arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [
+ MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
+ SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+
+ | otherwise = do
+ -- we only ever generate word-sized function arguments. Promotion
+ -- has already happened: our Int8# type is kept sign-extended
+ -- in an Int#, for example.
+ ASSERT(arg_rep == I64) return ()
+ (arg_op, arg_code) <- getOperand arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [PUSH I64 arg_op,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+ where
+ arg_rep = cmmExprRep arg
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* SysV insists on either passing I64 arguments on the stack, or in two GPRs,
starting with an odd-numbered GPR. It may skip a GPR to achieve this.
Darwin just treats an I64 like two separate I32s (high word first).
+ * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
+ 4-byte aligned like everything else on Darwin.
+ * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
+ PowerPC Linux does not agree, so neither do we.
According to both conventions, The parameter area should be part of the
caller's stack frame, allocated in the caller's prologue code (large enough
#if darwin_TARGET_OS
initialStackOffset = 24
-- size of linkage area + size of arguments, in bytes
- stackDelta _finalStack = roundTo16 $ (24 +) $ max 32 $ sum $
+ stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
map machRepByteWidth argReps
#elif linux_TARGET_OS
initialStackOffset = 8
- stackDelta finalStack = roundTo16 finalStack
+ stackDelta finalStack = roundTo 16 finalStack
#endif
args = map fst argsAndHints
argReps = map cmmExprRep args
- roundTo16 x | x `mod` 16 == 0 = x
- | otherwise = x + 16 - (x `mod` 16)
+ roundTo a x | x `mod` a == 0 = x
+ | otherwise = x + a - (x `mod` a)
move_sp_down finalStack
| delta > 64 =
storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
#elif linux_TARGET_OS
- let stackCode = accumCode `appOL` code
- `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset))
- `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset+4)))
+ let stackOffset' = roundTo 8 stackOffset
+ stackCode = accumCode `appOL` code
+ `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+ `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
regCode hireg loreg =
accumCode `appOL` code
`snocOL` MR hireg vr_hi
passArguments args regs fprs stackOffset
(regCode hireg loreg) (hireg : loreg : accumUsed)
_ -> -- only one or no regs left
- passArguments args [] fprs (stackOffset+8)
+ passArguments args [] fprs (stackOffset'+8)
stackCode accumUsed
#endif
passArguments args
(drop nGprs gprs)
(drop nFprs fprs)
- (stackOffset + stackBytes)
+ (stackOffset' + stackBytes)
(accumCode `appOL` code `snocOL` ST rep vr stackSlot)
accumUsed
where
- stackSlot = AddrRegImm sp (ImmInt stackOffset)
+#if darwin_TARGET_OS
+ -- stackOffset is at least 4-byte aligned
+ -- The Darwin ABI is happy with that.
+ stackOffset' = stackOffset
+#else
+ -- ... the SysV ABI requires 8-byte alignment for doubles.
+ stackOffset' | rep == F64 = roundTo 8 stackOffset
+ | otherwise = stackOffset
+#endif
+ stackSlot = AddrRegImm sp (ImmInt stackOffset')
(nGprs, nFprs, stackBytes, regs) = case rep of
I32 -> (1, 0, 4, gprs)
#if darwin_TARGET_OS
genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genSwitch expr ids = do
(reg,e_code) <- getSomeReg expr
lbl <- getNewLabelNat
let
jumpTable = map jumpTableEntry ids
- op = OpAddr (AddrBaseIndex Nothing (Just (reg,4)) (ImmCLbl lbl))
+ op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
code = e_code `appOL` toOL [
LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
JMP_TBL op [ id | Just id <- ids ]
dynRef <- cmmMakeDynamicReference addImportNat False lbl
(tableReg,t_code) <- getSomeReg $ dynRef
let
- jumpTable = map jumpTableEntry ids
-
+ jumpTable = map jumpTableEntryRel ids
+
+ jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 wordRep)
+ jumpTableEntryRel (Just (BlockId id))
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+ where blockLabel = mkAsmTempLabel id
+
code = e_code `appOL` t_code `appOL` toOL [
LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
SLW tmp reg (RIImm (ImmInt 2)),
LD I32 tmp (AddrRegReg tableReg tmp),
+ ADD tmp tmp (RIReg tableReg),
MTCTR tmp,
BCTR [ id | Just id <- ids ]
]
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
condIntReg cond x y = do
CondCode _ cond cond_code <- condIntCode cond x y
let
code dst = cond_code `appOL` toOL [
SETCC cond (OpReg tmp),
- MOV I32 (OpReg tmp) (OpReg dst),
- AND I32 (OpImm (ImmInt 1)) (OpReg dst)
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
]
- -- NB. (1) Tha AND is needed here because the x86 only
- -- sets the low byte in the SETCC instruction.
- -- NB. (2) The extra temporary register is a hack to
- -- work around the fact that the setcc instructions only
- -- accept byte registers. dst might not be a byte-able reg,
- -- but currently all free registers are byte-able, so we're
- -- guaranteed that a new temporary is byte-able.
-- in
return (Any I32 code)
+#endif
+
+#if i386_TARGET_ARCH
condFltReg cond x y = do
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
CondCode _ cond cond_code <- condFltCode cond x y
- let
- code dst = cond_code `appOL` toOL [
- JXX cond lbl1,
- MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
- JXX ALWAYS lbl2,
- NEWBLOCK lbl1,
- MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
- JXX ALWAYS lbl2,
- NEWBLOCK lbl2]
- -- SIGH, have to split up this block somehow...
+ tmp <- getNewRegNat I8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL I8 (OpReg tmp) (OpReg dst)
+ ]
-- in
return (Any I32 code)
-#endif /* i386_TARGET_ARCH */
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp1 <- getNewRegNat wordRep
+ tmp2 <- getNewRegNat wordRep
+ let
+ -- We have to worry about unordered operands (eg. comparisons
+ -- against NaN). If the operands are unordered, the comparison
+ -- sets the parity flag, carry flag and zero flag.
+ -- All comparisons are supposed to return false for unordered
+ -- operands except for !=, which returns true.
+ --
+ -- Optimisation: we don't have to test the parity flag if we
+ -- know the test has already excluded the unordered case: eg >
+ -- and >= test for a zero carry flag, which can only occur for
+ -- ordered operands.
+ --
+ -- ToDo: by reversing comparisons we could avoid testing the
+ -- parity flag in more cases.
+
+ code dst =
+ cond_code `appOL`
+ (case cond of
+ NE -> or_unordered dst
+ GU -> plain_test dst
+ GEU -> plain_test dst
+ _ -> and_ordered dst)
+
+ plain_test dst = toOL [
+ SETCC cond (OpReg tmp1),
+ MOVZxL I8 (OpReg tmp1) (OpReg dst)
+ ]
+ or_unordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC PARITY (OpReg tmp2),
+ OR I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ and_ordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC NOTPARITY (OpReg tmp2),
+ AND I8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL I8 (OpReg tmp2) (OpReg dst)
+ ]
+ -- in
+ return (Any I32 code)
+
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
,IF_ARCH_i386 ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
+ -> Maybe (Operand -> Operand -> Instr)
,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
- ,))))
+ ,)))))
-> CmmExpr -> CmmExpr -- the two arguments
-> NatM Register
-> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
- ,)))
+ ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+ ,))))
-> CmmExpr -> CmmExpr -- the two arguments
-> NatM Register
#endif
:: MachRep
-> IF_ARCH_alpha((RI -> Reg -> Instr)
,IF_ARCH_i386 ((Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Instr)
,IF_ARCH_sparc((RI -> Reg -> Instr)
,IF_ARCH_powerpc((Reg -> Reg -> Instr)
- ,))))
+ ,)))))
-> CmmExpr -- the one argument
-> NatM Register
:: MachRep
-> IF_ARCH_alpha((Reg -> Reg -> Instr)
,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+ ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
,IF_ARCH_sparc((Reg -> Reg -> Instr)
- ,)))
+ ,))))
-> CmmExpr -- the one argument
-> NatM Register
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
{-
The Rules of the Game are:
register happens to be the destination register.
-}
-trivialCode rep instr maybe_revinstr a (CmmLit lit_b) = do
- a_code <- getAnyReg a
- let
- code dst
- = a_code dst `snocOL`
- instr (OpImm (litToImm lit_b)) (OpReg dst)
- -- in
- return (Any rep code)
-
-trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+ | not (is64BitLit lit_a) = do
b_code <- getAnyReg b
let
code dst
-- in
return (Any rep code)
-trivialCode rep instr maybe_revinstr a b = do
- (b_op, b_code) <- getOperand b
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+ (b_op, b_code) <- getNonClobberedOperand b
a_code <- getAnyReg a
tmp <- getNewRegNat rep
let
-- as the destination reg. In this case, we have to save b in a
-- new temporary across the computation of a.
code dst
- | dst `clashesWith` b_op =
+ | dst `regClashesWithOp` b_op =
b_code `appOL`
unitOL (MOV rep b_op (OpReg tmp)) `appOL`
a_code dst `snocOL`
instr b_op (OpReg dst)
-- in
return (Any rep code)
- where
- reg `clashesWith` OpReg reg2 = reg == reg2
- reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
+
+reg `regClashesWithOp` OpReg reg2 = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _ = False
-----------
-----------
+#if i386_TARGET_ARCH
+
trivialFCode pk instr x y = do
(x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
(y_reg, y_code) <- getSomeReg y
-- in
return (Any pk code)
+#endif
+
+#if x86_64_TARGET_ARCH
+
+trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
+
+#endif
+
-------------
trivialUFCode rep instr x = do
coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
-#ifdef sparc_TARGET_ARCH
+#if sparc_TARGET_ARCH
coerceDbl2Flt :: CmmExpr -> NatM Register
coerceFlt2Dbl :: CmmExpr -> NatM Register
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- in
+ return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
#if sparc_TARGET_ARCH
coerceInt2FP pk x