| ReadOnlyData
| RelocatableReadOnlyData
| UninitialisedData
+ | ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned
| OtherSection String
data CmmStatic
cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
--- ToDo: eliminate multiple conversions. Be careful though: can't remove
--- a narrowing, and can't remove conversions to/from floating point types.
-
--- ToDo: eliminate nested comparisons:
--- CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)]
--- turns into a simple equality test.
+-- Eliminate nested conversions where possible
+cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
+ | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
+ Just (_, rep3,signed2) <- isIntConversion conv_outer
+ = case () of
+ -- widen then narrow to the same size is a nop
+ _ | rep1 < rep2 && rep1 == rep3 -> x
+ -- Widen then narrow to different size: collapse to single conversion
+ -- but remember to use the signedness from the widening, just in case
+ -- the final conversion is a widen.
+ | rep1 < rep2 && rep2 > rep3 ->
+ cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+ -- Nested widenings: collapse if the signedness is the same
+ | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
+ cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+ -- Nested narrowings: collapse
+ | rep1 > rep2 && rep2 > rep3 ->
+ cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
+ | otherwise ->
+ CmmMachOp conv_outer args
+ where
+ isIntConversion (MO_U_Conv rep1 rep2) = Just (rep1,rep2,False)
+ isIntConversion (MO_S_Conv rep1 rep2) = Just (rep1,rep2,True)
+ isIntConversion _ = Nothing
+
+ intconv True = MO_S_Conv
+ intconv False = MO_U_Conv
+
+-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
+-- but what if the architecture only supports word-sized loads, should
+-- we do the transformation anyway?
cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
= case mop of
import qualified Outputable
import FastString
import FastTypes ( isFastTrue )
+import Constants ( wORD_SIZE )
#ifdef DEBUG
import Outputable ( assertPanic )
CmmAssign reg src
| isFloatingRep kind -> assignReg_FltCode kind reg src
- | wordRep == I32 && kind == I64
- -> assignReg_I64Code reg src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignReg_I64Code reg src
+#endif
| otherwise -> assignReg_IntCode kind reg src
where kind = cmmRegRep reg
CmmStore addr src
| isFloatingRep kind -> assignMem_FltCode kind addr src
- | wordRep == I32 && kind == I64
- -> assignMem_I64Code addr src
+#if WORD_SIZE_IN_BITS==32
+ | kind == I64 -> assignMem_I64Code addr src
+#endif
| otherwise -> assignMem_IntCode kind addr src
where kind = cmmExprRep src
-- selection game are therefore that the returned
-- Reg may be modified
+#if WORD_SIZE_IN_BITS==32
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
iselExpr64 :: CmmExpr -> NatM ChildCode64
+#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+
+-- -----------------------------------------------------------------------------
-- Grab the Reg for a CmmReg
getRegisterReg :: CmmReg -> Reg
-- in
return (Any F64 code)
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+ let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
+ -- I don't know why there are xorpd, xorps, and pxor instructions.
+ -- They all appear to do the same thing --SDM
+ return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+ lbl <- getNewLabelNat
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f rep)],
+ MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+ -- ToDo: should use %rip-relative
+ ]
+ -- in
+ return (Any rep code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- catch simple cases of zero- or sign-extended load
getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
code <- intLoadCode (MOVSxL I16) addr
return (Any I32 code)
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I8) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I16) addr
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+ return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL I32) addr
+ return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+ lbl <- getNewLabelNat
+ let
+ code dst = toOL [
+ -- This is how gcc does it, so it can't be that bad:
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x80000000 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32),
+ CmmStaticLit (CmmInt 0 I32)
+ ],
+ XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+ -- xorps, so we need the 128-bit constant
+ -- ToDo: rip-relative
+ ]
+ --
+ return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+ lbl <- getNewLabelNat
+ let
+ -- This is how gcc does it, so it can't be that bad:
+ code dst = toOL [
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x8000000000000000 I64),
+ CmmStaticLit (CmmInt 0 I64)
+ ],
+ -- gcc puts an unpck here. Wonder if we need it.
+ XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+ -- xorpd, so we need the 128-bit constant
+ -- ToDo: rip-relative
+ ]
+ --
+ return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
getRegister (CmmMachOp mop [x]) -- unary MachOps
= case mop of
+#if i386_TARGET_ARCH
MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
MO_Not rep -> trivialUCode rep (NOT rep) x
MO_S_Conv I16 I8 -> conversionNop I16 x
MO_U_Conv I32 I16 -> conversionNop I32 x
MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I64 I32 -> conversionNop I64 x
+ MO_S_Conv I64 I32 -> conversionNop I64 x
+ MO_U_Conv I64 I16 -> conversionNop I64 x
+ MO_S_Conv I64 I16 -> conversionNop I64 x
+ MO_U_Conv I64 I8 -> conversionNop I64 x
+ MO_S_Conv I64 I8 -> conversionNop I64 x
+#endif
+
MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
+#if x86_64_TARGET_ARCH
+ MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
+ MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+ MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+ MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
+ MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+ MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+ -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+ -- However, we don't want the register allocator to throw it
+ -- away as an unnecessary reg-to-reg move, so we keep it in
+ -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
MO_S_Conv F32 F64 -> conversionNop F64 x
MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+ MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+ MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
MO_S_Conv from to
| isFloatingRep from -> coerceFP2Int from to x
| isFloatingRep to -> coerceInt2FP from to x
+ other -> pprPanic "getRegister" (pprMachOp mop)
where
-- signed or unsigned extension.
integerExtend from to instr expr = do
MO_U_Lt rep -> condIntReg LU x y
MO_U_Le rep -> condIntReg LEU x y
- MO_Add F32 -> trivialFCode F32 GADD x y
- MO_Sub F32 -> trivialFCode F32 GSUB x y
+#if i386_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 GADD x y
+ MO_Sub F32 -> trivialFCode F32 GSUB x y
MO_Add F64 -> trivialFCode F64 GADD x y
MO_Sub F64 -> trivialFCode F64 GSUB x y
- MO_S_Quot F32 -> trivialFCode F32 GDIV x y
+ MO_S_Quot F32 -> trivialFCode F32 GDIV x y
MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Add F32 -> trivialFCode F32 ADD x y
+ MO_Sub F32 -> trivialFCode F32 SUB x y
+
+ MO_Add F64 -> trivialFCode F64 ADD x y
+ MO_Sub F64 -> trivialFCode F64 SUB x y
+
+ MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+ MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y
MO_U_Quot rep -> div_code rep False True x y
MO_U_Rem rep -> div_code rep False False x y
+#if i386_TARGET_ARCH
MO_Mul F32 -> trivialFCode F32 GMUL x y
MO_Mul F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_Mul F32 -> trivialFCode F32 MUL x y
+ MO_Mul F64 -> trivialFCode F64 MUL x y
+#endif
+
MO_Mul rep -> let op = IMUL rep in
trivialCode rep op (Just op) x y
where
--------------------
imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
- imulMayOflo I32 a b = do
- res_lo <- getNewRegNat I32
- res_hi <- getNewRegNat I32
+ imulMayOflo rep a b = do
+ res_lo <- getNewRegNat rep
+ res_hi <- getNewRegNat rep
(a_reg, a_code) <- getNonClobberedReg a
(b_reg, b_code) <- getSomeReg b
let
code dst = a_code `appOL` b_code `appOL`
toOL [
- MOV I32 (OpReg a_reg) (OpReg res_hi),
- MOV I32 (OpReg b_reg) (OpReg res_lo),
+ MOV rep (OpReg a_reg) (OpReg res_hi),
+ MOV rep (OpReg b_reg) (OpReg res_lo),
IMUL64 res_hi res_lo, -- result in res_hi:res_lo
- SAR I32 (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
- SUB I32 (OpReg res_hi) (OpReg res_lo), -- compare against upper
- MOV I32 (OpReg res_lo) (OpReg dst)
+ SAR rep (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
+ SUB rep (OpReg res_hi) (OpReg res_lo), -- compare against upper
+ MOV rep (OpReg res_lo) (OpReg dst)
-- dst==0 if high part == sign extended low part
]
-- in
- return (Any I32 code)
+ return (Any rep code)
--------------------
shift_code :: MachRep
(y_op, y_code) <- getOperand y -- cannot be clobbered
x_code <- getAnyReg x
let
- widen | signed = CLTD
+ widen | signed = CLTD rep
| otherwise = XOR rep (OpReg edx) (OpReg edx)
instr | signed = IDIV
return (Fixed rep result code)
-
getRegister (CmmLoad mem pk)
| isFloatingRep pk
= do
Amode src mem_code <- getAmode mem
let
code dst = mem_code `snocOL`
- GLD pk src dst
+ IF_ARCH_i386(GLD pk src dst,
+ MOV pk (OpAddr src) (OpReg dst))
--
return (Any pk code)
+#if i386_TARGET_ARCH
getRegister (CmmLoad mem pk)
| pk /= I64
= do
-- we can't guarantee access to an 8-bit variant of every register
-- (esi and edi don't have 8-bit variants), so to make things
-- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+ = do
+ code <- intLoadCode (MOV pk) mem
+ return (Any pk code)
+#endif
getRegister (CmmLit (CmmInt 0 rep))
= let
anyReg (Any _ code) = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
getByteReg expr = do
r <- getRegister expr
case r of
return (tmp, code `snocOL` reg2reg rep reg tmp)
-- ToDo: could optimise slightly by checking for byte-addressable
-- real registers, but that will happen very rarely if at all.
+#endif
-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
reg2reg :: MachRep -> Reg -> Reg -> Instr
reg2reg rep src dst
+#if i386_TARGET_ARCH
| isFloatingRep rep = GMOV src dst
+#endif
| otherwise = MOV rep (OpReg src) (OpReg dst)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
extendUExpr I32 x = x
extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
--- ###FIXME: exact code duplication from x86 case
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
#endif /* powerpc_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- This is all just ridiculous, since it carefully undoes
-- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (-(fromInteger i))
return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+ | not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (fromInteger i)
return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
code)
-getAmode (CmmLit lit)
+getAmode (CmmLit lit) | not (is64BitLit lit)
= return (Amode (ImmAddr (litToImm lit) 0) nilOL)
getAmode expr = do
(reg,code) <- getSomeReg expr
return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- -----------------------------------------------------------------------------
-- getOperand: sometimes any operand will do.
--- getOperand gets a *safe* operand; that is, the value of the operand
--- will remain valid across the computation of an arbitrary expression,
--- unless the expression is computed directly into a register which
--- the operand refers to (see trivialCode where this function is used
--- for an example).
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-getOperand (CmmLoad mem pk)
- | not (isFloatingRep pk) && pk /= I64 = do
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+getNonClobberedOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+ return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
Amode src mem_code <- getAmode mem
(src',save_code) <-
if (amodeCouldBeClobbered src)
else
return (src, nilOL)
return (OpAddr src', save_code `appOL` mem_code)
-
-getOperand e = do
+getNonClobberedOperand e = do
(reg, code) <- getNonClobberedReg e
return (OpReg reg, code)
regClobbered (RealReg rr) = isFastTrue (freeReg rr)
regClobbered _ = False
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+getOperand (CmmLit lit)
+ | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+ return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getOperand e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit) = not (is64BitLit lit) &&
+ not (isFloatingRep (cmmLitRep lit))
+isOperand _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getRegOrMem e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
+#endif
+is64BitLit x = False
#endif
-- -----------------------------------------------------------------------------
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH || sparc_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
-- yes, they really do seem to want exactly the same!
getCondCode (CmmMachOp mop [x, y])
#endif /* alpha_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) = do
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
Amode x_addr x_code <- getAmode x
let
imm = litToImm lit
--
return (CondCode False cond code)
--- anything vs immediate
-condIntCode cond x (CmmLit lit) = do
- (x_reg, x_code) <- getSomeReg x
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
let
- imm = litToImm lit
- code = x_code `snocOL`
- CMP (cmmLitRep lit) (OpImm imm) (OpReg x_reg)
- -- in
- return (CondCode False cond code)
-
--- memory vs anything
-condIntCode cond (CmmLoad x pk) y = do
- (y_reg, y_code) <- getNonClobberedReg y
- Amode x_addr x_code <- getAmode x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP pk (OpReg y_reg) (OpAddr x_addr)
- -- in
- return (CondCode False cond code)
-
--- anything vs memory
-condIntCode cond y (CmmLoad x pk) = do
- (y_reg, y_code) <- getNonClobberedReg y
- Amode x_addr x_code <- getAmode x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP pk (OpAddr x_addr) (OpReg y_reg)
+ code = x_code `appOL` y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
-- in
return (CondCode False cond code)
-- anything vs anything
condIntCode cond x y = do
- (x_op, x_code) <- getOperand x
- (y_reg, y_code) <- getSomeReg y
+ (y_reg, y_code) <- getNonClobberedReg y
+ (x_op, x_code) <- getRegOrMem x
let
- code = x_code `appOL`
- y_code `snocOL`
+ code = y_code `appOL`
+ x_code `snocOL`
CMP (cmmExprRep x) (OpReg y_reg) x_op
-- in
return (CondCode False cond code)
+#endif
------------
+#if i386_TARGET_ARCH
condFltCode cond x y
= ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
(x_reg, x_code) <- getNonClobberedReg x
-- The GCMP insn does the test and sets the zero flag if comparable
-- and true. Hence we always supply EQQ as the condition to test.
return (CondCode True EQQ code)
-
#endif /* i386_TARGET_ARCH */
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg. We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL`
+ y_code `snocOL`
+ CMP (cmmExprRep x) y_op (OpReg x_reg)
+ -- in
+ return (CondCode False (condToUnsigned cond) code)
+ -- we need to use the unsigned comparison operators on the
+ -- result of this comparison.
+#endif
+
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- integer assignment to memory
assignMem_IntCode pk addr src = do
return code
where
get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
- get_op_RI (CmmLit lit)
+ get_op_RI (CmmLit lit) | not (is64BitLit lit)
= return (nilOL, OpImm (litToImm lit))
get_op_RI op
= do (reg,code) <- getNonClobberedReg op
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- Floating point assignment to memory
assignMem_FltCode pk addr src = do
let
code = src_code `appOL`
addr_code `snocOL`
- GST pk src_reg addr
+ IF_ARCH_i386(GST pk src_reg addr,
+ MOV pk (OpReg src_reg) (OpAddr addr))
return code
-- Floating point assignment to a register/temporary
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genJump (CmmLoad mem pk) = do
Amode target code <- getAmode mem
genBranch id = return (unitOL (BR id))
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genBranch id = return (unitOL (JXX ALWAYS id))
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genCondJump id bool = do
CondCode _ cond code <- getCondCode bool
toOL (
-- Deallocate parameters after call for ccall;
-- but not for stdcall (callee does it)
- (if cconv == StdCallConv then [] else
+ (if cconv == StdCallConv || tot_arg_size==0 then [] else
[ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
++
[DELTA (delta + tot_arg_size)]
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols =
+ panic "genCCall(CmmPrim)(x86_64)"
+
+genCCall target dest_regs args vols = do
+
+ -- load up the register arguments
+ (stack_args, sse_regs, load_args_code)
+ <- load_args args allArgRegs allFPArgRegs 0 nilOL
+
+ let
+ tot_arg_size = arg_size * length stack_args
+
+ -- On entry to the called function, %rsp should be aligned
+ -- on a 16-byte boundary +8 (i.e. the first stack arg after
+ -- the return address is 16-byte aligned). In STG land
+ -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+ -- need to make sure we push a multiple of 16-bytes of args,
+ -- plus the return address, to get the correct alignment.
+ -- Urg, this is hard. We need to feed the delta back into
+ -- the arg pushing code.
+ (real_size, adjust_rsp) <-
+ if tot_arg_size `rem` 16 == 0
+ then return (tot_arg_size, nilOL)
+ else do -- we need to adjust...
+ delta <- getDeltaNat
+ setDeltaNat (delta-8)
+ return (tot_arg_size+8, toOL [
+ SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+ DELTA (delta-8)
+ ])
+
+ -- push the stack args, right to left
+ push_code <- push_args (reverse stack_args) nilOL
+ delta <- getDeltaNat
+
+ -- deal with static vs dynamic call targets
+ (callinsns,cconv) <-
+ case target of
+ -- CmmPrim -> ...
+ CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+ -> -- ToDo: stdcall arg sizes
+ return (unitOL (CALL (Left fn_imm)), conv)
+ where fn_imm = ImmCLbl lbl
+ CmmForeignCall expr conv
+ -> do (dyn_r, dyn_c) <- getSomeReg expr
+ return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+
+ let
+ -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- registers that contain arguments, if the called routine
+ -- is a varargs function. We don't know whether it's a
+ -- varargs function or not, so we have to assume it is.
+ --
+ -- It's not safe to omit this assignment, even if the number
+ -- of SSE regs in use is zero. If %al is larger than 8
+ -- on entry to a varargs function, seg faults ensue.
+ assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+ let call = callinsns `appOL`
+ toOL (
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ (if cconv == StdCallConv || real_size==0 then [] else
+ [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+ ++
+ [DELTA (delta + real_size)]
+ )
+ -- in
+ setDeltaNat (delta + real_size)
+
+ let
+ -- assign the results, if necessary
+ assign_code [] = nilOL
+ assign_code [(dest,_hint)] =
+ case rep of
+ F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+ rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+ where
+ rep = cmmRegRep dest
+ r_dest = getRegisterReg dest
+ assign_code many = panic "genCCall.assign_code many"
+
+ return (load_args_code `appOL`
+ adjust_rsp `appOL`
+ push_code `appOL`
+ assign_eax sse_regs `appOL`
+ call `appOL`
+ assign_code dest_regs)
+
+ where
+ arg_size = 8 -- always, at the mo
+
+ load_args :: [(CmmExpr,MachHint)]
+ -> [Reg] -- int regs avail for args
+ -> [Reg] -- FP regs avail for args
+ -> Int -> InstrBlock
+ -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
+ load_args args [] [] sse_regs code = return (args, sse_regs, code)
+ -- no more regs to use
+ load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
+ -- no more args to push
+ load_args ((arg,hint) : rest) aregs fregs sse_regs code
+ | isFloatingRep arg_rep =
+ case fregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
+ | otherwise =
+ case aregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest rs fregs sse_regs (code `appOL` arg_code r)
+ where
+ arg_rep = cmmExprRep arg
+
+ push_this_arg = do
+ (args',sse',code') <- load_args rest aregs fregs sse_regs code
+ return ((arg,hint):args', sse', code')
+
+ push_args [] code = return code
+ push_args ((arg,hint):rest) code
+ | isFloatingRep arg_rep = do
+ (arg_reg, arg_code) <- getSomeReg arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [
+ MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
+ SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+
+ | otherwise = do
+ -- we only ever generate word-sized function arguments. Promotion
+ -- has already happened: our Int8# type is kept sign-extended
+ -- in an Int#, for example.
+ ASSERT(arg_rep == I64) return ()
+ (arg_op, arg_code) <- getOperand arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` toOL [PUSH I64 arg_op,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+ where
+ arg_rep = cmmExprRep arg
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
#if sparc_TARGET_ARCH
{-
The SPARC calling convention is an absolute
genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
genSwitch expr ids = do
(reg,e_code) <- getSomeReg expr
lbl <- getNewLabelNat
let
jumpTable = map jumpTableEntry ids
- op = OpAddr (AddrBaseIndex Nothing (Just (reg,4)) (ImmCLbl lbl))
+ op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
code = e_code `appOL` toOL [
LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
JMP_TBL op [ id | Just id <- ids ]
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
condIntReg cond x y = do
CondCode _ cond cond_code <- condIntCode cond x y
-> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
,IF_ARCH_i386 ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
+ -> Maybe (Operand -> Operand -> Instr)
,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
- ,))))
+ ,)))))
-> CmmExpr -> CmmExpr -- the two arguments
-> NatM Register
-> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
- ,)))
+ ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+ ,))))
-> CmmExpr -> CmmExpr -- the two arguments
-> NatM Register
#endif
:: MachRep
-> IF_ARCH_alpha((RI -> Reg -> Instr)
,IF_ARCH_i386 ((Operand -> Instr)
+ ,IF_ARCH_x86_64 ((Operand -> Instr)
,IF_ARCH_sparc((RI -> Reg -> Instr)
,IF_ARCH_powerpc((Reg -> Reg -> Instr)
- ,))))
+ ,)))))
-> CmmExpr -- the one argument
-> NatM Register
:: MachRep
-> IF_ARCH_alpha((Reg -> Reg -> Instr)
,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+ ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
,IF_ARCH_sparc((Reg -> Reg -> Instr)
- ,)))
+ ,))))
-> CmmExpr -- the one argument
-> NatM Register
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
{-
The Rules of the Game are:
register happens to be the destination register.
-}
-trivialCode rep instr maybe_revinstr a (CmmLit lit_b) = do
- a_code <- getAnyReg a
- let
- code dst
- = a_code dst `snocOL`
- instr (OpImm (litToImm lit_b)) (OpReg dst)
- -- in
- return (Any rep code)
-
-trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+ | not (is64BitLit lit_a) = do
b_code <- getAnyReg b
let
code dst
return (Any rep code)
trivialCode rep instr maybe_revinstr a b = do
- (b_op, b_code) <- getOperand b
+ (b_op, b_code) <- getNonClobberedOperand b
a_code <- getAnyReg a
tmp <- getNewRegNat rep
let
where
reg `clashesWith` OpReg reg2 = reg == reg2
reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
+ reg `clashesWith` _ = False
-----------
-----------
+#if i386_TARGET_ARCH
+
trivialFCode pk instr x y = do
(x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
(y_reg, y_code) <- getSomeReg y
-- in
return (Any pk code)
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- We use the 2-operand SSE2 floating pt instructions. ToDo: improve on
+-- this by using some of the special cases in trivialCode above.
+trivialFCode pk instr x y = do
+ (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
+ x_code <- getAnyReg x
+ let
+ code dst =
+ y_code `appOL`
+ x_code dst `snocOL`
+ instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
+ (IF_ARCH_x86_64(OpReg,) dst)
+ -- in
+ return (Any pk code)
+
+#endif
+
-------------
trivialUFCode rep instr x = do
coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
-#ifdef sparc_TARGET_ARCH
+#if sparc_TARGET_ARCH
coerceDbl2Flt :: CmmExpr -> NatM Register
coerceFlt2Dbl :: CmmExpr -> NatM Register
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- in
+ return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
#if sparc_TARGET_ARCH
coerceInt2FP pk x
-- * Machine instructions
Instr(..),
- Cond(..),
-#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
+ Cond(..), condUnsigned, condToSigned, condToUnsigned,
+
+#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
Size(..), machRepSize,
#endif
RI(..),
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
Operand(..),
+#endif
+#if i386_TARGET_ARCH
i386_insert_ffrees,
#endif
#if sparc_TARGET_ARCH
riZero, fpRelEA, moveSp, fPair,
#endif
-#if powerpc_TARGET_ARCH
- condUnsigned, condToSigned,
-#endif
- DestInfo(..), hasDestInfo, pprDests,
-
) where
#include "HsVersions.h"
import CLabel ( CLabel, pprCLabel )
import Panic ( panic )
import Outputable
-import Config ( cLeadingUnderscore )
import FastString
import GLAEXTS
| ULE -- For CMP only
| ULT -- For CMP only
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
= ALWAYS -- What's really used? ToDo
| EQQ
| GE
#endif
deriving Eq -- to make an assertion work
+condUnsigned GU = True
+condUnsigned LU = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _ = False
+
+condToSigned GU = GTT
+condToSigned LU = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x = x
+
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE = GEU
+condToUnsigned LE = LEU
+condToUnsigned x = x
-- -----------------------------------------------------------------------------
-- Sizes on this architecture
-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
-- here. I've removed them from the x86 version, we'll see what happens --SDM
-#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
+#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
data Size
#if alpha_TARGET_ARCH
= B -- byte
--SDM 1/2003
-}
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- data Instr continues...
| MOV MachRep Operand Operand
| MOVZxL MachRep Operand Operand -- size is the size of operand 1
| MOVSxL MachRep Operand Operand -- size is the size of operand 1
+ -- x86_64 note: plain mov into a 32-bit register always zero-extends
+ -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
+ -- don't affect the high bits of the register.
-- Load effective address (also a very useful three-operand add instruction :-)
| LEA MachRep Operand Operand
| ADD MachRep Operand Operand
| ADC MachRep Operand Operand
| SUB MachRep Operand Operand
- | IMUL MachRep Operand Operand -- signed int mul
- | MUL MachRep Operand Operand -- unsigned int mul
+ | MUL MachRep Operand Operand
+ | IMUL MachRep Operand Operand -- signed int mul
| IMUL64 Reg Reg
-- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
| BT MachRep Imm Operand
| NOP
+#if i386_TARGET_ARCH
-- Float Arithmetic.
-- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
| GTAN MachRep Reg Reg -- src, dst
| GFREE -- do ffree on all x86 regs; an ugly hack
+#endif
+
+#if x86_64_TARGET_ARCH
+-- SSE2 floating point: we use a restricted set of the available SSE2
+-- instructions for floating-point.
+
+ -- use MOV for moving (either movss or movsd (movlpd better?))
+
+ | CVTSS2SD Reg Reg -- F32 to F64
+ | CVTSD2SS Reg Reg -- F64 to F32
+ | CVTSS2SI Operand Reg -- F32 to I32/I64 (with rounding)
+ | CVTSD2SI Operand Reg -- F64 to I32/I64 (with rounding)
+ | CVTSI2SS Operand Reg -- I32/I64 to F32
+ | CVTSI2SD Operand Reg -- I32/I64 to F64
+
+ -- use ADD & SUB for arithmetic. In both cases, operands
+ -- are Operand Reg.
+
+ -- SSE2 floating-point division:
+ | FDIV MachRep Operand Operand -- divisor, dividend(dst)
+
+ -- use CMP for comparisons. ucomiss and ucomisd instructions
+ -- compare single/double prec floating point respectively.
+
+ | SQRT MachRep Operand Reg -- src, dst
+#endif
-- Comparison
| TEST MachRep Operand Operand
| CALL (Either Imm Reg)
-- Other things.
- | CLTD -- sign extend %eax into %edx:%eax
+ | CLTD MachRep -- sign extend %eax into %edx:%eax
| FETCHGOT Reg -- pseudo-insn for position-independent code
-- pretty-prints as
| OpImm Imm -- immediate value
| OpAddr AddrMode -- memory reference
+#endif /* i386 or x86_64 */
+#if i386_TARGET_ARCH
i386_insert_ffrees :: [Instr] -> [Instr]
i386_insert_ffrees insns
| any is_G_instr insns
GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
GFREE -> panic "is_G_instr: GFREE (!)"
other -> False
-
#endif /* i386_TARGET_ARCH */
| FETCHPC Reg -- pseudo-instruction:
-- bcl to next insn, mflr reg
-condUnsigned GU = True
-condUnsigned LU = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _ = False
-
-condToSigned GU = GTT
-condToSigned LU = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x = x
#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- DestInfo
-
--- ToDo: might not be needed anymore --SDM
-
--- used by insnFuture in RegAllocInfo.lhs
-data DestInfo
- = NoDestInfo -- no supplied dests; infer from context
- | DestInfo [CLabel] -- precisely these dests and no others
-
-hasDestInfo NoDestInfo = False
-hasDestInfo (DestInfo _) = True
-
-pprDests :: DestInfo -> SDoc
-pprDests NoDestInfo = text "NoDestInfo"
-pprDests (DestInfo dsts) = brackets (hsep (map pprCLabel dsts))
fake0, fake1, fake2, fake3, fake4, fake5,
addrModeRegs,
#endif
+#if x86_64_TARGET_ARCH
+ rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
+ eax, ebx, ecx, edx, esi, edi, ebp, esp,
+ r8, r9, r10, r11, r12, r13, r14, r15,
+ xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+ xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+ xmm, eax, edx,
+ addrModeRegs, allFPArgRegs,
+#endif
#if sparc_TARGET_ARCH
fits13Bits,
fpRel, gReg, iReg, lReg, oReg, largeOffsetError,
| AddrRegImm Reg Imm
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
= AddrBaseIndex Base Index Displacement
| ImmAddr Imm Int
| AddrRegImm Reg Imm
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
addrModeRegs :: AddrMode -> [Reg]
addrModeRegs (AddrBaseIndex b i _) = b_regs ++ i_regs
where
#if alpha_TARGET_ARCH
_ -> panic "MachMisc.addrOffset not defined for Alpha"
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
ImmAddr i off0 -> Just (ImmAddr i (off0 + off))
AddrBaseIndex r i (ImmInt n) -> Just (AddrBaseIndex r i (ImmInt (n + off)))
-> AddrMode
spRel n
-#if i386_TARGET_ARCH
+#if defined(i386_TARGET_ARCH)
= AddrBaseIndex (Just esp) Nothing (ImmInt (n * wORD_SIZE))
+#elif defined(x86_64_TARGET_ARCH)
+ = AddrBaseIndex (Just rsp) Nothing (ImmInt (n * wORD_SIZE))
#else
= AddrRegImm sp (ImmInt (n * wORD_SIZE))
#endif
#endif
{-
+AMD x86_64 architecture:
+- Registers 0-16 have 32-bit counterparts (eax, ebx etc.)
+- Registers 0-7 have 16-bit counterparts (ax, bx etc.)
+- Registers 0-3 have 8 bit counterparts (ah, bh etc.)
+
+-}
+
+#if x86_64_TARGET_ARCH
+
+rax, rbx, rcx, rdx, rsp, rbp, rsi, rdi,
+ r8, r9, r10, r11, r12, r13, r14, r15,
+ xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+ xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 :: Reg
+
+rax = RealReg 0
+rbx = RealReg 1
+rcx = RealReg 2
+rdx = RealReg 3
+rsi = RealReg 4
+rdi = RealReg 5
+rbp = RealReg 6
+rsp = RealReg 7
+r8 = RealReg 8
+r9 = RealReg 9
+r10 = RealReg 10
+r11 = RealReg 11
+r12 = RealReg 12
+r13 = RealReg 13
+r14 = RealReg 14
+r15 = RealReg 15
+xmm0 = RealReg 16
+xmm1 = RealReg 17
+xmm2 = RealReg 18
+xmm3 = RealReg 19
+xmm4 = RealReg 20
+xmm5 = RealReg 21
+xmm6 = RealReg 22
+xmm7 = RealReg 23
+xmm8 = RealReg 24
+xmm9 = RealReg 25
+xmm10 = RealReg 26
+xmm11 = RealReg 27
+xmm12 = RealReg 28
+xmm13 = RealReg 29
+xmm14 = RealReg 30
+xmm15 = RealReg 31
+
+ -- so we can re-use some x86 code:
+eax = rax
+ebx = rbx
+ecx = rcx
+edx = rdx
+esi = rsi
+edi = rdi
+ebp = rbp
+esp = rsp
+
+xmm n = RealReg (16+n)
+
+-- On x86, we might want to have an 8-bit RegClass, which would
+-- contain just regs 1-4 (the others don't have 8-bit versions).
+-- However, we can get away without this at the moment because the
+-- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
+regClass (RealReg i) = if i < 16 then RcInteger else RcDouble
+regClass (VirtualRegI u) = RcInteger
+regClass (VirtualRegHi u) = RcInteger
+regClass (VirtualRegD u) = RcDouble
+regClass (VirtualRegF u) = pprPanic "regClass(x86_64):VirtualRegF"
+ (ppr (VirtualRegF u))
+
+regNames
+ = ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp" ]
+
+showReg :: RegNo -> String
+showReg n
+ | n >= 16 = "%xmm" ++ show n
+ | n >= 8 = "%r" ++ show n
+ | otherwise = regNames !! n
+
+#endif
+
+{-
The SPARC has 64 registers of interest; 32 integer registers and 32
floating point registers. The mapping of STG registers to SPARC
machine registers is defined in StgRegs.h. We are, of course,
#define fake4 12
#define fake5 13
#endif
+
+#if x86_64_TARGET_ARCH
+#define rax 0
+#define rbx 1
+#define rcx 2
+#define rdx 3
+#define rsi 4
+#define rdi 5
+#define rbp 6
+#define rsp 7
+#define r8 8
+#define r9 9
+#define r10 10
+#define r11 11
+#define r12 12
+#define r13 13
+#define r14 14
+#define r15 15
+#define xmm0 16
+#define xmm1 17
+#define xmm2 18
+#define xmm3 19
+#define xmm4 20
+#define xmm5 21
+#define xmm6 22
+#define xmm7 23
+#define xmm8 24
+#define xmm9 25
+#define xmm10 26
+#define xmm11 27
+#define xmm12 28
+#define xmm13 29
+#define xmm14 30
+#define xmm15 31
+#endif
+
#if sparc_TARGET_ARCH
#define g0 0
#define g1 1
allMachRegNos
= IF_ARCH_alpha( [0..63],
IF_ARCH_i386( [0..13],
+ IF_ARCH_x86_64( [0..31],
IF_ARCH_sparc( ([0..31]
++ [f0,f2 .. nCG_FirstFloatReg-1]
++ [nCG_FirstFloatReg .. f31]),
IF_ARCH_powerpc([0..63],
- ))))
+ )))))
-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
-- caller-saves registers
map RealReg [eax,ecx,edx,fake0,fake1,fake2,fake3,fake4,fake5]
#endif /* i386_TARGET_ARCH */
+#if x86_64_TARGET_ARCH
+ -- caller-saves registers
+ map RealReg ([rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11] ++ [16..31])
+ -- all xmm regs are caller-saves
+#endif /* x86_64_TARGET_ARCH */
#if sparc_TARGET_ARCH
map RealReg
( oReg 7 :
argRegs _ = panic "MachRegs.argRegs(x86): should not be used!"
#endif
+#if x86_64_TARGET_ARCH
+argRegs _ = panic "MachRegs.argRegs(x86_64): should not be used!"
+#endif
+
#if alpha_TARGET_ARCH
argRegs 0 = []
argRegs 1 = freeMappedRegs [16, fReg 16]
allArgRegs = panic "MachRegs.allArgRegs(x86): should not be used!"
#endif
+#if x86_64_TARGET_ARCH
+allArgRegs :: [Reg]
+allArgRegs = map RealReg [rdi,rsi,rdx,rcx,r8,r9]
+allFPArgRegs :: [Reg]
+allFPArgRegs = map RealReg [xmm0 .. xmm7]
+#endif
+
#if powerpc_TARGET_ARCH
allArgRegs :: [Reg]
allArgRegs = map RealReg [3..10]
freeReg esp = fastBool False -- %esp is the C stack pointer
#endif
+#if x86_64_TARGET_ARCH
+freeReg rsp = fastBool False -- %rsp is the C stack pointer
+#endif
+
#if sparc_TARGET_ARCH
freeReg g0 = fastBool False -- %g0 is always 0.
freeReg g5 = fastBool False -- %g5 is reserved (ABI).
# define IF_ARCH_i386(x,y) y
#endif
-- - - - - - - - - - - - - - - - - - - - - -
+#if x86_64_TARGET_ARCH
+# define IF_ARCH_x86_64(x,y) x
+#else
+# define IF_ARCH_x86_64(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - -
#if freebsd_TARGET_OS
# define IF_OS_freebsd(x,y) x
#else
--
-- Pretty-printing assembly language
--
- -- (c) The University of Glasgow 1993-2004
- --
+-- (c) The University of Glasgow 1993-2005
+--
-----------------------------------------------------------------------------
-- We start with the @pprXXX@s with some cross-platform commonality
#include "HsVersions.h"
import Cmm
-import MachOp ( MachRep(..) )
+import MachOp ( MachRep(..), wordRep, isFloatingRep )
import MachRegs -- may differ per-platform
import MachInstrs
-- on which bit of it we care about. Yurgh.
pprUserReg :: Reg -> Doc
-pprUserReg = pprReg IF_ARCH_i386(I32,)
+pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,)
-pprReg :: IF_ARCH_i386(MachRep ->,) Reg -> Doc
+pprReg :: IF_ARCH_i386(MachRep ->, IF_ARCH_x86_64(MachRep ->,)) Reg -> Doc
-pprReg IF_ARCH_i386(s,) r
+pprReg IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) r
= case r of
- RealReg i -> ppr_reg_no IF_ARCH_i386(s,) i
+ RealReg i -> ppr_reg_no IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) i
VirtualRegI u -> text "%vI_" <> asmSDoc (pprUnique u)
VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u)
VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u)
_ -> SLIT("very naughty I386 register")
})
#endif
+
+#if x86_64_TARGET_ARCH
+ ppr_reg_no :: MachRep -> Int -> Doc
+ ppr_reg_no I8 = ppr_reg_byte
+ ppr_reg_no I16 = ppr_reg_word
+ ppr_reg_no I32 = ppr_reg_long
+ ppr_reg_no _ = ppr_reg_quad
+
+ ppr_reg_byte i = ptext
+ (case i of {
+ 0 -> SLIT("%al"); 1 -> SLIT("%bl");
+ 2 -> SLIT("%cl"); 3 -> SLIT("%dl");
+ 4 -> SLIT("%sil"); 5 -> SLIT("%dil"); -- new 8-bit regs!
+ 6 -> SLIT("%bpl"); 7 -> SLIT("%spl");
+ 8 -> SLIT("%r8b"); 9 -> SLIT("%r9b");
+ 10 -> SLIT("%r10b"); 11 -> SLIT("%r11b");
+ 12 -> SLIT("%r12b"); 13 -> SLIT("%r13b");
+ 14 -> SLIT("%r14b"); 15 -> SLIT("%r15b");
+ _ -> SLIT("very naughty x86_64 byte register")
+ })
+
+ ppr_reg_word i = ptext
+ (case i of {
+ 0 -> SLIT("%ax"); 1 -> SLIT("%bx");
+ 2 -> SLIT("%cx"); 3 -> SLIT("%dx");
+ 4 -> SLIT("%si"); 5 -> SLIT("%di");
+ 6 -> SLIT("%bp"); 7 -> SLIT("%sp");
+ 8 -> SLIT("%r8w"); 9 -> SLIT("%r9w");
+ 10 -> SLIT("%r10w"); 11 -> SLIT("%r11w");
+ 12 -> SLIT("%r12w"); 13 -> SLIT("%r13w");
+ 14 -> SLIT("%r14w"); 15 -> SLIT("%r15w");
+ _ -> SLIT("very naughty x86_64 word register")
+ })
+
+ ppr_reg_long i = ptext
+ (case i of {
+ 0 -> SLIT("%eax"); 1 -> SLIT("%ebx");
+ 2 -> SLIT("%ecx"); 3 -> SLIT("%edx");
+ 4 -> SLIT("%esi"); 5 -> SLIT("%edi");
+ 6 -> SLIT("%ebp"); 7 -> SLIT("%esp");
+ 8 -> SLIT("%r8d"); 9 -> SLIT("%r9d");
+ 10 -> SLIT("%r10d"); 11 -> SLIT("%r11d");
+ 12 -> SLIT("%r12d"); 13 -> SLIT("%r13d");
+ 14 -> SLIT("%r14d"); 15 -> SLIT("%r15d");
+ _ -> SLIT("very naughty x86_64 register")
+ })
+
+ ppr_reg_quad i = ptext
+ (case i of {
+ 0 -> SLIT("%rax"); 1 -> SLIT("%rbx");
+ 2 -> SLIT("%rcx"); 3 -> SLIT("%rdx");
+ 4 -> SLIT("%rsi"); 5 -> SLIT("%rdi");
+ 6 -> SLIT("%rbp"); 7 -> SLIT("%rsp");
+ 8 -> SLIT("%r8"); 9 -> SLIT("%r9");
+ 10 -> SLIT("%r10"); 11 -> SLIT("%r11");
+ 12 -> SLIT("%r12"); 13 -> SLIT("%r13");
+ 14 -> SLIT("%r14"); 15 -> SLIT("%r15");
+ 16 -> SLIT("%xmm0"); 17 -> SLIT("%xmm1");
+ 18 -> SLIT("%xmm2"); 19 -> SLIT("%xmm3");
+ 20 -> SLIT("%xmm4"); 21 -> SLIT("%xmm5");
+ 22 -> SLIT("%xmm6"); 23 -> SLIT("%xmm7");
+ 24 -> SLIT("%xmm8"); 25 -> SLIT("%xmm9");
+ 26 -> SLIT("%xmm10"); 27 -> SLIT("%xmm11");
+ 28 -> SLIT("%xmm12"); 28 -> SLIT("%xmm13");
+ 30 -> SLIT("%xmm13"); 31 -> SLIT("%xmm15")
+ })
+#endif
+
#if sparc_TARGET_ARCH
ppr_reg_no :: Int -> Doc
ppr_reg_no i = ptext
-- -----------------------------------------------------------------------------
-- pprSize: print a 'Size'
-#if powerpc_TARGET_ARCH || i386_TARGET_ARCH
+#if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH
pprSize :: MachRep -> Doc
#else
pprSize :: Size -> Doc
-- SF -> SLIT("s") UNUSED
TF -> SLIT("t")
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
I8 -> SLIT("b")
I16 -> SLIT("w")
I32 -> SLIT("l")
- F32 -> SLIT("s")
- F64 -> SLIT("l")
+ I64 -> SLIT("q")
+#endif
+#if i386_TARGET_ARCH
+ F32 -> SLIT("l")
+ F64 -> SLIT("q")
F80 -> SLIT("t")
#endif
+#if x86_64_TARGET_ARCH
+ F32 -> SLIT("ss") -- "scalar single-precision float" (SSE2)
+ F64 -> SLIT("sd") -- "scalar double-precision float" (SSE2)
+#endif
#if sparc_TARGET_ARCH
B -> SLIT("sb")
Bu -> SLIT("ub")
GTT -> SLIT("gt");
GE -> SLIT("ge")
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
GEU -> SLIT("ae"); LU -> SLIT("b");
- EQQ -> SLIT("e"); GTT -> SLIT("g");
+ EQQ -> SLIT("e"); GTT -> SLIT("g");
GE -> SLIT("ge"); GU -> SLIT("a");
LTT -> SLIT("l"); LE -> SLIT("le");
LEU -> SLIT("be"); NE -> SLIT("ne");
-------------------
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
pprAddr (ImmAddr imm off)
= let pp_imm = pprImm imm
in
= let
pp_disp = ppr_disp displacement
pp_off p = pp_disp <> char '(' <> p <> char ')'
- pp_reg r = pprReg I32 r
+ pp_reg r = pprReg wordRep r
in
case (base,index) of
(Nothing, Nothing) -> pp_disp
IF_ARCH_alpha(SLIT("\t.text\n\t.align 3") {-word boundary-}
,IF_ARCH_sparc(SLIT(".text\n\t.align 4") {-word boundary-}
,IF_ARCH_i386(SLIT(".text\n\t.align 4,0x90") {-needs per-OS variation!-}
+ ,IF_ARCH_x86_64(SLIT(".text\n\t.align 8") {-needs per-OS variation!-}
,IF_ARCH_powerpc(SLIT(".text\n.align 2")
- ,))))
+ ,)))))
pprSectionHeader Data
= ptext
IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
,IF_ARCH_i386(SLIT(".data\n\t.align 4")
+ ,IF_ARCH_x86_64(SLIT(".data\n\t.align 8")
,IF_ARCH_powerpc(SLIT(".data\n.align 2")
- ,))))
+ ,)))))
pprSectionHeader ReadOnlyData
= ptext
IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 4")
+ ,IF_ARCH_x86_64(SLIT(".section .rodata\n\t.align 8")
,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const\n.align 2"),
SLIT(".section .rodata\n\t.align 2"))
- ,))))
+ ,)))))
pprSectionHeader RelocatableReadOnlyData
= ptext
IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
- ,IF_ARCH_i386(SLIT(".data\n\t.align 4")
+ ,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 4")
+ ,IF_ARCH_x86_64(SLIT(".text\n\t.align 8")
,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"),
SLIT(".data\n\t.align 2"))
- ,))))
+ ,)))))
+ -- the assembler on x86_64/Linux refuses to generate code for
+ -- .quad x - y
+ -- where x is in the text section and y in the rodata section.
+ -- It works if y is in the text section, though. This is probably
+ -- going to cause difficulties for PIC, I imagine.
pprSectionHeader UninitialisedData
= ptext
IF_ARCH_alpha(SLIT("\t.bss\n\t.align 3")
,IF_ARCH_sparc(SLIT(".bss\n\t.align 8") {-<8 will break double constants -}
,IF_ARCH_i386(SLIT(".section .bss\n\t.align 4")
+ ,IF_ARCH_x86_64(SLIT(".section .bss\n\t.align 8")
,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"),
SLIT(".section .bss\n\t.align 2"))
- ,))))
+ ,)))))
+pprSectionHeader ReadOnlyData16
+ = ptext
+ IF_ARCH_alpha(SLIT("\t.data\n\t.align 4")
+ ,IF_ARCH_sparc(SLIT(".data\n\t.align 16")
+ ,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 16")
+ ,IF_ARCH_x86_64(SLIT(".section .rodata.cst16\n\t.align 16")
+ ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const\n.align 4"),
+ SLIT(".section .rodata\n\t.align 4"))
+ ,)))))
+
pprSectionHeader (OtherSection sec)
= panic "PprMach.pprSectionHeader: unknown section"
pprGloblDecl :: CLabel -> Doc
pprGloblDecl lbl
| not (externallyVisibleCLabel lbl) = empty
- | otherwise = ptext IF_ARCH_alpha(SLIT(".globl ")
- ,IF_ARCH_i386(SLIT(".globl ")
- ,IF_ARCH_sparc(SLIT(".global ")
- ,IF_ARCH_powerpc(SLIT(".globl ")
- ,)))) <>
+ | otherwise = ptext IF_ARCH_sparc(SLIT(".global "),
+ SLIT(".globl ")) <>
pprCLabel_asm lbl
pprLabel :: CLabel -> Doc
pprAlign bytes =
IF_ARCH_alpha(ptextSLIT(".align ") <> int pow2,
IF_ARCH_i386(ptext SLIT(".align ") <> int bytes,
+ IF_ARCH_x86_64(ptext SLIT(".align ") <> int bytes,
IF_ARCH_sparc(ptext SLIT(".align ") <> int bytes,
- IF_ARCH_powerpc(ptext SLIT(".align ") <> int pow2,))))
+ IF_ARCH_powerpc(ptext SLIT(".align ") <> int pow2,)))))
where
pow2 = log2 bytes
ppr_item I16 x = [ptext SLIT("\t.short\t") <> pprImm imm]
ppr_item I64 x = [ptext SLIT("\t.quad\t") <> pprImm imm]
#endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
ppr_item I16 x = [ptext SLIT("\t.word\t") <> pprImm imm]
ppr_item I64 x = [ptext SLIT("\t.quad\t") <> pprImm imm]
#endif
= IF_ARCH_alpha( ((<>) (ptext SLIT("\t# ")) (ftext s))
,IF_ARCH_sparc( ((<>) (ptext SLIT("! ")) (ftext s))
,IF_ARCH_i386( ((<>) (ptext SLIT("# ")) (ftext s))
+ ,IF_ARCH_x86_64( ((<>) (ptext SLIT("# ")) (ftext s))
,IF_ARCH_powerpc( IF_OS_linux(
((<>) (ptext SLIT("# ")) (ftext s)),
((<>) (ptext SLIT("; ")) (ftext s)))
- ,))))
+ ,)))))
pprInstr (DELTA d)
= pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
-- -----------------------------------------------------------------------------
-- pprInstr for an x86
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack
| src == dst
#else
empty
#endif
+
pprInstr (MOV size src dst)
= pprSizeOpOp SLIT("mov") size src dst
-pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes I32 src dst
-pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes I32 src dst
+
+pprInstr (MOVZxL I32 src dst) = pprSizeOpOp SLIT("mov") I32 src dst
+ -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
+ -- movl. But we represent it as a MOVZxL instruction, because
+ -- the reg alloc would tend to throw away a plain reg-to-reg
+ -- move, and we still want it to do that.
+
+pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes wordRep src dst
+pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes wordRep src dst
-- here we do some patching, since the physical registers are only set late
-- in the code generation.
however, cannot be used to determine if the upper half of the
result is non-zero." So there.
-}
-pprInstr (MUL size op1 op2) = pprSizeOpOp SLIT("imul") size op1 op2
-
pprInstr (AND size src dst) = pprSizeOpOp SLIT("and") size src dst
pprInstr (OR size src dst) = pprSizeOpOp SLIT("or") size src dst
+
+pprInstr (XOR F32 src dst) = pprOpOp SLIT("xorps") F32 src dst
+pprInstr (XOR F64 src dst) = pprOpOp SLIT("xorpd") F64 src dst
pprInstr (XOR size src dst) = pprSizeOpOp SLIT("xor") size src dst
+
pprInstr (NOT size op) = pprSizeOp SLIT("not") size op
pprInstr (NEGI size op) = pprSizeOp SLIT("neg") size op
pprInstr (BT size imm src) = pprSizeImmOp SLIT("bt") size imm src
-pprInstr (CMP size src dst) = pprSizeOpOp SLIT("cmp") size src dst
+pprInstr (CMP size src dst)
+ | isFloatingRep size = pprSizeOpOp SLIT("ucomi") size src dst -- SSE2
+ | otherwise = pprSizeOpOp SLIT("cmp") size src dst
+
pprInstr (TEST size src dst) = pprSizeOpOp SLIT("test") size src dst
pprInstr (PUSH size op) = pprSizeOp SLIT("push") size op
pprInstr (POP size op) = pprSizeOp SLIT("pop") size op
-- pprInstr POPA = ptext SLIT("\tpopal")
pprInstr NOP = ptext SLIT("\tnop")
-pprInstr CLTD = ptext SLIT("\tcltd")
+pprInstr (CLTD I32) = ptext SLIT("\tcltd")
+pprInstr (CLTD I64) = ptext SLIT("\tcqto")
pprInstr (SETCC cond op) = pprCondInstr SLIT("set") cond (pprOperand I8 op)
where lab = mkAsmTempLabel id
pprInstr (JMP (OpImm imm)) = (<>) (ptext SLIT("\tjmp ")) (pprImm imm)
-pprInstr (JMP op) = (<>) (ptext SLIT("\tjmp *")) (pprOperand I32 op)
+pprInstr (JMP op) = (<>) (ptext SLIT("\tjmp *")) (pprOperand wordRep op)
pprInstr (JMP_TBL op ids) = pprInstr (JMP op)
pprInstr (CALL (Left imm)) = (<>) (ptext SLIT("\tcall ")) (pprImm imm)
-pprInstr (CALL (Right reg)) = (<>) (ptext SLIT("\tcall *")) (pprReg I32 reg)
+pprInstr (CALL (Right reg)) = (<>) (ptext SLIT("\tcall *")) (pprReg wordRep reg)
pprInstr (IDIV sz op) = pprSizeOp SLIT("idiv") sz op
pprInstr (DIV sz op) = pprSizeOp SLIT("div") sz op
pprInstr (IMUL64 sd_hi sd_lo) = pprInstr_imul64 sd_hi sd_lo
+#if x86_64_TARGET_ARCH
+pprInstr (MUL size op1 op2) = pprSizeOpOp SLIT("mul") size op1 op2
+
+pprInstr (FDIV size op1 op2) = pprSizeOpOp SLIT("div") size op1 op2
+
+pprInstr (CVTSS2SD from to) = pprRegReg SLIT("cvtss2sd") from to
+pprInstr (CVTSD2SS from to) = pprRegReg SLIT("cvtsd2ss") from to
+pprInstr (CVTSS2SI from to) = pprOpReg SLIT("cvtss2si") from to
+pprInstr (CVTSD2SI from to) = pprOpReg SLIT("cvtsd2si") from to
+pprInstr (CVTSI2SS from to) = pprOpReg SLIT("cvtsi2ss") from to
+pprInstr (CVTSI2SD from to) = pprOpReg SLIT("cvtsi2sd") from to
+#endif
+
+pprInstr (FETCHGOT reg)
+ = vcat [ ptext SLIT("\tcall 1f"),
+ hcat [ ptext SLIT("1:\tpopl\t"), pprReg I32 reg ],
+ hcat [ ptext SLIT("\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
+ pprReg I32 reg ]
+ ]
+#endif
+
+-- -----------------------------------------------------------------------------
+-- i386 floating-point
+
+#if i386_TARGET_ARCH
-- Simulating a flat register set on the x86 FP stack is tricky.
-- you have to free %st(7) before pushing anything on the FP reg stack
-- so as to preclude the possibility of a FP stack overflow exception.
ptext SLIT("\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)")
]
-pprInstr (FETCHGOT reg)
- = vcat [ ptext SLIT("\tcall 1f"),
- hcat [ ptext SLIT("1:\tpopl\t"), pprReg I32 reg ],
- hcat [ ptext SLIT("\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
- pprReg I32 reg ]
- ]
-
--- Emit code to make hi_reg:lo_reg be the 64-bit product of hi_reg and lo_reg
-pprInstr_imul64 hi_reg lo_reg
- = let fakeInsn = text "imul64" <+> pp_hi_reg <> comma <+> pp_lo_reg
- pp_hi_reg = pprReg I32 hi_reg
- pp_lo_reg = pprReg I32 lo_reg
- in
- vcat [
- text "\t# BEGIN " <> fakeInsn,
- text "\tpushl" <+> pp_hi_reg <> text" ; pushl" <+> pp_lo_reg,
- text "\tpushl %eax ; pushl %edx",
- text "\tmovl 12(%esp), %eax ; imull 8(%esp)",
- text "\tmovl %edx, 12(%esp) ; movl %eax, 8(%esp)",
- text "\tpopl %edx ; popl %eax",
- text "\tpopl" <+> pp_lo_reg <> text " ; popl" <+> pp_hi_reg,
- text "\t# END " <> fakeInsn
- ]
-
-
--------------------------
-- coerce %st(0) to the specified size
pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg SLIT("gsub") sz src1 src2 dst
pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg SLIT("gmul") sz src1 src2 dst
pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg SLIT("gdiv") sz src1 src2 dst
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+-- Emit code to make hi_reg:lo_reg be the 64-bit product of hi_reg and lo_reg
+pprInstr_imul64 hi_reg lo_reg
+ = let fakeInsn = text "imul64" <+> pp_hi_reg <> comma <+> pp_lo_reg
+ pp_hi_reg = pprReg wordRep hi_reg
+ pp_lo_reg = pprReg wordRep lo_reg
+ in
+ vcat [
+ text "\t# BEGIN " <> fakeInsn,
+ text "\tpushl" <+> pp_hi_reg <> text" ; pushl" <+> pp_lo_reg,
+ text "\tpushl %eax ; pushl %edx",
+ text "\tmovl 12(%esp), %eax ; imull 8(%esp)",
+ text "\tmovl %edx, 12(%esp) ; movl %eax, 8(%esp)",
+ text "\tpopl %edx ; popl %eax",
+ text "\tpopl" <+> pp_lo_reg <> text " ; popl" <+> pp_hi_reg,
+ text "\t# END " <> fakeInsn
+ ]
-- Continue with I386-only printing bits and bobs:
pprDollImm :: Imm -> Doc
pprOperand s (OpImm i) = pprDollImm i
pprOperand s (OpAddr ea) = pprAddr ea
+pprMnemonic_ :: LitString -> Doc
+pprMnemonic_ name =
+ char '\t' <> ptext name <> space
+
pprMnemonic :: LitString -> MachRep -> Doc
pprMnemonic name size =
char '\t' <> ptext name <> pprSize size <> space
pprOperand size op2
]
+pprOpOp :: LitString -> MachRep -> Operand -> Operand -> Doc
+pprOpOp name size op1 op2
+ = hcat [
+ pprMnemonic_ name,
+ pprOperand size op1,
+ comma,
+ pprOperand size op2
+ ]
+
pprSizeReg :: LitString -> MachRep -> Reg -> Doc
pprSizeReg name size reg1
= hcat [
pprReg size reg2
]
+pprRegReg :: LitString -> Reg -> Reg -> Doc
+pprRegReg name reg1 reg2
+ = hcat [
+ pprMnemonic_ name,
+ pprReg wordRep reg1,
+ comma,
+ pprReg wordRep reg2
+ ]
+
+pprOpReg :: LitString -> Operand -> Reg -> Doc
+pprOpReg name op1 reg2
+ = hcat [
+ pprMnemonic_ name,
+ pprOperand wordRep op1,
+ comma,
+ pprReg wordRep reg2
+ ]
+
pprCondRegReg :: LitString -> MachRep -> Cond -> Reg -> Reg -> Doc
pprCondRegReg name size cond reg1 reg2
= hcat [
#include "HsVersions.h"
import Cmm ( BlockId )
-#if powerpc_TARGET_ARCH || i386_TARGET_ARCH
+#if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH
import MachOp ( MachRep(..) )
#endif
import MachInstrs
#endif /* alpha_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
regUsage instr = case instr of
MOV sz src dst -> usageRW src dst
JMP_TBL op ids -> mkRU (use_R op) []
CALL (Left imm) -> mkRU [] callClobberedRegs
CALL (Right reg) -> mkRU [reg] callClobberedRegs
- CLTD -> mkRU [eax] [edx]
+ CLTD sz -> mkRU [eax] [edx]
NOP -> mkRU [] []
+#if i386_TARGET_ARCH
GMOV src dst -> mkRU [src] [dst]
GLD sz src dst -> mkRU (use_EA src) [dst]
GST sz src dst -> mkRU (src : use_EA dst) []
GSIN sz src dst -> mkRU [src] [dst]
GCOS sz src dst -> mkRU [src] [dst]
GTAN sz src dst -> mkRU [src] [dst]
+#endif
+
+#if x86_64_TARGET_ARCH
+ CVTSS2SD src dst -> mkRU [src] [dst]
+ CVTSD2SS src dst -> mkRU [src] [dst]
+ CVTSS2SI src dst -> mkRU (use_R src) [dst]
+ CVTSD2SI src dst -> mkRU (use_R src) [dst]
+ CVTSI2SS src dst -> mkRU (use_R src) [dst]
+ CVTSI2SD src dst -> mkRU (use_R src) [dst]
+ FDIV sz src dst -> usageRM src dst
+#endif
FETCHGOT reg -> mkRU [] [reg]
mkRU src dst = RU (filter interesting src)
(filter interesting dst)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
jumpDests :: Instr -> [BlockId] -> [BlockId]
jumpDests insn acc
= case insn of
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
JXX _ id -> id : acc
JMP_TBL _ ids -> ids ++ acc
#elif powerpc_TARGET_ARCH
#endif /* alpha_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
patchRegs instr env = case instr of
MOV sz src dst -> patch2 (MOV sz) src dst
JMP op -> patch1 JMP op
JMP_TBL op ids -> patch1 JMP_TBL op $ ids
+#if i386_TARGET_ARCH
GMOV src dst -> GMOV (env src) (env dst)
GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
GST sz src dst -> GST sz (env src) (lookupAddr dst)
GSIN sz src dst -> GSIN sz (env src) (env dst)
GCOS sz src dst -> GCOS sz (env src) (env dst)
GTAN sz src dst -> GTAN sz (env src) (env dst)
+#endif
+
+#if x86_64_TARGET_ARCH
+ CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
+ CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
+ CVTSS2SI src dst -> CVTSS2SI (patchOp src) (env dst)
+ CVTSD2SI src dst -> CVTSD2SI (patchOp src) (env dst)
+ CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst)
+ CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst)
+ FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
+#endif
CALL (Left imm) -> instr
CALL (Right reg) -> CALL (Right (env reg))
COMMENT _ -> instr
DELTA _ -> instr
JXX _ _ -> instr
- CLTD -> instr
+ CLTD _ -> instr
_other -> panic "patchRegs: unrecognised instr"
lookupIndex Nothing = Nothing
lookupIndex (Just (r,i)) = Just (env r, i)
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH*/
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
-- by assigning the src and dest temporaries to the same real register.
isRegRegMove :: Instr -> Maybe (Reg,Reg)
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- TMP:
isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2)
#elif powerpc_TARGET_ARCH
RcInteger -> MOV I32 (OpReg reg) (OpAddr (spRel off_w))
_ -> GST F80 reg (spRel off_w) {- RcFloat/RcDouble -}
#endif
+#ifdef x86_64_TARGET_ARCH
+ let off_w = (off-delta) `div` 8
+ in case regClass reg of
+ RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w))
+ _ -> panic "mkSpillInstr: ToDo"
+#endif
#ifdef sparc_TARGET_ARCH
{-SPARC: spill below frame pointer leaving 2 words/spill-}
let{off_w = 1 + (off `div` 4);
let
off = spillSlotToOffset slot
in
-#ifdef alpha_TARGET_ARCH
+#if alpha_TARGET_ARCH
LD sz dyn (spRel (- (off `div` 8)))
#endif
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH
let off_w = (off-delta) `div` 4
in case regClass reg of {
RcInteger -> MOV I32 (OpAddr (spRel off_w)) (OpReg reg);
_ -> GLD F80 (spRel off_w) reg} {- RcFloat/RcDouble -}
#endif
-#ifdef sparc_TARGET_ARCH
+#if x86_64_TARGET_ARCH
+ let off_w = (off-delta) `div` 8
+ in case regClass reg of
+ RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg)
+ _ -> panic "mkLoadInstr: ToDo"
+#endif
+#if sparc_TARGET_ARCH
let{off_w = 1 + (off `div` 4);
sz = case regClass vreg of {
RcInteger -> W;
RcDouble -> DF}}
in LD sz (fpRel (- off_w)) dyn
#endif
-#ifdef powerpc_TARGET_ARCH
+#if powerpc_TARGET_ARCH
let sz = case regClass reg of
RcInteger -> I32
RcDouble -> F64