X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FnativeGen%2FX86%2FCodeGen.hs;h=39de19c412c124c4131eb3af010b15d5460c43c7;hp=43495a45a5e74cd2f08895a0f21f75885b82216a;hb=8133a9f47b99f4e65ed30551de32ad72c6b61b27;hpb=b04a210e26ca57242fd052f2aa91011a80b76299 diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 43495a4..39de19c 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1,10 +1,3 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ----------------------------------------------------------------------------- -- -- Generating machine code (instruction selection) @@ -20,6 +13,7 @@ module X86.CodeGen ( cmmTopCodeGen, + generateJumpTableForInstr, InstrBlock ) @@ -27,69 +21,83 @@ where #include "HsVersions.h" #include "nativeGen/NCG.h" -#include "MachDeps.h" +#include "../includes/MachDeps.h" -- NCG stuff: import X86.Instr import X86.Cond import X86.Regs import X86.RegInfo -import X86.Ppr import Instruction import PIC import NCGMonad import Size import Reg -import RegClass import Platform -- Our intermediate code: import BasicTypes import BlockId -import PprCmm ( pprExpr ) -import Cmm +import PprCmm () +import OldCmm +import OldPprCmm () import CLabel -import ClosureInfo ( C_SRT(..) ) -- The rest: import StaticFlags ( opt_PIC ) import ForeignCall ( CCallConv(..) ) import OrdList -import Pretty -import qualified Outputable as O import Outputable +import Unique import FastString import FastBool ( isFastTrue ) import Constants ( wORD_SIZE ) import DynFlags -import Debug.Trace ( trace ) +import Control.Monad ( mapAndUnzipM ) +import Data.Maybe ( catMaybes ) +import Data.Int -import Control.Monad ( mapAndUnzipM ) -import Data.Maybe ( fromJust ) -import Data.Bits +#if WORD_SIZE_IN_BITS==32 +import Data.Maybe ( fromJust ) import Data.Word -import Data.Int +import Data.Bits +#endif +sse2Enabled :: NatM Bool +#if x86_64_TARGET_ARCH +-- SSE2 is fixed on for x86_64. It would be possible to make it optional, +-- but we'd need to fix at least the foreign call code where the calling +-- convention specifies the use of xmm regs, and possibly other places. +sse2Enabled = return True +#else +sse2Enabled = do + dflags <- getDynFlagsNat + return (dopt Opt_SSE2 dflags) +#endif + +if_sse2 :: NatM a -> NatM a -> NatM a +if_sse2 sse2 x87 = do + b <- sse2Enabled + if b then sse2 else x87 cmmTopCodeGen - :: DynFlags - -> RawCmmTop + :: RawCmmTop -> NatM [NatCmmTop Instr] -cmmTopCodeGen dynflags - (CmmProc info lab params (ListGraph blocks)) = do +cmmTopCodeGen (CmmProc info lab (ListGraph blocks)) = do (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks picBaseMb <- getPicBaseMaybeNat - let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) + dflags <- getDynFlagsNat + let proc = CmmProc info lab (ListGraph $ concat nat_blocks) tops = proc : concat statics - os = platformOS $ targetPlatform dynflags + os = platformOS $ targetPlatform dflags case picBaseMb of Just picBase -> initializePicBase_x86 ArchX86 os picBase tops Nothing -> return tops -cmmTopCodeGen _ (CmmData sec dat) = do +cmmTopCodeGen (CmmData sec dat) = do return [CmmData sec dat] -- no translation, we just use CmmStatic @@ -152,8 +160,8 @@ stmtToInstrs stmt = case stmt of CmmBranch id -> genBranch id CmmCondBranch arg id -> genCondJump id arg CmmSwitch arg ids -> genSwitch arg ids - CmmJump arg params -> genJump arg - CmmReturn params -> + CmmJump arg _ -> genJump arg + CmmReturn _ -> panic "stmtToInstrs: return statement should have been cps'd away" @@ -172,6 +180,7 @@ data CondCode = CondCode Bool Cond InstrBlock +#if WORD_SIZE_IN_BITS==32 -- | a.k.a "Register64" -- Reg is the lower 32-bit temporary which contains the result. -- Use getHiVRegFromLo to find the other VRegUnique. @@ -183,6 +192,7 @@ data ChildCode64 = ChildCode64 InstrBlock Reg +#endif -- | Register's passed up the tree. If the stix code forces the register @@ -201,18 +211,21 @@ swizzleRegisterRep (Any _ codefn) size = Any size codefn -- | Grab the Reg for a CmmReg -getRegisterReg :: CmmReg -> Reg +getRegisterReg :: Bool -> CmmReg -> Reg -getRegisterReg (CmmLocal (LocalReg u pk)) - = mkVReg u (cmmTypeSize pk) +getRegisterReg use_sse2 (CmmLocal (LocalReg u pk)) + = let sz = cmmTypeSize pk in + if isFloatSize sz && not use_sse2 + then RegVirtual (mkVirtualReg u FF80) + else RegVirtual (mkVirtualReg u sz) -getRegisterReg (CmmGlobal mid) - = case get_GlobalReg_reg_or_addr mid of - Left (RealReg rrno) -> RealReg rrno - _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... +getRegisterReg _ (CmmGlobal mid) + = case globalRegMaybe mid of + Just reg -> RegReal $ reg + Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... -- | Memory addressing modes passed up the tree. @@ -252,8 +265,8 @@ is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 -- | Convert a BlockId to some CmmStatic data jumpTableEntry :: Maybe BlockId -> CmmStatic jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) -jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel id +jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel (getUnique blockid) -- ----------------------------------------------------------------------------- @@ -261,8 +274,8 @@ jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) -- Expand CmmRegOff. ToDo: should we do it this way around, or convert -- CmmExprs into CmmRegOff? -mangleIndexTree :: CmmExpr -> CmmExpr -mangleIndexTree (CmmRegOff reg off) +mangleIndexTree :: CmmReg -> Int -> CmmExpr +mangleIndexTree reg off = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] where width = typeWidth (cmmRegType reg) @@ -279,9 +292,7 @@ getSomeReg expr = do return (reg, code) - - - +#if WORD_SIZE_IN_BITS==32 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock assignMem_I64Code addrTree valueTree = do Amode addr addr_code <- getAmode addrTree @@ -297,10 +308,10 @@ assignMem_I64Code addrTree valueTree = do assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do +assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do ChildCode64 vcode r_src_lo <- iselExpr64 valueTree let - r_dst_lo = mkVReg u_dst II32 + r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32 r_dst_hi = getHiVRegFromLo r_dst_lo r_src_hi = getHiVRegFromLo r_src_lo mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo) @@ -310,12 +321,10 @@ assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do vcode `snocOL` mov_lo `snocOL` mov_hi ) -assignReg_I64Code lvalue valueTree +assignReg_I64Code _ _ = panic "assignReg_I64Code(i386): invalid lvalue" - - iselExpr64 :: CmmExpr -> NatM ChildCode64 iselExpr64 (CmmLit (CmmInt i _)) = do (rlo,rhi) <- getNewRegPairNat II32 @@ -342,7 +351,7 @@ iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do ) iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (mkVReg vu II32)) + = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32)) -- we handle addition, but rather badly iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do @@ -389,7 +398,7 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do iselExpr64 expr = pprPanic "iselExpr64(i386)" (ppr expr) - +#endif -------------------------------------------------------------------------------- @@ -405,11 +414,17 @@ getRegister (CmmReg (CmmGlobal PicBaseReg)) #endif getRegister (CmmReg reg) - = return (Fixed (cmmTypeSize (cmmRegType reg)) - (getRegisterReg reg) nilOL) + = do use_sse2 <- sse2Enabled + let + sz = cmmTypeSize (cmmRegType reg) + size | not use_sse2 && isFloatSize sz = FF80 + | otherwise = sz + -- + return (Fixed size (getRegisterReg use_sse2 reg) nilOL) + -getRegister tree@(CmmRegOff _ _) - = getRegister (mangleIndexTree tree) +getRegister (CmmRegOff r n) + = getRegister $ mangleIndexTree r n #if WORD_SIZE_IN_BITS==32 @@ -437,78 +452,35 @@ getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do #endif - - -#if i386_TARGET_ARCH - -getRegister (CmmLit (CmmFloat f W32)) = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let code dst = - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f W32)] - `consOL` (addr_code `snocOL` - GLD FF32 addr dst) - -- in - return (Any FF32 code) - - -getRegister (CmmLit (CmmFloat d W64)) - | d == 0.0 - = let code dst = unitOL (GLDZ dst) - in return (Any FF64 code) - - | d == 1.0 - = let code dst = unitOL (GLD1 dst) - in return (Any FF64 code) - - | otherwise = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let code dst = - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat d W64)] - `consOL` (addr_code `snocOL` - GLD FF64 addr dst) - -- in - return (Any FF64 code) - -#endif /* i386_TARGET_ARCH */ - - - - -#if x86_64_TARGET_ARCH -getRegister (CmmLit (CmmFloat 0.0 w)) = do - let size = floatSize w - code dst = unitOL (XOR size (OpReg dst) (OpReg dst)) - -- I don't know why there are xorpd, xorps, and pxor instructions. - -- They all appear to do the same thing --SDM - return (Any size code) - -getRegister (CmmLit (CmmFloat f w)) = do - lbl <- getNewLabelNat - let code dst = toOL [ - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f w)], - MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - ] - -- in - return (Any size code) - where size = floatSize w - -#endif /* x86_64_TARGET_ARCH */ - - - - +getRegister (CmmLit lit@(CmmFloat f w)) = + if_sse2 float_const_sse2 float_const_x87 + where + float_const_sse2 + | f == 0.0 = do + let + size = floatSize w + code dst = unitOL (XOR size (OpReg dst) (OpReg dst)) + -- I don't know why there are xorpd, xorps, and pxor instructions. + -- They all appear to do the same thing --SDM + return (Any size code) + + | otherwise = do + Amode addr code <- memConstant (widthInBytes w) lit + loadFloatAmode True w addr code + + float_const_x87 = case w of + W64 + | f == 0.0 -> + let code dst = unitOL (GLDZ dst) + in return (Any FF80 code) + + | f == 1.0 -> + let code dst = unitOL (GLD1 dst) + in return (Any FF80 code) + + _otherwise -> do + Amode addr code <- memConstant (widthInBytes w) lit + loadFloatAmode False w addr code -- catch simple cases of zero- or sign-extended load getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do @@ -560,61 +532,20 @@ getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), = return $ Any II64 (\dst -> unitOL $ LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) -getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do - x_code <- getAnyReg x - lbl <- getNewLabelNat - let - code dst = x_code dst `appOL` toOL [ - -- This is how gcc does it, so it can't be that bad: - LDATA ReadOnlyData16 [ - CmmAlign 16, - CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x80000000 W32), - CmmStaticLit (CmmInt 0 W32), - CmmStaticLit (CmmInt 0 W32), - CmmStaticLit (CmmInt 0 W32) - ], - XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - -- xorps, so we need the 128-bit constant - -- ToDo: rip-relative - ] - -- - return (Any FF32 code) - -getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do - x_code <- getAnyReg x - lbl <- getNewLabelNat - let - -- This is how gcc does it, so it can't be that bad: - code dst = x_code dst `appOL` toOL [ - LDATA ReadOnlyData16 [ - CmmAlign 16, - CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x8000000000000000 W64), - CmmStaticLit (CmmInt 0 W64) - ], - -- gcc puts an unpck here. Wonder if we need it. - XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - -- xorpd, so we need the 128-bit constant - ] - -- - return (Any FF64 code) - #endif /* x86_64_TARGET_ARCH */ -getRegister (CmmMachOp mop [x]) -- unary MachOps - = case mop of -#if i386_TARGET_ARCH - MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x - MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x -#endif +getRegister (CmmMachOp mop [x]) = do -- unary MachOps + sse2 <- sse2Enabled + case mop of + MO_F_Neg w + | sse2 -> sse2NegCode w x + | otherwise -> trivialUFCode FF80 (GNEG FF80) x MO_S_Neg w -> triv_ucode NEGI (intSize w) - MO_F_Neg w -> triv_ucode NEGI (floatSize w) MO_Not w -> triv_ucode NOT (intSize w) -- Nop conversions @@ -659,18 +590,16 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps -- the form of a movzl and print it as a movl later. #endif -#if i386_TARGET_ARCH - MO_FF_Conv W32 W64 -> conversionNop FF64 x - MO_FF_Conv W64 W32 -> conversionNop FF32 x -#else - MO_FF_Conv W32 W64 -> coerceFP2FP W64 x + MO_FF_Conv W32 W64 + | sse2 -> coerceFP2FP W64 x + | otherwise -> conversionNop FF80 x + MO_FF_Conv W64 W32 -> coerceFP2FP W32 x -#endif MO_FS_Conv from to -> coerceFP2Int from to x MO_SF_Conv from to -> coerceInt2FP from to x - other -> pprPanic "getRegister" (pprMachOp mop) + _other -> pprPanic "getRegister" (pprMachOp mop) where triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register triv_ucode instr size = trivialUCode size (instr size) x @@ -707,41 +636,37 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps return (swizzleRegisterRep e_code new_size) -getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps - = case mop of - MO_F_Eq w -> condFltReg EQQ x y - MO_F_Ne w -> condFltReg NE x y - MO_F_Gt w -> condFltReg GTT x y - MO_F_Ge w -> condFltReg GE x y - MO_F_Lt w -> condFltReg LTT x y - MO_F_Le w -> condFltReg LE x y - - MO_Eq rep -> condIntReg EQQ x y - MO_Ne rep -> condIntReg NE x y - - MO_S_Gt rep -> condIntReg GTT x y - MO_S_Ge rep -> condIntReg GE x y - MO_S_Lt rep -> condIntReg LTT x y - MO_S_Le rep -> condIntReg LE x y - - MO_U_Gt rep -> condIntReg GU x y - MO_U_Ge rep -> condIntReg GEU x y - MO_U_Lt rep -> condIntReg LU x y - MO_U_Le rep -> condIntReg LEU x y - -#if i386_TARGET_ARCH - MO_F_Add w -> trivialFCode w GADD x y - MO_F_Sub w -> trivialFCode w GSUB x y - MO_F_Quot w -> trivialFCode w GDIV x y - MO_F_Mul w -> trivialFCode w GMUL x y -#endif - -#if x86_64_TARGET_ARCH - MO_F_Add w -> trivialFCode w ADD x y - MO_F_Sub w -> trivialFCode w SUB x y - MO_F_Quot w -> trivialFCode w FDIV x y - MO_F_Mul w -> trivialFCode w MUL x y -#endif +getRegister (CmmMachOp mop [x, y]) = do -- dyadic MachOps + sse2 <- sse2Enabled + case mop of + MO_F_Eq _ -> condFltReg EQQ x y + MO_F_Ne _ -> condFltReg NE x y + MO_F_Gt _ -> condFltReg GTT x y + MO_F_Ge _ -> condFltReg GE x y + MO_F_Lt _ -> condFltReg LTT x y + MO_F_Le _ -> condFltReg LE x y + + MO_Eq _ -> condIntReg EQQ x y + MO_Ne _ -> condIntReg NE x y + + MO_S_Gt _ -> condIntReg GTT x y + MO_S_Ge _ -> condIntReg GE x y + MO_S_Lt _ -> condIntReg LTT x y + MO_S_Le _ -> condIntReg LE x y + + MO_U_Gt _ -> condIntReg GU x y + MO_U_Ge _ -> condIntReg GEU x y + MO_U_Lt _ -> condIntReg LU x y + MO_U_Le _ -> condIntReg LEU x y + + MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y + | otherwise -> trivialFCode_x87 GADD x y + MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y + | otherwise -> trivialFCode_x87 GSUB x y + MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y + | otherwise -> trivialFCode_x87 GDIV x y + MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y + | otherwise -> trivialFCode_x87 GMUL x y MO_Add rep -> add_code rep x y MO_Sub rep -> sub_code rep x y @@ -766,7 +691,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps MO_U_Shr rep -> shift_code rep SHR x y {-False-} MO_S_Shr rep -> shift_code rep SAR x y {-False-} - other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) + _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) where -------------------- triv_op width instr = trivialCode width op (Just op) x y @@ -803,7 +728,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps -> NatM Register {- Case1: shift length as immediate -} - shift_code width instr x y@(CmmLit lit) = do + shift_code width instr x (CmmLit lit) = do x_code <- getAnyReg x let size = intSize width @@ -892,13 +817,9 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps getRegister (CmmLoad mem pk) | isFloatType pk = do - Amode src mem_code <- getAmode mem - let - size = cmmTypeSize pk - code dst = mem_code `snocOL` - IF_ARCH_i386(GLD size src dst, - MOV size (OpAddr src) (OpReg dst)) - return (Any size code) + Amode addr mem_code <- getAmode mem + use_sse2 <- sse2Enabled + loadFloatAmode use_sse2 (typeWidth pk) addr mem_code #if i386_TARGET_ARCH getRegister (CmmLoad mem pk) @@ -933,8 +854,7 @@ getRegister (CmmLit (CmmInt 0 width)) size = intSize width -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits - adj_size = case size of II64 -> II32; _ -> size - size1 = IF_ARCH_i386( size, adj_size ) + size1 = IF_ARCH_i386( size, case size of II64 -> II32; _ -> size ) code dst = unitOL (XOR size1 (OpReg dst) (OpReg dst)) in @@ -1022,7 +942,9 @@ getNonClobberedReg expr = do return (tmp, code tmp) Fixed rep reg code -- only free regs can be clobbered - | RealReg rr <- reg, isFastTrue (freeReg rr) -> do + | RegReal (RealRegSingle rr) <- reg + , isFastTrue (freeReg rr) + -> do tmp <- getNewRegNat rep return (tmp, code `snocOL` reg2reg rep reg tmp) | otherwise -> @@ -1030,16 +952,13 @@ getNonClobberedReg expr = do reg2reg :: Size -> Reg -> Reg -> Instr reg2reg size src dst -#if i386_TARGET_ARCH - | isFloatSize size = GMOV src dst -#endif - | otherwise = MOV size (OpReg src) (OpReg dst) - + | size == FF80 = GMOV src dst + | otherwise = MOV size (OpReg src) (OpReg dst) -------------------------------------------------------------------------------- getAmode :: CmmExpr -> NatM Amode -getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) +getAmode (CmmRegOff r n) = getAmode $ mangleIndexTree r n #if x86_64_TARGET_ARCH @@ -1052,18 +971,18 @@ getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), -- This is all just ridiculous, since it carefully undoes -- what mangleIndexTree has just done. -getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) +getAmode (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)]) | is32BitLit lit -- ASSERT(rep == II32)??? = do (x_reg, x_code) <- getSomeReg x let off = ImmInt (-(fromInteger i)) return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) -getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) +getAmode (CmmMachOp (MO_Add _rep) [x, CmmLit lit]) | is32BitLit lit -- ASSERT(rep == II32)??? = do (x_reg, x_code) <- getSomeReg x - let off = ImmInt (fromInteger i) + let off = litToImm lit return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be @@ -1072,12 +991,12 @@ getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _), b@(CmmLit _)]) = getAmode (CmmMachOp (MO_Add rep) [b,a]) -getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) +getAmode (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)]]) | shift == 0 || shift == 1 || shift == 2 || shift == 3 = x86_complex_amode x y shift 0 -getAmode (CmmMachOp (MO_Add rep) +getAmode (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Add _) [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)], CmmLit (CmmInt offset _)]]) @@ -1085,7 +1004,7 @@ getAmode (CmmMachOp (MO_Add rep) && is32BitInteger offset = x86_complex_amode x y shift offset -getAmode (CmmMachOp (MO_Add rep) [x,y]) +getAmode (CmmMachOp (MO_Add _) [x,y]) = x86_complex_amode x y 0 0 getAmode (CmmLit lit) | is32BitLit lit @@ -1104,7 +1023,8 @@ x86_complex_amode base index shift offset (y_reg, y_code) <- getSomeReg index let code = x_code `appOL` y_code - base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 + base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8; + n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")" return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset))) code) @@ -1120,58 +1040,81 @@ x86_complex_amode base index shift offset -- (see trivialCode where this function is used for an example). getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) -#if x86_64_TARGET_ARCH -getNonClobberedOperand (CmmLit lit) - | isSuitableFloatingPointLit lit = do - lbl <- getNewLabelNat - let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit lit]) - return (OpAddr (ripRel (ImmCLbl lbl)), code) -#endif -getNonClobberedOperand (CmmLit lit) - | is32BitLit lit && not (isFloatType (cmmLitType lit)) = - return (OpImm (litToImm lit), nilOL) -getNonClobberedOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - (src',save_code) <- - if (amodeCouldBeClobbered src) - then do - tmp <- getNewRegNat archWordSize - return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), - unitOL (LEA II32 (OpAddr src) (OpReg tmp))) - else - return (src, nilOL) - return (OpAddr src', save_code `appOL` mem_code) -getNonClobberedOperand e = do +getNonClobberedOperand (CmmLit lit) = do + use_sse2 <- sse2Enabled + if use_sse2 && isSuitableFloatingPointLit lit + then do + let CmmFloat _ w = lit + Amode addr code <- memConstant (widthInBytes w) lit + return (OpAddr addr, code) + else do + + if is32BitLit lit && not (isFloatType (cmmLitType lit)) + then return (OpImm (litToImm lit), nilOL) + else getNonClobberedOperand_generic (CmmLit lit) + +getNonClobberedOperand (CmmLoad mem pk) = do + use_sse2 <- sse2Enabled + if (not (isFloatType pk) || use_sse2) + && IF_ARCH_i386(not (isWord64 pk), True) + then do + Amode src mem_code <- getAmode mem + (src',save_code) <- + if (amodeCouldBeClobbered src) + then do + tmp <- getNewRegNat archWordSize + return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), + unitOL (LEA II32 (OpAddr src) (OpReg tmp))) + else + return (src, nilOL) + return (OpAddr src', save_code `appOL` mem_code) + else do + getNonClobberedOperand_generic (CmmLoad mem pk) + +getNonClobberedOperand e = getNonClobberedOperand_generic e + +getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock) +getNonClobberedOperand_generic e = do (reg, code) <- getNonClobberedReg e return (OpReg reg, code) amodeCouldBeClobbered :: AddrMode -> Bool amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode) -regClobbered (RealReg rr) = isFastTrue (freeReg rr) +regClobbered :: Reg -> Bool +regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr) regClobbered _ = False -- getOperand: the operand is not required to remain valid across the -- computation of an arbitrary expression. getOperand :: CmmExpr -> NatM (Operand, InstrBlock) -#if x86_64_TARGET_ARCH -getOperand (CmmLit lit) - | isSuitableFloatingPointLit lit = do - lbl <- getNewLabelNat - let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit lit]) - return (OpAddr (ripRel (ImmCLbl lbl)), code) -#endif -getOperand (CmmLit lit) - | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do - return (OpImm (litToImm lit), nilOL) -getOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) -getOperand e = do + +getOperand (CmmLit lit) = do + use_sse2 <- sse2Enabled + if (use_sse2 && isSuitableFloatingPointLit lit) + then do + let CmmFloat _ w = lit + Amode addr code <- memConstant (widthInBytes w) lit + return (OpAddr addr, code) + else do + + if is32BitLit lit && not (isFloatType (cmmLitType lit)) + then return (OpImm (litToImm lit), nilOL) + else getOperand_generic (CmmLit lit) + +getOperand (CmmLoad mem pk) = do + use_sse2 <- sse2Enabled + if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True) + then do + Amode src mem_code <- getAmode mem + return (OpAddr src, mem_code) + else + getOperand_generic (CmmLoad mem pk) + +getOperand e = getOperand_generic e + +getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock) +getOperand_generic e = do (reg, code) <- getSomeReg e return (OpReg reg, code) @@ -1181,28 +1124,67 @@ isOperand (CmmLit lit) = is32BitLit lit || isSuitableFloatingPointLit lit isOperand _ = False +memConstant :: Int -> CmmLit -> NatM Amode +memConstant align lit = do +#ifdef x86_64_TARGET_ARCH + lbl <- getNewLabelNat + let addr = ripRel (ImmCLbl lbl) + addr_code = nilOL +#else + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef +#endif + let code = + LDATA ReadOnlyData + [CmmAlign align, + CmmDataLabel lbl, + CmmStaticLit lit] + `consOL` addr_code + return (Amode addr code) + + +loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register +loadFloatAmode use_sse2 w addr addr_code = do + let size = floatSize w + code dst = addr_code `snocOL` + if use_sse2 + then MOV size (OpAddr addr) (OpReg dst) + else GLD size addr dst + -- in + return (Any (if use_sse2 then size else FF80) code) + + -- if we want a floating-point literal as an operand, we can -- use it directly from memory. However, if the literal is -- zero, we're better off generating it into a register using -- xor. +isSuitableFloatingPointLit :: CmmLit -> Bool isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 isSuitableFloatingPointLit _ = False getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) -getRegOrMem (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) +getRegOrMem e@(CmmLoad mem pk) = do + use_sse2 <- sse2Enabled + if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True) + then do + Amode src mem_code <- getAmode mem + return (OpAddr src, mem_code) + else do + (reg, code) <- getNonClobberedReg e + return (OpReg reg, code) getRegOrMem e = do (reg, code) <- getNonClobberedReg e return (OpReg reg, code) +is32BitLit :: CmmLit -> Bool #if x86_64_TARGET_ARCH is32BitLit (CmmInt i W64) = is32BitInteger i -- assume that labels are in the range 0-2^31-1: this assumes the -- small memory model (see gcc docs, -mcmodel=small). #endif -is32BitLit x = True +is32BitLit _ = True @@ -1230,20 +1212,20 @@ getCondCode (CmmMachOp mop [x, y]) MO_F_Lt W64 -> condFltCode LTT x y MO_F_Le W64 -> condFltCode LE x y - MO_Eq rep -> condIntCode EQQ x y - MO_Ne rep -> condIntCode NE x y + MO_Eq _ -> condIntCode EQQ x y + MO_Ne _ -> condIntCode NE x y - MO_S_Gt rep -> condIntCode GTT x y - MO_S_Ge rep -> condIntCode GE x y - MO_S_Lt rep -> condIntCode LTT x y - MO_S_Le rep -> condIntCode LE x y + MO_S_Gt _ -> condIntCode GTT x y + MO_S_Ge _ -> condIntCode GE x y + MO_S_Lt _ -> condIntCode LTT x y + MO_S_Le _ -> condIntCode LE x y - MO_U_Gt rep -> condIntCode GU x y - MO_U_Ge rep -> condIntCode GEU x y - MO_U_Lt rep -> condIntCode LU x y - MO_U_Le rep -> condIntCode LEU x y + MO_U_Gt _ -> condIntCode GU x y + MO_U_Ge _ -> condIntCode GEU x y + MO_U_Lt _ -> condIntCode LU x y + MO_U_Le _ -> condIntCode LEU x y - other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) + _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) @@ -1267,8 +1249,8 @@ condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do -- anything vs zero, using a mask -- TODO: Add some sanity checking!!!! -condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) - | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit +condIntCode cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk)) + | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit lit = do (x_reg, x_code) <- getSomeReg x let @@ -1312,40 +1294,35 @@ condIntCode cond x y = do -------------------------------------------------------------------------------- condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode -#if i386_TARGET_ARCH condFltCode cond x y - = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do - (x_reg, x_code) <- getNonClobberedReg x - (y_reg, y_code) <- getSomeReg y - let - code = x_code `appOL` y_code `snocOL` - GCMP cond x_reg y_reg - -- The GCMP insn does the test and sets the zero flag if comparable - -- and true. Hence we always supply EQQ as the condition to test. - return (CondCode True EQQ code) - -#elif x86_64_TARGET_ARCH --- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be --- an operand, but the right must be a reg. We can probably do better --- than this general case... -condFltCode cond x y = do - (x_reg, x_code) <- getNonClobberedReg x - (y_op, y_code) <- getOperand y - let - code = x_code `appOL` - y_code `snocOL` - CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg) - -- NB(1): we need to use the unsigned comparison operators on the - -- result of this comparison. - -- in - return (CondCode True (condToUnsigned cond) code) - -#else -condFltCode = panic "X86.condFltCode: not defined" - -#endif - + = if_sse2 condFltCode_sse2 condFltCode_x87 + where + condFltCode_x87 + = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do + (x_reg, x_code) <- getNonClobberedReg x + (y_reg, y_code) <- getSomeReg y + let + code = x_code `appOL` y_code `snocOL` + GCMP cond x_reg y_reg + -- The GCMP insn does the test and sets the zero flag if comparable + -- and true. Hence we always supply EQQ as the condition to test. + return (CondCode True EQQ code) + + -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be + -- an operand, but the right must be a reg. We can probably do better + -- than this general case... + condFltCode_sse2 = do + (x_reg, x_code) <- getNonClobberedReg x + (y_op, y_code) <- getOperand y + let + code = x_code `appOL` + y_code `snocOL` + CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg) + -- NB(1): we need to use the unsigned comparison operators on the + -- result of this comparison. + -- in + return (CondCode True (condToUnsigned cond) code) -- ----------------------------------------------------------------------------- -- Generating assignments @@ -1411,34 +1388,36 @@ assignMem_IntCode pk addr src = do -- Assign; dst is a reg, rhs is mem assignReg_IntCode pk reg (CmmLoad src _) = do load_code <- intLoadCode (MOV pk) src - return (load_code (getRegisterReg reg)) + return (load_code (getRegisterReg False{-no sse2-} reg)) -- dst is a reg, but src could be anything -assignReg_IntCode pk reg src = do +assignReg_IntCode _ reg src = do code <- getAnyReg src - return (code (getRegisterReg reg)) + return (code (getRegisterReg False{-no sse2-} reg)) -- Floating point assignment to memory assignMem_FltCode pk addr src = do (src_reg, src_code) <- getNonClobberedReg src Amode addr addr_code <- getAmode addr + use_sse2 <- sse2Enabled let code = src_code `appOL` addr_code `snocOL` - IF_ARCH_i386(GST pk src_reg addr, - MOV pk (OpReg src_reg) (OpAddr addr)) + if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr) + else GST pk src_reg addr return code -- Floating point assignment to a register/temporary -assignReg_FltCode pk reg src = do +assignReg_FltCode _ reg src = do + use_sse2 <- sse2Enabled src_code <- getAnyReg src - return (src_code (getRegisterReg reg)) + return (src_code (getRegisterReg use_sse2 reg)) genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock -genJump (CmmLoad mem pk) = do +genJump (CmmLoad mem _) = do Amode target code <- getAmode mem return (code `snocOL` JMP (OpAddr target)) @@ -1475,15 +1454,10 @@ genCondJump -> CmmExpr -- the condition on which to branch -> NatM InstrBlock -#if i386_TARGET_ARCH -genCondJump id bool = do - CondCode _ cond code <- getCondCode bool - return (code `snocOL` JXX cond id) - -#elif x86_64_TARGET_ARCH genCondJump id bool = do CondCode is_float cond cond_code <- getCondCode bool - if not is_float + use_sse2 <- sse2Enabled + if not is_float || not use_sse2 then return (cond_code `snocOL` JXX cond id) else do @@ -1511,13 +1485,6 @@ genCondJump id bool = do ] return (cond_code `appOL` code) -#else -genCondJump = panic "X86.genCondJump: not defined" - -#endif - - - -- ----------------------------------------------------------------------------- -- Generating C calls @@ -1543,11 +1510,19 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- write barrier compiles to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. +-- void return type prim op +genCCall (CmmPrim op) [] args = + outOfLineCmmOp op Nothing args + -- we only cope with a single result for foreign calls -genCCall (CmmPrim op) [CmmHinted r _] args = do +genCCall (CmmPrim op) [r_hinted@(CmmHinted r _)] args = do l1 <- getNewLabelNat l2 <- getNewLabelNat - case op of + sse2 <- sse2Enabled + if sse2 + then + outOfLineCmmOp op (Just r_hinted) args + else case op of MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args @@ -1560,12 +1535,17 @@ genCCall (CmmPrim op) [CmmHinted r _] args = do MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args - other_op -> outOfLineFloatOp op r args + _other_op -> outOfLineCmmOp op (Just r_hinted) args + where actuallyInlineFloatOp instr size [CmmHinted x _] - = do res <- trivialUFCode size (instr size) x + = do res <- trivialUFCode size (instr size) x any <- anyReg res - return (any (getRegisterReg (CmmLocal r))) + return (any (getRegisterReg False (CmmLocal r))) + + actuallyInlineFloatOp _ _ args + = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! (" + ++ show (length args) ++ ")" genCCall target dest_regs args = do let @@ -1580,22 +1560,25 @@ genCCall target dest_regs args = do setDeltaNat (delta0 - arg_pad_size) #endif - push_codes <- mapM push_arg (reverse args) + use_sse2 <- sse2Enabled + push_codes <- mapM (push_arg use_sse2) (reverse args) delta <- getDeltaNat -- in -- deal with static vs dynamic call targets (callinsns,cconv) <- case target of - -- CmmPrim -> ... CmmCallee (CmmLit (CmmLabel lbl)) conv -> -- ToDo: stdcall arg sizes return (unitOL (CALL (Left fn_imm) []), conv) where fn_imm = ImmCLbl lbl CmmCallee expr conv - -> do { (dyn_c, dyn_r) <- get_op expr + -> do { (dyn_r, dyn_c) <- getSomeReg expr ; ASSERT( isWord32 (cmmExprType expr) ) return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) } + CmmPrim _ + -> panic $ "genCCall: Can't handle CmmPrim call type here, error " + ++ "probably because too many return values." let push_code #if darwin_TARGET_OS @@ -1606,12 +1589,24 @@ genCCall target dest_regs args = do | otherwise #endif = concatOL push_codes + + -- Deallocate parameters after call for ccall; + -- but not for stdcall (callee does it) + -- + -- We have to pop any stack padding we added + -- on Darwin even if we are doing stdcall, though (#5052) + pop_size | cconv /= StdCallConv = tot_arg_size + | otherwise +#if darwin_TARGET_OS + = arg_pad_size +#else + = 0 +#endif + call = callinsns `appOL` toOL ( - -- Deallocate parameters after call for ccall; - -- but not for stdcall (callee does it) - (if cconv == StdCallConv || tot_arg_size==0 then [] else - [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)]) + (if pop_size==0 then [] else + [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)]) ++ [DELTA (delta + tot_arg_size)] ) @@ -1622,16 +1617,27 @@ genCCall target dest_regs args = do -- assign the results, if necessary assign_code [] = nilOL assign_code [CmmHinted dest _hint] - | isFloatType ty = unitOL (GMOV fake0 r_dest) + | isFloatType ty = + if use_sse2 + then let tmp_amode = AddrBaseIndex (EABaseReg esp) + EAIndexNone + (ImmInt 0) + sz = floatSize w + in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp), + GST sz fake0 tmp_amode, + MOV sz (OpAddr tmp_amode) (OpReg r_dest), + ADD II32 (OpImm (ImmInt b)) (OpReg esp)] + else unitOL (GMOV fake0 r_dest) | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), MOV II32 (OpReg edx) (OpReg r_dest_hi)] | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest)) where ty = localRegType dest w = typeWidth ty + b = widthInBytes w r_dest_hi = getHiVRegFromLo r_dest - r_dest = getRegisterReg (CmmLocal dest) - assign_code many = panic "genCCall.assign_code many" + r_dest = getRegisterReg use_sse2 (CmmLocal dest) + assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many) return (push_code `appOL` call `appOL` @@ -1641,14 +1647,15 @@ genCCall target dest_regs args = do arg_size :: CmmType -> Int -- Width in bytes arg_size ty = widthInBytes (typeWidth ty) +#if darwin_TARGET_OS roundTo a x | x `mod` a == 0 = x | otherwise = x + a - (x `mod` a) +#endif - - push_arg :: HintedCmmActual {-current argument-} + push_arg :: Bool -> HintedCmmActual {-current argument-} -> NatM InstrBlock -- code - push_arg (CmmHinted arg _hint) -- we don't need the hints on x86 + push_arg use_sse2 (CmmHinted arg _hint) -- we don't need the hints on x86 | isWord64 arg_ty = do ChildCode64 code r_lo <- iselExpr64 arg delta <- getDeltaNat @@ -1662,32 +1669,35 @@ genCCall target dest_regs args = do DELTA (delta-8)] ) - | otherwise = do - (code, reg) <- get_op arg + | isFloatType arg_ty = do + (reg, code) <- getSomeReg arg delta <- getDeltaNat - let size = arg_size arg_ty -- Byte size setDeltaNat (delta-size) - if (isFloatType arg_ty) - then return (code `appOL` + return (code `appOL` toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), DELTA (delta-size), - GST (floatSize (typeWidth arg_ty)) - reg (AddrBaseIndex (EABaseReg esp) + let addr = AddrBaseIndex (EABaseReg esp) EAIndexNone - (ImmInt 0))] - ) - else return (code `snocOL` - PUSH II32 (OpReg reg) `snocOL` - DELTA (delta-size) + (ImmInt 0) + size = floatSize (typeWidth arg_ty) + in + if use_sse2 + then MOV size (OpReg reg) (OpAddr addr) + else GST size reg addr + ] ) + + | otherwise = do + (operand, code) <- getOperand arg + delta <- getDeltaNat + setDeltaNat (delta-size) + return (code `snocOL` + PUSH II32 operand `snocOL` + DELTA (delta-size)) + where arg_ty = cmmExprType arg - - ------------ - get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg - get_op op = do - (reg,code) <- getSomeReg op - return (code, reg) + size = arg_size arg_ty -- Byte size #elif x86_64_TARGET_ARCH @@ -1695,9 +1705,13 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- write barrier compiles to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. +-- void return type prim op +genCCall (CmmPrim op) [] args = + outOfLineCmmOp op Nothing args -genCCall (CmmPrim op) [CmmHinted r _] args = - outOfLineFloatOp op r args +-- we only cope with a single result for foreign calls +genCCall (CmmPrim op) [res] args = + outOfLineCmmOp op (Just res) args genCCall target dest_regs args = do @@ -1741,7 +1755,6 @@ genCCall target dest_regs args = do -- deal with static vs dynamic call targets (callinsns,cconv) <- case target of - -- CmmPrim -> ... CmmCallee (CmmLit (CmmLabel lbl)) conv -> -- ToDo: stdcall arg sizes return (unitOL (CALL (Left fn_imm) arg_regs), conv) @@ -1749,15 +1762,18 @@ genCCall target dest_regs args = do CmmCallee expr conv -> do (dyn_r, dyn_c) <- getSomeReg expr return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) + CmmPrim _ + -> panic $ "genCCall: Can't handle CmmPrim call type here, error " + ++ "probably because too many return values." let - -- The x86_64 ABI requires us to set %al to the number of SSE + -- The x86_64 ABI requires us to set %al to the number of SSE2 -- registers that contain arguments, if the called routine -- is a varargs function. We don't know whether it's a -- varargs function or not, so we have to assume it is. -- -- It's not safe to omit this assignment, even if the number - -- of SSE regs in use is zero. If %al is larger than 8 + -- of SSE2 regs in use is zero. If %al is larger than 8 -- on entry to a varargs function, seg faults ensue. assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) @@ -1779,12 +1795,12 @@ genCCall target dest_regs args = do assign_code [CmmHinted dest _hint] = case typeWidth rep of W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) - W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) + W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest)) _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest)) where rep = localRegType dest - r_dest = getRegisterReg (CmmLocal dest) - assign_code many = panic "genCCall.assign_code many" + r_dest = getRegisterReg True (CmmLocal dest) + assign_code _many = panic "genCCall.assign_code many" return (load_args_code `appOL` adjust_rsp `appOL` @@ -1826,7 +1842,7 @@ genCCall target dest_regs args = do return ((CmmHinted arg hint):args', ars, frs, code') push_args [] code = return code - push_args ((CmmHinted arg hint):rest) code + push_args ((CmmHinted arg _):rest) code | isFloatType arg_rep = do (arg_reg, arg_code) <- getSomeReg arg delta <- getDeltaNat @@ -1859,28 +1875,25 @@ genCCall = panic "X86.genCCAll: not defined" #endif /* x86_64_TARGET_ARCH */ - - -outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock -outOfLineFloatOp mop res args +outOfLineCmmOp :: CallishMachOp -> Maybe HintedCmmFormal -> HintedCmmActuals -> NatM InstrBlock +outOfLineCmmOp mop res args = do dflags <- getDynFlagsNat targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl let target = CmmCallee targetExpr CCallConv - - if isFloat64 (localRegType res) - then - stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn) - else do - uq <- getUniqueNat - let - tmp = LocalReg uq f64 - -- in - code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn) - code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) - return (code1 `appOL` code2) + + stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn) where - lbl = mkForeignLabel fn Nothing False IsFunction + -- Assume we can call these functions directly, and that they're not in a dynamic library. + -- TODO: Why is this ok? Under linux this code will be in libm.so + -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31 + lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction + + args' = case mop of + MO_Memcpy -> init args + MO_Memset -> init args + MO_Memmove -> init args + _ -> args fn = case mop of MO_F32_Sqrt -> fsLit "sqrtf" @@ -1915,8 +1928,11 @@ outOfLineFloatOp mop res args MO_F64_Tanh -> fsLit "tanh" MO_F64_Pwr -> fsLit "pow" + MO_Memcpy -> fsLit "memcpy" + MO_Memset -> fsLit "memset" + MO_Memmove -> fsLit "memmove" - + other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")" -- ----------------------------------------------------------------------------- @@ -1932,16 +1948,7 @@ genSwitch expr ids dflags <- getDynFlagsNat dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl (tableReg,t_code) <- getSomeReg $ dynRef - let - jumpTable = map jumpTableEntryRel ids - - jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 wordWidth) - jumpTableEntryRel (Just (BlockId id)) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel id - - op = OpAddr (AddrBaseIndex (EABaseReg tableReg) + let op = OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg wORD_SIZE) (ImmInt 0)) #if x86_64_TARGET_ARCH @@ -1954,8 +1961,7 @@ genSwitch expr ids code = e_code `appOL` t_code `appOL` toOL [ ADD (intSize wordWidth) op (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ], - LDATA Text (CmmDataLabel lbl : jumpTable) + JMP_TBL (OpReg tableReg) ids Text lbl ] #else -- HACK: On x86_64 binutils<2.17 is only able to generate PC32 @@ -1965,20 +1971,15 @@ genSwitch expr ids -- conjunction with the hack in PprMach.hs/pprDataItem once -- binutils 2.17 is standard. code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - MOVSxL II32 - (OpAddr (AddrBaseIndex (EABaseReg tableReg) - (EAIndex reg wORD_SIZE) (ImmInt 0))) - (OpReg reg), + MOVSxL II32 op (OpReg reg), ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] + JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl ] #endif #else code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), ADD (intSize wordWidth) op (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] + JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl ] #endif return code @@ -1986,16 +1987,29 @@ genSwitch expr ids = do (reg,e_code) <- getSomeReg expr lbl <- getNewLabelNat - let - jumpTable = map jumpTableEntry ids - op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) + let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) code = e_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - JMP_TBL op [ id | Just id <- ids ] + JMP_TBL op ids ReadOnlyData lbl ] -- in return code +generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr) +generateJumpTableForInstr (JMP_TBL _ ids section lbl) = Just (createJumpTable ids section lbl) +generateJumpTableForInstr _ = Nothing + +createJumpTable :: [Maybe BlockId] -> Section -> CLabel -> GenCmmTop CmmStatic h g +createJumpTable ids section lbl + = let jumpTable + | opt_PIC = + let jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 wordWidth) + jumpTableEntryRel (Just blockid) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) + where blockLabel = mkAsmTempLabel (getUnique blockid) + in map jumpTableEntryRel ids + | otherwise = map jumpTableEntry ids + in CmmData section (CmmDataLabel lbl : jumpTable) -- ----------------------------------------------------------------------------- -- 'condIntReg' and 'condFltReg': condition codes into registers @@ -2022,72 +2036,64 @@ condIntReg cond x y = do condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register - -#if i386_TARGET_ARCH -condFltReg cond x y = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp <- getNewRegNat II8 - let - code dst = cond_code `appOL` toOL [ - SETCC cond (OpReg tmp), - MOVZxL II8 (OpReg tmp) (OpReg dst) - ] - -- in - return (Any II32 code) - -#elif x86_64_TARGET_ARCH -condFltReg cond x y = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp1 <- getNewRegNat wordSize - tmp2 <- getNewRegNat wordSize - let - -- We have to worry about unordered operands (eg. comparisons - -- against NaN). If the operands are unordered, the comparison - -- sets the parity flag, carry flag and zero flag. - -- All comparisons are supposed to return false for unordered - -- operands except for !=, which returns true. - -- - -- Optimisation: we don't have to test the parity flag if we - -- know the test has already excluded the unordered case: eg > - -- and >= test for a zero carry flag, which can only occur for - -- ordered operands. - -- - -- ToDo: by reversing comparisons we could avoid testing the - -- parity flag in more cases. - - code dst = - cond_code `appOL` - (case cond of - NE -> or_unordered dst - GU -> plain_test dst - GEU -> plain_test dst - _ -> and_ordered dst) - - plain_test dst = toOL [ - SETCC cond (OpReg tmp1), - MOVZxL II8 (OpReg tmp1) (OpReg dst) - ] - or_unordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC PARITY (OpReg tmp2), - OR II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - and_ordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC NOTPARITY (OpReg tmp2), - AND II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - -- in - return (Any II32 code) - -#else -condFltReg = panic "X86.condFltReg: not defined" - -#endif - - +condFltReg cond x y = if_sse2 condFltReg_sse2 condFltReg_x87 + where + condFltReg_x87 = do + CondCode _ cond cond_code <- condFltCode cond x y + tmp <- getNewRegNat II8 + let + code dst = cond_code `appOL` toOL [ + SETCC cond (OpReg tmp), + MOVZxL II8 (OpReg tmp) (OpReg dst) + ] + -- in + return (Any II32 code) + + condFltReg_sse2 = do + CondCode _ cond cond_code <- condFltCode cond x y + tmp1 <- getNewRegNat archWordSize + tmp2 <- getNewRegNat archWordSize + let + -- We have to worry about unordered operands (eg. comparisons + -- against NaN). If the operands are unordered, the comparison + -- sets the parity flag, carry flag and zero flag. + -- All comparisons are supposed to return false for unordered + -- operands except for !=, which returns true. + -- + -- Optimisation: we don't have to test the parity flag if we + -- know the test has already excluded the unordered case: eg > + -- and >= test for a zero carry flag, which can only occur for + -- ordered operands. + -- + -- ToDo: by reversing comparisons we could avoid testing the + -- parity flag in more cases. + + code dst = + cond_code `appOL` + (case cond of + NE -> or_unordered dst + GU -> plain_test dst + GEU -> plain_test dst + _ -> and_ordered dst) + + plain_test dst = toOL [ + SETCC cond (OpReg tmp1), + MOVZxL II8 (OpReg tmp1) (OpReg dst) + ] + or_unordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC PARITY (OpReg tmp2), + OR II8 (OpReg tmp1) (OpReg tmp2), + MOVZxL II8 (OpReg tmp2) (OpReg dst) + ] + and_ordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC NOTPARITY (OpReg tmp2), + AND II8 (OpReg tmp1) (OpReg tmp2), + MOVZxL II8 (OpReg tmp2) (OpReg dst) + ] + -- in + return (Any II32 code) -- ----------------------------------------------------------------------------- @@ -2148,7 +2154,10 @@ SDM's version of The Rules: register happens to be the destination register. -} -trivialCode width instr (Just revinstr) (CmmLit lit_a) b +trivialCode :: Width -> (Operand -> Operand -> Instr) + -> Maybe (Operand -> Operand -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCode width _ (Just revinstr) (CmmLit lit_a) b | is32BitLit lit_a = do b_code <- getAnyReg b let @@ -2158,10 +2167,12 @@ trivialCode width instr (Just revinstr) (CmmLit lit_a) b -- in return (Any (intSize width) code) -trivialCode width instr maybe_revinstr a b +trivialCode width instr _ a b = genTrivialCode (intSize width) instr a b -- This is re-used for floating pt instructions too. +genTrivialCode :: Size -> (Operand -> Operand -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register genTrivialCode rep instr a b = do (b_op, b_code) <- getNonClobberedOperand b a_code <- getAnyReg a @@ -2186,12 +2197,15 @@ genTrivialCode rep instr a b = do -- in return (Any rep code) +regClashesWithOp :: Reg -> Operand -> Bool reg `regClashesWithOp` OpReg reg2 = reg == reg2 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) -reg `regClashesWithOp` _ = False +_ `regClashesWithOp` _ = False ----------- +trivialUCode :: Size -> (Operand -> Instr) + -> CmmExpr -> NatM Register trivialUCode rep instr x = do x_code <- getAnyReg x let @@ -2202,27 +2216,27 @@ trivialUCode rep instr x = do ----------- -#if i386_TARGET_ARCH - -trivialFCode width instr x y = do +trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialFCode_x87 instr x y = do (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too (y_reg, y_code) <- getSomeReg y let - size = floatSize width + size = FF80 -- always, on x87 code dst = x_code `appOL` y_code `snocOL` instr size x_reg y_reg dst return (Any size code) -#endif +trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialFCode_sse2 pk instr x y + = genTrivialCode size (instr size) x y + where size = floatSize pk -#if x86_64_TARGET_ARCH -trivialFCode pk instr x y - = genTrivialCode size (instr size) x y - where size = floatSize pk -#endif +trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register trivialUFCode size instr x = do (x_reg, x_code) <- getSomeReg x let @@ -2235,79 +2249,86 @@ trivialUFCode size instr x = do -------------------------------------------------------------------------------- coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register - -#if i386_TARGET_ARCH -coerceInt2FP from to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case to of W32 -> GITOF; W64 -> GITOD - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - return (Any (floatSize to) code) - -#elif x86_64_TARGET_ARCH -coerceInt2FP from to x = do - (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand - let - opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD - code dst = x_code `snocOL` opc x_op dst - -- in - return (Any (floatSize to) code) -- works even if the destination rep is GITOF; W64 -> GITOD; + n -> panic $ "coerceInt2FP.x87: unhandled width (" + ++ show n ++ ")" + code dst = x_code `snocOL` opc x_reg dst + -- ToDo: works for non-II32 reps? + return (Any FF80 code) + + coerce_sse2 = do + (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand + let + opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD + n -> panic $ "coerceInt2FP.sse: unhandled width (" + ++ show n ++ ")" + code dst = x_code `snocOL` opc (intSize from) x_op dst + -- in + return (Any (floatSize to) code) + -- works even if the destination rep is Width -> CmmExpr -> NatM Register - -#if i386_TARGET_ARCH -coerceFP2Int from to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case from of W32 -> GFTOI; W64 -> GDTOI - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - -- in - return (Any (intSize to) code) - -#elif x86_64_TARGET_ARCH -coerceFP2Int from to x = do - (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand - let - opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ - code dst = x_code `snocOL` opc x_op dst - -- in - return (Any (intSize to) code) -- works even if the destination rep is GFTOI; W64 -> GDTOI + n -> panic $ "coerceFP2Int.x87: unhandled width (" + ++ show n ++ ")" + code dst = x_code `snocOL` opc x_reg dst + -- ToDo: works for non-II32 reps? + -- in + return (Any (intSize to) code) + + coerceFP2Int_sse2 = do + (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand + let + opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ; + n -> panic $ "coerceFP2Init.sse: unhandled width (" + ++ show n ++ ")" + code dst = x_code `snocOL` opc (intSize to) x_op dst + -- in + return (Any (intSize to) code) + -- works even if the destination rep is CmmExpr -> NatM Register - -#if x86_64_TARGET_ARCH coerceFP2FP to x = do + use_sse2 <- sse2Enabled (x_reg, x_code) <- getSomeReg x let - opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD + opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD; + n -> panic $ "coerceFP2FP: unhandled width (" + ++ show n ++ ")" + | otherwise = GDTOF code dst = x_code `snocOL` opc x_reg dst -- in - return (Any (floatSize to) code) - -#else -coerceFP2FP = panic "X86.coerceFP2FP: not defined" - -#endif - + return (Any (if use_sse2 then floatSize to else FF80) code) +-------------------------------------------------------------------------------- +sse2NegCode :: Width -> CmmExpr -> NatM Register +sse2NegCode w x = do + let sz = floatSize w + x_code <- getAnyReg x + -- This is how gcc does it, so it can't be that bad: + let + const | FF32 <- sz = CmmInt 0x80000000 W32 + | otherwise = CmmInt 0x8000000000000000 W64 + Amode amode amode_code <- memConstant (widthInBytes w) const + tmp <- getNewRegNat sz + let + code dst = x_code dst `appOL` amode_code `appOL` toOL [ + MOV sz (OpAddr amode) (OpReg tmp), + XOR sz (OpReg tmp) (OpReg dst) + ] + -- + return (Any sz code)