From 41147ad2a9ca84ebe66386b3b0043cb7b48ddcd8 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Wed, 30 Mar 2011 10:18:09 +0000 Subject: [PATCH] implement double-to-float narrowing in the x86 NCG (#4441) --- compiler/nativeGen/X86/CodeGen.hs | 10 +++++----- compiler/nativeGen/X86/Instr.hs | 11 +++++++++-- compiler/nativeGen/X86/Ppr.hs | 6 ++++++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 44311a4..e606e2c 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -605,9 +605,7 @@ getRegister (CmmMachOp mop [x]) = do -- unary MachOps | sse2 -> coerceFP2FP W64 x | otherwise -> conversionNop FF80 x - MO_FF_Conv W64 W32 - | sse2 -> coerceFP2FP W32 x - | otherwise -> conversionNop FF80 x + MO_FF_Conv W64 W32 -> coerceFP2FP W32 x MO_FS_Conv from to -> coerceFP2Int from to x MO_SF_Conv from to -> coerceInt2FP from to x @@ -2257,12 +2255,14 @@ coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87 -------------------------------------------------------------------------------- coerceFP2FP :: Width -> CmmExpr -> NatM Register coerceFP2FP to x = do + use_sse2 <- sse2Enabled (x_reg, x_code) <- getSomeReg x let - opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD + opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD + | otherwise = GDTOF code dst = x_code `snocOL` opc x_reg dst -- in - return (Any (floatSize to) code) + return (Any (if use_sse2 then floatSize to else FF80) code) -------------------------------------------------------------------------------- diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index d05b08a..a96452b 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -228,6 +228,8 @@ data Instr | GITOF Reg Reg -- src(intreg), dst(fpreg) | GITOD Reg Reg -- src(intreg), dst(fpreg) + | GDTOF Reg Reg -- src(fpreg), dst(fpreg) + | GADD Size Reg Reg Reg -- src1, src2, dst | GDIV Size Reg Reg Reg -- src1, src2, dst | GSUB Size Reg Reg Reg -- src1, src2, dst @@ -367,6 +369,8 @@ x86_regUsageOfInstr instr GITOF src dst -> mkRU [src] [dst] GITOD src dst -> mkRU [src] [dst] + GDTOF src dst -> mkRU [src] [dst] + GADD _ s1 s2 dst -> mkRU [s1,s2] [dst] GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst] GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst] @@ -493,6 +497,8 @@ x86_patchRegsOfInstr instr env GITOF src dst -> GITOF (env src) (env dst) GITOD src dst -> GITOD (env src) (env dst) + GDTOF src dst -> GDTOF (env src) (env dst) + GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst) GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst) GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst) @@ -750,8 +756,9 @@ is_G_instr instr GLD1{} -> True GFTOI{} -> True GDTOI{} -> True - GITOF{} -> True - GITOD{} -> True + GITOF{} -> True + GITOD{} -> True + GDTOF{} -> True GADD{} -> True GDIV{} -> True GSUB{} -> True diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 7944a38..5fe78e1 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -720,6 +720,11 @@ pprInstr g@(GITOD src dst) text " ; fildl (%esp) ; ", gpop dst 1, text " ; addl $4,%esp"]) +pprInstr g@(GDTOF src dst) + = pprG g (vcat [gtab <> gpush src 0, + gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;", + gtab <> gpop dst 1]) + {- Gruesome swamp follows. If you're unfortunate enough to have ventured this far into the jungle AND you give a Rat's Ass (tm) what's going on, here's the deal. Generate code to do a floating point comparison @@ -975,6 +980,7 @@ pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst +pprGInstr (GDTOF src dst) = pprSizeSizeRegReg (sLit "gdtof") FF64 FF32 src dst pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst -- 1.7.10.4