From 41147ad2a9ca84ebe66386b3b0043cb7b48ddcd8 Mon Sep 17 00:00:00 2001
From: Simon Marlow <marlowsd@gmail.com>
Date: Wed, 30 Mar 2011 10:18:09 +0000
Subject: [PATCH] implement double-to-float narrowing in the x86 NCG (#4441)

---
 compiler/nativeGen/X86/CodeGen.hs |   10 +++++-----
 compiler/nativeGen/X86/Instr.hs   |   11 +++++++++--
 compiler/nativeGen/X86/Ppr.hs     |    6 ++++++
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 44311a4..e606e2c 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -605,9 +605,7 @@ getRegister (CmmMachOp mop [x]) = do -- unary MachOps
         | sse2      -> coerceFP2FP W64 x
         | otherwise -> conversionNop FF80 x 
 
-      MO_FF_Conv W64 W32
-        | sse2      -> coerceFP2FP W32 x
-        | otherwise -> conversionNop FF80 x 
+      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
 
       MO_FS_Conv from to -> coerceFP2Int from to x
       MO_SF_Conv from to -> coerceInt2FP from to x
@@ -2257,12 +2255,14 @@ coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
 --------------------------------------------------------------------------------
 coerceFP2FP :: Width -> CmmExpr -> NatM Register
 coerceFP2FP to x = do
+  use_sse2 <- sse2Enabled
   (x_reg, x_code) <- getSomeReg x
   let
-        opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
+        opc | use_sse2  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
+            | otherwise = GDTOF
         code dst = x_code `snocOL` opc x_reg dst
   -- in
-  return (Any (floatSize to) code)
+  return (Any (if use_sse2 then floatSize to else FF80) code)
 
 --------------------------------------------------------------------------------
 
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index d05b08a..a96452b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -228,6 +228,8 @@ data Instr
         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 	
+        | GDTOF       Reg Reg -- src(fpreg), dst(fpreg)
+
 	| GADD	      Size Reg Reg Reg -- src1, src2, dst
 	| GDIV	      Size Reg Reg Reg -- src1, src2, dst
 	| GSUB	      Size Reg Reg Reg -- src1, src2, dst
@@ -367,6 +369,8 @@ x86_regUsageOfInstr instr
     GITOF  src dst	-> mkRU [src] [dst]
     GITOD  src dst	-> mkRU [src] [dst]
 
+    GDTOF  src dst	-> mkRU [src] [dst]
+
     GADD   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
     GSUB   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
     GMUL   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
@@ -493,6 +497,8 @@ x86_patchRegsOfInstr instr env
     GITOF src dst	-> GITOF (env src) (env dst)
     GITOD src dst	-> GITOD (env src) (env dst)
 
+    GDTOF src dst	-> GDTOF (env src) (env dst)
+
     GADD sz s1 s2 dst	-> GADD sz (env s1) (env s2) (env dst)
     GSUB sz s1 s2 dst	-> GSUB sz (env s1) (env s2) (env dst)
     GMUL sz s1 s2 dst	-> GMUL sz (env s1) (env s2) (env dst)
@@ -750,8 +756,9 @@ is_G_instr instr
 	GLD1{}		-> True
         GFTOI{}		-> True
 	GDTOI{}	 	-> True
-        GITOF{} 	-> True
-	GITOD{} 	-> True
+        GITOF{}		-> True
+	GITOD{}	 	-> True
+        GDTOF{}		-> True
 	GADD{}	 	-> True
 	GDIV{}	 	-> True
 	GSUB{} 		-> True
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 7944a38..5fe78e1 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -720,6 +720,11 @@ pprInstr g@(GITOD src dst)
                    text " ; fildl (%esp) ; ",
                    gpop dst 1, text " ; addl $4,%esp"])
 
+pprInstr g@(GDTOF src dst)
+  = pprG g (vcat [gtab <> gpush src 0,
+                  gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;",
+                  gtab <> gpop dst 1])
+
 {- Gruesome swamp follows.  If you're unfortunate enough to have ventured
    this far into the jungle AND you give a Rat's Ass (tm) what's going
    on, here's the deal.  Generate code to do a floating point comparison
@@ -975,6 +980,7 @@ pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
 
 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32  src dst
 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
+pprGInstr (GDTOF src dst) = pprSizeSizeRegReg (sLit "gdtof") FF64 FF32 src dst
 
 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
-- 
1.7.10.4