Fix sin/cos/tan on x86; trac #2059

author Ian Lynagh <igloo@earth.li>

Sat, 3 May 2008 00:08:41 +0000 (00:08 +0000)

committer Ian Lynagh <igloo@earth.li>

Sat, 3 May 2008 00:08:41 +0000 (00:08 +0000)
author Ian Lynagh <igloo@earth.li>
Sat, 3 May 2008 00:08:41 +0000 (00:08 +0000)
committer Ian Lynagh <igloo@earth.li>
Sat, 3 May 2008 00:08:41 +0000 (00:08 +0000)
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs

index 3abe820..d86fe7a 100644 (file)
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -3050,18 +3050,20 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
  
  -- we only cope with a single result for foreign calls
  genCCall (CmmPrim op) [CmmKinded r _] args = do
  
  -- we only cope with a single result for foreign calls
  genCCall (CmmPrim op) [CmmKinded r _] args = do
+  l1 <- getNewLabelNat
+  l2 <- getNewLabelNat
    case op of
         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
         
    case op of
         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
         
-       MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
-       MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
+       MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32 l1 l2) args
+       MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args
         
         
-       MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
-       MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
+       MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32 l1 l2) args
+       MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args
         
         
-       MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
-       MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
+       MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32 l1 l2) args
+       MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args
         
         other_op    -> outOfLineFloatOp op r args
   where
         
         other_op    -> outOfLineFloatOp op r args
   where
diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs

index 5a8d3f1..716a521 100644 (file)
--- a/compiler/nativeGen/MachInstrs.hs
+++ b/compiler/nativeGen/MachInstrs.hs
@@ -483,9 +483,9 @@ bit or 64 bit precision.
         | GABS        MachRep Reg Reg -- src, dst
         | GNEG        MachRep Reg Reg -- src, dst
         | GSQRT       MachRep Reg Reg -- src, dst
         | GABS        MachRep Reg Reg -- src, dst
         | GNEG        MachRep Reg Reg -- src, dst
         | GSQRT       MachRep Reg Reg -- src, dst
-       | GSIN        MachRep Reg Reg -- src, dst
-       | GCOS        MachRep Reg Reg -- src, dst
-       | GTAN        MachRep Reg Reg -- src, dst
+       | GSIN        MachRep CLabel CLabel Reg Reg -- src, dst
+       | GCOS        MachRep CLabel CLabel Reg Reg -- src, dst
+       | GTAN        MachRep CLabel CLabel Reg Reg -- src, dst
         
          | GFREE         -- do ffree on all x86 regs; an ugly hack
  #endif
         
          | GFREE         -- do ffree on all x86 regs; an ugly hack
  #endif
@@ -583,7 +583,7 @@ is_G_instr instr
         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
         GCMP _ _ _ -> True; GABS _ _ _ -> True
         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
         GCMP _ _ _ -> True; GABS _ _ _ -> True
         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
-        GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
+        GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True
          GFREE -> panic "is_G_instr: GFREE (!)"
          other -> False
  #endif /* i386_TARGET_ARCH */
          GFREE -> panic "is_G_instr: GFREE (!)"
          other -> False
  #endif /* i386_TARGET_ARCH */
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs

index 38267d0..1995cd0 100644 (file)
--- a/compiler/nativeGen/PprMach.hs
+++ b/compiler/nativeGen/PprMach.hs
@@ -1504,17 +1504,12 @@ pprInstr g@(GNEG sz src dst)
  pprInstr g@(GSQRT sz src dst)
     = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$ 
               hcat [gtab, gcoerceto sz, gpop dst 1])
  pprInstr g@(GSQRT sz src dst)
     = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$ 
               hcat [gtab, gcoerceto sz, gpop dst 1])
-pprInstr g@(GSIN sz src dst)
-   = pprG g (hcat [gtab, gpush src 0, text " ; fsin"] $$ 
-             hcat [gtab, gcoerceto sz, gpop dst 1])
-pprInstr g@(GCOS sz src dst)
-   = pprG g (hcat [gtab, gpush src 0, text " ; fcos"] $$ 
-             hcat [gtab, gcoerceto sz, gpop dst 1])
-pprInstr g@(GTAN sz src dst)
-   = pprG g (hcat [gtab, text "ffree %st(6) ; ",
-                   gpush src 0, text " ; fptan ; ", 
-                   text " fstp %st(0)"] $$
-             hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GSIN sz l1 l2 src dst)
+   = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
+pprInstr g@(GCOS sz l1 l2 src dst)
+   = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
+pprInstr g@(GTAN sz l1 l2 src dst)
+   = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
  
  -- In the translations for GADD, GMUL, GSUB and GDIV,
  -- the first two cases are mere optimisations.  The otherwise clause
  
  -- In the translations for GADD, GMUL, GSUB and GDIV,
  -- the first two cases are mere optimisations.  The otherwise clause
@@ -1585,6 +1580,48 @@ pprInstr GFREE
              ptext (sLit "\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)") 
            ]
  
              ptext (sLit "\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)") 
            ]
  
+pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> MachRep -> Doc
+pprTrigOp op -- fsin, fcos or fptan
+          isTan -- we need a couple of extra steps if we're doing tan
+          l1 l2 -- internal labels for us to use
+          src dst sz
+    = -- We'll be needing %eax later on
+      hcat [gtab, text "pushl %eax;"] $$
+      -- tan is going to use an extra space on the FP stack
+      (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
+      -- First put the value in %st(0) and try to apply the op to it
+      hcat [gpush src 0, text ("; " ++ op)] $$
+      -- Now look to see if C2 was set (overflow, |value| >= 2^63)
+      hcat [gtab, text "fnstsw %ax"] $$
+      hcat [gtab, text "test   $0x400,%eax"] $$
+      -- If we were in bounds then jump to the end
+      hcat [gtab, text "je     " <> pprCLabel_asm l1] $$
+      -- Otherwise we need to shrink the value. Start by
+      -- loading pi, doubleing it (by adding it to itself),
+      -- and then swapping pi with the value, so the value we
+      -- want to apply op to is in %st(0) again
+      hcat [gtab, text "ffree %st(7); fldpi"] $$
+      hcat [gtab, text "fadd   %st(0),%st"] $$
+      hcat [gtab, text "fxch   %st(1)"] $$
+      -- Now we have a loop in which we make the value smaller,
+      -- see if it's small enough, and loop if not
+      (pprCLabel_asm l2 <> char ':') $$
+      hcat [gtab, text "fprem1"] $$
+      -- My Debian libc uses fstsw here for the tan code, but I can't
+      -- see any reason why it should need to be different for tan.
+      hcat [gtab, text "fnstsw %ax"] $$
+      hcat [gtab, text "test   $0x400,%eax"] $$
+      hcat [gtab, text "jne    " <> pprCLabel_asm l2] $$
+      hcat [gtab, text "fstp   %st(1)"] $$
+      hcat [gtab, text op] $$
+      (pprCLabel_asm l1 <> char ':') $$
+      -- Pop the 1.0 tan gave us
+      (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
+      -- Restore %eax
+      hcat [gtab, text "popl %eax;"] $$
+      -- And finally make the result the right size
+      hcat [gtab, gcoerceto sz, gpop dst 1]
+
  --------------------------
  
  -- coerce %st(0) to the specified size
  --------------------------
  
  -- coerce %st(0) to the specified size
@@ -1626,9 +1663,9 @@ pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") F64 co src dst
  pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
  pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
  pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
  pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
  pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
  pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
-pprGInstr (GSIN sz src dst) = pprSizeRegReg (sLit "gsin") sz src dst
-pprGInstr (GCOS sz src dst) = pprSizeRegReg (sLit "gcos") sz src dst
-pprGInstr (GTAN sz src dst) = pprSizeRegReg (sLit "gtan") sz src dst
+pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
+pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
+pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
  
  pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
  pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
  
  pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
  pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs

index 2361e77..da876c3 100644 (file)
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ b/compiler/nativeGen/RegAllocInfo.hs
@@ -215,9 +215,9 @@ regUsage instr = case instr of
      GABS   sz src dst  -> mkRU [src] [dst]
      GNEG   sz src dst  -> mkRU [src] [dst]
      GSQRT  sz src dst  -> mkRU [src] [dst]
      GABS   sz src dst  -> mkRU [src] [dst]
      GNEG   sz src dst  -> mkRU [src] [dst]
      GSQRT  sz src dst  -> mkRU [src] [dst]
-    GSIN   sz src dst  -> mkRU [src] [dst]
-    GCOS   sz src dst  -> mkRU [src] [dst]
-    GTAN   sz src dst  -> mkRU [src] [dst]
+    GSIN   sz _ _ src dst      -> mkRU [src] [dst]
+    GCOS   sz _ _ src dst      -> mkRU [src] [dst]
+    GTAN   sz _ _ src dst      -> mkRU [src] [dst]
  #endif
  
  #if x86_64_TARGET_ARCH
  #endif
  
  #if x86_64_TARGET_ARCH
@@ -599,9 +599,9 @@ patchRegs instr env = case instr of
      GABS sz src dst    -> GABS sz (env src) (env dst)
      GNEG sz src dst    -> GNEG sz (env src) (env dst)
      GSQRT sz src dst   -> GSQRT sz (env src) (env dst)
      GABS sz src dst    -> GABS sz (env src) (env dst)
      GNEG sz src dst    -> GNEG sz (env src) (env dst)
      GSQRT sz src dst   -> GSQRT sz (env src) (env dst)
-    GSIN sz src dst    -> GSIN sz (env src) (env dst)
-    GCOS sz src dst    -> GCOS sz (env src) (env dst)
-    GTAN sz src dst    -> GTAN sz (env src) (env dst)
+    GSIN sz l1 l2 src dst      -> GSIN sz l1 l2 (env src) (env dst)
+    GCOS sz l1 l2 src dst      -> GCOS sz l1 l2 (env src) (env dst)
+    GTAN sz l1 l2 src dst      -> GTAN sz l1 l2 (env src) (env dst)
  #endif
  
  #if x86_64_TARGET_ARCH
  #endif
  
  #if x86_64_TARGET_ARCH
author	Ian Lynagh <igloo@earth.li>
	Sat, 3 May 2008 00:08:41 +0000 (00:08 +0000)
committer	Ian Lynagh <igloo@earth.li>
	Sat, 3 May 2008 00:08:41 +0000 (00:08 +0000)
compiler/nativeGen/MachCodeGen.hs		patch \| blob \| history
compiler/nativeGen/MachInstrs.hs		patch \| blob \| history
compiler/nativeGen/PprMach.hs		patch \| blob \| history
compiler/nativeGen/RegAllocInfo.hs		patch \| blob \| history