From 6ee9554a738c442719ded861504acb729fd3d431 Mon Sep 17 00:00:00 2001 From: Ian Lynagh Date: Sat, 3 May 2008 00:08:41 +0000 Subject: [PATCH] Fix sin/cos/tan on x86; trac #2059 If the value is > 2^63 then we need to work out its value mod 2pi, and apply the operation to that instead. --- compiler/nativeGen/MachCodeGen.hs | 14 ++++---- compiler/nativeGen/MachInstrs.hs | 8 ++--- compiler/nativeGen/PprMach.hs | 65 ++++++++++++++++++++++++++++-------- compiler/nativeGen/RegAllocInfo.hs | 12 +++---- 4 files changed, 69 insertions(+), 30 deletions(-) diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 3abe820..d86fe7a 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -3050,18 +3050,20 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- we only cope with a single result for foreign calls genCCall (CmmPrim op) [CmmKinded r _] args = do + l1 <- getNewLabelNat + l2 <- getNewLabelNat case op of MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args - MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args - MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args + MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32 l1 l2) args + MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args - MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args - MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args + MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32 l1 l2) args + MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args - MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args - MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args + MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32 l1 l2) args + MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args other_op -> outOfLineFloatOp op r args where diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs index 5a8d3f1..716a521 100644 --- a/compiler/nativeGen/MachInstrs.hs +++ b/compiler/nativeGen/MachInstrs.hs @@ -483,9 +483,9 @@ bit or 64 bit precision. | GABS MachRep Reg Reg -- src, dst | GNEG MachRep Reg Reg -- src, dst | GSQRT MachRep Reg Reg -- src, dst - | GSIN MachRep Reg Reg -- src, dst - | GCOS MachRep Reg Reg -- src, dst - | GTAN MachRep Reg Reg -- src, dst + | GSIN MachRep CLabel CLabel Reg Reg -- src, dst + | GCOS MachRep CLabel CLabel Reg Reg -- src, dst + | GTAN MachRep CLabel CLabel Reg Reg -- src, dst | GFREE -- do ffree on all x86 regs; an ugly hack #endif @@ -583,7 +583,7 @@ is_G_instr instr GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True GCMP _ _ _ -> True; GABS _ _ _ -> True GNEG _ _ _ -> True; GSQRT _ _ _ -> True - GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True + GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True GFREE -> panic "is_G_instr: GFREE (!)" other -> False #endif /* i386_TARGET_ARCH */ diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs index 38267d0..1995cd0 100644 --- a/compiler/nativeGen/PprMach.hs +++ b/compiler/nativeGen/PprMach.hs @@ -1504,17 +1504,12 @@ pprInstr g@(GNEG sz src dst) pprInstr g@(GSQRT sz src dst) = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$ hcat [gtab, gcoerceto sz, gpop dst 1]) -pprInstr g@(GSIN sz src dst) - = pprG g (hcat [gtab, gpush src 0, text " ; fsin"] $$ - hcat [gtab, gcoerceto sz, gpop dst 1]) -pprInstr g@(GCOS sz src dst) - = pprG g (hcat [gtab, gpush src 0, text " ; fcos"] $$ - hcat [gtab, gcoerceto sz, gpop dst 1]) -pprInstr g@(GTAN sz src dst) - = pprG g (hcat [gtab, text "ffree %st(6) ; ", - gpush src 0, text " ; fptan ; ", - text " fstp %st(0)"] $$ - hcat [gtab, gcoerceto sz, gpop dst 1]) +pprInstr g@(GSIN sz l1 l2 src dst) + = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz) +pprInstr g@(GCOS sz l1 l2 src dst) + = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz) +pprInstr g@(GTAN sz l1 l2 src dst) + = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz) -- In the translations for GADD, GMUL, GSUB and GDIV, -- the first two cases are mere optimisations. The otherwise clause @@ -1585,6 +1580,48 @@ pprInstr GFREE ptext (sLit "\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)") ] +pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> MachRep -> Doc +pprTrigOp op -- fsin, fcos or fptan + isTan -- we need a couple of extra steps if we're doing tan + l1 l2 -- internal labels for us to use + src dst sz + = -- We'll be needing %eax later on + hcat [gtab, text "pushl %eax;"] $$ + -- tan is going to use an extra space on the FP stack + (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$ + -- First put the value in %st(0) and try to apply the op to it + hcat [gpush src 0, text ("; " ++ op)] $$ + -- Now look to see if C2 was set (overflow, |value| >= 2^63) + hcat [gtab, text "fnstsw %ax"] $$ + hcat [gtab, text "test $0x400,%eax"] $$ + -- If we were in bounds then jump to the end + hcat [gtab, text "je " <> pprCLabel_asm l1] $$ + -- Otherwise we need to shrink the value. Start by + -- loading pi, doubleing it (by adding it to itself), + -- and then swapping pi with the value, so the value we + -- want to apply op to is in %st(0) again + hcat [gtab, text "ffree %st(7); fldpi"] $$ + hcat [gtab, text "fadd %st(0),%st"] $$ + hcat [gtab, text "fxch %st(1)"] $$ + -- Now we have a loop in which we make the value smaller, + -- see if it's small enough, and loop if not + (pprCLabel_asm l2 <> char ':') $$ + hcat [gtab, text "fprem1"] $$ + -- My Debian libc uses fstsw here for the tan code, but I can't + -- see any reason why it should need to be different for tan. + hcat [gtab, text "fnstsw %ax"] $$ + hcat [gtab, text "test $0x400,%eax"] $$ + hcat [gtab, text "jne " <> pprCLabel_asm l2] $$ + hcat [gtab, text "fstp %st(1)"] $$ + hcat [gtab, text op] $$ + (pprCLabel_asm l1 <> char ':') $$ + -- Pop the 1.0 tan gave us + (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$ + -- Restore %eax + hcat [gtab, text "popl %eax;"] $$ + -- And finally make the result the right size + hcat [gtab, gcoerceto sz, gpop dst 1] + -------------------------- -- coerce %st(0) to the specified size @@ -1626,9 +1663,9 @@ pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") F64 co src dst pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst -pprGInstr (GSIN sz src dst) = pprSizeRegReg (sLit "gsin") sz src dst -pprGInstr (GCOS sz src dst) = pprSizeRegReg (sLit "gcos") sz src dst -pprGInstr (GTAN sz src dst) = pprSizeRegReg (sLit "gtan") sz src dst +pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst +pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst +pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs index 2361e77..da876c3 100644 --- a/compiler/nativeGen/RegAllocInfo.hs +++ b/compiler/nativeGen/RegAllocInfo.hs @@ -215,9 +215,9 @@ regUsage instr = case instr of GABS sz src dst -> mkRU [src] [dst] GNEG sz src dst -> mkRU [src] [dst] GSQRT sz src dst -> mkRU [src] [dst] - GSIN sz src dst -> mkRU [src] [dst] - GCOS sz src dst -> mkRU [src] [dst] - GTAN sz src dst -> mkRU [src] [dst] + GSIN sz _ _ src dst -> mkRU [src] [dst] + GCOS sz _ _ src dst -> mkRU [src] [dst] + GTAN sz _ _ src dst -> mkRU [src] [dst] #endif #if x86_64_TARGET_ARCH @@ -599,9 +599,9 @@ patchRegs instr env = case instr of GABS sz src dst -> GABS sz (env src) (env dst) GNEG sz src dst -> GNEG sz (env src) (env dst) GSQRT sz src dst -> GSQRT sz (env src) (env dst) - GSIN sz src dst -> GSIN sz (env src) (env dst) - GCOS sz src dst -> GCOS sz (env src) (env dst) - GTAN sz src dst -> GTAN sz (env src) (env dst) + GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst) + GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst) + GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst) #endif #if x86_64_TARGET_ARCH -- 1.7.10.4