X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FX86%2FInstr.hs;h=a96452b9f16f152fc39b2eb5cd4924a23351e026;hb=6cec61d14a324285dbb8ce73d4c7215f1f8d6766;hp=f856313e00e70464d5e5cb766752e27781591cac;hpb=335b9f366ac440259318777c4c07e4fa42fbbec6;p=ghc-hetmet.git

diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index f856313..a96452b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -21,7 +21,7 @@ import Reg
 import TargetReg
 
 import BlockId
-import Cmm
+import OldCmm
 import FastString
 import FastBool
 import Outputable
@@ -102,7 +102,7 @@ Hence GLDZ and GLD1.  Bwahahahahahahaha!
 -}
 
 {-
-MORE FLOATING POINT MUSINGS...
+Note [x86 Floating point precision]
 
 Intel's internal floating point registers are by default 80 bit
 extended precision.  This means that all operations done on values in
@@ -141,11 +141,12 @@ This is what gcc does.  Spilling at 80 bits requires taking up a full
 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 the alignment problems.
 
-In the future, we'll use the SSE registers for floating point.  This
-requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
-precision float ops), which means P4 or Xeon and above.  Using SSE
-will solve all these problems, because the SSE registers use fixed 32
-bit or 64 bit precision.
+In the future [edit: now available in GHC 7.0.1, with the -msse2
+flag], we'll use the SSE registers for floating point.  This requires
+a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
+float ops), which means P4 or Xeon and above.  Using SSE will solve
+all these problems, because the SSE registers use fixed 32 bit or 64
+bit precision.
 
 --SDM 1/2003
 -}
@@ -227,6 +228,8 @@ data Instr
         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 	
+        | GDTOF       Reg Reg -- src(fpreg), dst(fpreg)
+
 	| GADD	      Size Reg Reg Reg -- src1, src2, dst
 	| GDIV	      Size Reg Reg Reg -- src1, src2, dst
 	| GSUB	      Size Reg Reg Reg -- src1, src2, dst
@@ -366,6 +369,8 @@ x86_regUsageOfInstr instr
     GITOF  src dst	-> mkRU [src] [dst]
     GITOD  src dst	-> mkRU [src] [dst]
 
+    GDTOF  src dst	-> mkRU [src] [dst]
+
     GADD   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
     GSUB   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
     GMUL   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
@@ -492,6 +497,8 @@ x86_patchRegsOfInstr instr env
     GITOF src dst	-> GITOF (env src) (env dst)
     GITOD src dst	-> GITOD (env src) (env dst)
 
+    GDTOF src dst	-> GDTOF (env src) (env dst)
+
     GADD sz s1 s2 dst	-> GADD sz (env s1) (env s2) (env dst)
     GSUB sz s1 s2 dst	-> GSUB sz (env s1) (env s2) (env dst)
     GMUL sz s1 s2 dst	-> GMUL sz (env s1) (env s2) (env dst)
@@ -530,7 +537,9 @@ x86_patchRegsOfInstr instr env
     _other		-> panic "patchRegs: unrecognised instr"
 
   where
+    patch1 :: (Operand -> a) -> Operand -> a
     patch1 insn op      = insn $! patchOp op
+    patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 
     patchOp (OpReg  reg) = OpReg $! env reg
@@ -732,6 +741,7 @@ i386_insert_ffrees blocks
      where p insn r = case insn of
                         CALL _ _ -> GFREE : insn : r
                         JMP _    -> GFREE : insn : r
+                        JXX_GBL _ _ -> GFREE : insn : r
                         _        -> insn : r
 
 -- if you ever add a new FP insn to the fake x86 FP insn set,
@@ -746,8 +756,9 @@ is_G_instr instr
 	GLD1{}		-> True
         GFTOI{}		-> True
 	GDTOI{}	 	-> True
-        GITOF{} 	-> True
-	GITOD{} 	-> True
+        GITOF{}		-> True
+	GITOD{}	 	-> True
+        GDTOF{}		-> True
 	GADD{}	 	-> True
 	GDIV{}	 	-> True
 	GSUB{} 		-> True
@@ -775,24 +786,24 @@ canShortcut _                  = Nothing
 -- This helper shortcuts a sequence of branches.
 -- The blockset helps avoid following cycles.
 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
+shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
   where shortcutJump' fn seen insn@(JXX cc id) =
-          if elemBlockSet id seen then insn
+          if setMember id seen then insn
           else case fn id of
                  Nothing                -> insn
                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
-               where seen' = extendBlockSet seen id
+               where seen' = setInsert id seen
         shortcutJump' _ _ other = other
 
 -- Here because it knows about JumpDest
 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
   | Just uq <- maybeAsmTemp lab 
-  = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (BlockId uq)))
+  = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (mkBlockId uq)))
 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
   | Just uq <- maybeAsmTemp lbl1
-  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (BlockId uq)) lbl2 off)
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (mkBlockId uq)) lbl2 off)
         -- slightly dodgy, we're ignoring the second label, but this
         -- works with the way we use CmmLabelDiffOff for jump tables now.
 
@@ -805,10 +816,11 @@ shortBlockId
 	-> BlockId
 	-> CLabel
 
-shortBlockId fn seen blockid@(BlockId uq) =
+shortBlockId fn seen blockid =
   case (elementOfUniqSet uq seen, fn blockid) of
     (True, _)    -> mkAsmTempLabel uq
     (_, Nothing) -> mkAsmTempLabel uq
     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
     (_, _other) -> panic "shortBlockId"
+  where uq = getUnique blockid