X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FX86%2FInstr.hs;h=a96452b9f16f152fc39b2eb5cd4924a23351e026;hb=6cec61d14a324285dbb8ce73d4c7215f1f8d6766;hp=7e11d233639d4398ce00a2fd49779661964b0ffc;hpb=9d9eef1f78e25c716e2c0c7559005b730f425231;p=ghc-hetmet.git

diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 7e11d23..a96452b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -21,14 +21,15 @@ import Reg
 import TargetReg
 
 import BlockId
-import Cmm
+import OldCmm
 import FastString
 import FastBool
 import Outputable
 import Constants	(rESERVED_C_STACK_BYTES)
 
 import CLabel
-import Panic
+import UniqSet
+import Unique
 
 -- Size of a PPC memory address, in bytes.
 --
@@ -101,7 +102,7 @@ Hence GLDZ and GLD1.  Bwahahahahahahaha!
 -}
 
 {-
-MORE FLOATING POINT MUSINGS...
+Note [x86 Floating point precision]
 
 Intel's internal floating point registers are by default 80 bit
 extended precision.  This means that all operations done on values in
@@ -140,11 +141,12 @@ This is what gcc does.  Spilling at 80 bits requires taking up a full
 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 the alignment problems.
 
-In the future, we'll use the SSE registers for floating point.  This
-requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
-precision float ops), which means P4 or Xeon and above.  Using SSE
-will solve all these problems, because the SSE registers use fixed 32
-bit or 64 bit precision.
+In the future [edit: now available in GHC 7.0.1, with the -msse2
+flag], we'll use the SSE registers for floating point.  This requires
+a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
+float ops), which means P4 or Xeon and above.  Using SSE will solve
+all these problems, because the SSE registers use fixed 32 bit or 64
+bit precision.
 
 --SDM 1/2003
 -}
@@ -226,6 +228,8 @@ data Instr
         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 	
+        | GDTOF       Reg Reg -- src(fpreg), dst(fpreg)
+
 	| GADD	      Size Reg Reg Reg -- src1, src2, dst
 	| GDIV	      Size Reg Reg Reg -- src1, src2, dst
 	| GSUB	      Size Reg Reg Reg -- src1, src2, dst
@@ -252,10 +256,10 @@ data Instr
 	-- use MOV for moving (either movss or movsd (movlpd better?))
 	| CVTSS2SD	Reg Reg		-- F32 to F64
 	| CVTSD2SS	Reg Reg		-- F64 to F32
-	| CVTTSS2SIQ	Operand Reg	-- F32 to I32/I64 (with truncation)
-	| CVTTSD2SIQ	Operand	Reg	-- F64 to I32/I64 (with truncation)
-	| CVTSI2SS	Operand Reg	-- I32/I64 to F32
-	| CVTSI2SD	Operand Reg	-- I32/I64 to F64
+	| CVTTSS2SIQ	Size Operand Reg -- F32 to I32/I64 (with truncation)
+	| CVTTSD2SIQ	Size Operand Reg -- F64 to I32/I64 (with truncation)
+	| CVTSI2SS	Size Operand Reg -- I32/I64 to F32
+	| CVTSI2SD	Size Operand Reg -- I32/I64 to F64
 
 	-- use ADD & SUB for arithmetic.  In both cases, operands
 	-- are  Operand Reg.
@@ -352,7 +356,6 @@ x86_regUsageOfInstr instr
     CLTD   _		-> mkRU [eax] [edx]
     NOP			-> mkRU [] []
 
-#if i386_TARGET_ARCH
     GMOV   src dst	-> mkRU [src] [dst]
     GLD    _ src dst	-> mkRU (use_EA src) [dst]
     GST    _ src dst	-> mkRUR (src : use_EA dst)
@@ -366,6 +369,8 @@ x86_regUsageOfInstr instr
     GITOF  src dst	-> mkRU [src] [dst]
     GITOD  src dst	-> mkRU [src] [dst]
 
+    GDTOF  src dst	-> mkRU [src] [dst]
+
     GADD   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
     GSUB   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
     GMUL   _ s1 s2 dst	-> mkRU [s1,s2] [dst]
@@ -378,17 +383,14 @@ x86_regUsageOfInstr instr
     GSIN   _ _ _ src dst -> mkRU [src] [dst]
     GCOS   _ _ _ src dst -> mkRU [src] [dst]
     GTAN   _ _ _ src dst -> mkRU [src] [dst]
-#endif
 
-#if x86_64_TARGET_ARCH
     CVTSS2SD   src dst	-> mkRU [src] [dst]
     CVTSD2SS   src dst	-> mkRU [src] [dst]
-    CVTTSS2SIQ src dst	-> mkRU (use_R src) [dst]
-    CVTTSD2SIQ src dst	-> mkRU (use_R src) [dst]
-    CVTSI2SS   src dst	-> mkRU (use_R src) [dst]
-    CVTSI2SD   src dst	-> mkRU (use_R src) [dst]
+    CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
+    CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
+    CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
+    CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
     FDIV _     src dst	-> usageRM src dst
-#endif    
 
     FETCHGOT reg        -> mkRU [] [reg]
     FETCHPC  reg        -> mkRU [] [reg]
@@ -482,7 +484,6 @@ x86_patchRegsOfInstr instr env
     JMP op		-> patch1 JMP op
     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 
-#if i386_TARGET_ARCH
     GMOV src dst	-> GMOV (env src) (env dst)
     GLD  sz src dst	-> GLD sz (lookupAddr src) (env dst)
     GST  sz src dst	-> GST sz (env src) (lookupAddr dst)
@@ -496,6 +497,8 @@ x86_patchRegsOfInstr instr env
     GITOF src dst	-> GITOF (env src) (env dst)
     GITOD src dst	-> GITOD (env src) (env dst)
 
+    GDTOF src dst	-> GDTOF (env src) (env dst)
+
     GADD sz s1 s2 dst	-> GADD sz (env s1) (env s2) (env dst)
     GSUB sz s1 s2 dst	-> GSUB sz (env s1) (env s2) (env dst)
     GMUL sz s1 s2 dst	-> GMUL sz (env s1) (env s2) (env dst)
@@ -508,17 +511,14 @@ x86_patchRegsOfInstr instr env
     GSIN sz l1 l2 src dst	-> GSIN sz l1 l2 (env src) (env dst)
     GCOS sz l1 l2 src dst	-> GCOS sz l1 l2 (env src) (env dst)
     GTAN sz l1 l2 src dst	-> GTAN sz l1 l2 (env src) (env dst)
-#endif
 
-#if x86_64_TARGET_ARCH
     CVTSS2SD src dst	-> CVTSS2SD (env src) (env dst)
     CVTSD2SS src dst	-> CVTSD2SS (env src) (env dst)
-    CVTTSS2SIQ src dst	-> CVTTSS2SIQ (patchOp src) (env dst)
-    CVTTSD2SIQ src dst	-> CVTTSD2SIQ (patchOp src) (env dst)
-    CVTSI2SS src dst	-> CVTSI2SS (patchOp src) (env dst)
-    CVTSI2SD src dst	-> CVTSI2SD (patchOp src) (env dst)
+    CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
+    CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
+    CVTSI2SS sz src dst	-> CVTSI2SS sz (patchOp src) (env dst)
+    CVTSI2SD sz src dst	-> CVTSI2SD sz (patchOp src) (env dst)
     FDIV sz src dst	-> FDIV sz (patchOp src) (patchOp dst)
-#endif    
 
     CALL (Left _)  _	-> instr
     CALL (Right reg) p	-> CALL (Right (env reg)) p
@@ -537,7 +537,9 @@ x86_patchRegsOfInstr instr env
     _other		-> panic "patchRegs: unrecognised instr"
 
   where
+    patch1 :: (Operand -> a) -> Operand -> a
     patch1 insn op      = insn $! patchOp op
+    patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 
     patchOp (OpReg  reg) = OpReg $! env reg
@@ -601,30 +603,16 @@ x86_mkSpillInstr
 	-> Int		-- spill slot to use
 	-> Instr
 
-#if   i386_TARGET_ARCH
-x86_mkSpillInstr reg delta slot
-  = let	off     = spillSlotToOffset slot
-    in
-    let off_w = (off-delta) `div` 4
-    in case targetClassOfReg reg of
-	   RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
-	   _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
-
-#elif x86_64_TARGET_ARCH
 x86_mkSpillInstr reg delta slot
   = let	off     = spillSlotToOffset slot
     in
-    let off_w = (off-delta) `div` 8
+    let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
     in case targetClassOfReg reg of
-	   RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
-	   RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
+	   RcInteger   -> MOV IF_ARCH_i386(II32,II64)
+                              (OpReg reg) (OpAddr (spRel off_w))
+	   RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
+	   RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
            _         -> panic "X86.mkSpillInstr: no match"
-		-- ToDo: will it work to always spill as a double?
-		-- does that cause a stall if the data was a float?
-#else
-x86_mkSpillInstr _ _ _
-    =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
-#endif
 
 
 -- | Make a spill reload instruction.
@@ -634,26 +622,16 @@ x86_mkLoadInstr
 	-> Int		-- spill slot to use
 	-> Instr
 
-#if   i386_TARGET_ARCH
-x86_mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-	let off_w = (off-delta) `div` 4
-        in case targetClassOfReg reg of {
-              RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
-              _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
-#elif x86_64_TARGET_ARCH
 x86_mkLoadInstr reg delta slot
   = let off     = spillSlotToOffset slot
     in
-	let off_w = (off-delta) `div` 8
+	let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
         in case targetClassOfReg reg of
-              RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
-              _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
-#else
-x86_mkLoadInstr _ _ _
- 	= panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
-#endif
+              RcInteger -> MOV IF_ARCH_i386(II32,II64) 
+                               (OpAddr (spRel off_w)) (OpReg reg)
+              RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
+              RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
+              _           -> panic "X86.x86_mkLoadInstr"
 
 spillSlotSize :: Int
 spillSlotSize = IF_ARCH_i386(12, 8)
@@ -714,14 +692,12 @@ x86_mkRegRegMoveInstr src dst
  = case targetClassOfReg src of
 #if   i386_TARGET_ARCH
         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
-        RcDouble  -> GMOV src dst
-	RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 #else
         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
-        RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
-	RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 #endif
-
+        RcDouble    -> GMOV src dst
+        RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
+	_     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 
 -- | Check whether an instruction represents a reg-reg move.
 -- 	The register allocator attempts to eliminate reg->reg moves whenever it can,
@@ -765,6 +741,7 @@ i386_insert_ffrees blocks
      where p insn r = case insn of
                         CALL _ _ -> GFREE : insn : r
                         JMP _    -> GFREE : insn : r
+                        JXX_GBL _ _ -> GFREE : insn : r
                         _        -> insn : r
 
 -- if you ever add a new FP insn to the fake x86 FP insn set,
@@ -779,8 +756,9 @@ is_G_instr instr
 	GLD1{}		-> True
         GFTOI{}		-> True
 	GDTOI{}	 	-> True
-        GITOF{} 	-> True
-	GITOD{} 	-> True
+        GITOF{}		-> True
+	GITOD{}	 	-> True
+        GDTOF{}		-> True
 	GADD{}	 	-> True
 	GDIV{}	 	-> True
 	GSUB{} 		-> True
@@ -800,31 +778,32 @@ data JumpDest = DestBlockId BlockId | DestImm Imm
 
 
 canShortcut :: Instr -> Maybe JumpDest
-canShortcut (JXX ALWAYS id) 	= Just (DestBlockId id)
-canShortcut (JMP (OpImm imm)) 	= Just (DestImm imm)
-canShortcut _ 			= Nothing
+canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
+canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
+canShortcut _                  = Nothing
 
 
--- The helper ensures that we don't follow cycles.
+-- This helper shortcuts a sequence of branches.
+-- The blockset helps avoid following cycles.
 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
+shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
   where shortcutJump' fn seen insn@(JXX cc id) =
-          if elemBlockSet id seen then insn
+          if setMember id seen then insn
           else case fn id of
                  Nothing                -> insn
                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
-               where seen' = extendBlockSet seen id
+               where seen' = setInsert id seen
         shortcutJump' _ _ other = other
 
 -- Here because it knows about JumpDest
 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
   | Just uq <- maybeAsmTemp lab 
-  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+  = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (mkBlockId uq)))
 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
   | Just uq <- maybeAsmTemp lbl1
-  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (mkBlockId uq)) lbl2 off)
         -- slightly dodgy, we're ignoring the second label, but this
         -- works with the way we use CmmLabelDiffOff for jump tables now.
 
@@ -833,12 +812,15 @@ shortcutStatic _ other_static
 
 shortBlockId 
 	:: (BlockId -> Maybe JumpDest)
+	-> UniqSet Unique
 	-> BlockId
 	-> CLabel
 
-shortBlockId fn blockid@(BlockId uq) =
-   case fn blockid of
-      Nothing -> mkAsmTempLabel uq
-      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
-      Just (DestImm (ImmCLbl lbl)) -> lbl
-      _other -> panic "shortBlockId"
+shortBlockId fn seen blockid =
+  case (elementOfUniqSet uq seen, fn blockid) of
+    (True, _)    -> mkAsmTempLabel uq
+    (_, Nothing) -> mkAsmTempLabel uq
+    (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
+    (_, Just (DestImm (ImmCLbl lbl))) -> lbl
+    (_, _other) -> panic "shortBlockId"
+  where uq = getUnique blockid