merge up to ghc HEAD 16-Apr-2011

[ghc-hetmet.git] / compiler / nativeGen / X86 / Instr.hs
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs

index 5d731bd..a96452b 100644 (file)
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -21,13 +21,15 @@ import Reg
  import TargetReg
  
  import BlockId
  import TargetReg
  
  import BlockId
-import Cmm
+import OldCmm
  import FastString
  import FastBool
  import Outputable
  import Constants       (rESERVED_C_STACK_BYTES)
  
  import CLabel
  import FastString
  import FastBool
  import Outputable
  import Constants       (rESERVED_C_STACK_BYTES)
  
  import CLabel
+import UniqSet
+import Unique
  
  -- Size of a PPC memory address, in bytes.
  --
  
  -- Size of a PPC memory address, in bytes.
  --
@@ -100,7 +102,7 @@ Hence GLDZ and GLD1.  Bwahahahahahahaha!
  -}
  
  {-
  -}
  
  {-
-MORE FLOATING POINT MUSINGS...
+Note [x86 Floating point precision]
  
  Intel's internal floating point registers are by default 80 bit
  extended precision.  This means that all operations done on values in
  
  Intel's internal floating point registers are by default 80 bit
  extended precision.  This means that all operations done on values in
@@ -139,11 +141,12 @@ This is what gcc does.  Spilling at 80 bits requires taking up a full
  128 bit slot (so we get alignment).  We spill at 80-bits and ignore
  the alignment problems.
  
  128 bit slot (so we get alignment).  We spill at 80-bits and ignore
  the alignment problems.
  
-In the future, we'll use the SSE registers for floating point.  This
-requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
-precision float ops), which means P4 or Xeon and above.  Using SSE
-will solve all these problems, because the SSE registers use fixed 32
-bit or 64 bit precision.
+In the future [edit: now available in GHC 7.0.1, with the -msse2
+flag], we'll use the SSE registers for floating point.  This requires
+a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
+float ops), which means P4 or Xeon and above.  Using SSE will solve
+all these problems, because the SSE registers use fixed 32 bit or 64
+bit precision.
  
  --SDM 1/2003
  -}
  
  --SDM 1/2003
  -}
@@ -225,6 +228,8 @@ data Instr
          | GITOF       Reg Reg -- src(intreg), dst(fpreg)
          | GITOD       Reg Reg -- src(intreg), dst(fpreg)
         
          | GITOF       Reg Reg -- src(intreg), dst(fpreg)
          | GITOD       Reg Reg -- src(intreg), dst(fpreg)
         
+        | GDTOF       Reg Reg -- src(fpreg), dst(fpreg)
+
         | GADD        Size Reg Reg Reg -- src1, src2, dst
         | GDIV        Size Reg Reg Reg -- src1, src2, dst
         | GSUB        Size Reg Reg Reg -- src1, src2, dst
         | GADD        Size Reg Reg Reg -- src1, src2, dst
         | GDIV        Size Reg Reg Reg -- src1, src2, dst
         | GSUB        Size Reg Reg Reg -- src1, src2, dst
@@ -251,10 +256,10 @@ data Instr
         -- use MOV for moving (either movss or movsd (movlpd better?))
         | CVTSS2SD      Reg Reg         -- F32 to F64
         | CVTSD2SS      Reg Reg         -- F64 to F32
         -- use MOV for moving (either movss or movsd (movlpd better?))
         | CVTSS2SD      Reg Reg         -- F32 to F64
         | CVTSD2SS      Reg Reg         -- F64 to F32
-       | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
-       | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
-       | CVTSI2SS      Operand Reg     -- I32/I64 to F32
-       | CVTSI2SD      Operand Reg     -- I32/I64 to F64
+       | CVTTSS2SIQ    Size Operand Reg -- F32 to I32/I64 (with truncation)
+       | CVTTSD2SIQ    Size Operand Reg -- F64 to I32/I64 (with truncation)
+       | CVTSI2SS      Size Operand Reg -- I32/I64 to F32
+       | CVTSI2SD      Size Operand Reg -- I32/I64 to F64
  
         -- use ADD & SUB for arithmetic.  In both cases, operands
         -- are  Operand Reg.
  
         -- use ADD & SUB for arithmetic.  In both cases, operands
         -- are  Operand Reg.
@@ -351,7 +356,6 @@ x86_regUsageOfInstr instr
      CLTD   _           -> mkRU [eax] [edx]
      NOP                        -> mkRU [] []
  
      CLTD   _           -> mkRU [eax] [edx]
      NOP                        -> mkRU [] []
  
-#if i386_TARGET_ARCH
      GMOV   src dst     -> mkRU [src] [dst]
      GLD    _ src dst   -> mkRU (use_EA src) [dst]
      GST    _ src dst   -> mkRUR (src : use_EA dst)
      GMOV   src dst     -> mkRU [src] [dst]
      GLD    _ src dst   -> mkRU (use_EA src) [dst]
      GST    _ src dst   -> mkRUR (src : use_EA dst)
@@ -365,6 +369,8 @@ x86_regUsageOfInstr instr
      GITOF  src dst     -> mkRU [src] [dst]
      GITOD  src dst     -> mkRU [src] [dst]
  
      GITOF  src dst     -> mkRU [src] [dst]
      GITOD  src dst     -> mkRU [src] [dst]
  
+    GDTOF  src dst     -> mkRU [src] [dst]
+
      GADD   _ s1 s2 dst -> mkRU [s1,s2] [dst]
      GSUB   _ s1 s2 dst -> mkRU [s1,s2] [dst]
      GMUL   _ s1 s2 dst -> mkRU [s1,s2] [dst]
      GADD   _ s1 s2 dst -> mkRU [s1,s2] [dst]
      GSUB   _ s1 s2 dst -> mkRU [s1,s2] [dst]
      GMUL   _ s1 s2 dst -> mkRU [s1,s2] [dst]
@@ -377,17 +383,14 @@ x86_regUsageOfInstr instr
      GSIN   _ _ _ src dst -> mkRU [src] [dst]
      GCOS   _ _ _ src dst -> mkRU [src] [dst]
      GTAN   _ _ _ src dst -> mkRU [src] [dst]
      GSIN   _ _ _ src dst -> mkRU [src] [dst]
      GCOS   _ _ _ src dst -> mkRU [src] [dst]
      GTAN   _ _ _ src dst -> mkRU [src] [dst]
-#endif
  
  
-#if x86_64_TARGET_ARCH
      CVTSS2SD   src dst -> mkRU [src] [dst]
      CVTSD2SS   src dst -> mkRU [src] [dst]
      CVTSS2SD   src dst -> mkRU [src] [dst]
      CVTSD2SS   src dst -> mkRU [src] [dst]
-    CVTTSS2SIQ src dst -> mkRU (use_R src) [dst]
-    CVTTSD2SIQ src dst -> mkRU (use_R src) [dst]
-    CVTSI2SS   src dst -> mkRU (use_R src) [dst]
-    CVTSI2SD   src dst -> mkRU (use_R src) [dst]
+    CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
+    CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
+    CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
+    CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
      FDIV _     src dst -> usageRM src dst
      FDIV _     src dst -> usageRM src dst
-#endif    
  
      FETCHGOT reg        -> mkRU [] [reg]
      FETCHPC  reg        -> mkRU [] [reg]
  
      FETCHGOT reg        -> mkRU [] [reg]
      FETCHPC  reg        -> mkRU [] [reg]
@@ -481,7 +484,6 @@ x86_patchRegsOfInstr instr env
      JMP op             -> patch1 JMP op
      JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
  
      JMP op             -> patch1 JMP op
      JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
  
-#if i386_TARGET_ARCH
      GMOV src dst       -> GMOV (env src) (env dst)
      GLD  sz src dst    -> GLD sz (lookupAddr src) (env dst)
      GST  sz src dst    -> GST sz (env src) (lookupAddr dst)
      GMOV src dst       -> GMOV (env src) (env dst)
      GLD  sz src dst    -> GLD sz (lookupAddr src) (env dst)
      GST  sz src dst    -> GST sz (env src) (lookupAddr dst)
@@ -495,6 +497,8 @@ x86_patchRegsOfInstr instr env
      GITOF src dst      -> GITOF (env src) (env dst)
      GITOD src dst      -> GITOD (env src) (env dst)
  
      GITOF src dst      -> GITOF (env src) (env dst)
      GITOD src dst      -> GITOD (env src) (env dst)
  
+    GDTOF src dst      -> GDTOF (env src) (env dst)
+
      GADD sz s1 s2 dst  -> GADD sz (env s1) (env s2) (env dst)
      GSUB sz s1 s2 dst  -> GSUB sz (env s1) (env s2) (env dst)
      GMUL sz s1 s2 dst  -> GMUL sz (env s1) (env s2) (env dst)
      GADD sz s1 s2 dst  -> GADD sz (env s1) (env s2) (env dst)
      GSUB sz s1 s2 dst  -> GSUB sz (env s1) (env s2) (env dst)
      GMUL sz s1 s2 dst  -> GMUL sz (env s1) (env s2) (env dst)
@@ -507,17 +511,14 @@ x86_patchRegsOfInstr instr env
      GSIN sz l1 l2 src dst      -> GSIN sz l1 l2 (env src) (env dst)
      GCOS sz l1 l2 src dst      -> GCOS sz l1 l2 (env src) (env dst)
      GTAN sz l1 l2 src dst      -> GTAN sz l1 l2 (env src) (env dst)
      GSIN sz l1 l2 src dst      -> GSIN sz l1 l2 (env src) (env dst)
      GCOS sz l1 l2 src dst      -> GCOS sz l1 l2 (env src) (env dst)
      GTAN sz l1 l2 src dst      -> GTAN sz l1 l2 (env src) (env dst)
-#endif
  
  
-#if x86_64_TARGET_ARCH
      CVTSS2SD src dst   -> CVTSS2SD (env src) (env dst)
      CVTSD2SS src dst   -> CVTSD2SS (env src) (env dst)
      CVTSS2SD src dst   -> CVTSS2SD (env src) (env dst)
      CVTSD2SS src dst   -> CVTSD2SS (env src) (env dst)
-    CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst)
-    CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst)
-    CVTSI2SS src dst   -> CVTSI2SS (patchOp src) (env dst)
-    CVTSI2SD src dst   -> CVTSI2SD (patchOp src) (env dst)
+    CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
+    CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
+    CVTSI2SS sz src dst        -> CVTSI2SS sz (patchOp src) (env dst)
+    CVTSI2SD sz src dst        -> CVTSI2SD sz (patchOp src) (env dst)
      FDIV sz src dst    -> FDIV sz (patchOp src) (patchOp dst)
      FDIV sz src dst    -> FDIV sz (patchOp src) (patchOp dst)
-#endif    
  
      CALL (Left _)  _   -> instr
      CALL (Right reg) p -> CALL (Right (env reg)) p
  
      CALL (Left _)  _   -> instr
      CALL (Right reg) p -> CALL (Right (env reg)) p
@@ -536,7 +537,9 @@ x86_patchRegsOfInstr instr env
      _other             -> panic "patchRegs: unrecognised instr"
  
    where
      _other             -> panic "patchRegs: unrecognised instr"
  
    where
+    patch1 :: (Operand -> a) -> Operand -> a
      patch1 insn op      = insn $! patchOp op
      patch1 insn op      = insn $! patchOp op
+    patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
      patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
  
      patchOp (OpReg  reg) = OpReg $! env reg
      patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
  
      patchOp (OpReg  reg) = OpReg $! env reg
@@ -600,30 +603,16 @@ x86_mkSpillInstr
         -> Int          -- spill slot to use
         -> Instr
  
         -> Int          -- spill slot to use
         -> Instr
  
-#if   i386_TARGET_ARCH
-x86_mkSpillInstr reg delta slot
-  = let        off     = spillSlotToOffset slot
-    in
-    let off_w = (off-delta) `div` 4
-    in case targetClassOfReg reg of
-          RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
-          _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
-
-#elif x86_64_TARGET_ARCH
  x86_mkSpillInstr reg delta slot
    = let        off     = spillSlotToOffset slot
      in
  x86_mkSpillInstr reg delta slot
    = let        off     = spillSlotToOffset slot
      in
-    let off_w = (off-delta) `div` 8
+    let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
      in case targetClassOfReg reg of
      in case targetClassOfReg reg of
-          RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
-          RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
+          RcInteger   -> MOV IF_ARCH_i386(II32,II64)
+                              (OpReg reg) (OpAddr (spRel off_w))
+          RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
+          RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
             _         -> panic "X86.mkSpillInstr: no match"
             _         -> panic "X86.mkSpillInstr: no match"
-               -- ToDo: will it work to always spill as a double?
-               -- does that cause a stall if the data was a float?
-#else
-x86_mkSpillInstr _ _ _
-    =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
-#endif
  
  
  -- | Make a spill reload instruction.
  
  
  -- | Make a spill reload instruction.
@@ -633,26 +622,16 @@ x86_mkLoadInstr
         -> Int          -- spill slot to use
         -> Instr
  
         -> Int          -- spill slot to use
         -> Instr
  
-#if   i386_TARGET_ARCH
-x86_mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-       let off_w = (off-delta) `div` 4
-        in case targetClassOfReg reg of {
-              RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
-              _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
-#elif x86_64_TARGET_ARCH
  x86_mkLoadInstr reg delta slot
    = let off     = spillSlotToOffset slot
      in
  x86_mkLoadInstr reg delta slot
    = let off     = spillSlotToOffset slot
      in
-       let off_w = (off-delta) `div` 8
+       let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
          in case targetClassOfReg reg of
          in case targetClassOfReg reg of
-              RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
-              _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
-#else
-x86_mkLoadInstr _ _ _
-       = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
-#endif
+              RcInteger -> MOV IF_ARCH_i386(II32,II64) 
+                               (OpAddr (spRel off_w)) (OpReg reg)
+              RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
+              RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
+              _           -> panic "X86.x86_mkLoadInstr"
  
  spillSlotSize :: Int
  spillSlotSize = IF_ARCH_i386(12, 8)
  
  spillSlotSize :: Int
  spillSlotSize = IF_ARCH_i386(12, 8)
@@ -713,14 +692,12 @@ x86_mkRegRegMoveInstr src dst
   = case targetClassOfReg src of
  #if   i386_TARGET_ARCH
          RcInteger -> MOV II32 (OpReg src) (OpReg dst)
   = case targetClassOfReg src of
  #if   i386_TARGET_ARCH
          RcInteger -> MOV II32 (OpReg src) (OpReg dst)
-        RcDouble  -> GMOV src dst
-       RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
  #else
          RcInteger -> MOV II64 (OpReg src) (OpReg dst)
  #else
          RcInteger -> MOV II64 (OpReg src) (OpReg dst)
-        RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
-       RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
  #endif
  #endif
-
+        RcDouble    -> GMOV src dst
+        RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
+       _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
  
  -- | Check whether an instruction represents a reg-reg move.
  --     The register allocator attempts to eliminate reg->reg moves whenever it can,
  
  -- | Check whether an instruction represents a reg-reg move.
  --     The register allocator attempts to eliminate reg->reg moves whenever it can,
@@ -764,6 +741,7 @@ i386_insert_ffrees blocks
       where p insn r = case insn of
                          CALL _ _ -> GFREE : insn : r
                          JMP _    -> GFREE : insn : r
       where p insn r = case insn of
                          CALL _ _ -> GFREE : insn : r
                          JMP _    -> GFREE : insn : r
+                        JXX_GBL _ _ -> GFREE : insn : r
                          _        -> insn : r
  
  -- if you ever add a new FP insn to the fake x86 FP insn set,
                          _        -> insn : r
  
  -- if you ever add a new FP insn to the fake x86 FP insn set,
@@ -778,8 +756,9 @@ is_G_instr instr
         GLD1{}          -> True
          GFTOI{}                -> True
         GDTOI{}         -> True
         GLD1{}          -> True
          GFTOI{}                -> True
         GDTOI{}         -> True
-        GITOF{}        -> True
-       GITOD{}         -> True
+        GITOF{}                -> True
+       GITOD{}         -> True
+        GDTOF{}                -> True
         GADD{}          -> True
         GDIV{}          -> True
         GSUB{}          -> True
         GADD{}          -> True
         GDIV{}          -> True
         GSUB{}          -> True
@@ -799,31 +778,32 @@ data JumpDest = DestBlockId BlockId | DestImm Imm
  
  
  canShortcut :: Instr -> Maybe JumpDest
  
  
  canShortcut :: Instr -> Maybe JumpDest
-canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
-canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
-canShortcut _                  = Nothing
+canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
+canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
+canShortcut _                  = Nothing
  
  
  
  
--- The helper ensures that we don't follow cycles.
+-- This helper shortcuts a sequence of branches.
+-- The blockset helps avoid following cycles.
  shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
  shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
+shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
    where shortcutJump' fn seen insn@(JXX cc id) =
    where shortcutJump' fn seen insn@(JXX cc id) =
-          if elemBlockSet id seen then insn
+          if setMember id seen then insn
            else case fn id of
                   Nothing                -> insn
                   Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
                   Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
            else case fn id of
                   Nothing                -> insn
                   Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
                   Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
-               where seen' = extendBlockSet seen id
+               where seen' = setInsert id seen
          shortcutJump' _ _ other = other
  
  -- Here because it knows about JumpDest
  shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
  shortcutStatic fn (CmmStaticLit (CmmLabel lab))
    | Just uq <- maybeAsmTemp lab 
          shortcutJump' _ _ other = other
  
  -- Here because it knows about JumpDest
  shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
  shortcutStatic fn (CmmStaticLit (CmmLabel lab))
    | Just uq <- maybeAsmTemp lab 
-  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+  = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (mkBlockId uq)))
  shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
    | Just uq <- maybeAsmTemp lbl1
  shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
    | Just uq <- maybeAsmTemp lbl1
-  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (mkBlockId uq)) lbl2 off)
          -- slightly dodgy, we're ignoring the second label, but this
          -- works with the way we use CmmLabelDiffOff for jump tables now.
  
          -- slightly dodgy, we're ignoring the second label, but this
          -- works with the way we use CmmLabelDiffOff for jump tables now.
  
@@ -832,12 +812,15 @@ shortcutStatic _ other_static
  
  shortBlockId 
         :: (BlockId -> Maybe JumpDest)
  
  shortBlockId 
         :: (BlockId -> Maybe JumpDest)
+       -> UniqSet Unique
         -> BlockId
         -> CLabel
  
         -> BlockId
         -> CLabel
  
-shortBlockId fn blockid@(BlockId uq) =
-   case fn blockid of
-      Nothing -> mkAsmTempLabel uq
-      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
-      Just (DestImm (ImmCLbl lbl)) -> lbl
-      _other -> panic "shortBlockId"
+shortBlockId fn seen blockid =
+  case (elementOfUniqSet uq seen, fn blockid) of
+    (True, _)    -> mkAsmTempLabel uq
+    (_, Nothing) -> mkAsmTempLabel uq
+    (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
+    (_, Just (DestImm (ImmCLbl lbl))) -> lbl
+    (_, _other) -> panic "shortBlockId"
+  where uq = getUnique blockid