Make -ddump-to-file truncate existing files.

[ghc-hetmet.git] / compiler / nativeGen / X86 / Instr.hs
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs

index cbf5179..92655d1 100644 (file)
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -12,35 +12,50 @@
  module X86.Instr
  where
  
+import X86.Cond
+import X86.Regs
+import Instruction
+import Size
+import RegClass
+import Reg
+import TargetReg
+
  import BlockId
-import MachRegs
-import Cmm
+import OldCmm
  import FastString
+import FastBool
+import Outputable
+import Constants       (rESERVED_C_STACK_BYTES)
  
-#if i386_TARGET_ARCH
  import CLabel
-import Panic
-#endif
+import UniqSet
+import Unique
  
-data Cond
-       = ALWAYS        -- What's really used? ToDo
-       | EQQ
-       | GE
-       | GEU
-       | GTT
-       | GU
-       | LE
-       | LEU
-       | LTT
-       | LU
-       | NE
-       | NEG
-       | POS
-       | CARRY
-       | OFLO
-       | PARITY
-       | NOTPARITY
+-- Size of a PPC memory address, in bytes.
+--
+archWordSize :: Size
+#if i386_TARGET_ARCH
+archWordSize   = II32
+#elif x86_64_TARGET_ARCH
+archWordSize   = II64
+#else
+archWordSize   = panic "X86.Instr.archWordSize: not defined"
+#endif
  
+-- | Instruction instance for x86 instruction set.
+instance Instruction Instr where
+       regUsageOfInstr         = x86_regUsageOfInstr
+       patchRegsOfInstr        = x86_patchRegsOfInstr
+       isJumpishInstr          = x86_isJumpishInstr
+       jumpDestsOfInstr        = x86_jumpDestsOfInstr
+       patchJumpInstr          = x86_patchJumpInstr
+       mkSpillInstr            = x86_mkSpillInstr
+       mkLoadInstr             = x86_mkLoadInstr
+       takeDeltaInstr          = x86_takeDeltaInstr
+       isMetaInstr             = x86_isMetaInstr
+       mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
+       takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
+       mkJumpInstr             = x86_mkJumpInstr
  
  
  -- -----------------------------------------------------------------------------
@@ -87,7 +102,7 @@ Hence GLDZ and GLD1.  Bwahahahahahahaha!
  -}
  
  {-
-MORE FLOATING POINT MUSINGS...
+Note [x86 Floating point precision]
  
  Intel's internal floating point registers are by default 80 bit
  extended precision.  This means that all operations done on values in
@@ -126,11 +141,12 @@ This is what gcc does.  Spilling at 80 bits requires taking up a full
  128 bit slot (so we get alignment).  We spill at 80-bits and ignore
  the alignment problems.
  
-In the future, we'll use the SSE registers for floating point.  This
-requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
-precision float ops), which means P4 or Xeon and above.  Using SSE
-will solve all these problems, because the SSE registers use fixed 32
-bit or 64 bit precision.
+In the future [edit: now available in GHC 7.0.1, with the -msse2
+flag], we'll use the SSE registers for floating point.  This requires
+a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
+float ops), which means P4 or Xeon and above.  Using SSE will solve
+all these problems, because the SSE registers use fixed 32 bit or 64
+bit precision.
  
  --SDM 1/2003
  -}
@@ -155,13 +171,6 @@ data Instr
          -- benefit of subsequent passes
         | DELTA   Int
  
-       -- | spill this reg to a stack slot
-       | SPILL   Reg Int
-
-       -- | reload this reg from a stack slot
-       | RELOAD  Int Reg
-
-
         -- Moves.
         | MOV         Size Operand Operand
         | MOVZxL      Size Operand Operand -- size is the size of operand 1
@@ -200,9 +209,7 @@ data Instr
          | BT          Size Imm Operand
         | NOP
  
-#if i386_TARGET_ARCH
-       -- Float Arithmetic.
-
+       -- x86 Float Arithmetic.
         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles 
         -- as single instructions right up until we spit them out.
          -- all the 3-operand fake fp insns are src1 src2 dst
@@ -221,6 +228,8 @@ data Instr
          | GITOF       Reg Reg -- src(intreg), dst(fpreg)
          | GITOD       Reg Reg -- src(intreg), dst(fpreg)
         
+        | GDTOF       Reg Reg -- src(fpreg), dst(fpreg)
+
         | GADD        Size Reg Reg Reg -- src1, src2, dst
         | GDIV        Size Reg Reg Reg -- src1, src2, dst
         | GSUB        Size Reg Reg Reg -- src1, src2, dst
@@ -240,20 +249,17 @@ data Instr
         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
         
          | GFREE         -- do ffree on all x86 regs; an ugly hack
-#endif
  
-#if x86_64_TARGET_ARCH
--- SSE2 floating point: we use a restricted set of the available SSE2
--- instructions for floating-point.
  
+       -- SSE2 floating point: we use a restricted set of the available SSE2
+       -- instructions for floating-point.
         -- use MOV for moving (either movss or movsd (movlpd better?))
-
         | CVTSS2SD      Reg Reg         -- F32 to F64
         | CVTSD2SS      Reg Reg         -- F64 to F32
-       | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
-       | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
-       | CVTSI2SS      Operand Reg     -- I32/I64 to F32
-       | CVTSI2SD      Operand Reg     -- I32/I64 to F64
+       | CVTTSS2SIQ    Size Operand Reg -- F32 to I32/I64 (with truncation)
+       | CVTTSD2SIQ    Size Operand Reg -- F64 to I32/I64 (with truncation)
+       | CVTSI2SS      Size Operand Reg -- I32/I64 to F32
+       | CVTSI2SD      Size Operand Reg -- I32/I64 to F64
  
         -- use ADD & SUB for arithmetic.  In both cases, operands
         -- are  Operand Reg.
@@ -265,7 +271,7 @@ data Instr
         -- compare single/double prec floating point respectively.
  
         | SQRT          Size Operand Reg        -- src, dst
-#endif
+
  
         -- Comparison
         | TEST          Size Operand Operand
@@ -283,7 +289,11 @@ data Instr
         | JMP         Operand
         | JXX         Cond BlockId  -- includes unconditional branches
         | JXX_GBL     Cond Imm      -- non-local version of JXX
-       | JMP_TBL     Operand [BlockId]  -- table jump
+       -- Table jump
+       | JMP_TBL     Operand   -- Address to jump to
+                     [Maybe BlockId] -- Blocks in the jump table
+                     Section   -- Data section jump table should be put in
+                     CLabel    -- Label of jump table
         | CALL        (Either Imm Reg) [Reg]
  
         -- Other things.
@@ -306,8 +316,424 @@ data Operand
         | OpAddr AddrMode       -- memory reference
  
  
-#if i386_TARGET_ARCH
-i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr]
+
+x86_regUsageOfInstr :: Instr -> RegUsage
+x86_regUsageOfInstr instr 
+ = case instr of
+    MOV    _ src dst   -> usageRW src dst
+    MOVZxL _ src dst   -> usageRW src dst
+    MOVSxL _ src dst   -> usageRW src dst
+    LEA    _ src dst   -> usageRW src dst
+    ADD    _ src dst   -> usageRM src dst
+    ADC    _ src dst   -> usageRM src dst
+    SUB    _ src dst   -> usageRM src dst
+    IMUL   _ src dst   -> usageRM src dst
+    IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
+    MUL    _ src dst   -> usageRM src dst
+    DIV    _ op        -> mkRU (eax:edx:use_R op) [eax,edx]
+    IDIV   _ op        -> mkRU (eax:edx:use_R op) [eax,edx]
+    AND    _ src dst   -> usageRM src dst
+    OR     _ src dst   -> usageRM src dst
+
+    XOR    _ (OpReg src) (OpReg dst)
+        | src == dst    -> mkRU [] [dst]
+
+    XOR    _ src dst   -> usageRM src dst
+    NOT    _ op                -> usageM op
+    NEGI   _ op                -> usageM op
+    SHL    _ imm dst   -> usageRM imm dst
+    SAR    _ imm dst   -> usageRM imm dst
+    SHR    _ imm dst   -> usageRM imm dst
+    BT     _ _   src   -> mkRUR (use_R src)
+
+    PUSH   _ op                -> mkRUR (use_R op)
+    POP    _ op                -> mkRU [] (def_W op)
+    TEST   _ src dst   -> mkRUR (use_R src ++ use_R dst)
+    CMP    _ src dst   -> mkRUR (use_R src ++ use_R dst)
+    SETCC  _ op                -> mkRU [] (def_W op)
+    JXX    _ _         -> mkRU [] []
+    JXX_GBL _ _                -> mkRU [] []
+    JMP     op         -> mkRUR (use_R op)
+    JMP_TBL op _ _ _    -> mkRUR (use_R op)
+    CALL (Left _)  params   -> mkRU params callClobberedRegs
+    CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
+    CLTD   _           -> mkRU [eax] [edx]
+    NOP                        -> mkRU [] []
+
+    GMOV   src dst     -> mkRU [src] [dst]
+    GLD    _ src dst   -> mkRU (use_EA src) [dst]
+    GST    _ src dst   -> mkRUR (src : use_EA dst)
+
+    GLDZ   dst         -> mkRU [] [dst]
+    GLD1   dst         -> mkRU [] [dst]
+
+    GFTOI  src dst     -> mkRU [src] [dst]
+    GDTOI  src dst     -> mkRU [src] [dst]
+
+    GITOF  src dst     -> mkRU [src] [dst]
+    GITOD  src dst     -> mkRU [src] [dst]
+
+    GDTOF  src dst     -> mkRU [src] [dst]
+
+    GADD   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+    GSUB   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+    GMUL   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+    GDIV   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+
+    GCMP   _ src1 src2   -> mkRUR [src1,src2]
+    GABS   _ src dst     -> mkRU [src] [dst]
+    GNEG   _ src dst     -> mkRU [src] [dst]
+    GSQRT  _ src dst     -> mkRU [src] [dst]
+    GSIN   _ _ _ src dst -> mkRU [src] [dst]
+    GCOS   _ _ _ src dst -> mkRU [src] [dst]
+    GTAN   _ _ _ src dst -> mkRU [src] [dst]
+
+    CVTSS2SD   src dst -> mkRU [src] [dst]
+    CVTSD2SS   src dst -> mkRU [src] [dst]
+    CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
+    CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
+    CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
+    CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
+    FDIV _     src dst -> usageRM src dst
+
+    FETCHGOT reg        -> mkRU [] [reg]
+    FETCHPC  reg        -> mkRU [] [reg]
+
+    COMMENT _          -> noUsage
+    DELTA   _           -> noUsage
+
+    _other             -> panic "regUsage: unrecognised instr"
+
+ where
+    -- 2 operand form; first operand Read; second Written
+    usageRW :: Operand -> Operand -> RegUsage
+    usageRW op (OpReg reg)     = mkRU (use_R op) [reg]
+    usageRW op (OpAddr ea)     = mkRUR (use_R op ++ use_EA ea)
+    usageRW _ _                        = panic "X86.RegInfo.usageRW: no match"
+
+    -- 2 operand form; first operand Read; second Modified
+    usageRM :: Operand -> Operand -> RegUsage
+    usageRM op (OpReg reg)     = mkRU (use_R op ++ [reg]) [reg]
+    usageRM op (OpAddr ea)     = mkRUR (use_R op ++ use_EA ea)
+    usageRM _ _                        = panic "X86.RegInfo.usageRM: no match"
+
+    -- 1 operand form; operand Modified
+    usageM :: Operand -> RegUsage
+    usageM (OpReg reg)         = mkRU [reg] [reg]
+    usageM (OpAddr ea)         = mkRUR (use_EA ea)
+    usageM _                   = panic "X86.RegInfo.usageM: no match"
+
+    -- Registers defd when an operand is written.
+    def_W (OpReg reg)          = [reg]
+    def_W (OpAddr _ )          = []
+    def_W _                    = panic "X86.RegInfo.def_W: no match"
+
+    -- Registers used when an operand is read.
+    use_R (OpReg reg)  = [reg]
+    use_R (OpImm _)    = []
+    use_R (OpAddr ea)  = use_EA ea
+
+    -- Registers used to compute an effective address.
+    use_EA (ImmAddr _ _) = []
+    use_EA (AddrBaseIndex base index _) = 
+       use_base base $! use_index index
+       where use_base (EABaseReg r) x = r : x
+             use_base _ x             = x
+             use_index EAIndexNone   = []
+             use_index (EAIndex i _) = [i]
+
+    mkRUR src = src' `seq` RU src' []
+       where src' = filter interesting src
+
+    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+       where src' = filter interesting src
+             dst' = filter interesting dst
+
+interesting :: Reg -> Bool
+interesting (RegVirtual _)             = True
+interesting (RegReal (RealRegSingle i))        = isFastTrue (freeReg i)
+interesting (RegReal (RealRegPair{}))  = panic "X86.interesting: no reg pairs on this arch"
+
+
+
+x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+x86_patchRegsOfInstr instr env 
+ = case instr of
+    MOV  sz src dst    -> patch2 (MOV  sz) src dst
+    MOVZxL sz src dst  -> patch2 (MOVZxL sz) src dst
+    MOVSxL sz src dst  -> patch2 (MOVSxL sz) src dst
+    LEA  sz src dst    -> patch2 (LEA  sz) src dst
+    ADD  sz src dst    -> patch2 (ADD  sz) src dst
+    ADC  sz src dst    -> patch2 (ADC  sz) src dst
+    SUB  sz src dst    -> patch2 (SUB  sz) src dst
+    IMUL sz src dst    -> patch2 (IMUL sz) src dst
+    IMUL2 sz src        -> patch1 (IMUL2 sz) src
+    MUL sz src dst     -> patch2 (MUL sz) src dst
+    IDIV sz op         -> patch1 (IDIV sz) op
+    DIV sz op          -> patch1 (DIV sz) op
+    AND  sz src dst    -> patch2 (AND  sz) src dst
+    OR   sz src dst    -> patch2 (OR   sz) src dst
+    XOR  sz src dst    -> patch2 (XOR  sz) src dst
+    NOT  sz op                 -> patch1 (NOT  sz) op
+    NEGI sz op         -> patch1 (NEGI sz) op
+    SHL  sz imm dst    -> patch1 (SHL sz imm) dst
+    SAR  sz imm dst    -> patch1 (SAR sz imm) dst
+    SHR  sz imm dst    -> patch1 (SHR sz imm) dst
+    BT   sz imm src     -> patch1 (BT  sz imm) src
+    TEST sz src dst    -> patch2 (TEST sz) src dst
+    CMP  sz src dst    -> patch2 (CMP  sz) src dst
+    PUSH sz op         -> patch1 (PUSH sz) op
+    POP  sz op         -> patch1 (POP  sz) op
+    SETCC cond op      -> patch1 (SETCC cond) op
+    JMP op             -> patch1 JMP op
+    JMP_TBL op ids s lbl-> JMP_TBL (patchOp op) ids s lbl
+
+    GMOV src dst       -> GMOV (env src) (env dst)
+    GLD  sz src dst    -> GLD sz (lookupAddr src) (env dst)
+    GST  sz src dst    -> GST sz (env src) (lookupAddr dst)
+
+    GLDZ dst           -> GLDZ (env dst)
+    GLD1 dst           -> GLD1 (env dst)
+
+    GFTOI src dst      -> GFTOI (env src) (env dst)
+    GDTOI src dst      -> GDTOI (env src) (env dst)
+
+    GITOF src dst      -> GITOF (env src) (env dst)
+    GITOD src dst      -> GITOD (env src) (env dst)
+
+    GDTOF src dst      -> GDTOF (env src) (env dst)
+
+    GADD sz s1 s2 dst  -> GADD sz (env s1) (env s2) (env dst)
+    GSUB sz s1 s2 dst  -> GSUB sz (env s1) (env s2) (env dst)
+    GMUL sz s1 s2 dst  -> GMUL sz (env s1) (env s2) (env dst)
+    GDIV sz s1 s2 dst  -> GDIV sz (env s1) (env s2) (env dst)
+
+    GCMP sz src1 src2  -> GCMP sz (env src1) (env src2)
+    GABS sz src dst    -> GABS sz (env src) (env dst)
+    GNEG sz src dst    -> GNEG sz (env src) (env dst)
+    GSQRT sz src dst   -> GSQRT sz (env src) (env dst)
+    GSIN sz l1 l2 src dst      -> GSIN sz l1 l2 (env src) (env dst)
+    GCOS sz l1 l2 src dst      -> GCOS sz l1 l2 (env src) (env dst)
+    GTAN sz l1 l2 src dst      -> GTAN sz l1 l2 (env src) (env dst)
+
+    CVTSS2SD src dst   -> CVTSS2SD (env src) (env dst)
+    CVTSD2SS src dst   -> CVTSD2SS (env src) (env dst)
+    CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
+    CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
+    CVTSI2SS sz src dst        -> CVTSI2SS sz (patchOp src) (env dst)
+    CVTSI2SD sz src dst        -> CVTSI2SD sz (patchOp src) (env dst)
+    FDIV sz src dst    -> FDIV sz (patchOp src) (patchOp dst)
+
+    CALL (Left _)  _   -> instr
+    CALL (Right reg) p -> CALL (Right (env reg)) p
+    
+    FETCHGOT reg        -> FETCHGOT (env reg)
+    FETCHPC  reg        -> FETCHPC  (env reg)
+   
+    NOP                        -> instr
+    COMMENT _          -> instr
+    DELTA _            -> instr
+
+    JXX _ _            -> instr
+    JXX_GBL _ _                -> instr
+    CLTD _             -> instr
+
+    _other             -> panic "patchRegs: unrecognised instr"
+
+  where
+    patch1 :: (Operand -> a) -> Operand -> a
+    patch1 insn op      = insn $! patchOp op
+    patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
+    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
+
+    patchOp (OpReg  reg) = OpReg $! env reg
+    patchOp (OpImm  imm) = OpImm imm
+    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
+
+    lookupAddr (ImmAddr imm off) = ImmAddr imm off
+    lookupAddr (AddrBaseIndex base index disp)
+      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
+      where
+       lookupBase EABaseNone       = EABaseNone
+       lookupBase EABaseRip        = EABaseRip
+       lookupBase (EABaseReg r)    = EABaseReg (env r)
+                                
+       lookupIndex EAIndexNone     = EAIndexNone
+       lookupIndex (EAIndex r i)   = EAIndex (env r) i
+
+
+--------------------------------------------------------------------------------
+x86_isJumpishInstr 
+       :: Instr -> Bool
+
+x86_isJumpishInstr instr
+ = case instr of
+       JMP{}           -> True
+       JXX{}           -> True
+       JXX_GBL{}       -> True
+       JMP_TBL{}       -> True
+       CALL{}          -> True
+       _               -> False
+
+
+x86_jumpDestsOfInstr
+       :: Instr 
+       -> [BlockId]
+
+x86_jumpDestsOfInstr insn 
+  = case insn of
+       JXX _ id        -> [id]
+       JMP_TBL _ ids _ _ -> [id | Just id <- ids]
+       _               -> []
+
+
+x86_patchJumpInstr 
+       :: Instr -> (BlockId -> BlockId) -> Instr
+
+x86_patchJumpInstr insn patchF
+  = case insn of
+       JXX cc id       -> JXX cc (patchF id)
+       JMP_TBL op ids section lbl
+         -> JMP_TBL op (map (fmap patchF) ids) section lbl
+       _               -> insn
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- | Make a spill instruction.
+x86_mkSpillInstr
+       :: Reg          -- register to spill
+       -> Int          -- current stack delta
+       -> Int          -- spill slot to use
+       -> Instr
+
+x86_mkSpillInstr reg delta slot
+  = let        off     = spillSlotToOffset slot
+    in
+    let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
+    in case targetClassOfReg reg of
+          RcInteger   -> MOV IF_ARCH_i386(II32,II64)
+                              (OpReg reg) (OpAddr (spRel off_w))
+          RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
+          RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
+           _         -> panic "X86.mkSpillInstr: no match"
+
+
+-- | Make a spill reload instruction.
+x86_mkLoadInstr
+       :: Reg          -- register to load
+       -> Int          -- current stack delta
+       -> Int          -- spill slot to use
+       -> Instr
+
+x86_mkLoadInstr reg delta slot
+  = let off     = spillSlotToOffset slot
+    in
+       let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
+        in case targetClassOfReg reg of
+              RcInteger -> MOV IF_ARCH_i386(II32,II64) 
+                               (OpAddr (spRel off_w)) (OpReg reg)
+              RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
+              RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
+              _           -> panic "X86.x86_mkLoadInstr"
+
+spillSlotSize :: Int
+spillSlotSize = IF_ARCH_i386(12, 8)
+
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
+
+-- convert a spill slot number to a *byte* offset, with no sign:
+-- decide on a per arch basis whether you are spilling above or below
+-- the C stack pointer.
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+   | slot >= 0 && slot < maxSpillSlots
+   = 64 + spillSlotSize * slot
+   | otherwise
+   = pprPanic "spillSlotToOffset:" 
+              (   text "invalid spill location: " <> int slot
+             $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+--------------------------------------------------------------------------------
+
+-- | See if this instruction is telling us the current C stack delta
+x86_takeDeltaInstr
+       :: Instr
+       -> Maybe Int
+       
+x86_takeDeltaInstr instr
+ = case instr of
+       DELTA i         -> Just i
+       _               -> Nothing
+
+
+x86_isMetaInstr
+       :: Instr
+       -> Bool
+       
+x86_isMetaInstr instr
+ = case instr of
+       COMMENT{}       -> True
+       LDATA{}         -> True
+       NEWBLOCK{}      -> True
+       DELTA{}         -> True
+       _               -> False
+
+
+
+-- | Make a reg-reg move instruction.
+--     On SPARC v8 there are no instructions to move directly between
+--     floating point and integer regs. If we need to do that then we
+--     have to go via memory.
+--
+x86_mkRegRegMoveInstr
+       :: Reg
+       -> Reg
+       -> Instr
+
+x86_mkRegRegMoveInstr src dst
+ = case targetClassOfReg src of
+#if   i386_TARGET_ARCH
+        RcInteger -> MOV II32 (OpReg src) (OpReg dst)
+#else
+        RcInteger -> MOV II64 (OpReg src) (OpReg dst)
+#endif
+        RcDouble    -> GMOV src dst
+        RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
+       _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
+
+-- | Check whether an instruction represents a reg-reg move.
+--     The register allocator attempts to eliminate reg->reg moves whenever it can,
+--     by assigning the src and dest temporaries to the same real register.
+--
+x86_takeRegRegMoveInstr
+       :: Instr 
+       -> Maybe (Reg,Reg)
+
+x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2)) 
+       = Just (r1,r2)
+
+x86_takeRegRegMoveInstr _  = Nothing
+
+
+-- | Make an unconditional branch instruction.
+x86_mkJumpInstr
+       :: BlockId
+       -> [Instr]
+
+x86_mkJumpInstr id 
+       = [JXX ALWAYS id]
+
+
+
+
+
+i386_insert_ffrees 
+       :: [GenBasicBlock Instr] 
+       -> [GenBasicBlock Instr]
+
  i386_insert_ffrees blocks
     | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
     = map ffree_before_nonlocal_transfers blocks
@@ -320,6 +746,7 @@ i386_insert_ffrees blocks
       where p insn r = case insn of
                          CALL _ _ -> GFREE : insn : r
                          JMP _    -> GFREE : insn : r
+                        JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL"
                          _        -> insn : r
  
  -- if you ever add a new FP insn to the fake x86 FP insn set,
@@ -334,8 +761,9 @@ is_G_instr instr
         GLD1{}          -> True
          GFTOI{}                -> True
         GDTOI{}         -> True
-        GITOF{}        -> True
-       GITOD{}         -> True
+        GITOF{}                -> True
+       GITOD{}         -> True
+        GDTOF{}                -> True
         GADD{}          -> True
         GDIV{}          -> True
         GSUB{}          -> True
@@ -349,4 +777,55 @@ is_G_instr instr
         GTAN{}          -> True
          GFREE          -> panic "is_G_instr: GFREE (!)"
          _              -> False
-#endif /* i386_TARGET_ARCH */
+
+
+data JumpDest = DestBlockId BlockId | DestImm Imm
+
+
+canShortcut :: Instr -> Maybe JumpDest
+canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
+canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
+canShortcut _                  = Nothing
+
+
+-- This helper shortcuts a sequence of branches.
+-- The blockset helps avoid following cycles.
+shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
+shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
+  where shortcutJump' fn seen insn@(JXX cc id) =
+          if setMember id seen then insn
+          else case fn id of
+                 Nothing                -> insn
+                 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
+                 Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
+               where seen' = setInsert id seen
+        shortcutJump' _ _ other = other
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (mkBlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (mkBlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
+
+shortcutStatic _ other_static
+        = other_static
+
+shortBlockId 
+       :: (BlockId -> Maybe JumpDest)
+       -> UniqSet Unique
+       -> BlockId
+       -> CLabel
+
+shortBlockId fn seen blockid =
+  case (elementOfUniqSet uq seen, fn blockid) of
+    (True, _)    -> mkAsmTempLabel uq
+    (_, Nothing) -> mkAsmTempLabel uq
+    (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
+    (_, Just (DestImm (ImmCLbl lbl))) -> lbl
+    (_, _other) -> panic "shortBlockId"
+  where uq = getUnique blockid