Merging in the new codegen branch

[ghc-hetmet.git] / compiler / nativeGen / MachRegs.lhs
diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs

index bc96e9d..2e578c0 100644 (file)
--- a/compiler/nativeGen/MachRegs.lhs
+++ b/compiler/nativeGen/MachRegs.lhs
@@ -13,10 +13,21 @@
  -- -----------------------------------------------------------------------------
  
  \begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
  #include "nativeGen/NCG.h"
  
  module MachRegs (
  
+       -- * Sizes
+       Size(..), intSize, floatSize, isFloatSize, 
+                 wordSize, cmmTypeSize, sizeToWidth,
+
         -- * Immediate values
         Imm(..), strImmLit, litToImm,
  
@@ -26,8 +37,9 @@ module MachRegs (
  
         -- * The 'Reg' type
         RegNo,
-       Reg(..), isRealReg, isVirtualReg,
+       Reg(..), isRealReg, isVirtualReg, renameVirtualReg,
          RegClass(..), regClass,
+       trivColorable,
         getHiVRegFromLo, 
         mkVReg,
  
@@ -36,6 +48,7 @@ module MachRegs (
  
         -- * Machine-dependent register-related stuff
          allocatableRegs, argRegs, allArgRegs, callClobberedRegs,
+       allocatableRegsInClass,
         freeReg,
         spRel,
  
@@ -81,19 +94,20 @@ module MachRegs (
  -- HACK: go for the max
  #endif
  
-#include "MachRegs.h"
+#include "../includes/MachRegs.h"
  
  import Cmm
-import MachOp          ( MachRep(..) )
  import CgUtils          ( get_GlobalReg_addr )
-
  import CLabel           ( CLabel, mkMainCapabilityLabel )
  import Pretty
  import Outputable      ( Outputable(..), pprPanic, panic )
  import qualified Outputable
  import Unique
+import UniqSet
  import Constants
  import FastTypes
+import FastBool
+import UniqFM
  
  #if powerpc_TARGET_ARCH
  import Data.Word       ( Word8, Word16, Word32 )
@@ -101,6 +115,95 @@ import Data.Int    ( Int8, Int16, Int32 )
  #endif
  
  -- -----------------------------------------------------------------------------
+-- Sizes on this architecture
+-- 
+-- A Size is usually a combination of width and class
+
+-- It looks very like the old MachRep, but it's now of purely local
+-- significance, here in the native code generator.  You can change it
+-- without global consequences.
+--
+-- A major use is as an opcode qualifier; thus the opcode 
+--     mov.l a b
+-- might be encoded 
+--     MOV II32 a b
+-- where the Size field encodes the ".l" part.
+
+-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
+-- here.  I've removed them from the x86 version, we'll see what happens --SDM
+
+-- ToDo: quite a few occurrences of Size could usefully be replaced by Width
+
+#if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH
+data Size      -- For these three, the "size" also gives the int/float
+               -- distinction, because the instructions for int/float
+               -- differ only in their suffices
+  = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80
+  deriving Eq
+
+intSize, floatSize :: Width -> Size
+intSize W8 = II8
+intSize W16 = II16
+intSize W32 = II32
+intSize W64 = II64
+intSize other = pprPanic "MachInstrs.intSize" (ppr other)
+
+floatSize W32 = FF32
+floatSize W64 = FF64
+floatSize other = pprPanic "MachInstrs.intSize" (ppr other)
+
+wordSize :: Size
+wordSize = intSize wordWidth
+
+sizeToWidth :: Size -> Width
+sizeToWidth II8  = W8
+sizeToWidth II16 = W16
+sizeToWidth II32 = W32
+sizeToWidth II64 = W64
+sizeToWidth FF32 = W32
+sizeToWidth FF64 = W64
+sizeToWidth _ = panic "MachInstrs.sizeToWidth"
+
+cmmTypeSize :: CmmType -> Size
+cmmTypeSize ty | isFloatType ty = floatSize (typeWidth ty)
+              | otherwise      = intSize (typeWidth ty)
+
+isFloatSize :: Size -> Bool
+isFloatSize FF32 = True
+isFloatSize FF64 = True
+isFloatSize FF80 = True
+isFloatSize other = False
+#endif
+
+#if alpha_TARGET_ARCH
+data Size
+    = B            -- byte
+    | Bu
+--  | W            -- word (2 bytes): UNUSED
+--  | Wu    -- : UNUSED
+    | L            -- longword (4 bytes)
+    | Q            -- quadword (8 bytes)
+--  | FF    -- VAX F-style floating pt: UNUSED
+--  | GF    -- VAX G-style floating pt: UNUSED
+--  | DF    -- VAX D-style floating pt: UNUSED
+--  | SF    -- IEEE single-precision floating pt: UNUSED
+    | TF    -- IEEE double-precision floating pt
+  deriving Eq
+#endif
+
+#if sparc_TARGET_ARCH /* || powerpc_TARGET_ARCH */
+data Size
+    = B     -- byte (signed)
+    | Bu    -- byte (unsigned)
+    | H     -- halfword (signed, 2 bytes)
+    | Hu    -- halfword (unsigned, 2 bytes)
+    | W            -- word (4 bytes)
+    | F            -- IEEE single-precision floating pt
+    | DF    -- IEEE single-precision floating pt
+  deriving Eq
+#endif
+
+-- -----------------------------------------------------------------------------
  -- Immediates
  
  data Imm
@@ -126,8 +229,8 @@ strImmLit s = ImmLit (text s)
  
  litToImm :: CmmLit -> Imm
  litToImm (CmmInt i _)        = ImmInteger i
-litToImm (CmmFloat f F32)    = ImmFloat f
-litToImm (CmmFloat f F64)    = ImmDouble f
+litToImm (CmmFloat f W32)    = ImmFloat f
+litToImm (CmmFloat f W64)    = ImmDouble f
  litToImm (CmmLabel l)        = ImmCLbl l
  litToImm (CmmLabelOff l off) = ImmIndex l off
  litToImm (CmmLabelDiffOff l1 l2 off)
@@ -253,23 +356,22 @@ largeOffsetError i
  fits16Bits :: Integral a => a -> Bool
  fits16Bits x = x >= -32768 && x < 32768
  
-makeImmediate :: Integral a => MachRep -> Bool -> a -> Maybe Imm
-
+makeImmediate :: Integral a => Width -> Bool -> a -> Maybe Imm
  makeImmediate rep signed x = fmap ImmInt (toI16 rep signed)
      where
-        narrow I32 False = fromIntegral (fromIntegral x :: Word32)
-        narrow I16 False = fromIntegral (fromIntegral x :: Word16)
-        narrow I8  False = fromIntegral (fromIntegral x :: Word8)
-        narrow I32 True  = fromIntegral (fromIntegral x :: Int32)
-        narrow I16 True  = fromIntegral (fromIntegral x :: Int16)
-        narrow I8  True  = fromIntegral (fromIntegral x :: Int8)
+        narrow W32 False = fromIntegral (fromIntegral x :: Word32)
+        narrow W16 False = fromIntegral (fromIntegral x :: Word16)
+        narrow W8  False = fromIntegral (fromIntegral x :: Word8)
+        narrow W32 True  = fromIntegral (fromIntegral x :: Int32)
+        narrow W16 True  = fromIntegral (fromIntegral x :: Int16)
+        narrow W8  True  = fromIntegral (fromIntegral x :: Int8)
          
          narrowed = narrow rep signed
          
-        toI16 I32 True
+        toI16 W32 True
              | narrowed >= -32768 && narrowed < 32768 = Just narrowed
              | otherwise = Nothing
-        toI16 I32 False
+        toI16 W32 False
              | narrowed >= 0 && narrowed < 65536 = Just narrowed
              | otherwise = Nothing
          toI16 _ _  = Just narrowed
@@ -353,6 +455,11 @@ data RegClass
     | RcDouble
       deriving Eq
  
+instance Uniquable RegClass where
+    getUnique RcInteger        = mkUnique 'L' 0
+    getUnique RcFloat  = mkUnique 'L' 1
+    getUnique RcDouble = mkUnique 'L' 2
+
  type RegNo = Int
  
  data Reg
@@ -375,16 +482,18 @@ instance Uniquable Reg where
  unRealReg (RealReg i) = i
  unRealReg vreg        = pprPanic "unRealReg on VirtualReg" (ppr vreg)
  
-mkVReg :: Unique -> MachRep -> Reg
-mkVReg u rep
-   = case rep of
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+   | not (isFloatSize size) = VirtualRegI u
+   | otherwise
+   = case size of
  #if sparc_TARGET_ARCH
-        F32   -> VirtualRegF u
+        FF32    -> VirtualRegF u
  #else
-        F32   -> VirtualRegD u
+        FF32    -> VirtualRegD u
  #endif
-        F64   -> VirtualRegD u
-        other -> VirtualRegI u
+        FF64    -> VirtualRegD u
+       _other -> panic "mkVReg"
  
  isVirtualReg :: Reg -> Bool
  isVirtualReg (RealReg _)      = False
@@ -396,6 +505,15 @@ isVirtualReg (VirtualRegD _)  = True
  isRealReg :: Reg -> Bool
  isRealReg = not . isVirtualReg
  
+renameVirtualReg :: Unique -> Reg -> Reg
+renameVirtualReg u r
+ = case r of
+       RealReg _       -> error "renameVirtualReg: can't change unique on a real reg"
+       VirtualRegI _   -> VirtualRegI  u
+       VirtualRegHi _  -> VirtualRegHi u
+       VirtualRegF _   -> VirtualRegF  u
+       VirtualRegD _   -> VirtualRegD  u
+
  instance Show Reg where
      show (RealReg i)      = showReg i
      show (VirtualRegI u)  = "%vI_" ++ show u
@@ -403,10 +521,132 @@ instance Show Reg where
      show (VirtualRegF u)  = "%vF_" ++ show u
      show (VirtualRegD u)  = "%vD_" ++ show u
  
+instance Outputable RegClass where
+    ppr RcInteger      = Outputable.text "I"
+    ppr RcFloat                = Outputable.text "F"
+    ppr RcDouble       = Outputable.text "D"
+
  instance Outputable Reg where
      ppr r = Outputable.text (show r)
  
  
+
+
+-- trivColorable function for the graph coloring allocator
+--     This gets hammered by scanGraph during register allocation,
+--     so needs to be fairly efficient.
+--
+--     NOTE:   This only works for arcitectures with just RcInteger and RcDouble
+--             (which are disjoint) ie. x86, x86_64 and ppc
+--
+
+--     BL 2007/09
+--     Doing a nice fold over the UniqSet makes trivColorable use
+--     32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
+{-
+trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
+trivColorable classN conflicts exclusions
+ = let
+
+       acc :: Reg -> (Int, Int) -> (Int, Int)
+       acc r (cd, cf)  
+        = case regClass r of
+               RcInteger       -> (cd+1, cf)
+               RcDouble        -> (cd,   cf+1)
+               _               -> panic "MachRegs.trivColorable: reg class not handled"
+
+       tmp                     = foldUniqSet acc (0, 0) conflicts
+       (countInt,  countFloat) = foldUniqSet acc tmp    exclusions
+
+       squeese         = worst countInt   classN RcInteger
+                       + worst countFloat classN RcDouble
+
+   in  squeese < allocatableRegsInClass classN
+
+-- | Worst case displacement
+--     node N of classN has n neighbors of class C.
+--
+--     We currently only have RcInteger and RcDouble, which don't conflict at all.
+--     This is a bit boring compared to what's in RegArchX86.
+--
+worst :: Int -> RegClass -> RegClass -> Int
+worst n classN classC
+ = case classN of
+       RcInteger
+        -> case classC of
+               RcInteger       -> min n (allocatableRegsInClass RcInteger)
+               RcDouble        -> 0
+               
+       RcDouble
+        -> case classC of
+               RcDouble        -> min n (allocatableRegsInClass RcDouble)
+               RcInteger       -> 0
+-}
+
+
+-- The number of allocatable regs is hard coded here so we can do a fast comparision
+-- in trivColorable. It's ok if these numbers are _less_ than the actual number of
+-- free regs, but they can't be more or the register conflict graph won't color.
+--
+-- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
+-- is too slow for us here.
+--
+-- Compare MachRegs.freeRegs  and MachRegs.h to get these numbers.
+--
+#if i386_TARGET_ARCH
+#define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
+#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(6))
+
+#elif x86_64_TARGET_ARCH
+#define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
+#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(2))
+
+#elif powerpc_TARGET_ARCH
+#define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
+#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(26))
+
+#else
+#error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE
+#endif
+
+{-# INLINE regClass      #-}
+trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
+trivColorable classN conflicts exclusions
+ = {-# SCC "trivColorable" #-}
+   let
+       {-# INLINE   isSqueesed    #-}
+       isSqueesed cI cF ufm
+         = case ufm of
+               NodeUFM _ _ left right
+                -> case isSqueesed cI cF right of
+                       (# s, cI', cF' #)
+                        -> case s of
+                               False   -> isSqueesed cI' cF' left
+                               True    -> (# True, cI', cF' #)
+
+               LeafUFM _ reg
+                -> case regClass reg of
+                       RcInteger
+                        -> case cI +# _ILIT(1) of
+                               cI' -> (# cI' >=# ALLOCATABLE_REGS_INTEGER, cI', cF #)
+
+                       RcDouble
+                        -> case cF +# _ILIT(1) of
+                               cF' -> (# cF' >=# ALLOCATABLE_REGS_DOUBLE,  cI, cF' #)
+
+               EmptyUFM
+                ->     (# False, cI, cF #)
+
+   in case isSqueesed (_ILIT(0)) (_ILIT(0)) conflicts of
+       (# False, cI', cF' #)
+        -> case isSqueesed cI' cF' exclusions of
+               (# s, _, _ #)   -> not s
+
+       (# True, _, _ #)
+        -> False
+
+
+
  -- -----------------------------------------------------------------------------
  -- Machine-specific register stuff
  
@@ -468,6 +708,7 @@ fake3 = RealReg 11
  fake4 = RealReg 12
  fake5 = RealReg 13
  
+
  -- On x86, we might want to have an 8-bit RegClass, which would
  -- contain just regs 1-4 (the others don't have 8-bit versions).
  -- However, we can get away without this at the moment because the
@@ -489,6 +730,7 @@ showReg n
       then regNames !! n
       else "%unknown_x86_real_reg_" ++ show n
  
+
  #endif
  
  {-
@@ -952,6 +1194,25 @@ allocatableRegs
     = let isFree i = isFastTrue (freeReg i)
       in  filter isFree allMachRegNos
  
+
+-- | The number of regs in each class.
+--     We go via top level CAFs to ensure that we're not recomputing
+--     the length of these lists each time the fn is called.
+allocatableRegsInClass :: RegClass -> Int
+allocatableRegsInClass cls
+ = case cls of
+       RcInteger       -> allocatableRegsInteger
+       RcDouble        -> allocatableRegsDouble
+
+allocatableRegsInteger 
+       = length $ filter (\r -> regClass r == RcInteger) 
+                $ map RealReg allocatableRegs
+
+allocatableRegsDouble
+       = length $ filter (\r -> regClass r == RcDouble) 
+                $ map RealReg allocatableRegs
+
+
  -- these are the regs which we cannot assume stay alive over a
  -- C call.  
  callClobberedRegs :: [Reg]
@@ -1189,34 +1450,34 @@ globalRegMaybe :: GlobalReg -> Maybe Reg
  globalRegMaybe BaseReg                 = Just (RealReg REG_Base)
  #endif
  #ifdef REG_R1
-globalRegMaybe (VanillaReg 1)          = Just (RealReg REG_R1)
+globalRegMaybe (VanillaReg 1 _)                = Just (RealReg REG_R1)
  #endif 
  #ifdef REG_R2 
-globalRegMaybe (VanillaReg 2)          = Just (RealReg REG_R2)
+globalRegMaybe (VanillaReg 2 _)                = Just (RealReg REG_R2)
  #endif 
  #ifdef REG_R3 
-globalRegMaybe (VanillaReg 3)          = Just (RealReg REG_R3)
+globalRegMaybe (VanillaReg 3 _)        = Just (RealReg REG_R3)
  #endif 
  #ifdef REG_R4 
-globalRegMaybe (VanillaReg 4)          = Just (RealReg REG_R4)
+globalRegMaybe (VanillaReg 4 _)                = Just (RealReg REG_R4)
  #endif 
  #ifdef REG_R5 
-globalRegMaybe (VanillaReg 5)          = Just (RealReg REG_R5)
+globalRegMaybe (VanillaReg 5 _)                = Just (RealReg REG_R5)
  #endif 
  #ifdef REG_R6 
-globalRegMaybe (VanillaReg 6)          = Just (RealReg REG_R6)
+globalRegMaybe (VanillaReg 6 _)                = Just (RealReg REG_R6)
  #endif 
  #ifdef REG_R7 
-globalRegMaybe (VanillaReg 7)          = Just (RealReg REG_R7)
+globalRegMaybe (VanillaReg 7 _)                = Just (RealReg REG_R7)
  #endif 
  #ifdef REG_R8 
-globalRegMaybe (VanillaReg 8)          = Just (RealReg REG_R8)
+globalRegMaybe (VanillaReg 8 _)                = Just (RealReg REG_R8)
  #endif
  #ifdef REG_R9 
-globalRegMaybe (VanillaReg 9)          = Just (RealReg REG_R9)
+globalRegMaybe (VanillaReg 9 _)                = Just (RealReg REG_R9)
  #endif
  #ifdef REG_R10 
-globalRegMaybe (VanillaReg 10)         = Just (RealReg REG_R10)
+globalRegMaybe (VanillaReg 10 _)       = Just (RealReg REG_R10)
  #endif
  #ifdef REG_F1
  globalRegMaybe (FloatReg 1)            = Just (RealReg REG_F1)