X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FnativeGen%2FRegAlloc%2FGraph%2FTrivColorable.hs;fp=compiler%2FnativeGen%2FRegAlloc%2FGraph%2FTrivColorable.hs;h=5f3f0ac495cb470d6fea032525fb33ca1645c86b;hp=df0460631396c3cf3366c19fb38b1c7c82ea9f42;hb=f9288086f935c97812b2d80defcff38baf7b6a6c;hpb=de29a9f02449359b70402f763ac7590673774124 diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index df04606..5f3f0ac 100644 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -1,3 +1,4 @@ +{-# OPTIONS -fno-warn-unused-binds #-} module RegAlloc.Graph.TrivColorable ( trivColorable, @@ -15,51 +16,136 @@ import GraphBase import UniqFM import FastTypes -{- --- allocatableRegs is allMachRegNos with the fixed-use regs removed. --- i.e., these are the regs for which we are prepared to allow the --- register allocator to attempt to map VRegs to. -allocatableRegs :: [RegNo] -allocatableRegs - = let isFree i = isFastTrue (freeReg i) - in filter isFree allMachRegNos - - --- | The number of regs in each class. --- We go via top level CAFs to ensure that we're not recomputing --- the length of these lists each time the fn is called. -allocatableRegsInClass :: RegClass -> Int -allocatableRegsInClass cls - = case cls of - RcInteger -> allocatableRegsInteger - RcDouble -> allocatableRegsDouble - RcFloat -> panic "Regs.allocatableRegsInClass: no match\n" - -allocatableRegsInteger :: Int -allocatableRegsInteger - = length $ filter (\r -> regClass r == RcInteger) - $ map RealReg allocatableRegs - -allocatableRegsDouble :: Int -allocatableRegsDouble - = length $ filter (\r -> regClass r == RcDouble) - $ map RealReg allocatableRegs --} - -- trivColorable --------------------------------------------------------------- -- trivColorable function for the graph coloring allocator +-- -- This gets hammered by scanGraph during register allocation, -- so needs to be fairly efficient. -- -- NOTE: This only works for arcitectures with just RcInteger and RcDouble -- (which are disjoint) ie. x86, x86_64 and ppc -- - -- BL 2007/09 -- Doing a nice fold over the UniqSet makes trivColorable use -- 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs. +-- +-- The number of allocatable regs is hard coded here so we can do a fast +-- comparision in trivColorable. +-- +-- It's ok if these numbers are _less_ than the actual number of free regs, +-- but they can't be more or the register conflict graph won't color. +-- +-- If the graph doesn't color then the allocator will panic, but it won't +-- generate bad object code or anything nasty like that. +-- +-- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing +-- is too slow for us here. +-- +-- Look at includes/MachRegs.h to get these numbers. +-- + +#if i386_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER (_ILIT(3)) +#define ALLOCATABLE_REGS_DOUBLE (_ILIT(6)) +#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) + + +#elif x86_64_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER (_ILIT(5)) +#define ALLOCATABLE_REGS_DOUBLE (_ILIT(2)) +#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) + + +#elif powerpc_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER (_ILIT(16)) +#define ALLOCATABLE_REGS_DOUBLE (_ILIT(26)) +#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) + + +#elif sparc_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER (_ILIT(14)) +#define ALLOCATABLE_REGS_DOUBLE (_ILIT(11)) +#define ALLOCATABLE_REGS_FLOAT (_ILIT(22)) + + +#else +#error ToDo: choose which trivColorable function to use for this architecture. +#endif + + + +-- Disjoint registers ---------------------------------------------------------- +-- +-- The definition has been unfolded into individual cases for speed. +-- Each architecture has a different register setup, so we use a +-- different regSqueeze function for each. +-- +accSqueeze + :: FastInt + -> FastInt + -> (reg -> FastInt) + -> UniqFM reg + -> FastInt + +accSqueeze count maxCount squeeze ufm + = case ufm of + NodeUFM _ _ left right + -> case accSqueeze count maxCount squeeze right of + count' -> case count' >=# maxCount of + False -> accSqueeze count' maxCount squeeze left + True -> count' + + LeafUFM _ reg -> count +# squeeze reg + EmptyUFM -> count + + +trivColorable + :: (RegClass -> VirtualReg -> FastInt) + -> (RegClass -> RealReg -> FastInt) + -> Triv VirtualReg RegClass RealReg + +trivColorable virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions + | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_INTEGER + (virtualRegSqueeze RcInteger) + conflicts + + , count3 <- accSqueeze count2 ALLOCATABLE_REGS_INTEGER + (realRegSqueeze RcInteger) + exclusions + + = count3 <# ALLOCATABLE_REGS_INTEGER + +trivColorable virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions + | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_FLOAT + (virtualRegSqueeze RcFloat) + conflicts + + , count3 <- accSqueeze count2 ALLOCATABLE_REGS_FLOAT + (realRegSqueeze RcFloat) + exclusions + + = count3 <# ALLOCATABLE_REGS_FLOAT + +trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions + | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_DOUBLE + (virtualRegSqueeze RcDouble) + conflicts + + , count3 <- accSqueeze count2 ALLOCATABLE_REGS_DOUBLE + (realRegSqueeze RcDouble) + exclusions + + = count3 <# ALLOCATABLE_REGS_DOUBLE + + +-- Specification Code ---------------------------------------------------------- +-- +-- The trivColorable function for each particular architecture should +-- implement the following function, but faster. +-- + {- trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool trivColorable classN conflicts exclusions @@ -69,14 +155,14 @@ trivColorable classN conflicts exclusions acc r (cd, cf) = case regClass r of RcInteger -> (cd+1, cf) - RcDouble -> (cd, cf+1) + RcFloat -> (cd, cf+1) _ -> panic "Regs.trivColorable: reg class not handled" tmp = foldUniqSet acc (0, 0) conflicts (countInt, countFloat) = foldUniqSet acc tmp exclusions squeese = worst countInt classN RcInteger - + worst countFloat classN RcDouble + + worst countFloat classN RcFloat in squeese < allocatableRegsInClass classN @@ -92,85 +178,38 @@ worst n classN classC RcInteger -> case classC of RcInteger -> min n (allocatableRegsInClass RcInteger) - RcDouble -> 0 + RcFloat -> 0 RcDouble -> case classC of - RcDouble -> min n (allocatableRegsInClass RcDouble) + RcFloat -> min n (allocatableRegsInClass RcFloat) RcInteger -> 0 --} - --- The number of allocatable regs is hard coded here so we can do a fast comparision --- in trivColorable. It's ok if these numbers are _less_ than the actual number of --- free regs, but they can't be more or the register conflict graph won't color. --- --- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing --- is too slow for us here. --- --- Compare Regs.freeRegs and MachRegs.h to get these numbers. --- -#if i386_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(3)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(6)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) - -#elif x86_64_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(5)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(2)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: [RegNo] +allocatableRegs + = let isFree i = isFastTrue (freeReg i) + in filter isFree allMachRegNos -#elif powerpc_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(16)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(26)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) -#elif sparc_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(14)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(8)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(6)) +-- | The number of regs in each class. +-- We go via top level CAFs to ensure that we're not recomputing +-- the length of these lists each time the fn is called. +allocatableRegsInClass :: RegClass -> Int +allocatableRegsInClass cls + = case cls of + RcInteger -> allocatableRegsInteger + RcFloat -> allocatableRegsDouble -#else -#error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE -#endif +allocatableRegsInteger :: Int +allocatableRegsInteger + = length $ filter (\r -> regClass r == RcInteger) + $ map RealReg allocatableRegs -trivColorable - :: (Reg -> RegClass) - -> Triv Reg RegClass Reg - -trivColorable regClass _ conflicts exclusions - = {-# SCC "trivColorable" #-} - let - isSqueesed cI cF ufm - = case ufm of - NodeUFM _ _ left right - -> case isSqueesed cI cF right of - (# s, cI', cF' #) - -> case s of - False -> isSqueesed cI' cF' left - True -> (# True, cI', cF' #) - - LeafUFM _ reg - -> case regClass reg of - RcInteger - -> case cI +# _ILIT(1) of - cI' -> (# cI' >=# ALLOCATABLE_REGS_INTEGER, cI', cF #) - - RcDouble - -> case cF +# _ILIT(1) of - cF' -> (# cF' >=# ALLOCATABLE_REGS_DOUBLE, cI, cF' #) - - RcFloat - -> case cF +# _ILIT(1) of - cF' -> (# cF' >=# ALLOCATABLE_REGS_FLOAT, cI, cF' #) - - EmptyUFM - -> (# False, cI, cF #) - - in case isSqueesed (_ILIT(0)) (_ILIT(0)) conflicts of - (# False, cI', cF' #) - -> case isSqueesed cI' cF' exclusions of - (# s, _, _ #) -> not s - - (# True, _, _ #) - -> False +allocatableRegsFloat :: Int +allocatableRegsFloat + = length $ filter (\r -> regClass r == RcFloat + $ map RealReg allocatableRegs +-}