1 {-# OPTIONS -fno-warn-unused-binds #-}
3 module RegAlloc.Graph.TrivColorable (
9 #include "HsVersions.h"
20 -- trivColorable ---------------------------------------------------------------
22 -- trivColorable function for the graph coloring allocator
24 -- This gets hammered by scanGraph during register allocation,
25 -- so needs to be fairly efficient.
27 -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
28 -- (which are disjoint) ie. x86, x86_64 and ppc
31 -- Doing a nice fold over the UniqSet makes trivColorable use
32 -- 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
34 -- The number of allocatable regs is hard coded here so we can do a fast
35 -- comparision in trivColorable.
37 -- It's ok if these numbers are _less_ than the actual number of free regs,
38 -- but they can't be more or the register conflict graph won't color.
40 -- If the graph doesn't color then the allocator will panic, but it won't
41 -- generate bad object code or anything nasty like that.
43 -- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
44 -- is too slow for us here.
46 -- Look at includes/MachRegs.h to get these numbers.
50 #define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
51 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(6))
52 #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
55 #elif x86_64_TARGET_ARCH
56 #define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
57 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(2))
58 #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
61 #elif powerpc_TARGET_ARCH
62 #define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
63 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(26))
64 #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
67 #elif sparc_TARGET_ARCH
68 #define ALLOCATABLE_REGS_INTEGER (_ILIT(14))
69 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(11))
70 #define ALLOCATABLE_REGS_FLOAT (_ILIT(22))
74 #error ToDo: choose which trivColorable function to use for this architecture.
79 -- Disjoint registers ----------------------------------------------------------
81 -- The definition has been unfolded into individual cases for speed.
82 -- Each architecture has a different register setup, so we use a
83 -- different regSqueeze function for each.
92 accSqueeze count maxCount squeeze ufm
94 NodeUFM _ _ left right
95 -> case accSqueeze count maxCount squeeze right of
96 count' -> case count' >=# maxCount of
97 False -> accSqueeze count' maxCount squeeze left
100 LeafUFM _ reg -> count +# squeeze reg
105 :: (RegClass -> VirtualReg -> FastInt)
106 -> (RegClass -> RealReg -> FastInt)
107 -> Triv VirtualReg RegClass RealReg
109 trivColorable virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
110 | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_INTEGER
111 (virtualRegSqueeze RcInteger)
114 , count3 <- accSqueeze count2 ALLOCATABLE_REGS_INTEGER
115 (realRegSqueeze RcInteger)
118 = count3 <# ALLOCATABLE_REGS_INTEGER
120 trivColorable virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
121 | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_FLOAT
122 (virtualRegSqueeze RcFloat)
125 , count3 <- accSqueeze count2 ALLOCATABLE_REGS_FLOAT
126 (realRegSqueeze RcFloat)
129 = count3 <# ALLOCATABLE_REGS_FLOAT
131 trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
132 | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_DOUBLE
133 (virtualRegSqueeze RcDouble)
136 , count3 <- accSqueeze count2 ALLOCATABLE_REGS_DOUBLE
137 (realRegSqueeze RcDouble)
140 = count3 <# ALLOCATABLE_REGS_DOUBLE
143 -- Specification Code ----------------------------------------------------------
145 -- The trivColorable function for each particular architecture should
146 -- implement the following function, but faster.
150 trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
151 trivColorable classN conflicts exclusions
154 acc :: Reg -> (Int, Int) -> (Int, Int)
157 RcInteger -> (cd+1, cf)
158 RcFloat -> (cd, cf+1)
159 _ -> panic "Regs.trivColorable: reg class not handled"
161 tmp = foldUniqSet acc (0, 0) conflicts
162 (countInt, countFloat) = foldUniqSet acc tmp exclusions
164 squeese = worst countInt classN RcInteger
165 + worst countFloat classN RcFloat
167 in squeese < allocatableRegsInClass classN
169 -- | Worst case displacement
170 -- node N of classN has n neighbors of class C.
172 -- We currently only have RcInteger and RcDouble, which don't conflict at all.
173 -- This is a bit boring compared to what's in RegArchX86.
175 worst :: Int -> RegClass -> RegClass -> Int
176 worst n classN classC
180 RcInteger -> min n (allocatableRegsInClass RcInteger)
185 RcFloat -> min n (allocatableRegsInClass RcFloat)
188 -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
189 -- i.e., these are the regs for which we are prepared to allow the
190 -- register allocator to attempt to map VRegs to.
191 allocatableRegs :: [RegNo]
193 = let isFree i = isFastTrue (freeReg i)
194 in filter isFree allMachRegNos
197 -- | The number of regs in each class.
198 -- We go via top level CAFs to ensure that we're not recomputing
199 -- the length of these lists each time the fn is called.
200 allocatableRegsInClass :: RegClass -> Int
201 allocatableRegsInClass cls
203 RcInteger -> allocatableRegsInteger
204 RcFloat -> allocatableRegsDouble
206 allocatableRegsInteger :: Int
207 allocatableRegsInteger
208 = length $ filter (\r -> regClass r == RcInteger)
209 $ map RealReg allocatableRegs
211 allocatableRegsFloat :: Int
213 = length $ filter (\r -> regClass r == RcFloat
214 $ map RealReg allocatableRegs