1 {-# OPTIONS -fno-warn-unused-binds #-}
3 module RegAlloc.Graph.TrivColorable (
9 #include "HsVersions.h"
20 -- trivColorable ---------------------------------------------------------------
22 -- trivColorable function for the graph coloring allocator
24 -- This gets hammered by scanGraph during register allocation,
25 -- so needs to be fairly efficient.
27 -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
28 -- (which are disjoint) ie. x86, x86_64 and ppc
31 -- Doing a nice fold over the UniqSet makes trivColorable use
32 -- 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
34 -- The number of allocatable regs is hard coded here so we can do a fast
35 -- comparision in trivColorable.
37 -- It's ok if these numbers are _less_ than the actual number of free regs,
38 -- but they can't be more or the register conflict graph won't color.
40 -- If the graph doesn't color then the allocator will panic, but it won't
41 -- generate bad object code or anything nasty like that.
43 -- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
44 -- is too slow for us here.
46 -- Look at includes/stg/MachRegs.h to get these numbers.
50 #define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
51 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(6))
52 #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
53 #define ALLOCATABLE_REGS_SSE (_ILIT(16))
56 #elif x86_64_TARGET_ARCH
57 #define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
58 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(0))
59 #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
60 #define ALLOCATABLE_REGS_SSE (_ILIT(10))
62 #elif powerpc_TARGET_ARCH
63 #define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
64 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(26))
65 #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
66 #define ALLOCATABLE_REGS_SSE (_ILIT(0))
69 #elif sparc_TARGET_ARCH
70 #define ALLOCATABLE_REGS_INTEGER (_ILIT(14))
71 #define ALLOCATABLE_REGS_DOUBLE (_ILIT(11))
72 #define ALLOCATABLE_REGS_FLOAT (_ILIT(22))
73 #define ALLOCATABLE_REGS_SSE (_ILIT(0))
77 #error ToDo: choose which trivColorable function to use for this architecture.
82 -- Disjoint registers ----------------------------------------------------------
84 -- The definition has been unfolded into individual cases for speed.
85 -- Each architecture has a different register setup, so we use a
86 -- different regSqueeze function for each.
95 accSqueeze count maxCount squeeze ufm
97 NodeUFM _ _ left right
98 -> case accSqueeze count maxCount squeeze right of
99 count' -> case count' >=# maxCount of
100 False -> accSqueeze count' maxCount squeeze left
103 LeafUFM _ reg -> count +# squeeze reg
108 :: (RegClass -> VirtualReg -> FastInt)
109 -> (RegClass -> RealReg -> FastInt)
110 -> Triv VirtualReg RegClass RealReg
112 trivColorable virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
113 | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_INTEGER
114 (virtualRegSqueeze RcInteger)
117 , count3 <- accSqueeze count2 ALLOCATABLE_REGS_INTEGER
118 (realRegSqueeze RcInteger)
121 = count3 <# ALLOCATABLE_REGS_INTEGER
123 trivColorable virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
124 | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_FLOAT
125 (virtualRegSqueeze RcFloat)
128 , count3 <- accSqueeze count2 ALLOCATABLE_REGS_FLOAT
129 (realRegSqueeze RcFloat)
132 = count3 <# ALLOCATABLE_REGS_FLOAT
134 trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
135 | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_DOUBLE
136 (virtualRegSqueeze RcDouble)
139 , count3 <- accSqueeze count2 ALLOCATABLE_REGS_DOUBLE
140 (realRegSqueeze RcDouble)
143 = count3 <# ALLOCATABLE_REGS_DOUBLE
145 trivColorable virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
146 | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_SSE
147 (virtualRegSqueeze RcDoubleSSE)
150 , count3 <- accSqueeze count2 ALLOCATABLE_REGS_SSE
151 (realRegSqueeze RcDoubleSSE)
154 = count3 <# ALLOCATABLE_REGS_SSE
157 -- Specification Code ----------------------------------------------------------
159 -- The trivColorable function for each particular architecture should
160 -- implement the following function, but faster.
164 trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
165 trivColorable classN conflicts exclusions
168 acc :: Reg -> (Int, Int) -> (Int, Int)
171 RcInteger -> (cd+1, cf)
172 RcFloat -> (cd, cf+1)
173 _ -> panic "Regs.trivColorable: reg class not handled"
175 tmp = foldUniqSet acc (0, 0) conflicts
176 (countInt, countFloat) = foldUniqSet acc tmp exclusions
178 squeese = worst countInt classN RcInteger
179 + worst countFloat classN RcFloat
181 in squeese < allocatableRegsInClass classN
183 -- | Worst case displacement
184 -- node N of classN has n neighbors of class C.
186 -- We currently only have RcInteger and RcDouble, which don't conflict at all.
187 -- This is a bit boring compared to what's in RegArchX86.
189 worst :: Int -> RegClass -> RegClass -> Int
190 worst n classN classC
194 RcInteger -> min n (allocatableRegsInClass RcInteger)
199 RcFloat -> min n (allocatableRegsInClass RcFloat)
202 -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
203 -- i.e., these are the regs for which we are prepared to allow the
204 -- register allocator to attempt to map VRegs to.
205 allocatableRegs :: [RegNo]
207 = let isFree i = isFastTrue (freeReg i)
208 in filter isFree allMachRegNos
211 -- | The number of regs in each class.
212 -- We go via top level CAFs to ensure that we're not recomputing
213 -- the length of these lists each time the fn is called.
214 allocatableRegsInClass :: RegClass -> Int
215 allocatableRegsInClass cls
217 RcInteger -> allocatableRegsInteger
218 RcFloat -> allocatableRegsDouble
220 allocatableRegsInteger :: Int
221 allocatableRegsInteger
222 = length $ filter (\r -> regClass r == RcInteger)
223 $ map RealReg allocatableRegs
225 allocatableRegsFloat :: Int
227 = length $ filter (\r -> regClass r == RcFloat
228 $ map RealReg allocatableRegs