X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FRegAlloc%2FGraph%2FTrivColorable.hs;fp=compiler%2FnativeGen%2FRegAlloc%2FGraph%2FTrivColorable.hs;h=7c3fbd5a650ab41ee232c475a813c8d5af48ed56;hb=5ff8ce7ff0d45ce13937ad2c24a2e37ffdd2337f;hp=2f10178a182b39086bbae03724e70dda9a6e8482;hpb=71c7067b7cc2b06265c97190e6a09c272ad7a175;p=ghc-hetmet.git diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index 2f10178..7c3fbd5 100644 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -27,10 +27,6 @@ import FastTypes -- NOTE: This only works for arcitectures with just RcInteger and RcDouble -- (which are disjoint) ie. x86, x86_64 and ppc -- --- BL 2007/09 --- Doing a nice fold over the UniqSet makes trivColorable use --- 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs. --- -- The number of allocatable regs is hard coded here so we can do a fast -- comparision in trivColorable. -- @@ -92,17 +88,42 @@ accSqueeze -> UniqFM reg -> FastInt -accSqueeze count maxCount squeeze ufm - = case ufm of - NodeUFM _ _ left right - -> case accSqueeze count maxCount squeeze right of - count' -> case count' >=# maxCount of - False -> accSqueeze count' maxCount squeeze left - True -> count' - - LeafUFM _ reg -> count +# squeeze reg - EmptyUFM -> count - +accSqueeze count maxCount squeeze ufm = acc count (eltsUFM ufm) + where acc count [] = count + acc count _ | count >=# maxCount = count + acc count (r:rs) = acc (count +# squeeze r) rs + +{- Note [accSqueeze] +~~~~~~~~~~~~~~~~~~~~ +BL 2007/09 +Doing a nice fold over the UniqSet makes trivColorable use +32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs. +Therefore the UniqFM is made non-abstract and we use custom fold. + +MS 2010/04 +When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal +representation any more. But it is imperative that the assSqueeze stops +the folding if the count gets greater or equal to maxCount. We thus convert +UniqFM to a (lazy) list, do the fold and stops if necessary, which was +the most efficient variant tried. Benchmark compiling 10-times SHA1.lhs follows. +(original = previous implementation, folding = fold of the whole UFM, + lazyFold = the current implementation, + hackFold = using internal representation of Data.IntMap) + + original folding hackFold lazyFold + -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s + 100.00% 96.44% 97.72% 97.12% + -fregs-graph 67.938s 74.875s 62.673s 64.679s + 100.00% 110.21% 92.25% 95.20% + -fregs-iterative 89.761s 143.913s 81.075s 86.912s + 100.00% 160.33% 90.32% 96.83% + -fnew-codegen 38.225s 37.142s 37.551s 37.119s + 100.00% 97.17% 98.24% 97.11% + -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s + 100.00% 99.70% 95.19% 94.65% + -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s + 100.00% 166.23% 94.18% 100.95% +-} trivColorable :: (RegClass -> VirtualReg -> FastInt)