X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FMachRegs.lhs;h=5832abe7861184d803be56e54c57b73835b65240;hb=4c6a3f787abcaed009a574196d82237d9ae64fc8;hp=dd4962ca4d306f4dc0672bd8fcb26646960674c8;hpb=7fc749a43b4b6b85d234fa95d4928648259584f4;p=ghc-hetmet.git diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs index dd4962c..5832abe 100644 --- a/compiler/nativeGen/MachRegs.lhs +++ b/compiler/nativeGen/MachRegs.lhs @@ -44,6 +44,7 @@ module MachRegs ( -- * Machine-dependent register-related stuff allocatableRegs, argRegs, allArgRegs, callClobberedRegs, + allocatableRegsInClass, freeReg, spRel, @@ -89,7 +90,7 @@ module MachRegs ( -- HACK: go for the max #endif -#include "MachRegs.h" +#include "../includes/MachRegs.h" import Cmm import MachOp ( MachRep(..) ) @@ -103,6 +104,8 @@ import Unique import UniqSet import Constants import FastTypes +import FastBool +import UniqFM #if powerpc_TARGET_ARCH import Data.Word ( Word8, Word16, Word32 ) @@ -444,24 +447,30 @@ instance Outputable Reg where -- NOTE: This only works for arcitectures with just RcInteger and RcDouble -- (which are disjoint) ie. x86, x86_64 and ppc -- + +-- BL 2007/09 +-- Doing a nice fold over the UniqSet makes trivColorable use +-- 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs. +{- trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool trivColorable classN conflicts exclusions - = let + = let + + acc :: Reg -> (Int, Int) -> (Int, Int) acc r (cd, cf) = case regClass r of RcInteger -> (cd+1, cf) RcDouble -> (cd, cf+1) _ -> panic "MachRegs.trivColorable: reg class not handled" - tmp = foldUniqSet acc (0, 0) conflicts - (rsD, rsFP) = foldUniqSet acc tmp exclusions + tmp = foldUniqSet acc (0, 0) conflicts + (countInt, countFloat) = foldUniqSet acc tmp exclusions - squeese = worst rsD classN RcInteger - + worst rsFP classN RcDouble + squeese = worst countInt classN RcInteger + + worst countFloat classN RcDouble in squeese < allocatableRegsInClass classN - -- | Worst case displacement -- node N of classN has n neighbors of class C. -- @@ -480,6 +489,70 @@ worst n classN classC -> case classC of RcDouble -> min n (allocatableRegsInClass RcDouble) RcInteger -> 0 +-} + + +-- The number of allocatable regs is hard coded here so we can do a fast comparision +-- in trivColorable. It's ok if these numbers are _less_ than the actual number of +-- free regs, but they can't be more or the register conflict graph won't color. +-- +-- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing +-- is too slow for us here. +-- +-- Compare MachRegs.freeRegs and MachRegs.h to get these numbers. +-- +#if i386_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER (_ILIT(3)) +#define ALLOCATABLE_REGS_DOUBLE (_ILIT(6)) + +#elif x86_64_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER (_ILIT(5)) +#define ALLOCATABLE_REGS_DOUBLE (_ILIT(2)) + +#elif powerpc_TARGET_ARCH +#define ALLOCATABLE_REGS_INTEGER (_ILIT(16)) +#define ALLOCATABLE_REGS_DOUBLE (_ILIT(26)) + +#else +#error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE +#endif + +{-# INLINE regClass #-} +trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool +trivColorable classN conflicts exclusions + = {-# SCC "trivColorable" #-} + let + {-# INLINE isSqueesed #-} + isSqueesed cI cF ufm + = case ufm of + NodeUFM _ _ left right + -> case isSqueesed cI cF right of + (# s, cI', cF' #) + -> case s of + False -> isSqueesed cI' cF' left + True -> (# True, cI', cF' #) + + LeafUFM _ reg + -> case regClass reg of + RcInteger + -> case cI +# _ILIT(1) of + cI' -> (# cI' >=# ALLOCATABLE_REGS_INTEGER, cI', cF #) + + RcDouble + -> case cF +# _ILIT(1) of + cF' -> (# cF' >=# ALLOCATABLE_REGS_DOUBLE, cI, cF' #) + + EmptyUFM + -> (# False, cI, cF #) + + in case isSqueesed (_ILIT(0)) (_ILIT(0)) conflicts of + (# False, cI', cF' #) + -> case isSqueesed cI' cF' exclusions of + (# s, _, _ #) -> not s + + (# True, _, _ #) + -> False + -- -----------------------------------------------------------------------------