X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FnativeGen%2FRegAlloc%2FGraph%2FTrivColorable.hs;h=6cc67adad1c5d95cf1e194a64419d6ca2cab257c;hp=e286707741dc72566d276a57e0d69f9aa1145dc9;hb=f8f0e76ad302fda30196ebc9230e5fcbc97be537;hpb=f537dd87c4a07526e2b1fc1bd1c125d652833641 diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index e286707..6cc67ad 100644 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -1,4 +1,4 @@ -{-# OPTIONS -fno-warn-unused-binds #-} +{-# LANGUAGE BangPatterns #-} module RegAlloc.Graph.TrivColorable ( trivColorable, @@ -15,6 +15,8 @@ import GraphBase import UniqFM import FastTypes +import Platform +import Panic -- trivColorable --------------------------------------------------------------- @@ -27,53 +29,24 @@ import FastTypes -- NOTE: This only works for arcitectures with just RcInteger and RcDouble -- (which are disjoint) ie. x86, x86_64 and ppc -- --- The number of allocatable regs is hard coded here so we can do a fast --- comparision in trivColorable. +-- The number of allocatable regs is hard coded in here so we can do +-- a fast comparision in trivColorable. -- --- It's ok if these numbers are _less_ than the actual number of free regs, --- but they can't be more or the register conflict graph won't color. +-- It's ok if these numbers are _less_ than the actual number of free +-- regs, but they can't be more or the register conflict +-- graph won't color. -- -- If the graph doesn't color then the allocator will panic, but it won't -- generate bad object code or anything nasty like that. -- --- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing --- is too slow for us here. +-- There is an allocatableRegsInClass :: RegClass -> Int, but doing +-- the unboxing is too slow for us here. +-- TODO: Is that still true? Could we use allocatableRegsInClass +-- without losing performance now? -- --- Look at includes/stg/MachRegs.h to get these numbers. +-- Look at includes/stg/MachRegs.h to get the numbers. -- -#if i386_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(3)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(6)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) -#define ALLOCATABLE_REGS_SSE (_ILIT(8)) - - -#elif x86_64_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(5)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(0)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) -#define ALLOCATABLE_REGS_SSE (_ILIT(10)) - -#elif powerpc_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(16)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(26)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) -#define ALLOCATABLE_REGS_SSE (_ILIT(0)) - - -#elif sparc_TARGET_ARCH -#define ALLOCATABLE_REGS_INTEGER (_ILIT(14)) -#define ALLOCATABLE_REGS_DOUBLE (_ILIT(11)) -#define ALLOCATABLE_REGS_FLOAT (_ILIT(22)) -#define ALLOCATABLE_REGS_SSE (_ILIT(0)) - - -#else -#error ToDo: choose which trivColorable function to use for this architecture. -#endif - - -- Disjoint registers ---------------------------------------------------------- -- @@ -125,54 +98,90 @@ the most efficient variant tried. Benchmark compiling 10-times SHA1.lhs follows. 100.00% 166.23% 94.18% 100.95% -} +-- TODO: We shouldn't be using defaultTargetPlatform here. +-- We should be passing DynFlags in instead, and looking at +-- its targetPlatform. + trivColorable :: (RegClass -> VirtualReg -> FastInt) -> (RegClass -> RealReg -> FastInt) -> Triv VirtualReg RegClass RealReg trivColorable virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions - | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_INTEGER + | let !cALLOCATABLE_REGS_INTEGER + = iUnbox $ case platformArch defaultTargetPlatform of + ArchX86 -> 3 + ArchX86_64 -> 5 + ArchPPC -> 16 + ArchSPARC -> 14 + ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchUnknown -> panic "trivColorable ArchUnknown" + , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_INTEGER (virtualRegSqueeze RcInteger) conflicts - , count3 <- accSqueeze count2 ALLOCATABLE_REGS_INTEGER + , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER (realRegSqueeze RcInteger) exclusions - = count3 <# ALLOCATABLE_REGS_INTEGER + = count3 <# cALLOCATABLE_REGS_INTEGER trivColorable virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions - | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_FLOAT + | let !cALLOCATABLE_REGS_FLOAT + = iUnbox $ case platformArch defaultTargetPlatform of + ArchX86 -> 0 + ArchX86_64 -> 0 + ArchPPC -> 0 + ArchSPARC -> 22 + ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchUnknown -> panic "trivColorable ArchUnknown" + , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_FLOAT (virtualRegSqueeze RcFloat) conflicts - , count3 <- accSqueeze count2 ALLOCATABLE_REGS_FLOAT + , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT (realRegSqueeze RcFloat) exclusions - = count3 <# ALLOCATABLE_REGS_FLOAT + = count3 <# cALLOCATABLE_REGS_FLOAT trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions - | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_DOUBLE + | let !cALLOCATABLE_REGS_DOUBLE + = iUnbox $ case platformArch defaultTargetPlatform of + ArchX86 -> 6 + ArchX86_64 -> 0 + ArchPPC -> 26 + ArchSPARC -> 11 + ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchUnknown -> panic "trivColorable ArchUnknown" + , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_DOUBLE (virtualRegSqueeze RcDouble) conflicts - , count3 <- accSqueeze count2 ALLOCATABLE_REGS_DOUBLE + , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE (realRegSqueeze RcDouble) exclusions - = count3 <# ALLOCATABLE_REGS_DOUBLE + = count3 <# cALLOCATABLE_REGS_DOUBLE trivColorable virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions - | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_SSE + | let !cALLOCATABLE_REGS_SSE + = iUnbox $ case platformArch defaultTargetPlatform of + ArchX86 -> 8 + ArchX86_64 -> 10 + ArchPPC -> 0 + ArchSPARC -> 0 + ArchPPC_64 -> panic "trivColorable ArchPPC_64" + ArchUnknown -> panic "trivColorable ArchUnknown" + , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_SSE (virtualRegSqueeze RcDoubleSSE) conflicts - , count3 <- accSqueeze count2 ALLOCATABLE_REGS_SSE + , count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE (realRegSqueeze RcDoubleSSE) exclusions - = count3 <# ALLOCATABLE_REGS_SSE + = count3 <# cALLOCATABLE_REGS_SSE -- Specification Code ----------------------------------------------------------