From 0f7d268d00795a58a06ae3c92ebbd14571295b84 Mon Sep 17 00:00:00 2001 From: "Ben.Lippmeier@anu.edu.au" Date: Tue, 14 Aug 2007 10:36:23 +0000 Subject: [PATCH] Add graph coloring register allocator. Refactored linear allocator into separate liveness annotation and allocation stages. Added graph coloring allocator, use -fregs-graph to enable. New dump flags are -ddump-asm-native -- output of cmm -> native transform. -ddump-asm-liveness -- code annotated with register liveness info -ddump-asm-coalesce -- output of register move coalescing (this is a separate pass when using the coloring allocator) (this could change in the future) -ddump-asm-regalloc -- code after register allocation -ddump-asm-regalloc-stages -- blocks after each build/spill stage of coloring allocator -ddump-asm-conflicts -- a global register liveness graph in graphviz format The new register allocator will allocate some registers, but it's not quite ready for prime-time yet. The spill code generator needs some work... --- compiler/cmm/PprCmm.hs | 20 +- compiler/main/DynFlags.hs | 15 + compiler/nativeGen/AsmCodeGen.lhs | 249 +++++++-- compiler/nativeGen/GraphBase.hs | 106 ++++ compiler/nativeGen/GraphColor.hs | 187 +++++++ compiler/nativeGen/GraphOps.hs | 313 +++++++++++ compiler/nativeGen/GraphPpr.hs | 149 +++++ compiler/nativeGen/MachRegs.lhs | 91 +++- compiler/nativeGen/PprMach.hs | 12 +- compiler/nativeGen/RegAllocColor.hs | 332 +++++++++++ compiler/nativeGen/RegAllocInfo.hs | 11 +- .../{RegisterAlloc.hs => RegAllocLinear.hs} | 222 ++------ compiler/nativeGen/RegArchBase.hs | 153 ++++++ compiler/nativeGen/RegArchX86.hs | 147 +++++ compiler/nativeGen/RegCoalesce.hs | 84 +++ compiler/nativeGen/RegLiveness.hs | 575 ++++++++++++++++++++ compiler/nativeGen/RegSpill.hs | 233 ++++++++ compiler/utils/State.hs | 31 ++ 18 files changed, 2701 insertions(+), 229 deletions(-) create mode 100644 compiler/nativeGen/GraphBase.hs create mode 100644 compiler/nativeGen/GraphColor.hs create mode 100644 compiler/nativeGen/GraphOps.hs create mode 100644 compiler/nativeGen/GraphPpr.hs create mode 100644 compiler/nativeGen/RegAllocColor.hs rename compiler/nativeGen/{RegisterAlloc.hs => RegAllocLinear.hs} (81%) create mode 100644 compiler/nativeGen/RegArchBase.hs create mode 100644 compiler/nativeGen/RegArchX86.hs create mode 100644 compiler/nativeGen/RegCoalesce.hs create mode 100644 compiler/nativeGen/RegLiveness.hs create mode 100644 compiler/nativeGen/RegSpill.hs create mode 100644 compiler/utils/State.hs diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 72fde55..1f5be5c 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -33,7 +33,7 @@ -- module PprCmm ( - writeCmms, pprCmms, pprCmm, pprStmt, pprExpr + writeCmms, pprCmms, pprCmm, pprStmt, pprExpr, pprSection, pprStatic ) where #include "HsVersions.h" @@ -65,12 +65,16 @@ writeCmms handle cmms = printForC handle (pprCmms cmms) instance (Outputable info) => Outputable (GenCmm CmmStatic info CmmStmt) where ppr c = pprCmm c -instance (Outputable info) => Outputable (GenCmmTop CmmStatic info CmmStmt) where +instance (Outputable d, Outputable info, Outputable i) + => Outputable (GenCmmTop d info i) where ppr t = pprTop t -instance Outputable CmmBasicBlock where +instance (Outputable instr) => Outputable (GenBasicBlock instr) where ppr b = pprBBlock b +instance Outputable BlockId where + ppr id = pprBlockId id + instance Outputable CmmStmt where ppr s = pprStmt s @@ -92,6 +96,8 @@ instance Outputable CmmStatic where instance Outputable CmmInfo where ppr e = pprInfo e + + ----------------------------------------------------------------------------- pprCmm :: (Outputable info) => GenCmm CmmStatic info CmmStmt -> SDoc @@ -100,7 +106,9 @@ pprCmm (Cmm tops) = vcat $ intersperse (text "") $ map pprTop tops -- -------------------------------------------------------------------------- -- Top level `procedure' blocks. -- -pprTop :: (Outputable info) => GenCmmTop CmmStatic info CmmStmt -> SDoc +pprTop :: (Outputable d, Outputable info, Outputable i) + => GenCmmTop d info i -> SDoc + pprTop (CmmProc info lbl params blocks ) = vcat [ pprCLabel lbl <> parens (commafy $ map ppr params) <+> lbrace @@ -114,7 +122,7 @@ pprTop (CmmProc info lbl params blocks ) -- section "data" { ... } -- pprTop (CmmData section ds) = - (hang (pprSection section <+> lbrace) 4 (vcat (map pprStatic ds))) + (hang (pprSection section <+> lbrace) 4 (vcat (map ppr ds))) $$ rbrace -- -------------------------------------------------------------------------- @@ -186,7 +194,7 @@ pprUpdateFrame (UpdateFrame expr args) = -- -------------------------------------------------------------------------- -- Basic blocks look like assembly blocks. -- lbl: stmt ; stmt ; .. -pprBBlock :: CmmBasicBlock -> SDoc +pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc pprBBlock (BasicBlock ident stmts) = hang (pprBlockId ident <> colon) 4 (vcat (map ppr stmts)) diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 3ceface..be14a5b 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -96,6 +96,12 @@ data DynFlag = Opt_D_dump_cmm | Opt_D_dump_cps_cmm | Opt_D_dump_asm + | Opt_D_dump_asm_native + | Opt_D_dump_asm_liveness + | Opt_D_dump_asm_coalesce + | Opt_D_dump_asm_regalloc + | Opt_D_dump_asm_regalloc_stages + | Opt_D_dump_asm_conflicts | Opt_D_dump_cpranal | Opt_D_dump_deriv | Opt_D_dump_ds @@ -229,6 +235,7 @@ data DynFlag | Opt_DictsCheap | Opt_RewriteRules | Opt_Vectorise + | Opt_RegsGraph -- misc opts | Opt_Cpp @@ -990,6 +997,13 @@ dynamic_flags = [ , ( "ddump-cmm", setDumpFlag Opt_D_dump_cmm) , ( "ddump-cps-cmm", setDumpFlag Opt_D_dump_cps_cmm) , ( "ddump-asm", setDumpFlag Opt_D_dump_asm) + , ( "ddump-asm-native", setDumpFlag Opt_D_dump_asm_native) + , ( "ddump-asm-liveness", setDumpFlag Opt_D_dump_asm_liveness) + , ( "ddump-asm-coalesce", setDumpFlag Opt_D_dump_asm_coalesce) + , ( "ddump-asm-regalloc", setDumpFlag Opt_D_dump_asm_regalloc) + , ( "ddump-asm-conflicts", setDumpFlag Opt_D_dump_asm_conflicts) + , ( "ddump-asm-regalloc-stages", + setDumpFlag Opt_D_dump_asm_regalloc_stages) , ( "ddump-cpranal", setDumpFlag Opt_D_dump_cpranal) , ( "ddump-deriv", setDumpFlag Opt_D_dump_deriv) , ( "ddump-ds", setDumpFlag Opt_D_dump_ds) @@ -1137,6 +1151,7 @@ fFlags = [ ( "rewrite-rules", Opt_RewriteRules ), ( "break-on-exception", Opt_BreakOnException ), ( "vectorise", Opt_Vectorise ), + ( "regs-graph", Opt_RegsGraph), -- Deprecated in favour of -XTemplateHaskell: ( "th", Opt_TemplateHaskell ), -- Deprecated in favour of -XForeignFunctionInterface: diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 3485d61..ec02204 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -16,14 +16,18 @@ import MachInstrs import MachRegs import MachCodeGen import PprMach -import RegisterAlloc import RegAllocInfo import NCGMonad import PositionIndependentCode +import RegAllocLinear +import RegLiveness +import RegCoalesce +import qualified RegAllocColor as Color +import qualified GraphColor as Color import Cmm import CmmOpt ( cmmMiniInline, cmmMachOpFold ) -import PprCmm ( pprStmt, pprCmms ) +import PprCmm ( pprStmt, pprCmms, pprCmm ) import MachOp import CLabel @@ -42,6 +46,7 @@ import Digraph import qualified Pretty import Outputable import FastString +import UniqSet -- DEBUGGING ONLY --import OrdList @@ -53,6 +58,7 @@ import List ( intersperse ) import Data.Int import Data.Word import Data.Bits +import Data.Maybe import GHC.Exts {- @@ -108,21 +114,68 @@ The machine-dependent bits break down as follows: -- NB. We *lazilly* compile each block of code for space reasons. +-------------------- nativeCodeGen :: DynFlags -> [RawCmm] -> UniqSupply -> IO Pretty.Doc nativeCodeGen dflags cmms us = let (res, _) = initUs us $ cgCmm (concat (map add_split cmms)) - cgCmm :: [RawCmmTop] -> UniqSM (RawCmm, Pretty.Doc, [CLabel]) + cgCmm :: [RawCmmTop] -> UniqSM ( [CmmNativeGenDump], Pretty.Doc, [CLabel]) cgCmm tops = lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results -> - case unzip3 results of { (cmms,docs,imps) -> - returnUs (Cmm cmms, my_vcat docs, concat imps) + case unzip3 results of { (dump,docs,imps) -> + returnUs (dump, my_vcat docs, concat imps) } in - case res of { (ppr_cmms, insn_sdoc, imports) -> do - dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms]) + case res of { (dump, insn_sdoc, imports) -> do + + -- stripe across the outputs for each block so all the information for a + -- certain stage is concurrent in the dumps. + + dumpIfSet_dyn dflags + Opt_D_dump_opt_cmm "Optimised Cmm" + (pprCmm $ Cmm $ map cdCmmOpt dump) + + dumpIfSet_dyn dflags + Opt_D_dump_asm_native "(asm-native) Native code" + (vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdNative dump) + + dumpIfSet_dyn dflags + Opt_D_dump_asm_liveness "(asm-liveness) Liveness info added" + (vcat $ map (ppr . cdLiveness) dump) + + dumpIfSet_dyn dflags + Opt_D_dump_asm_coalesce "(asm-coalesce) Register moves coalesced." + (vcat $ map (ppr . cdCoalesce) dump) + + dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc "(asm-regalloc) Registers allocated" + (vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdAlloced dump) + + -- with the graph coloring allocator, show the result of each build/spill stage + -- for each block in turn. + mapM_ (\codeGraphs + -> dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc_stages + "(asm-regalloc-stages)" + (vcat $ map (\(stage, (code, graph)) -> + ( text "-- Stage " <> int stage + $$ ppr code + $$ Color.dotGraph Color.regDotColor trivColorable graph)) + (zip [0..] codeGraphs))) + $ map cdCodeGraphs dump + + -- Build a global register conflict graph. + -- If you want to see the graph for just one basic block then use asm-regalloc-stages instead. + dumpIfSet_dyn dflags + Opt_D_dump_asm_conflicts "(asm-conflicts) Register conflict graph" + $ Color.dotGraph Color.regDotColor trivColorable + $ foldl Color.union Color.initGraph + $ catMaybes $ map cdColoredGraph dump + + return (insn_sdoc Pretty.$$ dyld_stubs imports + #if HAVE_SUBSECTIONS_VIA_SYMBOLS -- On recent versions of Darwin, the linker supports -- dead-stripping of code and data on a per-symbol basis. @@ -193,45 +246,161 @@ nativeCodeGen dflags cmms us #endif --- Complete native code generation phase for a single top-level chunk --- of Cmm. +-- Carries output of the code generator passes, for dumping. +-- Make sure to only fill the one's we're interested in to avoid +-- creating space leaks. + +data CmmNativeGenDump + = CmmNativeGenDump + { cdCmmOpt :: RawCmmTop + , cdNative :: [NatCmmTop] + , cdLiveness :: [LiveCmmTop] + , cdCoalesce :: [LiveCmmTop] + , cdCodeGraphs :: [([LiveCmmTop], Color.Graph Reg RegClass Reg)] + , cdColoredGraph :: Maybe (Color.Graph Reg RegClass Reg) + , cdAlloced :: [NatCmmTop] } -cmmNativeGen :: DynFlags -> RawCmmTop -> UniqSM (RawCmmTop, Pretty.Doc, [CLabel]) +dchoose dflags opt a b + | dopt opt dflags = a + | otherwise = b + + +-- | Complete native code generation phase for a single top-level chunk of Cmm. +-- Unless they're being dumped, intermediate data structures are squashed after +-- every stage to avoid creating space leaks. +-- +cmmNativeGen :: DynFlags -> RawCmmTop -> UniqSM (CmmNativeGenDump, Pretty.Doc, [CLabel]) cmmNativeGen dflags cmm - = {-# SCC "fixAssigns" #-} - fixAssignsTop cmm `thenUs` \ fixed_cmm -> - {-# SCC "genericOpt" #-} - cmmToCmm dflags fixed_cmm `bind` \ (cmm, imports) -> - (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance - then cmm - else CmmData Text []) `bind` \ ppr_cmm -> - {-# SCC "genMachCode" #-} - genMachCode dflags cmm `thenUs` \ (pre_regalloc, lastMinuteImports) -> - {-# SCC "regAlloc" #-} - mapUs regAlloc pre_regalloc `thenUs` \ with_regs -> - {-# SCC "shortcutBranches" #-} - shortcutBranches dflags with_regs `bind` \ shorted -> - {-# SCC "sequenceBlocks" #-} - map sequenceTop shorted `bind` \ sequenced -> - {-# SCC "x86fp_kludge" #-} - map x86fp_kludge sequenced `bind` \ final_mach_code -> - {-# SCC "vcat" #-} - Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc -> - - returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports) - where - x86fp_kludge :: NatCmmTop -> NatCmmTop - x86fp_kludge top@(CmmData _ _) = top + = do + -- + fixed_cmm + <- {-# SCC "fixAssigns" #-} + fixAssignsTop cmm + + ---- cmm to cmm optimisations + (cmm, imports, ppr_cmm) + <- (\fixed_cmm + -> {-# SCC "genericOpt" #-} + do let (cmm, imports) = cmmToCmm dflags fixed_cmm + + return ( cmm + , imports + , dchoose dflags Opt_D_dump_cmm cmm (CmmData Text [])) + ) fixed_cmm + + + ---- generate native code from cmm + (native, lastMinuteImports, ppr_native) + <- (\cmm + -> {-# SCC "genMachCode" #-} + do (machCode, lastMinuteImports) + <- genMachCode dflags cmm + + return ( machCode + , lastMinuteImports + , dchoose dflags Opt_D_dump_asm_native machCode []) + ) cmm + + + ---- tag instructions with register liveness information + (withLiveness, ppr_withLiveness) + <- (\native + -> {-# SCC "regLiveness" #-} + do + withLiveness <- mapUs regLiveness native + + return ( withLiveness + , dchoose dflags Opt_D_dump_asm_liveness withLiveness [])) + native + + ---- allocate registers + (alloced, ppr_alloced, ppr_coalesce, ppr_codeGraphs, ppr_coloredGraph) + <- (\withLiveness + -> {-# SCC "regAlloc" #-} + do + if dopt Opt_RegsGraph dflags + then do + -- the regs usable for allocation + let alloc_regs + = foldr (\r -> plusUFM_C unionUniqSets + $ unitUFM (regClass r) (unitUniqSet r)) + emptyUFM + $ map RealReg allocatableRegs + + -- aggressively coalesce moves between virtual regs + coalesced <- regCoalesce withLiveness + + -- graph coloring register allocation + (alloced, codeGraphs) + <- Color.regAlloc + alloc_regs + (mkUniqSet [0..maxSpillSlots]) + coalesced + + return ( alloced + , dchoose dflags Opt_D_dump_asm_regalloc alloced [] + , dchoose dflags Opt_D_dump_asm_coalesce coalesced [] + , dchoose dflags Opt_D_dump_asm_regalloc_stages codeGraphs [] + , dchoose dflags Opt_D_dump_asm_conflicts Nothing Nothing) + + else do + -- do linear register allocation + alloced <- mapUs regAlloc withLiveness + return ( alloced + , dchoose dflags Opt_D_dump_asm_regalloc alloced [] + , [] + , [] + , Nothing )) + withLiveness + + + ---- shortcut branches + let shorted = + {-# SCC "shortcutBranches" #-} + shortcutBranches dflags alloced + + ---- sequence blocks + let sequenced = + {-# SCC "sequenceBlocks" #-} + map sequenceTop shorted + + ---- x86fp_kludge + let final_mach_code = #if i386_TARGET_ARCH - x86fp_kludge top@(CmmProc info lbl params code) = - CmmProc info lbl params (map bb_i386_insert_ffrees code) - where - bb_i386_insert_ffrees (BasicBlock id instrs) = - BasicBlock id (i386_insert_ffrees instrs) + {-# SCC "x86fp_kludge" #-} + map x86fp_kludge sequenced #else - x86fp_kludge top = top + sequenced +#endif + + ---- vcat + let final_sdoc = + {-# SCC "vcat" #-} + Pretty.vcat (map pprNatCmmTop final_mach_code) + + let dump = + CmmNativeGenDump + { cdCmmOpt = ppr_cmm + , cdNative = ppr_native + , cdLiveness = ppr_withLiveness + , cdCoalesce = ppr_coalesce + , cdCodeGraphs = ppr_codeGraphs + , cdColoredGraph = ppr_coloredGraph + , cdAlloced = ppr_alloced } + + returnUs (dump, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports) + +#if i386_TARGET_ARCH +x86fp_kludge :: NatCmmTop -> NatCmmTop +x86fp_kludge top@(CmmData _ _) = top +x86fp_kludge top@(CmmProc info lbl params code) = + CmmProc info lbl params (map bb_i386_insert_ffrees code) + where + bb_i386_insert_ffrees (BasicBlock id instrs) = + BasicBlock id (i386_insert_ffrees instrs) #endif + -- ----------------------------------------------------------------------------- -- Sequencing the basic blocks diff --git a/compiler/nativeGen/GraphBase.hs b/compiler/nativeGen/GraphBase.hs new file mode 100644 index 0000000..efc59b9 --- /dev/null +++ b/compiler/nativeGen/GraphBase.hs @@ -0,0 +1,106 @@ + +-- | Types for the general graph colorer. +module GraphBase ( + Triv, + Graph (..), + initGraph, + graphMapModify, + + Node (..), newNode, +) + + +where + +import UniqSet +import UniqFM + +-- | A fn to check if a node is trivially colorable +-- For graphs who's color classes are disjoint then a node is 'trivially colorable' +-- when it has less neighbors and exclusions than available colors for that node. +-- +-- For graph's who's color classes overlap, ie some colors alias other colors, then +-- this can be a bit more tricky. There is a general way to calculate this, but +-- it's likely be too slow for use in the code. The coloring algorithm takes +-- a canned function which can be optimised by the user to be specific to the +-- specific graph being colored. +-- +-- for details, see "A Generalised Algorithm for Graph-Coloring Register Allocation" +-- Smith, Ramsey, Holloway - PLDI 2004. +-- +type Triv k cls color + = cls -- ^ the class of the node we're trying to color. + -> UniqSet k -- ^ the node's neighbors. + -> UniqSet color -- ^ the node's exclusions. + -> Bool + + +-- | The Interference graph. +-- There used to be more fields, but they were turfed out in a previous revision. +-- maybe we'll want more later.. +-- +data Graph k cls color + = Graph { + -- | All active nodes in the graph. + graphMap :: UniqFM (Node k cls color) } + +-- | An empty graph. +initGraph + = Graph + { graphMap = emptyUFM } + + +-- | Modify the finite map holding the nodes in the graph. +graphMapModify + :: (UniqFM (Node k cls color) -> UniqFM (Node k cls color)) + -> Graph k cls color -> Graph k cls color + +graphMapModify f graph + = graph { graphMap = f (graphMap graph) } + + + +-- | Graph nodes. +-- Represents a thing that can conflict with another thing. +-- For the register allocater the nodes represent registers. +-- +data Node k cls color + = Node { + -- | A unique identifier for this node. + nodeId :: k + + -- | The class of this node, + -- determines the set of colors that can be used. + , nodeClass :: cls + + -- | The color of this node, if any. + , nodeColor :: Maybe color + + -- | Neighbors which must be colored differently to this node. + , nodeConflicts :: UniqSet k + + -- | Colors that cannot be used by this node. + , nodeExclusions :: UniqSet color + + -- | Colors that this node would prefer to be, in decending order. + , nodePreference :: [color] + + -- | Neighbors that this node would like to be colored the same as. + , nodeCoalesce :: UniqSet k } + + +-- | An empty node. +newNode :: k -> cls -> Node k cls color +newNode k cls + = Node + { nodeId = k + , nodeClass = cls + , nodeColor = Nothing + , nodeConflicts = emptyUniqSet + , nodeExclusions = emptyUniqSet + , nodePreference = [] + , nodeCoalesce = emptyUniqSet } + + + + diff --git a/compiler/nativeGen/GraphColor.hs b/compiler/nativeGen/GraphColor.hs new file mode 100644 index 0000000..934f2a7 --- /dev/null +++ b/compiler/nativeGen/GraphColor.hs @@ -0,0 +1,187 @@ + +-- | Graph Coloring. +-- This is a generic graph coloring library, abstracted over the type of +-- the node keys, nodes and colors. +-- +module GraphColor ( + module GraphBase, + module GraphOps, + module GraphPpr, + colorGraph +) + +where + +import GraphBase +import GraphOps +import GraphPpr + +import Unique +import UniqFM +import UniqSet +import Outputable + +import Data.Maybe +import Data.List + + +-- | Try to color a graph with this set of colors. +-- Uses Chaitin's algorithm to color the graph. +-- The graph is scanned for nodes which are deamed 'trivially colorable'. These nodes +-- are pushed onto a stack and removed from the graph. +-- Once this process is complete the graph can be colored by removing nodes from +-- the stack (ie in reverse order) and assigning them colors different to their neighbors. +-- +colorGraph + :: ( Uniquable k, Uniquable cls, Uniquable color, Eq color + , Outputable k, Outputable cls, Outputable color) + => UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class). + -> Triv k cls color -- ^ fn to decide whether a node is trivially colorable. + -> (Graph k cls color -> k) -- ^ fn to choose a node to potentially leave uncolored if nothing is trivially colorable. + -> Graph k cls color -- ^ the graph to color. + -> ( Graph k cls color -- ^ the colored graph. + , UniqSet k ) -- ^ the set of nodes that we couldn't find a color for. + +colorGraph colors triv spill graph0 + = let -- run the scanner to slurp out all the trivially colorable nodes + (ksTriv, ksProblems) = colorScan colors triv spill [] emptyUniqSet graph0 + + -- color the trivially colorable nodes + (graph1, ksNoTriv) = assignColors colors graph0 ksTriv + + -- try and color the problem nodes + (graph2, ksNoColor) = assignColors colors graph1 (uniqSetToList ksProblems) + + -- if the trivially colorable nodes didn't color then something is wrong + -- with the provided triv function. + in if not $ null ksNoTriv + then pprPanic "colorGraph: trivially colorable nodes didn't color!" empty +{- ( empty + $$ text "ksTriv = " <> ppr ksTriv + $$ text "ksNoTriv = " <> ppr ksNoTriv + $$ empty + $$ dotGraph (\x -> text "white") triv graph1) -} + else (graph2, mkUniqSet ksNoColor) + + +colorScan colors triv spill safe prob graph + + -- empty graphs are easy to color. + | isNullUFM $ graphMap graph + = (safe, prob) + + -- Try and find a trivially colorable node. + | Just node <- find (\node -> triv (nodeClass node) + (nodeConflicts node) + (nodeExclusions node)) + $ eltsUFM $ graphMap graph + , k <- nodeId node + = colorScan colors triv spill + (k : safe) prob (delNode k graph) + + -- There was no trivially colorable node, + -- Choose one to potentially leave uncolored. We /might/ be able to find + -- a color for this later on, but no guarantees. + | k <- spill graph + = colorScan colors triv spill + safe (addOneToUniqSet prob k) (delNode k graph) + + + +-- | Try to assign a color to all these nodes. + +assignColors + :: ( Uniquable k, Uniquable cls, Uniquable color, Eq color ) + => UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class). + -> Graph k cls color -- ^ the graph + -> [k] -- ^ nodes to assign a color to. + -> ( Graph k cls color -- the colored graph + , [k]) -- the nodes that didn't color. + +assignColors colors graph ks + = assignColors' colors graph [] ks + + where assignColors' colors graph prob [] + = (graph, prob) + + assignColors' colors graph prob (k:ks) + = case assignColor colors k graph of + + -- couldn't color this node + Nothing -> assignColors' colors graph (k : prob) ks + + -- this node colored ok, so do the rest + Just graph' -> assignColors' colors graph' prob ks + + + assignColor colors u graph + | Just c <- selectColor colors graph u + = Just (setColor u c graph) + + | otherwise + = Nothing + + + +-- | Select a color for a certain node +-- taking into account preferences, neighbors and exclusions. +-- returns Nothing if no color can be assigned to this node. +-- +-- TODO: avoid using the prefs of the neighbors, if at all possible. +-- +selectColor + :: ( Uniquable k, Uniquable cls, Uniquable color, Eq color) + => UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class). + -> Graph k cls color -- ^ the graph + -> k -- ^ key of the node to select a color for. + -> Maybe color + +selectColor colors graph u + = let -- lookup the node + Just node = lookupNode graph u + + -- lookup the available colors for the class of this node. + Just colors_avail + = lookupUFM colors (nodeClass node) + + -- colors we can't use because they're already being used + -- by a node that conflicts with this one. + Just nsConflicts + = sequence + $ map (lookupNode graph) + $ uniqSetToList + $ nodeConflicts node + + colors_conflict = mkUniqSet + $ catMaybes + $ map nodeColor nsConflicts + + -- colors that are still ok + colors_ok_ex = minusUniqSet colors_avail (nodeExclusions node) + colors_ok = minusUniqSet colors_ok_ex colors_conflict + + -- the colors that we prefer, and are still ok + colors_ok_pref = intersectUniqSets + (mkUniqSet $ nodePreference node) colors_ok + + -- make the decision + chooseColor + + -- we got one of our preferences, score! + | not $ isEmptyUniqSet colors_ok_pref + , c : rest <- uniqSetToList colors_ok_pref + = Just c + + -- it wasn't a preference, but it was still ok + | not $ isEmptyUniqSet colors_ok + , c : rest <- uniqSetToList colors_ok + = Just c + + -- leave this node uncolored + | otherwise + = Nothing + + in chooseColor + + + diff --git a/compiler/nativeGen/GraphOps.hs b/compiler/nativeGen/GraphOps.hs new file mode 100644 index 0000000..86bf6bd --- /dev/null +++ b/compiler/nativeGen/GraphOps.hs @@ -0,0 +1,313 @@ + +-- | Basic operations on graphs. +-- +module GraphOps ( + addNode, delNode, getNode, lookupNode, modNode, + size, + union, + addConflict, delConflict, addConflicts, + addCoalesce, delCoalesce, + addExclusion, + addPreference, + setColor, + verify +) +where + +import GraphBase + +import Outputable +import Unique +import UniqSet +import UniqFM + +import Data.List hiding (union) +import Data.Maybe + + +-- | Lookup a node from the graph. +lookupNode + :: Uniquable k + => Graph k cls color + -> k -> Maybe (Node k cls color) + +lookupNode graph k + = lookupUFM (graphMap graph) k + + +-- | Get a node from the graph, throwing an error if it's not there +getNode + :: Uniquable k + => Graph k cls color + -> k -> Node k cls color + +getNode graph k + = case lookupUFM (graphMap graph) k of + Just node -> node + Nothing -> panic "ColorOps.getNode: not found" + + +-- | Add a node to the graph, linking up its edges +addNode :: Uniquable k + => k -> Node k cls color + -> Graph k cls color -> Graph k cls color + +addNode k node graph + = let + -- add back conflict edges from other nodes to this one + map_conflict + = foldUniqSet + (adjustUFM (\n -> n { nodeConflicts = addOneToUniqSet (nodeConflicts n) k})) + (graphMap graph) + (nodeConflicts node) + + -- add back coalesce edges from other nodes to this one + map_coalesce + = foldUniqSet + (adjustUFM (\n -> n { nodeCoalesce = addOneToUniqSet (nodeCoalesce n) k})) + map_conflict + (nodeCoalesce node) + + in graph + { graphMap = addToUFM map_coalesce k node} + + + +-- | Delete a node and all its edges from the graph. +-- Throws an error if it's not there. +delNode :: Uniquable k + => k -> Graph k cls color -> Graph k cls color + +delNode k graph + = let Just node = lookupNode graph k + + -- delete conflict edges from other nodes to this one. + graph1 = foldl' (\g k1 -> delConflict k1 k g) graph + $ uniqSetToList (nodeConflicts node) + + -- delete coalesce edge from other nodes to this one. + graph2 = foldl' (\g k1 -> delCoalesce k1 k g) graph1 + $ uniqSetToList (nodeCoalesce node) + + -- delete the node + graph3 = graphMapModify (\fm -> delFromUFM fm k) graph2 + + in graph3 + + +-- | Modify a node in the graph +modNode :: Uniquable k + => (Node k cls color -> Node k cls color) + -> k -> Graph k cls color -> Graph k cls color + +modNode f k graph + = case getNode graph k of + Node{} -> graphMapModify + (\fm -> let Just node = lookupUFM fm k + node' = f node + in addToUFM fm k node') + graph + + +-- | Get the size of the graph, O(n) +size :: Uniquable k + => Graph k cls color -> Int + +size graph + = sizeUFM $ graphMap graph + + +-- | Union two graphs together. +union :: Uniquable k + => Graph k cls color -> Graph k cls color -> Graph k cls color + +union graph1 graph2 + = Graph + { graphMap = plusUFM (graphMap graph1) (graphMap graph2) } + + + + +-- | Add a conflict between nodes to the graph, creating the nodes required. +-- Conflicts are virtual regs which need to be colored differently. +addConflict + :: Uniquable k + => (k, cls) -> (k, cls) + -> Graph k cls color -> Graph k cls color + +addConflict (u1, c1) (u2, c2) + = let addNeighbor u c u' + = adjustWithDefaultUFM + (\node -> node { nodeConflicts = addOneToUniqSet (nodeConflicts node) u' }) + (newNode u c) { nodeConflicts = unitUniqSet u' } + u + + in graphMapModify + ( addNeighbor u1 c1 u2 + . addNeighbor u2 c2 u1) + + +-- | Delete a conflict edge. k1 -> k2 +delConflict + :: Uniquable k + => k -> k + -> Graph k cls color -> Graph k cls color + +delConflict k1 k2 + = modNode + (\node -> node { nodeConflicts = delOneFromUniqSet (nodeConflicts node) k2 }) + k1 + + +-- | Add some conflicts to the graph, creating nodes if required. +-- All the nodes in the set are taken to conflict with each other. +addConflicts + :: Uniquable k + => UniqSet k -> (k -> cls) + -> Graph k cls color -> Graph k cls color + +addConflicts conflicts getClass + + -- just a single node, but no conflicts, create the node anyway. + | (u : []) <- uniqSetToList conflicts + = graphMapModify + $ adjustWithDefaultUFM + id + (newNode u (getClass u)) + u + + | otherwise + = graphMapModify + $ (\fm -> foldr (\u -> addConflictSet1 u getClass conflicts) fm + $ uniqSetToList conflicts) + + +addConflictSet1 u getClass set + = let set' = delOneFromUniqSet set u + in adjustWithDefaultUFM + (\node -> node { nodeConflicts = unionUniqSets set' (nodeConflicts node) } ) + (newNode u (getClass u)) { nodeConflicts = set' } + u + + +-- | Add an exclusion to the graph, creating nodes if required. +-- These are extra colors that the node cannot use. +addExclusion + :: (Uniquable k, Uniquable color) + => k -> (k -> cls) -> color + -> Graph k cls color -> Graph k cls color + +addExclusion u getClass color + = graphMapModify + $ adjustWithDefaultUFM + (\node -> node { nodeExclusions = addOneToUniqSet (nodeExclusions node) color }) + (newNode u (getClass u)) { nodeExclusions = unitUniqSet color } + u + + +-- | Add a coalescence edge to the graph, creating nodes if requried. +-- It is considered adventageous to assign the same color to nodes in a coalesence. +addCoalesce + :: Uniquable k + => (k, cls) -> (k, cls) + -> Graph k cls color -> Graph k cls color + +addCoalesce (u1, c1) (u2, c2) + = let addCoalesce u c u' + = adjustWithDefaultUFM + (\node -> node { nodeCoalesce = addOneToUniqSet (nodeCoalesce node) u' }) + (newNode u c) { nodeCoalesce = unitUniqSet u' } + u + + in graphMapModify + ( addCoalesce u1 c1 u2 + . addCoalesce u2 c2 u1) + + +-- | Delete a coalescence edge (k1 -> k2) from the graph. +delCoalesce + :: Uniquable k + => k -> k + -> Graph k cls color -> Graph k cls color + +delCoalesce k1 k2 + = modNode (\node -> node { nodeCoalesce = delOneFromUniqSet (nodeCoalesce node) k2 }) + k1 + + +-- | Add a color preference to the graph, creating nodes if required. +-- The most recently added preference is the most prefered. +-- The algorithm tries to assign a node it's prefered color if possible. +-- +addPreference + :: Uniquable k + => (k, cls) -> color + -> Graph k cls color -> Graph k cls color + +addPreference (u, c) color + = graphMapModify + $ adjustWithDefaultUFM + (\node -> node { nodePreference = color : (nodePreference node) }) + (newNode u c) { nodePreference = [color] } + u + + +-- | Verify the internal structure of a graph +-- all its edges should point to valid nodes +-- +verify :: Uniquable k + => Graph k cls color + -> Bool + +verify graph + = let edges = unionUniqSets + (unionManyUniqSets + (map nodeConflicts $ eltsUFM $ graphMap graph)) + (unionManyUniqSets + (map nodeCoalesce $ eltsUFM $ graphMap graph)) + + nodes = mkUniqSet $ map nodeId $ eltsUFM $ graphMap graph + + badEdges = minusUniqSet edges nodes + + in if isEmptyUniqSet badEdges + then True + else False + + +-- | Set the color of a certain node +setColor + :: Uniquable k + => k -> color + -> Graph k cls color -> Graph k cls color + +setColor u color + = graphMapModify + $ adjustUFM + (\n -> n { nodeColor = Just color }) + u + + +adjustWithDefaultUFM + :: Uniquable k + => (a -> a) -> a -> k + -> UniqFM a -> UniqFM a + +adjustWithDefaultUFM f def k map + = addToUFM_C + (\old new -> f old) + map + k def + + +adjustUFM + :: Uniquable k + => (a -> a) + -> k -> UniqFM a -> UniqFM a + +adjustUFM f k map + = case lookupUFM map k of + Nothing -> map + Just a -> addToUFM map k (f a) + + diff --git a/compiler/nativeGen/GraphPpr.hs b/compiler/nativeGen/GraphPpr.hs new file mode 100644 index 0000000..a2a64bc --- /dev/null +++ b/compiler/nativeGen/GraphPpr.hs @@ -0,0 +1,149 @@ + +-- | Pretty printing of graphs. + +module GraphPpr ( + dumpGraph, + dotGraph +) +where + +import GraphBase + +import Outputable +import Unique +import UniqSet +import UniqFM + +import Data.List +import Data.Maybe + + +-- | Pretty print a graph in a somewhat human readable format. +dumpGraph + :: (Outputable k, Outputable cls, Outputable color) + => Graph k cls color -> SDoc + +dumpGraph graph + = text "Graph" + $$ (vcat $ map dumpNode $ eltsUFM $ graphMap graph) + +dumpNode node + = text "Node " <> ppr (nodeId node) + $$ text "conflicts " + <> parens (int (sizeUniqSet $ nodeConflicts node)) + <> text " = " + <> ppr (nodeConflicts node) + + $$ text "exclusions " + <> parens (int (sizeUniqSet $ nodeExclusions node)) + <> text " = " + <> ppr (nodeExclusions node) + + $$ text "coalesce " + <> parens (int (sizeUniqSet $ nodeCoalesce node)) + <> text " = " + <> ppr (nodeCoalesce node) + + $$ space + + + +-- | Pretty print a graph in graphviz .dot format. +-- Conflicts get solid edges. +-- Coalescences get dashed edges. +dotGraph + :: ( Uniquable k + , Outputable k, Outputable cls, Outputable color) + => (color -> SDoc) -- | What graphviz color to use for each node color + -- It's usually safe to return X11 style colors here, + -- ie "red", "green" etc or a hex triplet #aaff55 etc + -> Triv k cls color + -> Graph k cls color -> SDoc + +dotGraph colorMap triv graph + = let nodes = eltsUFM $ graphMap graph + in vcat + ( [ text "graph G {" ] + ++ map (dotNode colorMap triv) nodes + ++ (catMaybes $ snd $ mapAccumL dotNodeEdges emptyUniqSet nodes) + ++ [ text "}" + , space ]) + +dotNode colorMap triv node + = let name = ppr $ nodeId node + cls = ppr $ nodeClass node + + excludes + = hcat $ punctuate space + $ map (\n -> text "-" <> ppr n) + $ uniqSetToList $ nodeExclusions node + + preferences + = hcat $ punctuate space + $ map (\n -> text "+" <> ppr n) + $ nodePreference node + + expref = if and [isEmptyUniqSet (nodeExclusions node), null (nodePreference node)] + then empty + else text "\\n" <> (excludes <+> preferences) + + -- if the node has been colored then show that, + -- otherwise indicate whether it looks trivially colorable. + color + | Just c <- nodeColor node + = text "\\n(" <> ppr c <> text ")" + + | triv (nodeClass node) (nodeConflicts node) (nodeExclusions node) + = text "\\n(" <> text "triv" <> text ")" + + | otherwise + = text "\\n(" <> text "spill?" <> text ")" + + label = name <> text " :: " <> cls + <> expref + <> color + + pcolorC = case nodeColor node of + Nothing -> text "style=filled fillcolor=white" + Just c -> text "style=filled fillcolor=" <> doubleQuotes (colorMap c) + + + pout = text "node [label=" <> doubleQuotes label <> space <> pcolorC <> text "]" + <> space <> doubleQuotes name + <> text ";" + + in pout + + +-- | Nodes in the graph are doubly linked, but we only want one edge for each +-- conflict if the graphviz graph. Traverse over the graph, but make sure +-- to only print the edges for each node once. + +dotNodeEdges visited node + | elementOfUniqSet (nodeId node) visited + = ( visited + , Nothing) + + | otherwise + = let dconflicts + = map (dotEdgeConflict (nodeId node)) + $ uniqSetToList + $ minusUniqSet (nodeConflicts node) visited + + dcoalesces + = map (dotEdgeCoalesce (nodeId node)) + $ uniqSetToList + $ minusUniqSet (nodeCoalesce node) visited + + out = vcat dconflicts + $$ vcat dcoalesces + + in ( addOneToUniqSet visited (nodeId node) + , Just out) + +dotEdgeConflict u1 u2 + = doubleQuotes (ppr u1) <> text " -- " <> doubleQuotes (ppr u2) <> text ";" + +dotEdgeCoalesce u1 u2 + = doubleQuotes (ppr u1) <> text " -- " <> doubleQuotes (ppr u2) <> space <> text "[ style = dashed ];" + diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs index bc96e9d..bbb1fd7 100644 --- a/compiler/nativeGen/MachRegs.lhs +++ b/compiler/nativeGen/MachRegs.lhs @@ -26,8 +26,9 @@ module MachRegs ( -- * The 'Reg' type RegNo, - Reg(..), isRealReg, isVirtualReg, + Reg(..), isRealReg, isVirtualReg, renameVirtualReg, RegClass(..), regClass, + trivColorable, getHiVRegFromLo, mkVReg, @@ -92,6 +93,7 @@ import Pretty import Outputable ( Outputable(..), pprPanic, panic ) import qualified Outputable import Unique +import UniqSet import Constants import FastTypes @@ -353,6 +355,11 @@ data RegClass | RcDouble deriving Eq +instance Uniquable RegClass where + getUnique RcInteger = mkUnique 'L' 0 + getUnique RcFloat = mkUnique 'L' 1 + getUnique RcDouble = mkUnique 'L' 2 + type RegNo = Int data Reg @@ -396,6 +403,15 @@ isVirtualReg (VirtualRegD _) = True isRealReg :: Reg -> Bool isRealReg = not . isVirtualReg +renameVirtualReg :: Unique -> Reg -> Reg +renameVirtualReg u r + = case r of + RealReg _ -> error "renameVirtualReg: can't change unique on a real reg" + VirtualRegI _ -> VirtualRegI u + VirtualRegHi _ -> VirtualRegHi u + VirtualRegF _ -> VirtualRegF u + VirtualRegD _ -> VirtualRegD u + instance Show Reg where show (RealReg i) = showReg i show (VirtualRegI u) = "%vI_" ++ show u @@ -403,10 +419,62 @@ instance Show Reg where show (VirtualRegF u) = "%vF_" ++ show u show (VirtualRegD u) = "%vD_" ++ show u +instance Outputable RegClass where + ppr RcInteger = Outputable.text "I" + ppr RcFloat = Outputable.text "F" + ppr RcDouble = Outputable.text "D" + instance Outputable Reg where ppr r = Outputable.text (show r) + + +-- trivColorable function for the graph coloring allocator +-- This gets hammered by scanGraph during register allocation, +-- so needs to be fairly efficient. +-- +-- NOTE: This only works for arcitectures with just RcInteger and RcDouble +-- (which are disjoint) ie. x86, x86_64 and ppc +-- +trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool +trivColorable classN conflicts exclusions + = let + acc r (cd, cf) + = case regClass r of + RcInteger -> (cd+1, cf) + RcDouble -> (cd, cf+1) + _ -> panic "MachRegs.trivColorable: reg class not handled" + + tmp = foldUniqSet acc (0, 0) conflicts + (rsD, rsFP) = foldUniqSet acc tmp exclusions + + squeese = worst rsD classN RcInteger + + worst rsFP classN RcDouble + + in squeese < allocatableRegsInClass classN + + +-- | Worst case displacement +-- node N of classN has n neighbors of class C. +-- +-- We currently only have RcInteger and RcDouble, which don't conflict at all. +-- This is a bit boring compared to what's in RegArchX86. +-- +worst :: Int -> RegClass -> RegClass -> Int +worst n classN classC + = case classN of + RcInteger + -> case classC of + RcInteger -> min n (allocatableRegsInClass RcInteger) + RcDouble -> 0 + + RcDouble + -> case classC of + RcDouble -> min n (allocatableRegsInClass RcDouble) + RcInteger -> 0 + + -- ----------------------------------------------------------------------------- -- Machine-specific register stuff @@ -468,6 +536,7 @@ fake3 = RealReg 11 fake4 = RealReg 12 fake5 = RealReg 13 + -- On x86, we might want to have an 8-bit RegClass, which would -- contain just regs 1-4 (the others don't have 8-bit versions). -- However, we can get away without this at the moment because the @@ -489,6 +558,7 @@ showReg n then regNames !! n else "%unknown_x86_real_reg_" ++ show n + #endif {- @@ -952,6 +1022,25 @@ allocatableRegs = let isFree i = isFastTrue (freeReg i) in filter isFree allMachRegNos + +-- | The number of regs in each class. +-- We go via top level CAFs to ensure that we're not recomputing +-- the length of these lists each time the fn is called. +allocatableRegsInClass :: RegClass -> Int +allocatableRegsInClass cls + = case cls of + RcInteger -> allocatableRegsInteger + RcDouble -> allocatableRegsDouble + +allocatableRegsInteger + = length $ filter (\r -> regClass r == RcInteger) + $ map RealReg allocatableRegs + +allocatableRegsDouble + = length $ filter (\r -> regClass r == RcDouble) + $ map RealReg allocatableRegs + + -- these are the regs which we cannot assume stay alive over a -- C call. callClobberedRegs :: [Reg] diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs index 02ab265..6a72265 100644 --- a/compiler/nativeGen/PprMach.hs +++ b/compiler/nativeGen/PprMach.hs @@ -13,8 +13,8 @@ #include "nativeGen/NCG.h" module PprMach ( - pprNatCmmTop, pprBasicBlock, - pprInstr, pprSize, pprUserReg, + pprNatCmmTop, pprBasicBlock, pprSectionHeader, pprData, + pprInstr, pprSize, pprUserReg ) where @@ -36,6 +36,7 @@ import Unique ( pprUnique ) import Pretty import FastString import qualified Outputable +import Outputable ( Outputable ) import Data.Array.ST import Data.Word ( Word8 ) @@ -798,6 +799,9 @@ pprDataItem lit -- ----------------------------------------------------------------------------- -- pprInstr: print an 'Instr' +instance Outputable Instr where + ppr instr = Outputable.docToSDoc $ pprInstr instr + pprInstr :: Instr -> Doc --pprInstr (COMMENT s) = empty -- nuke 'em @@ -1207,7 +1211,8 @@ pprSizeRegRegReg name size reg1 reg2 reg3 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH -pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack +{- -- BUGS: changed for coloring allocator +pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack -- write a pass for this and patch linear allocator with it | src == dst = #if 0 /* #ifdef DEBUG */ @@ -1215,6 +1220,7 @@ pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack #else empty #endif +-} pprInstr (MOV size src dst) = pprSizeOpOp SLIT("mov") size src dst diff --git a/compiler/nativeGen/RegAllocColor.hs b/compiler/nativeGen/RegAllocColor.hs new file mode 100644 index 0000000..6a71412 --- /dev/null +++ b/compiler/nativeGen/RegAllocColor.hs @@ -0,0 +1,332 @@ +-- | Graph coloring register allocator. +-- +-- TODO: +-- Live range splitting: +-- At the moment regs that are spilled are spilled for all time, even though +-- we might be able to allocate them a hardreg in different parts of the code. +-- +-- As we're aggressively coalescing before register allocation proper we're not currently +-- using the coalescence information present in the graph. +-- +-- The function that choosing the potential spills could be a bit cleverer. +-- +-- Colors in graphviz graphs could be nicer. +-- + +module RegAllocColor ( + regAlloc, + regDotColor +) + +where + +#include "nativeGen/NCG.h" + +import qualified GraphColor as Color +import RegLiveness +import RegSpill +import MachRegs +import MachInstrs +import RegCoalesce +import PprMach + +import UniqSupply +import UniqSet +import UniqFM +import Bag +import Outputable + +import Data.List +import Data.Maybe +import Control.Monad + +-- | The maximum number of build/spill cycles we'll allow. +-- We should only need 3 or 4 cycles tops. +-- If we run for any longer than this we're probably in an infinite loop, +-- It's probably better just to bail out and report a bug at this stage. +maxSpinCount :: Int +maxSpinCount = 10 + + +-- | The top level of the graph coloring register allocator. +-- +regAlloc + :: UniqFM (UniqSet Reg) -- ^ the registers we can use for allocation + -> UniqSet Int -- ^ the set of available spill slots. + -> [LiveCmmTop] -- ^ code annotated with liveness information. + -> UniqSM + ( [NatCmmTop] -- ^ code with registers allocated. + , [ ( [LiveCmmTop] + , Color.Graph Reg RegClass Reg) ]) -- ^ code and graph for each pass + +regAlloc regsFree slotsFree code + = do + (code_final, debug_codeGraphs, graph_final) + <- regAlloc_spin 0 trivColorable regsFree slotsFree [] code + + return ( code_final + , debug_codeGraphs ) + +regAlloc_spin (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs code + = do + -- check that we're not running off down the garden path. + when (spinCount > maxSpinCount) + $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded." + ( text "It looks like the register allocator is stuck in an infinite loop." + $$ text "max cycles = " <> int maxSpinCount + $$ text "regsFree = " <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg) + $ uniqSetToList $ unionManyUniqSets $ eltsUFM regsFree) + $$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree)) + + -- build a conflict graph from the code. + graph <- buildGraph code + + -- build a map of how many instructions each reg lives for + -- this lazy, it won't be computed unless we need to spill + let fmLife = plusUFMs_C (\(r1, l1) (r2, l2) -> (r1, l1 + l2)) + $ map lifetimeCount code + + -- the function to choose regs to leave uncolored + let spill = chooseSpill_maxLife fmLife + + -- try and color the graph + let (graph_colored, rsSpill) + = Color.colorGraph regsFree triv spill graph + + -- see if we've found a coloring + if isEmptyUniqSet rsSpill + then do + -- patch the registers using the info in the graph + let code_patched = map (patchRegsFromGraph graph_colored) code + let code_nat = map stripLive code_patched + + return ( code_nat + , debug_codeGraphs ++ [(code, graph_colored), (code_patched, graph_colored)] + , graph_colored) + + else do + -- spill the uncolored regs + (code_spilled, slotsFree') + <- regSpill code slotsFree rsSpill + + -- recalculate liveness + let code_nat = map stripLive code_spilled + code_relive <- mapM regLiveness code_nat + + -- try again + regAlloc_spin (spinCount + 1) triv regsFree slotsFree' + (debug_codeGraphs ++ [(code, graph_colored)]) + code_relive + + +----- +-- Simple maxconflicts isn't always good, because we +-- can naievely end up spilling vregs that only live for one or two instrs. +-- +{- +chooseSpill_maxConflicts + :: Color.Graph Reg RegClass Reg + -> Reg + +chooseSpill_maxConflicts graph + = let node = maximumBy + (\n1 n2 -> compare + (sizeUniqSet $ Color.nodeConflicts n1) + (sizeUniqSet $ Color.nodeConflicts n2)) + $ eltsUFM $ Color.graphMap graph + + in Color.nodeId node +-} + +----- +chooseSpill_maxLife + :: UniqFM (Reg, Int) + -> Color.Graph Reg RegClass Reg + -> Reg + +chooseSpill_maxLife life graph + = let node = maximumBy (\n1 n2 -> compare (getLife n1) (getLife n2)) + $ eltsUFM $ Color.graphMap graph + + -- Orphan vregs die in the same instruction they are born in. + -- They will be in the graph, but not in the liveness map. + -- Their liveness is 0. + getLife n + = case lookupUFM life (Color.nodeId n) of + Just (_, l) -> l + Nothing -> 0 + + in Color.nodeId node + + +-- | Build a graph from the liveness and coalesce information in this code. + +buildGraph + :: [LiveCmmTop] + -> UniqSM (Color.Graph Reg RegClass Reg) + +buildGraph code + = do + -- Add the reg-reg conflicts to the graph + let conflictSets = unionManyBags (map slurpConflicts code) + let graph_conflict = foldrBag graphAddConflictSet Color.initGraph conflictSets + + + -- Add the coalescences edges to the graph. + let coalesce = unionManyBags (map slurpJoinMovs code) + let graph_coalesce = foldrBag graphAddCoalesce graph_conflict coalesce + + return $ graph_coalesce + + +-- | Add some conflict edges to the graph. +-- Conflicts between virtual and real regs are recorded as exlusions. +-- +graphAddConflictSet + :: UniqSet Reg + -> Color.Graph Reg RegClass Reg + -> Color.Graph Reg RegClass Reg + +graphAddConflictSet set graph + = let reals = filterUFM isRealReg set + virtuals = filterUFM (not . isRealReg) set + + graph1 = Color.addConflicts virtuals regClass graph + graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 regClass r2) + graph1 + [ (a, b) + | a <- uniqSetToList virtuals + , b <- uniqSetToList reals] + + in graph2 + + +-- | Add some coalesences edges to the graph +-- Coalesences between virtual and real regs are recorded as preferences. +-- +graphAddCoalesce + :: (Reg, Reg) + -> Color.Graph Reg RegClass Reg + -> Color.Graph Reg RegClass Reg + +graphAddCoalesce (r1, r2) graph + | RealReg regno <- r1 + = Color.addPreference (regWithClass r2) r1 graph + + | RealReg regno <- r2 + = Color.addPreference (regWithClass r1) r2 graph + + | otherwise + = Color.addCoalesce (regWithClass r1) (regWithClass r2) graph + + where regWithClass r = (r, regClass r) + + +-- | Patch registers in code using the reg -> reg mapping in this graph. +patchRegsFromGraph + :: Color.Graph Reg RegClass Reg + -> LiveCmmTop -> LiveCmmTop + +patchRegsFromGraph graph code + = let + -- a function to lookup the hardreg for a virtual reg from the graph. + patchF reg + -- leave real regs alone. + | isRealReg reg + = reg + + -- this virtual has a regular node in the graph. + | Just node <- Color.lookupNode graph reg + = case Color.nodeColor node of + Just color -> color + Nothing -> reg + + -- no node in the graph for this virtual, bad news. + | otherwise + = pprPanic "patchRegsFromGraph: register mapping failed." + ( text "There is no node in the graph for register " <> ppr reg + $$ ppr code + $$ Color.dotGraph (\x -> text "white") trivColorable graph) + + in patchEraseLive patchF code + + +----- +-- Register colors for drawing conflict graphs +-- Keep this out of MachRegs.hs because it's specific to the graph coloring allocator. + + +-- reg colors for x86 +#if i386_TARGET_ARCH +regDotColor :: Reg -> SDoc +regDotColor reg + = let Just str = lookupUFM regColors reg + in text str + +regColors + = listToUFM + $ [ (eax, "#00ff00") + , (ebx, "#0000ff") + , (ecx, "#00ffff") + , (edx, "#0080ff") + + , (fake0, "#ff00ff") + , (fake1, "#ff00aa") + , (fake2, "#aa00ff") + , (fake3, "#aa00aa") + , (fake4, "#ff0055") + , (fake5, "#5500ff") ] +#endif + + +-- reg colors for x86_64 +#if x86_64_TARGET_ARCH +regDotColor :: Reg -> SDoc +regDotColor reg + = let Just str = lookupUFM regColors reg + in text str + +regColors + = listToUFM + $ [ (rax, "#00ff00"), (eax, "#00ff00") + , (rbx, "#0000ff"), (ebx, "#0000ff") + , (rcx, "#00ffff"), (ecx, "#00ffff") + , (rdx, "#0080ff"), (edx, "#00ffff") + , (r8, "#00ff80") + , (r9, "#008080") + , (r10, "#0040ff") + , (r11, "#00ff40") + , (r12, "#008040") + , (r13, "#004080") + , (r14, "#004040") + , (r15, "#002080") ] + + ++ zip (map RealReg [16..31]) (repeat "red") +#endif + + +-- reg colors for ppc +#if powerpc_TARGET_ARCH +regDotColor :: Reg -> SDoc +regDotColor reg + = case regClass reg of + RcInteger -> text "blue" + RcFloat -> text "red" +#endif + + +{- +toX11Color (r, g, b) + = let rs = padL 2 '0' (showHex r "") + gs = padL 2 '0' (showHex r "") + bs = padL 2 '0' (showHex r "") + + padL n c s + = replicate (n - length s) c ++ s + in "#" ++ rs ++ gs ++ bs +-} + +plusUFMs_C :: (elt -> elt -> elt) -> [UniqFM elt] -> UniqFM elt +plusUFMs_C f maps + = foldl (plusUFM_C f) emptyUFM maps + diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs index 024774e..4cb688a 100644 --- a/compiler/nativeGen/RegAllocInfo.hs +++ b/compiler/nativeGen/RegAllocInfo.hs @@ -760,8 +760,8 @@ mkSpillInstr -> Int -- spill slot to use -> Instr mkSpillInstr reg delta slot - = ASSERT(isRealReg reg) - let + -- = ASSERT(isRealReg reg) -- BUGS: used for graph coloring: is this ok? + = let off = spillSlotToOffset slot in #ifdef alpha_TARGET_ARCH @@ -805,8 +805,8 @@ mkLoadInstr -> Int -- spill slot to use -> Instr mkLoadInstr reg delta slot - = ASSERT(isRealReg reg) - let + -- = ASSERT(isRealReg reg) -- BUGS: used for graph coloring: is this ok? + = let off = spillSlotToOffset slot in #if alpha_TARGET_ARCH @@ -891,4 +891,5 @@ spillSlotToOffset slot = 64 + spillSlotSize * slot | otherwise = pprPanic "spillSlotToOffset:" - (text "invalid spill location: " <> int slot) + ( text "invalid spill location: " <> int slot + $$ text "maxSpillSlots: " <> int maxSpillSlots) diff --git a/compiler/nativeGen/RegisterAlloc.hs b/compiler/nativeGen/RegAllocLinear.hs similarity index 81% rename from compiler/nativeGen/RegisterAlloc.hs rename to compiler/nativeGen/RegAllocLinear.hs index 8f7a656..d86e460 100644 --- a/compiler/nativeGen/RegisterAlloc.hs +++ b/compiler/nativeGen/RegAllocLinear.hs @@ -80,16 +80,16 @@ The algorithm is roughly: -} -module RegisterAlloc ( - regAlloc +module RegAllocLinear ( + regAlloc, ) where #include "HsVersions.h" -import PprMach import MachRegs import MachInstrs import RegAllocInfo +import RegLiveness import Cmm import Digraph @@ -102,31 +102,11 @@ import Outputable #ifndef DEBUG import Data.Maybe ( fromJust ) #endif -import Data.List ( nub, partition, mapAccumL, groupBy ) +import Data.List ( nub, partition, mapAccumL) import Control.Monad ( when ) import Data.Word import Data.Bits --- ----------------------------------------------------------------------------- --- Some useful types - -type RegSet = UniqSet Reg - -type RegMap a = UniqFM a -emptyRegMap = emptyUFM - -type BlockMap a = UniqFM a -emptyBlockMap = emptyUFM - --- A basic block where the isntructions are annotated with the registers --- which are no longer live in the *next* instruction in this sequence. --- (NB. if the instruction is a jump, these registers might still be live --- at the jump target(s) - you have to check the liveness at the destination --- block to find out). -type AnnBasicBlock - = GenBasicBlock (Instr, - [Reg], -- registers read (only) which die - [Reg]) -- registers written which die -- ----------------------------------------------------------------------------- -- The free register set @@ -239,6 +219,8 @@ emptyStackMap :: StackMap emptyStackMap = StackMap [0..maxSpillSlots] emptyUFM getStackSlotFor :: StackMap -> Unique -> (StackMap,Int) +getStackSlotFor fs@(StackMap [] reserved) reg + = panic "RegAllocLinear.getStackSlotFor: out of stack slots" getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg = case lookupUFM reserved reg of Just slot -> (fs,slot) @@ -247,149 +229,29 @@ getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg = -- ----------------------------------------------------------------------------- -- Top level of the register allocator -regAlloc :: NatCmmTop -> UniqSM NatCmmTop -regAlloc (CmmData sec d) = returnUs $ CmmData sec d -regAlloc (CmmProc info lbl params []) - = returnUs $ CmmProc info lbl params [] -- no blocks to run the regalloc on -regAlloc (CmmProc info lbl params blocks@(first:rest)) - = let - first_id = blockId first - sccs = sccBlocks blocks - (ann_sccs, block_live) = computeLiveness sccs - in linearRegAlloc block_live ann_sccs `thenUs` \final_blocks -> - let ((first':_),rest') = partition ((== first_id) . blockId) final_blocks - in returnUs $ -- pprTrace "Liveness" (ppr block_live) $ - CmmProc info lbl params (first':rest') - -sccBlocks :: [NatBasicBlock] -> [SCC NatBasicBlock] -sccBlocks blocks = stronglyConnComp graph - where - getOutEdges :: [Instr] -> [BlockId] - getOutEdges instrs = foldr jumpDests [] instrs +-- Allocate registers +regAlloc + :: LiveCmmTop + -> UniqSM NatCmmTop - graph = [ (block, getUnique id, map getUnique (getOutEdges instrs)) - | block@(BasicBlock id instrs) <- blocks ] +regAlloc cmm@(CmmData sec d) + = returnUs $ CmmData sec d + +regAlloc cmm@(CmmProc (LiveInfo info _ _) lbl params []) + = returnUs $ CmmProc info lbl params [] + +regAlloc cmm@(CmmProc (LiveInfo info (Just first_id) block_live) lbl params comps) + = let ann_sccs = map (\b -> case b of + BasicBlock i [b] -> AcyclicSCC b + BasicBlock i bs -> CyclicSCC bs) + $ comps + in linearRegAlloc block_live ann_sccs `thenUs` \final_blocks -> --- ----------------------------------------------------------------------------- --- Computing liveness - -computeLiveness - :: [SCC NatBasicBlock] - -> ([SCC AnnBasicBlock], -- instructions annotated with list of registers - -- which are "dead after this instruction". - BlockMap RegSet) -- blocks annontated with set of live registers - -- on entry to the block. - - -- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer - -- control to earlier ones only. The SCCs returned are in the *opposite* - -- order, which is exactly what we want for the next pass. + let ((first':_), rest') = partition ((== first_id) . blockId) final_blocks + in returnUs $ CmmProc info lbl params (first' : rest') -computeLiveness sccs - = livenessSCCs emptyBlockMap [] sccs - where - livenessSCCs - :: BlockMap RegSet - -> [SCC AnnBasicBlock] -- accum - -> [SCC NatBasicBlock] - -> ([SCC AnnBasicBlock], BlockMap RegSet) - - livenessSCCs blockmap done [] = (done, blockmap) - livenessSCCs blockmap done - (AcyclicSCC (BasicBlock block_id instrs) : sccs) = - {- pprTrace "live instrs" (ppr (getUnique block_id) $$ - vcat (map (\(instr,regs) -> docToSDoc (pprInstr instr) $$ ppr regs) instrs')) $ - -} - livenessSCCs blockmap' - (AcyclicSCC (BasicBlock block_id instrs'):done) sccs - where (live,instrs') = liveness emptyUniqSet blockmap [] - (reverse instrs) - blockmap' = addToUFM blockmap block_id live - - livenessSCCs blockmap done - (CyclicSCC blocks : sccs) = - livenessSCCs blockmap' (CyclicSCC blocks':done) sccs - where (blockmap', blocks') - = iterateUntilUnchanged linearLiveness equalBlockMaps - blockmap blocks - - iterateUntilUnchanged - :: (a -> b -> (a,c)) -> (a -> a -> Bool) - -> a -> b - -> (a,c) - - iterateUntilUnchanged f eq a b - = head $ - concatMap tail $ - groupBy (\(a1, _) (a2, _) -> eq a1 a2) $ - iterate (\(a, _) -> f a b) $ - (a, error "RegisterAlloc.livenessSCCs") - - - linearLiveness :: BlockMap RegSet -> [NatBasicBlock] - -> (BlockMap RegSet, [AnnBasicBlock]) - linearLiveness = mapAccumL processBlock - - processBlock blockmap input@(BasicBlock block_id instrs) - = (blockmap', BasicBlock block_id instrs') - where (live,instrs') = liveness emptyUniqSet blockmap [] - (reverse instrs) - blockmap' = addToUFM blockmap block_id live - - -- probably the least efficient way to compare two - -- BlockMaps for equality. - equalBlockMaps a b - = a' == b' - where a' = map f $ ufmToList a - b' = map f $ ufmToList b - f (key,elt) = (key, uniqSetToList elt) - - liveness :: RegSet -- live regs - -> BlockMap RegSet -- live regs on entry to other BBs - -> [(Instr,[Reg],[Reg])] -- instructions (accum) - -> [Instr] -- instructions - -> (RegSet, [(Instr,[Reg],[Reg])]) - - liveness liveregs blockmap done [] = (liveregs, done) - liveness liveregs blockmap done (instr:instrs) - | not_a_branch = liveness liveregs1 blockmap - ((instr,r_dying,w_dying):done) instrs - | otherwise = liveness liveregs_br blockmap - ((instr,r_dying_br,w_dying):done) instrs - where - RU read written = regUsage instr - - -- registers that were written here are dead going backwards. - -- registers that were read here are live going backwards. - liveregs1 = (liveregs `delListFromUniqSet` written) - `addListToUniqSet` read - - -- registers that are not live beyond this point, are recorded - -- as dying here. - r_dying = [ reg | reg <- read, reg `notElem` written, - not (elementOfUniqSet reg liveregs) ] - - w_dying = [ reg | reg <- written, - not (elementOfUniqSet reg liveregs) ] - - -- union in the live regs from all the jump destinations of this - -- instruction. - targets = jumpDests instr [] -- where we go from here - not_a_branch = null targets - - targetLiveRegs target = case lookupUFM blockmap target of - Just ra -> ra - Nothing -> emptyBlockMap - - live_from_branch = unionManyUniqSets (map targetLiveRegs targets) - - liveregs_br = liveregs1 `unionUniqSets` live_from_branch - - -- registers that are live only in the branch targets should - -- be listed as dying here. - live_branch_only = live_from_branch `minusUniqSet` liveregs - r_dying_br = uniqSetToList (mkUniqSet r_dying `unionUniqSets` - live_branch_only) + -- ----------------------------------------------------------------------------- -- Linear sweep to allocate registers @@ -419,14 +281,14 @@ instance Outputable Loc where linearRegAlloc :: BlockMap RegSet -- live regs on entry to each basic block - -> [SCC AnnBasicBlock] -- instructions annotated with "deaths" + -> [SCC LiveBasicBlock] -- instructions annotated with "deaths" -> UniqSM [NatBasicBlock] linearRegAlloc block_live sccs = linearRA_SCCs emptyBlockMap emptyStackMap sccs where linearRA_SCCs :: BlockAssignment -> StackMap - -> [SCC AnnBasicBlock] + -> [SCC LiveBasicBlock] -> UniqSM [NatBasicBlock] linearRA_SCCs block_assig stack [] = returnUs [] linearRA_SCCs block_assig stack @@ -475,7 +337,7 @@ linearRegAlloc block_live sccs = linearRA_SCCs emptyBlockMap emptyStackMap sccs runR block_assig freeregs assig stack us $ linearRA [] [] instrs - linearRA :: [Instr] -> [NatBasicBlock] -> [(Instr,[Reg],[Reg])] + linearRA :: [Instr] -> [NatBasicBlock] -> [LiveInstr] -> RegM ([Instr], [NatBasicBlock]) linearRA instr_acc fixups [] = return (reverse instr_acc, fixups) @@ -490,17 +352,22 @@ type BlockAssignment = BlockMap (FreeRegs, RegMap Loc) raInsn :: BlockMap RegSet -- Live temporaries at each basic block -> [Instr] -- new instructions (accum.) - -> (Instr,[Reg],[Reg]) -- the instruction (with "deaths") + -> LiveInstr -- the instruction (with "deaths") -> RegM ( [Instr], -- new instructions [NatBasicBlock] -- extra fixup blocks ) -raInsn block_live new_instrs (instr@(DELTA n), _, _) = do +raInsn block_live new_instrs (Instr instr@(COMMENT _) Nothing) + = return (new_instrs, []) + +raInsn block_live new_instrs (Instr instr@(DELTA n) Nothing) + = do setDeltaR n return (new_instrs, []) -raInsn block_live new_instrs (instr, r_dying, w_dying) = do +raInsn block_live new_instrs (Instr instr (Just live)) + = do assig <- getAssigR -- If we have a reg->reg move between virtual registers, where the @@ -511,7 +378,7 @@ raInsn block_live new_instrs (instr, r_dying, w_dying) = do -- (we can't eliminitate it if the source register is on the stack, because -- we do not want to use one spill slot for different virtual registers) case isRegRegMove instr of - Just (src,dst) | src `elem` r_dying, + Just (src,dst) | src `elementOfUniqSet` (liveDieRead live), isVirtualReg dst, not (dst `elemUFM` assig), Just (InReg _) <- (lookupUFM assig src) -> do @@ -533,7 +400,13 @@ raInsn block_live new_instrs (instr, r_dying, w_dying) = do -} return (new_instrs, []) - other -> genRaInsn block_live new_instrs instr r_dying w_dying + other -> genRaInsn block_live new_instrs instr + (uniqSetToList $ liveDieRead live) + (uniqSetToList $ liveDieWrite live) + + +raInsn block_live new_instrs li + = pprPanic "raInsn" (text "no match for:" <> ppr li) genRaInsn block_live new_instrs instr r_dying w_dying = @@ -662,7 +535,7 @@ saveClobberedTemps clobbered dying = do = do --ToDo: copy it to another register if possible (spill,slot) <- spillR (RealReg reg) temp - clobber (addToUFM assig temp (InBoth reg slot)) (spill:instrs) rest + clobber (addToUFM assig temp (InBoth reg slot)) (spill: COMMENT FSLIT("spill clobber") : instrs) rest clobberRegs :: [RegNo] -> RegM () clobberRegs [] = return () -- common case @@ -781,13 +654,14 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do -- in setAssigR assig2 spills' <- do_load reading loc my_reg spills - allocateRegsAndSpill reading keep (spill_insn:spills') + allocateRegsAndSpill reading keep + (spill_insn : COMMENT FSLIT("spill alloc") : spills') (my_reg:alloc) rs where -- load up a spilled temporary if we need to do_load True (Just (InMem slot)) reg spills = do insn <- loadR (RealReg reg) slot - return (insn : spills) + return (insn : COMMENT FSLIT("spill load") : spills) do_load _ _ _ spills = return spills diff --git a/compiler/nativeGen/RegArchBase.hs b/compiler/nativeGen/RegArchBase.hs new file mode 100644 index 0000000..5cf5403 --- /dev/null +++ b/compiler/nativeGen/RegArchBase.hs @@ -0,0 +1,153 @@ + +-- | Utils for calculating general worst, bound, squeese and free, functions. +-- +-- as per: "A Generalized Algorithm for Graph-Coloring Register Allocation" +-- Michael Smith, Normal Ramsey, Glenn Holloway. +-- PLDI 2004 +-- +-- These general versions are not used in GHC proper because they are too slow. +-- Instead, hand written optimised versions are provided for each architecture +-- in MachRegs*.hs +-- +-- This code is here because we can test the architecture specific code against it. +-- +-- +module RegArchBase ( + RegClass(..), + Reg(..), + RegSub(..), + + worst, + bound, + squeese +) + +where + + +----- +import qualified Data.Set as Set +import Data.Set (Set) + +-- import qualified Data.Map as Map +-- import Data.Map (Map) + + +-- Some basic register classes. +-- These aren't nessesarally in 1-to-1 correspondance with the allocatable +-- RegClasses in MachRegs.hs +-- +data RegClass + -- general purpose regs + = ClassG32 -- 32 bit GPRs + | ClassG16 -- 16 bit GPRs + | ClassG8 -- 8 bit GPRs + + -- floating point regs + | ClassF64 -- 64 bit FPRs + deriving (Show, Ord, Eq) + + +-- | A register of some class +data Reg + -- a register of some class + = Reg RegClass Int + + -- a sub-component of one of the other regs + | RegSub RegSub Reg + deriving (Show, Ord, Eq) + + +-- | A subcomponent of another register +data RegSub + = SubL16 -- lowest 16 bits + | SubL8 -- lowest 8 bits + | SubL8H -- second lowest 8 bits + deriving (Show, Enum, Ord, Eq) + + + +-- | Worst case displacement +-- +-- a node N of classN has some number of neighbors, +-- all of which are from classC. +-- +-- (worst neighbors classN classC) is the maximum number of potential +-- colors for N that can be lost by coloring its neighbors. + +-- This should be hand coded/cached for each particular architecture, +-- because the compute time is very long.. + +worst + :: (RegClass -> Set Reg) + -> (Reg -> Set Reg) + -> Int -> RegClass -> RegClass -> Int + +worst regsOfClass regAlias neighbors classN classC + = let regAliasS regs = unionsS $ Set.map regAlias regs + + -- all the regs in classes N, C + regsN = regsOfClass classN + regsC = regsOfClass classC + + -- all the possible subsets of c which have size < m + regsS = Set.filter (\s -> Set.size s >= 1 && Set.size s <= neighbors) + $ powerset regsC + + -- for each of the subsets of C, the regs which conflict with posiblities for N + regsS_conflict + = Set.map (\s -> Set.intersection regsN (regAliasS s)) regsS + + in Set.findMax $ Set.map Set.size $ regsS_conflict + + +-- | For a node N of classN and neighbors of classesC +-- (bound classN classesC) is the maximum number of potential +-- colors for N that can be lost by coloring its neighbors. +-- + +bound + :: (RegClass -> Set Reg) + -> (Reg -> Set Reg) + -> RegClass -> [RegClass] -> Int + +bound regsOfClass regAlias classN classesC + = let regAliasS regs = unionsS $ Set.map regAlias regs + + regsC_aliases + = Set.unions + $ map (regAliasS . regsOfClass) classesC + + overlap = Set.intersection (regsOfClass classN) regsC_aliases + + in Set.size overlap + + +-- | The total squeese on a particular node with a list of neighbors. +-- +-- A version of this should be constructed for each particular architecture, +-- possibly including uses of bound, so that alised registers don't get counted +-- twice, as per the paper. +-- +squeese + :: (RegClass -> Set Reg) + -> (Reg -> Set Reg) + -> RegClass -> [(Int, RegClass)] -> Int + +squeese regsOfClass regAlias classN countCs + = sum (map (\(i, classC) -> worst regsOfClass regAlias i classN classC) countCs) + + +-- | powerset (for lists) +powersetL :: Ord a => [a] -> [[a]] +powersetL = map concat . mapM (\x -> [[],[x]]) + +-- | powerset (for sets) +powerset :: Ord a => Set a -> Set (Set a) +powerset s = Set.fromList $ map Set.fromList $ powersetL $ Set.toList s + +-- | unions (for sets) +unionsS :: Ord a => Set (Set a) -> Set a +unionsS ss = Set.unions $ Set.toList ss + + diff --git a/compiler/nativeGen/RegArchX86.hs b/compiler/nativeGen/RegArchX86.hs new file mode 100644 index 0000000..53f9929 --- /dev/null +++ b/compiler/nativeGen/RegArchX86.hs @@ -0,0 +1,147 @@ + +-- | A description of the register set of the X86. +-- This isn't used directly in GHC proper. +-- +-- See RegArchBase.hs for the reference. +-- See MachRegs.hs for the actual trivColorable function used in GHC. +-- +module RegArchX86 ( + classOfReg, + regsOfClass, + regName, + regAlias, + worst, + squeese, +) where + +import RegArchBase (Reg(..), RegSub(..), RegClass(..)) + +import qualified Data.Set as Set +import Data.Set (Set) + +-- | Determine the class of a register +classOfReg :: Reg -> RegClass +classOfReg reg + = case reg of + Reg c i -> c + + RegSub SubL16 r -> ClassG16 + RegSub SubL8 r -> ClassG8 + RegSub SubL8H r -> ClassG8 + + +-- | Determine all the regs that make up a certain class. +-- +regsOfClass :: RegClass -> Set Reg +regsOfClass c + = case c of + ClassG32 + -> Set.fromList [ Reg ClassG32 i | i <- [0..7] ] + + ClassG16 + -> Set.fromList [ RegSub SubL16 (Reg ClassG32 i) | i <- [0..7] ] + + ClassG8 + -> Set.union + (Set.fromList [ RegSub SubL8 (Reg ClassG32 i) | i <- [0..3] ]) + (Set.fromList [ RegSub SubL8H (Reg ClassG32 i) | i <- [0..3] ]) + + ClassF64 + -> Set.fromList [ Reg ClassF64 i | i <- [0..5] ] + + +-- | Determine the common name of a reg +-- returns Nothing if this reg is not part of the machine. + +regName :: Reg -> Maybe String +regName reg + = case reg of + Reg ClassG32 i + | i <= 7 -> Just ([ "eax", "ebx", "ecx", "edx", "ebp", "esi", "edi", "esp" ] !! i) + + RegSub SubL16 (Reg ClassG32 i) + | i <= 7 -> Just ([ "ax", "bx", "cx", "dx", "bp", "si", "di", "sp"] !! i) + + RegSub SubL8 (Reg ClassG32 i) + | i <= 3 -> Just ([ "al", "bl", "cl", "dl"] !! i) + + RegSub SubL8H (Reg ClassG32 i) + | i <= 3 -> Just ([ "ah", "bh", "ch", "dh"] !! i) + + _ -> Nothing + + +-- | Which regs alias what other regs +regAlias :: Reg -> Set Reg +regAlias reg + = case reg of + + -- 32 bit regs alias all of the subregs + Reg ClassG32 i + + -- for eax, ebx, ecx, eds + | i <= 3 + -> Set.fromList $ [ Reg ClassG32 i, RegSub SubL16 reg, RegSub SubL8 reg, RegSub SubL8H reg ] + + -- for esi, edi, esp, ebp + | 4 <= i && i <= 7 + -> Set.fromList $ [ Reg ClassG32 i, RegSub SubL16 reg ] + + + -- 16 bit subregs alias the whole reg + RegSub SubL16 r@(Reg ClassG32 i) + -> regAlias r + + -- 8 bit subregs alias the 32 and 16, but not the other 8 bit subreg + RegSub SubL8 r@(Reg ClassG32 i) + -> Set.fromList $ [ r, RegSub SubL16 r, RegSub SubL8 r ] + + RegSub SubL8H r@(Reg ClassG32 i) + -> Set.fromList $ [ r, RegSub SubL16 r, RegSub SubL8H r ] + + -- fp + Reg ClassF64 i + -> Set.singleton reg + + _ -> error "regAlias: invalid register" + + +-- | Optimised versions of RegColorBase.{worst, squeese} specific to x86 + +worst :: Int -> RegClass -> RegClass -> Int +worst n classN classC + = case classN of + ClassG32 + -> case classC of + ClassG32 -> min n 8 + ClassG16 -> min n 8 + ClassG8 -> min n 4 + ClassF64 -> 0 + + ClassG16 + -> case classC of + ClassG32 -> min n 8 + ClassG16 -> min n 8 + ClassG8 -> min n 4 + ClassF64 -> 0 + + ClassG8 + -> case classC of + ClassG32 -> min (n*2) 8 + ClassG16 -> min (n*2) 8 + ClassG8 -> min n 8 + ClassF64 -> 0 + + ClassF64 + -> case classC of + ClassF64 -> min n 6 + _ -> 0 + +squeese :: RegClass -> [(Int, RegClass)] -> Int +squeese classN countCs + = sum (map (\(i, classC) -> worst i classN classC) countCs) + + + + + diff --git a/compiler/nativeGen/RegCoalesce.hs b/compiler/nativeGen/RegCoalesce.hs new file mode 100644 index 0000000..f3b19ad --- /dev/null +++ b/compiler/nativeGen/RegCoalesce.hs @@ -0,0 +1,84 @@ + +-- | Register coalescing. +-- + +module RegCoalesce ( + regCoalesce, + slurpJoinMovs +) + +where + +import Cmm +import MachRegs +import RegLiveness +import RegAllocInfo + +import Bag +import UniqFM +import UniqSet +import UniqSupply + +import Control.Monad +import Data.List + +-- | Do register coalescing on this top level thing +-- For Reg -> Reg moves, if the first reg dies at the same time the second reg is born +-- then the mov only serves to join live ranges. The two regs can be renamed to be +-- the same and the move instruction safely erased. + +regCoalesce :: [LiveCmmTop] -> UniqSM [LiveCmmTop] +regCoalesce code + = do + let joins = foldl' unionBags emptyBag + $ map slurpJoinMovs code + + let alloc = foldl' buildAlloc emptyUFM + $ bagToList joins + + let patched = map (patchEraseLive (sinkReg alloc)) code + + return patched + + +buildAlloc :: UniqFM Reg -> (Reg, Reg) -> UniqFM Reg +buildAlloc fm (r1, r2) + = let rmin = min r1 r2 + rmax = max r1 r2 + in addToUFM fm rmax rmin + +sinkReg :: UniqFM Reg -> Reg -> Reg +sinkReg fm r + = case lookupUFM fm r of + Nothing -> r + Just r' -> sinkReg fm r' + + +-- | Slurp out mov instructions that only serve to join live ranges. +-- During a mov, if the source reg dies and the destiation reg is born +-- then we can rename the two regs to the same thing and eliminate the move. +-- +slurpJoinMovs :: LiveCmmTop -> Bag (Reg, Reg) +slurpJoinMovs live + = slurpCmm emptyBag live + where + slurpCmm rs CmmData{} = rs + slurpCmm rs (CmmProc _ _ _ blocks) = foldl' slurpComp rs blocks + slurpComp rs (BasicBlock i blocks) = foldl' slurpBlock rs blocks + slurpBlock rs (BasicBlock i instrs) = foldl' slurpLI rs instrs + + slurpLI rs (Instr _ Nothing) = rs + slurpLI rs (Instr instr (Just live)) + | Just (r1, r2) <- isRegRegMove instr + , elementOfUniqSet r1 $ liveDieRead live + , elementOfUniqSet r2 $ liveBorn live + + -- only coalesce movs between two virtuals for now, else we end up with + -- allocatable regs in the live regs list.. + , isVirtualReg r1 && isVirtualReg r2 + = consBag (r1, r2) rs + + | otherwise + = rs + + diff --git a/compiler/nativeGen/RegLiveness.hs b/compiler/nativeGen/RegLiveness.hs new file mode 100644 index 0000000..4acb3be --- /dev/null +++ b/compiler/nativeGen/RegLiveness.hs @@ -0,0 +1,575 @@ +----------------------------------------------------------------------------- +-- +-- The register liveness determinator +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + + +module RegLiveness ( + RegSet, + RegMap, emptyRegMap, + BlockMap, emptyBlockMap, + LiveCmmTop, + LiveInstr (..), + Liveness (..), + LiveInfo (..), + LiveBasicBlock, + + mapBlockTop, + mapBlockTopM, + stripLive, + slurpConflicts, + lifetimeCount, + eraseDeltasLive, + patchEraseLive, + patchRegsLiveInstr, + regLiveness + ) where + +#include "HsVersions.h" + +import MachRegs +import MachInstrs +import PprMach +import RegAllocInfo +import Cmm + +import Digraph +import Outputable +import Unique +import UniqSet +import UniqFM +import UniqSupply +import Bag +import State + +import Data.List +import Data.Maybe + +----------------------------------------------------------------------------- +type RegSet = UniqSet Reg + +type RegMap a = UniqFM a +emptyRegMap = emptyUFM + +type BlockMap a = UniqFM a +emptyBlockMap = emptyUFM + + +-- | A top level thing which carries liveness information. +type LiveCmmTop + = GenCmmTop + CmmStatic + LiveInfo + (GenBasicBlock LiveInstr) + -- the "instructions" here are actually more blocks, + -- single blocks are acyclic + -- multiple blocks are taken to be cyclic. + +-- | An instruction with liveness information. +data LiveInstr + = Instr Instr (Maybe Liveness) + +-- | Liveness information. +-- The regs which die are ones which are no longer live in the *next* instruction +-- in this sequence. +-- (NB. if the instruction is a jump, these registers might still be live +-- at the jump target(s) - you have to check the liveness at the destination +-- block to find out). + +data Liveness + = Liveness + { liveBorn :: RegSet -- ^ registers born in this instruction (written to for first time). + , liveDieRead :: RegSet -- ^ registers that died because they were read for the last time. + , liveDieWrite :: RegSet } -- ^ registers that died because they were clobbered by something. + + +-- | Stash regs live on entry to each basic block in the info part of the cmm code. +data LiveInfo + = LiveInfo + [CmmStatic] -- cmm static stuff + (Maybe BlockId) -- id of the first block + (BlockMap RegSet) -- argument locals live on entry to this block + +-- | A basic block with liveness information. +type LiveBasicBlock + = GenBasicBlock LiveInstr + + +instance Outputable LiveInstr where + ppr (Instr instr Nothing) + = ppr instr + + ppr (Instr instr (Just live)) + = ppr instr + $$ (nest 8 + $ vcat + [ pprRegs (ptext SLIT("# born: ")) (liveBorn live) + , pprRegs (ptext SLIT("# r_dying: ")) (liveDieRead live) + , pprRegs (ptext SLIT("# w_dying: ")) (liveDieWrite live) ] + $+$ space) + + where pprRegs :: SDoc -> RegSet -> SDoc + pprRegs name regs + | isEmptyUniqSet regs = empty + | otherwise = name <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg) $ uniqSetToList regs) + + +instance Outputable LiveInfo where + ppr (LiveInfo static firstId liveOnEntry) + = (vcat $ map ppr static) + $$ text "# firstId = " <> ppr firstId + $$ text "# liveOnEntry = " <> ppr liveOnEntry + + +-- | map a function across all the basic blocks in this code +-- +mapBlockTop + :: (LiveBasicBlock -> LiveBasicBlock) + -> LiveCmmTop -> LiveCmmTop + +mapBlockTop f cmm + = evalState (mapBlockTopM (\x -> return $ f x) cmm) () + + +-- | map a function across all the basic blocks in this code (monadic version) +-- +mapBlockTopM + :: Monad m + => (LiveBasicBlock -> m LiveBasicBlock) + -> LiveCmmTop -> m LiveCmmTop + +mapBlockTopM f cmm@(CmmData{}) + = return cmm + +mapBlockTopM f (CmmProc header label params comps) + = do comps' <- mapM (mapBlockCompM f) comps + return $ CmmProc header label params comps' + +mapBlockCompM f (BasicBlock i blocks) + = do blocks' <- mapM f blocks + return $ BasicBlock i blocks' + + +-- | Slurp out the list of register conflicts from this top level thing. + +slurpConflicts :: LiveCmmTop -> Bag (UniqSet Reg) +slurpConflicts live + = slurpCmm emptyBag live + + where slurpCmm rs CmmData{} = rs + slurpCmm rs (CmmProc info _ _ blocks) + = foldl' (slurpComp info) rs blocks + + slurpComp info rs (BasicBlock i blocks) + = foldl' (slurpBlock info) rs blocks + + slurpBlock info rs (BasicBlock blockId instrs) + | LiveInfo _ _ blockLive <- info + , Just rsLiveEntry <- lookupUFM blockLive blockId + = consBag rsLiveEntry $ slurpLIs rsLiveEntry rs instrs + + slurpLIs rsLive rs [] = consBag rsLive rs + slurpLIs rsLive rs (Instr _ Nothing : lis) = slurpLIs rsLive rs lis + + slurpLIs rsLiveEntry rs (li@(Instr _ (Just live)) : lis) + = let + -- regs that die because they are read for the last time at the start of an instruction + -- are not live across it. + rsLiveAcross = rsLiveEntry `minusUniqSet` (liveDieRead live) + + -- regs live on entry to the next instruction. + -- be careful of orphans, make sure to delete dying regs _after_ unioning + -- in the ones that are born here. + rsLiveNext = (rsLiveAcross `unionUniqSets` (liveBorn live)) + `minusUniqSet` (liveDieWrite live) + + -- orphan vregs are the ones that die in the same instruction they are born in. + -- these are likely to be results that are never used, but we still + -- need to assign a hreg to them.. + rsOrphans = intersectUniqSets + (liveBorn live) + (unionUniqSets (liveDieWrite live) (liveDieRead live)) + + -- + rsConflicts = unionUniqSets rsLiveNext rsOrphans + + in slurpLIs rsLiveNext (consBag rsConflicts rs) lis + + +-- | Strip away liveness information, yielding NatCmmTop + +stripLive :: LiveCmmTop -> NatCmmTop +stripLive live + = stripCmm live + + where stripCmm (CmmData sec ds) = CmmData sec ds + stripCmm (CmmProc (LiveInfo info _ _) label params comps) + = CmmProc info label params (concatMap stripComp comps) + + stripComp (BasicBlock i blocks) = map stripBlock blocks + stripBlock (BasicBlock i instrs) = BasicBlock i (map stripLI instrs) + stripLI (Instr instr _) = instr + + +-- | Slurp out a map of how many times each register was live upon entry to an instruction. + +lifetimeCount + :: LiveCmmTop + -> UniqFM (Reg, Int) -- ^ reg -> (reg, count) + +lifetimeCount cmm + = countCmm emptyUFM cmm + where + countCmm fm CmmData{} = fm + countCmm fm (CmmProc info _ _ blocks) + = foldl' (countComp info) fm blocks + + countComp info fm (BasicBlock i blocks) + = foldl' (countBlock info) fm blocks + + countBlock info fm (BasicBlock blockId instrs) + | LiveInfo _ _ blockLive <- info + , Just rsLiveEntry <- lookupUFM blockLive blockId + = countLIs rsLiveEntry fm instrs + + countLIs rsLive fm [] = fm + countLIs rsLive fm (Instr _ Nothing : lis) = countLIs rsLive fm lis + + countLIs rsLiveEntry fm (Instr _ (Just live) : lis) + = let + rsLiveAcross = rsLiveEntry `minusUniqSet` (liveDieRead live) + + rsLiveNext = (rsLiveAcross `unionUniqSets` (liveBorn live)) + `minusUniqSet` (liveDieWrite live) + + add r fm = addToUFM_C + (\(r1, l1) (_, l2) -> (r1, l1 + l2)) + fm r (r, 1) + + fm' = foldUniqSet add fm rsLiveEntry + in countLIs rsLiveNext fm' lis + + +-- | Erase Delta instructions. + +eraseDeltasLive :: LiveCmmTop -> LiveCmmTop +eraseDeltasLive cmm + = mapBlockTop eraseBlock cmm + where + isDelta (DELTA _) = True + isDelta _ = False + + eraseBlock (BasicBlock id lis) + = BasicBlock id + $ filter (\(Instr i _) -> not $ isDelta i) + $ lis + + +-- | Patch the registers in this code according to this register mapping. +-- also erase reg -> reg moves when the reg is the same. +-- also erase reg -> reg moves when the destination dies in this instr. + +patchEraseLive + :: (Reg -> Reg) + -> LiveCmmTop -> LiveCmmTop + +patchEraseLive patchF cmm + = patchCmm cmm + where + patchCmm cmm@CmmData{} = cmm + + patchCmm cmm@(CmmProc info label params comps) + | LiveInfo static id blockMap <- info + = let patchRegSet set = mkUniqSet $ map patchF $ uniqSetToList set + blockMap' = mapUFM patchRegSet blockMap + + info' = LiveInfo static id blockMap' + in CmmProc info' label params $ map patchComp comps + + patchComp (BasicBlock id blocks) + = BasicBlock id $ map patchBlock blocks + + patchBlock (BasicBlock id lis) + = BasicBlock id $ patchInstrs lis + + patchInstrs [] = [] + patchInstrs (li : lis) + + | Instr i (Just live) <- li' + , Just (r1, r2) <- isRegRegMove i + , eatMe r1 r2 live + = patchInstrs lis + + | otherwise + = li' : patchInstrs lis + + where li' = patchRegsLiveInstr patchF li + + eatMe r1 r2 live + -- source and destination regs are the same + | r1 == r2 = True + + -- desination reg is never used + | elementOfUniqSet r2 (liveBorn live) + , elementOfUniqSet r2 (liveDieRead live) || elementOfUniqSet r2 (liveDieWrite live) + = True + + | otherwise = False + + +-- | Patch registers in this LiveInstr, including the liveness information. +-- +patchRegsLiveInstr + :: (Reg -> Reg) + -> LiveInstr -> LiveInstr + +patchRegsLiveInstr patchF li + = case li of + Instr instr Nothing + -> Instr (patchRegs instr patchF) Nothing + + Instr instr (Just live) + -> Instr + (patchRegs instr patchF) + (Just live + { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg + liveBorn = mkUniqSet $ map patchF $ uniqSetToList $ liveBorn live + , liveDieRead = mkUniqSet $ map patchF $ uniqSetToList $ liveDieRead live + , liveDieWrite = mkUniqSet $ map patchF $ uniqSetToList $ liveDieWrite live }) + + +--------------------------------------------------------------------------------- +-- Annotate code with register liveness information +-- +regLiveness + :: NatCmmTop + -> UniqSM LiveCmmTop + +regLiveness cmm@(CmmData sec d) + = returnUs $ CmmData sec d + +regLiveness cmm@(CmmProc info lbl params []) + = returnUs $ CmmProc + (LiveInfo info Nothing emptyUFM) + lbl params [] + +regLiveness cmm@(CmmProc info lbl params blocks@(first:rest)) + = let first_id = blockId first + sccs = sccBlocks blocks + (ann_sccs, block_live) = computeLiveness sccs + + liveBlocks + = map (\scc -> case scc of + AcyclicSCC b@(BasicBlock l _) -> BasicBlock l [b] + CyclicSCC bs@(BasicBlock l _ : _) -> BasicBlock l bs + CyclicSCC [] + -> panic "RegLiveness.regLiveness: no blocks in scc list") + $ ann_sccs + + in returnUs $ CmmProc + (LiveInfo info (Just first_id) block_live) + lbl params liveBlocks + + +sccBlocks :: [NatBasicBlock] -> [SCC NatBasicBlock] +sccBlocks blocks = stronglyConnComp graph + where + getOutEdges :: [Instr] -> [BlockId] + getOutEdges instrs = foldl' (\a x -> jumpDests x a) [] instrs + + graph = [ (block, getUnique id, map getUnique (getOutEdges instrs)) + | block@(BasicBlock id instrs) <- blocks ] + + +-- ----------------------------------------------------------------------------- +-- Computing liveness + +computeLiveness + :: [SCC NatBasicBlock] + -> ([SCC LiveBasicBlock], -- instructions annotated with list of registers + -- which are "dead after this instruction". + BlockMap RegSet) -- blocks annontated with set of live registers + -- on entry to the block. + + -- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer + -- control to earlier ones only. The SCCs returned are in the *opposite* + -- order, which is exactly what we want for the next pass. + +computeLiveness sccs + = livenessSCCs emptyBlockMap [] sccs + + +livenessSCCs + :: BlockMap RegSet + -> [SCC LiveBasicBlock] -- accum + -> [SCC NatBasicBlock] + -> ([SCC LiveBasicBlock], BlockMap RegSet) + +livenessSCCs blockmap done [] = (done, blockmap) + +livenessSCCs blockmap done (AcyclicSCC block : sccs) + = let (blockmap', block') = livenessBlock blockmap block + in livenessSCCs blockmap' (AcyclicSCC block' : done) sccs + +livenessSCCs blockmap done + (CyclicSCC blocks : sccs) = + livenessSCCs blockmap' (CyclicSCC blocks':done) sccs + where (blockmap', blocks') + = iterateUntilUnchanged linearLiveness equalBlockMaps + blockmap blocks + + iterateUntilUnchanged + :: (a -> b -> (a,c)) -> (a -> a -> Bool) + -> a -> b + -> (a,c) + + iterateUntilUnchanged f eq a b + = head $ + concatMap tail $ + groupBy (\(a1, _) (a2, _) -> eq a1 a2) $ + iterate (\(a, _) -> f a b) $ + (a, error "RegisterAlloc.livenessSCCs") + + + linearLiveness :: BlockMap RegSet -> [NatBasicBlock] + -> (BlockMap RegSet, [LiveBasicBlock]) + linearLiveness = mapAccumL livenessBlock + + -- probably the least efficient way to compare two + -- BlockMaps for equality. + equalBlockMaps a b + = a' == b' + where a' = map f $ ufmToList a + b' = map f $ ufmToList b + f (key,elt) = (key, uniqSetToList elt) + + + +-- | Annotate a basic block with register liveness information. +-- +livenessBlock + :: BlockMap RegSet + -> NatBasicBlock + -> (BlockMap RegSet, LiveBasicBlock) + +livenessBlock blockmap block@(BasicBlock block_id instrs) + = let + (regsLiveOnEntry, instrs1) + = livenessBack emptyUniqSet blockmap [] (reverse instrs) + blockmap' = addToUFM blockmap block_id regsLiveOnEntry + + instrs2 = livenessForward regsLiveOnEntry instrs1 + + output = BasicBlock block_id instrs2 + + in ( blockmap', output) + +-- | Calculate liveness going forwards, +-- filling in when regs are born + +livenessForward + :: RegSet -- regs live on this instr + -> [LiveInstr] -> [LiveInstr] + +livenessForward rsLiveEntry [] = [] +livenessForward rsLiveEntry (li@(Instr instr mLive) : lis) + | Nothing <- mLive + = li : livenessForward rsLiveEntry lis + + | Just live <- mLive + , RU read written <- regUsage instr + = let + -- Regs that are written to but weren't live on entry to this instruction + -- are recorded as being born here. + rsBorn = mkUniqSet + $ filter (\r -> not $ elementOfUniqSet r rsLiveEntry) written + + rsLiveNext = (rsLiveEntry `unionUniqSets` rsBorn) + `minusUniqSet` (liveDieRead live) + `minusUniqSet` (liveDieWrite live) + + in Instr instr (Just live { liveBorn = rsBorn }) + : livenessForward rsLiveNext lis + + +-- | Calculate liveness going backwards, +-- filling in when regs die, and what regs are live across each instruction + +livenessBack + :: RegSet -- regs live on this instr + -> BlockMap RegSet -- regs live on entry to other BBs + -> [LiveInstr] -- instructions (accum) + -> [Instr] -- instructions + -> (RegSet, [LiveInstr]) + +livenessBack liveregs blockmap done [] = (liveregs, done) + +livenessBack liveregs blockmap acc (instr : instrs) + = let (liveregs', instr') = liveness1 liveregs blockmap instr + in livenessBack liveregs' blockmap (instr' : acc) instrs + +-- don't bother tagging comments or deltas with liveness +liveness1 liveregs blockmap (instr@COMMENT{}) + = (liveregs, Instr instr Nothing) + +liveness1 liveregs blockmap (instr@DELTA{}) + = (liveregs, Instr instr Nothing) + +liveness1 liveregs blockmap instr + + | not_a_branch + = (liveregs1, Instr instr + (Just $ Liveness + { liveBorn = emptyUniqSet + , liveDieRead = mkUniqSet r_dying + , liveDieWrite = mkUniqSet w_dying })) + + | otherwise + = (liveregs_br, Instr instr + (Just $ Liveness + { liveBorn = emptyUniqSet + , liveDieRead = mkUniqSet r_dying_br + , liveDieWrite = mkUniqSet w_dying })) + + where + RU read written = regUsage instr + + -- registers that were written here are dead going backwards. + -- registers that were read here are live going backwards. + liveregs1 = (liveregs `delListFromUniqSet` written) + `addListToUniqSet` read + + -- registers that are not live beyond this point, are recorded + -- as dying here. + r_dying = [ reg | reg <- read, reg `notElem` written, + not (elementOfUniqSet reg liveregs) ] + + w_dying = [ reg | reg <- written, + not (elementOfUniqSet reg liveregs) ] + + -- union in the live regs from all the jump destinations of this + -- instruction. + targets = jumpDests instr [] -- where we go from here + not_a_branch = null targets + + targetLiveRegs target + = case lookupUFM blockmap target of + Just ra -> ra + Nothing -> emptyBlockMap + + live_from_branch = unionManyUniqSets (map targetLiveRegs targets) + + liveregs_br = liveregs1 `unionUniqSets` live_from_branch + + -- registers that are live only in the branch targets should + -- be listed as dying here. + live_branch_only = live_from_branch `minusUniqSet` liveregs + r_dying_br = uniqSetToList (mkUniqSet r_dying `unionUniqSets` + live_branch_only) + + + + diff --git a/compiler/nativeGen/RegSpill.hs b/compiler/nativeGen/RegSpill.hs new file mode 100644 index 0000000..4921cf1 --- /dev/null +++ b/compiler/nativeGen/RegSpill.hs @@ -0,0 +1,233 @@ + +module RegSpill ( + regSpill +) + +where + +#include "HsVersions.h" + +import RegLiveness +import RegAllocInfo +import MachRegs +import MachInstrs +import Cmm + +import Unique +import UniqFM +import UniqSet +import UniqSupply +import Outputable + +import Data.List +import Data.Maybe + + +-- | Spill all these virtual regs to memory +-- TODO: see if we can split some of the live ranges instead of just globally +-- spilling the virtual reg. +-- +-- TODO: On ciscy x86 and x86_64 we don't nessesarally have to add a mov instruction +-- when making spills. If an instr is using a spilled virtual we may be able to +-- address the spill slot directly. +-- +regSpill + :: [LiveCmmTop] -- ^ the code + -> UniqSet Int -- ^ available stack slots + -> UniqSet Reg -- ^ the regs to spill + -> UniqSM + ([LiveCmmTop] -- ^ code will spill instructions + , UniqSet Int) -- ^ left over slots + +regSpill code slotsFree regs + + -- not enough slots to spill these regs + | sizeUniqSet slotsFree < sizeUniqSet regs + = pprPanic "regSpill: out of spill slots!" + ( text " regs to spill = " <> ppr (sizeUniqSet regs) + $$ text " slots left = " <> ppr (sizeUniqSet slotsFree)) + + | otherwise + = do + -- allocate a slot for each of the spilled regs + let slots = take (sizeUniqSet regs) $ uniqSetToList slotsFree + let regSlotMap = listToUFM + $ zip (uniqSetToList regs) slots + + -- grab the unique supply from the monad + us <- getUs + + -- run the spiller on all the blocks + let (# code', _ #) = + runSpill (mapM (mapBlockTopM (regSpill_block regSlotMap)) code) + (initSpillS us) + + return ( code' + , minusUniqSet slotsFree (mkUniqSet slots) ) + + +regSpill_block regSlotMap (BasicBlock i instrs) + = do instrss' <- mapM (regSpill_instr regSlotMap) instrs + return $ BasicBlock i (concat instrss') + + +regSpill_instr _ li@(Instr (DELTA delta) _) + = do + setDelta delta + return [li] + +regSpill_instr _ li@(Instr _ Nothing) + = do return [li] + + +regSpill_instr regSlotMap + (Instr instr (Just live)) + = do + -- work out which regs are read and written in this instr + let RU rlRead rlWritten = regUsage instr + + -- sometimes a register is listed as being read more than once, + -- nub this so we don't end up inserting two lots of spill code. + let rsRead_ = nub rlRead + let rsWritten_ = nub rlWritten + + -- if a reg is modified, it appears in both lists, want to undo this.. + let rsRead = rsRead_ \\ rsWritten_ + let rsWritten = rsWritten_ \\ rsRead_ + let rsModify = intersect rsRead_ rsWritten_ + + -- work out if any of the regs being used are currently being spilled. + let rsSpillRead = filter (\r -> elemUFM r regSlotMap) rsRead + let rsSpillWritten = filter (\r -> elemUFM r regSlotMap) rsWritten + let rsSpillModify = filter (\r -> elemUFM r regSlotMap) rsModify + + -- rewrite the instr and work out spill code. + (instr1, prepost1) <- mapAccumLM (spillRead regSlotMap) instr rsSpillRead + (instr2, prepost2) <- mapAccumLM (spillWrite regSlotMap) instr1 rsSpillWritten + (instr3, prepost3) <- mapAccumLM (spillModify regSlotMap) instr2 rsSpillModify + + let (mPrefixes, mPostfixes) = unzip (prepost1 ++ prepost2 ++ prepost3) + let prefixes = concat mPrefixes + let postfixes = concat mPostfixes + + -- final code + let instrs' = map (\i -> Instr i Nothing) prefixes + ++ [ Instr instr3 Nothing ] + ++ map (\i -> Instr i Nothing) postfixes + + return +{- $ pprTrace "* regSpill_instr spill" + ( text "instr = " <> ppr instr + $$ text "read = " <> ppr rsSpillRead + $$ text "write = " <> ppr rsSpillWritten + $$ text "mod = " <> ppr rsSpillModify + $$ text "-- out" + $$ (vcat $ map ppr instrs') + $$ text " ") +-} + $ instrs' + + +spillRead regSlotMap instr reg + | Just slot <- lookupUFM regSlotMap reg + = do delta <- getDelta + (instr', nReg) <- patchInstr reg instr + + let pre = [ COMMENT FSLIT("spill read") + , mkLoadInstr nReg delta slot ] + + return ( instr', (pre, [])) + + | otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg" + +spillWrite regSlotMap instr reg + | Just slot <- lookupUFM regSlotMap reg + = do delta <- getDelta + (instr', nReg) <- patchInstr reg instr + + let post = [ COMMENT FSLIT("spill write") + , mkSpillInstr nReg delta slot ] + + return ( instr', ([], post)) + + | otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg" + +spillModify regSlotMap instr reg + | Just slot <- lookupUFM regSlotMap reg + = do delta <- getDelta + (instr', nReg) <- patchInstr reg instr + + let pre = [ COMMENT FSLIT("spill mod load") + , mkLoadInstr nReg delta slot ] + + let post = [ COMMENT FSLIT("spill mod write") + , mkSpillInstr nReg delta slot ] + + return ( instr', (pre, post)) + + | otherwise = panic "RegSpill.spillModify: no slot defined for spilled reg" + + +-- | rewrite uses of this virtual reg in an instr to use a different virtual reg +patchInstr :: Reg -> Instr -> SpillM (Instr, Reg) +patchInstr reg instr + = do nUnique <- newUnique + let nReg = renameVirtualReg nUnique reg + let instr' = patchReg1 reg nReg instr + return (instr', nReg) + +patchReg1 :: Reg -> Reg -> Instr -> Instr +patchReg1 old new instr + = let patchF r + | r == old = new + | otherwise = r + in patchRegs instr patchF + + +------------------------------------------------------------------------------------------- +-- Spiller monad + +data SpillS + = SpillS + { stateDelta :: Int + , stateUS :: UniqSupply } + +initSpillS uniqueSupply + = SpillS + { stateDelta = 0 + , stateUS = uniqueSupply } + +newtype SpillM a + = SpillM + { runSpill :: SpillS -> (# a, SpillS #) } + +instance Monad SpillM where + return x = SpillM $ \s -> (# x, s #) + + m >>= n = SpillM $ \s -> + case runSpill m s of + (# r, s' #) -> runSpill (n r) s' + +setDelta :: Int -> SpillM () +setDelta delta + = SpillM $ \s -> (# (), s { stateDelta = delta } #) + +getDelta :: SpillM Int +getDelta = SpillM $ \s -> (# stateDelta s, s #) + +newUnique :: SpillM Unique +newUnique + = SpillM $ \s + -> case splitUniqSupply (stateUS s) of + (us1, us2) + -> (# uniqFromSupply us1 + , s { stateUS = us2 } #) + +mapAccumLM _ s [] = return (s, []) +mapAccumLM f s (x:xs) + = do + (s1, x') <- f s x + (s2, xs') <- mapAccumLM f s1 xs + return (s2, x' : xs') + + diff --git a/compiler/utils/State.hs b/compiler/utils/State.hs new file mode 100644 index 0000000..faed566 --- /dev/null +++ b/compiler/utils/State.hs @@ -0,0 +1,31 @@ + +module State where + +newtype State s a + = State + { runState :: s -> (# a, s #) } + +instance Monad (State s) where + return x = State $ \s -> (# x, s #) + m >>= n = State $ \s -> + case runState m s of + (# r, s' #) -> runState (n r) s' + +get :: State s s +get = State $ \s -> (# s, s #) + +put :: s -> State s () +put s' = State $ \s -> (# (), s' #) + +modify :: (s -> s) -> State s () +modify f = State $ \s -> (# (), f s #) + +evalState :: State s a -> s -> a +evalState s i + = case runState s i of + (# a, s' #) -> a + +execState :: State s a -> s -> s +execState s i + = case runState s i of + (# a, s' #) -> s' -- 1.7.10.4