From: Ben.Lippmeier@anu.edu.au Date: Wed, 22 Aug 2007 17:04:12 +0000 (+0000) Subject: Refactor cmmNativeGen so dumps can be emitted inline with NCG stages X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=f2cd56cf9fc310c9b587ecb5dfaee4ad6b580355 Refactor cmmNativeGen so dumps can be emitted inline with NCG stages --- diff --git a/compiler/main/CodeOutput.lhs b/compiler/main/CodeOutput.lhs index 8b5138a..b155a35 100644 --- a/compiler/main/CodeOutput.lhs +++ b/compiler/main/CodeOutput.lhs @@ -81,7 +81,7 @@ codeOutput dflags this_mod location foreign_stubs pkg_deps flat_abstractC ; stubs_exist <- outputForeignStubs dflags this_mod location foreign_stubs ; case hscTarget dflags of { HscInterpreted -> return (); - HscAsm -> outputAsm dflags filenm this_mod location flat_abstractC; + HscAsm -> outputAsm dflags filenm flat_abstractC; HscC -> outputC dflags filenm this_mod location flat_abstractC stubs_exist pkg_deps foreign_stubs; @@ -158,13 +158,13 @@ outputC dflags filenm mod location flat_absC %************************************************************************ \begin{code} -outputAsm dflags filenm this_mod location flat_absC +outputAsm dflags filenm flat_absC #ifndef OMIT_NATIVE_CODEGEN = do ncg_uniqs <- mkSplitUniqSupply 'n' ncg_output_d <- {-# SCC "NativeCodeGen" #-} - nativeCodeGen dflags this_mod location flat_absC ncg_uniqs + nativeCodeGen dflags flat_absC ncg_uniqs dumpIfSet_dyn dflags Opt_D_dump_asm "Asm code" (docToSDoc ncg_output_d) {-# SCC "OutputAsm" #-} doOutput filenm $ \f -> printDoc LeftMode f ncg_output_d diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index f2906e7..c3b1d61 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -102,7 +102,7 @@ data DynFlag | Opt_D_dump_asm_regalloc | Opt_D_dump_asm_regalloc_stages | Opt_D_dump_asm_conflicts - | Opt_D_drop_asm_stats + | Opt_D_dump_asm_stats | Opt_D_dump_cpranal | Opt_D_dump_deriv | Opt_D_dump_ds @@ -142,7 +142,7 @@ data DynFlag | Opt_D_dump_minimal_imports | Opt_D_dump_mod_cycles | Opt_D_faststring_stats - | Opt_DumpToFile -- Redirect dump output to files instead of stdout. + | Opt_DumpToFile -- ^ Append dump output to files instead of stdout. | Opt_DoCoreLinting | Opt_DoStgLinting | Opt_DoCmmLinting @@ -1028,7 +1028,7 @@ dynamic_flags = [ , ( "ddump-asm-conflicts", setDumpFlag Opt_D_dump_asm_conflicts) , ( "ddump-asm-regalloc-stages", setDumpFlag Opt_D_dump_asm_regalloc_stages) - , ( "ddrop-asm-stats", setDumpFlag Opt_D_drop_asm_stats) + , ( "ddump-asm-stats", setDumpFlag Opt_D_dump_asm_stats) , ( "ddump-cpranal", setDumpFlag Opt_D_dump_cpranal) , ( "ddump-deriv", setDumpFlag Opt_D_dump_deriv) , ( "ddump-ds", setDumpFlag Opt_D_dump_ds) diff --git a/compiler/main/ErrUtils.lhs b/compiler/main/ErrUtils.lhs index d93fb1b..8b3af12 100644 --- a/compiler/main/ErrUtils.lhs +++ b/compiler/main/ErrUtils.lhs @@ -16,7 +16,7 @@ module ErrUtils ( ghcExit, doIfSet, doIfSet_dyn, - dumpIfSet, dumpIfSet_core, dumpIfSet_dyn, dumpIfSet_dyn_or, mkDumpDoc, + dumpIfSet, dumpIfSet_core, dumpIfSet_dyn, dumpIfSet_dyn_or, mkDumpDoc, dumpSDoc, -- * Messages during compilation putMsg, @@ -199,13 +199,13 @@ dumpIfSet_core dflags flag hdr doc | dopt flag dflags || verbosity dflags >= 4 || dopt Opt_D_verbose_core2core dflags - = writeDump dflags flag (mkDumpDoc hdr doc) + = dumpSDoc dflags flag hdr doc | otherwise = return () dumpIfSet_dyn :: DynFlags -> DynFlag -> String -> SDoc -> IO () dumpIfSet_dyn dflags flag hdr doc | dopt flag dflags || verbosity dflags >= 4 - = writeDump dflags flag (mkDumpDoc hdr doc) + = dumpSDoc dflags flag hdr doc | otherwise = return () @@ -228,11 +228,13 @@ mkDumpDoc hdr doc -- | Write out a dump. -- If --dump-to-file is set then this goes to a file. -- otherwise emit to stdout. -writeDump :: DynFlags -> DynFlag -> SDoc -> IO () -writeDump dflags dflag doc +dumpSDoc :: DynFlags -> DynFlag -> String -> SDoc -> IO () +dumpSDoc dflags dflag hdr doc = do let mFile = chooseDumpFile dflags dflag case mFile of -- write the dump to a file + -- don't add the header in this case, we can see what kind + -- of dump it is from the filename. Just fileName -> do handle <- openFile fileName AppendMode hPrintDump handle doc @@ -240,7 +242,7 @@ writeDump dflags dflag doc -- write the dump to stdout Nothing - -> do printDump doc + -> do printDump (mkDumpDoc hdr doc) -- | Choose where to put a dump file based on DynFlags diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index bc63e81..c606918 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -31,6 +31,7 @@ import CmmOpt ( cmmMiniInline, cmmMachOpFold ) import PprCmm ( pprStmt, pprCmms, pprCmm ) import MachOp import CLabel +import State import UniqFM import Unique ( Unique, getUnique ) @@ -49,6 +50,7 @@ import qualified Pretty import Outputable import FastString import UniqSet +import ErrUtils -- DEBUGGING ONLY --import OrdList @@ -115,224 +117,155 @@ The machine-dependent bits break down as follows: -- NB. We *lazilly* compile each block of code for space reasons. -------------------- -nativeCodeGen :: DynFlags -> Module -> ModLocation -> [RawCmm] -> UniqSupply -> IO Pretty.Doc -nativeCodeGen dflags mod modLocation cmms us - = let (res, _) = initUs us $ - cgCmm (concat (map add_split cmms)) +nativeCodeGen :: DynFlags -> [RawCmm] -> UniqSupply -> IO Pretty.Doc +nativeCodeGen dflags cmms us + = do + -- do native code generation on all these cmm things + (us', result) + <- mapAccumLM (cmmNativeGen dflags) us + $ concat $ map add_split cmms - cgCmm :: [RawCmmTop] -> UniqSM ( [CmmNativeGenDump], Pretty.Doc, [CLabel]) - cgCmm tops = - lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results -> - case unzip3 results of { (dump,docs,imps) -> - returnUs (dump, my_vcat docs, concat imps) - } - in - case res of { (dump, insn_sdoc, imports) -> do + let (native, imports, mColorStats, mLinearStats) + = unzip4 result - cmmNativeGenDump dflags mod modLocation dump + -- dump global NCG stats for graph coloring allocator + (case concat $ catMaybes mColorStats of + [] -> return () + stats -> do + -- build the global register conflict graph + let graphGlobal + = foldl Color.union Color.initGraph + $ [ Color.raGraph stat + | stat@Color.RegAllocStatsStart{} <- stats] + + dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats" + $ Color.pprStats stats graphGlobal - return (insn_sdoc Pretty.$$ dyld_stubs imports + dumpIfSet_dyn dflags + Opt_D_dump_asm_conflicts "Register conflict graph" + $ Color.dotGraph Color.regDotColor trivColorable + $ graphGlobal) -#if HAVE_SUBSECTIONS_VIA_SYMBOLS - -- On recent versions of Darwin, the linker supports - -- dead-stripping of code and data on a per-symbol basis. - -- There's a hack to make this work in PprMach.pprNatCmmTop. - Pretty.$$ Pretty.text ".subsections_via_symbols" -#endif -#if HAVE_GNU_NONEXEC_STACK - -- On recent GNU ELF systems one can mark an object file - -- as not requiring an executable stack. If all objects - -- linked into a program have this note then the program - -- will not use an executable stack, which is good for - -- security. GHC generated code does not need an executable - -- stack so add the note in: - Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits" -#endif -#if !defined(darwin_TARGET_OS) - -- And just because every other compiler does, lets stick in - -- an identifier directive: .ident "GHC x.y.z" - Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+> - Pretty.text cProjectVersion - in Pretty.text ".ident" Pretty.<+> - Pretty.doubleQuotes compilerIdent -#endif - ) - } - where + -- dump global NCG stats for linear allocator + (case catMaybes mLinearStats of + [] -> return () + stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats" + $ Linear.pprStats (concat stats)) - add_split (Cmm tops) - | dopt Opt_SplitObjs dflags = split_marker : tops - | otherwise = tops + return $ makeAsmDoc (concat native) (concat imports) - split_marker = CmmProc [] mkSplitMarkerLabel [] [] + where add_split (Cmm tops) + | dopt Opt_SplitObjs dflags = split_marker : tops + | otherwise = tops - -- Generate "symbol stubs" for all external symbols that might - -- come from a dynamic library. -{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ - map head $ group $ sort imps-} - - -- (Hack) sometimes two Labels pretty-print the same, but have - -- different uniques; so we compare their text versions... - dyld_stubs imps - | needImportedSymbols - = Pretty.vcat $ - (pprGotDeclaration :) $ - map (pprImportedSymbol . fst . head) $ - groupBy (\(_,a) (_,b) -> a == b) $ - sortBy (\(_,a) (_,b) -> compare a b) $ - map doPpr $ - imps - | otherwise - = Pretty.empty - - where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle) - astyle = mkCodeStyle AsmStyle + split_marker = CmmProc [] mkSplitMarkerLabel [] [] -#ifndef NCG_DEBUG - my_vcat sds = Pretty.vcat sds -#else - my_vcat sds = Pretty.vcat ( - intersperse ( - Pretty.char ' ' - Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker") - Pretty.$$ Pretty.char ' ' - ) - sds - ) -#endif +-- | Complete native code generation phase for a single top-level chunk of Cmm. +-- Dumping the output of each stage along the way. +-- Global conflict graph and NGC stats +cmmNativeGen + :: DynFlags + -> UniqSupply + -> RawCmmTop + -> IO ( UniqSupply + , ( [NatCmmTop] + , [CLabel] + , Maybe [Color.RegAllocStats] + , Maybe [Linear.RegAllocStats])) + +cmmNativeGen dflags us cmm + = do + -- rewrite assignments to global regs + let (fixed_cmm, usFix) = + initUs us $ fixAssignsTop cmm --- Carries output of the code generator passes, for dumping. --- Make sure to only fill the one's we're interested in to avoid --- creating space leaks. + -- cmm to cmm optimisations + let (opt_cmm, imports) = + cmmToCmm dflags fixed_cmm -data CmmNativeGenDump - = CmmNativeGenDump - { cdCmmOpt :: RawCmmTop - , cdNative :: [NatCmmTop] - , cdLiveness :: [LiveCmmTop] - , cdCoalesce :: Maybe [LiveCmmTop] - , cdRegAllocStats :: Maybe [Color.RegAllocStats] - , cdRegAllocStatsLinear :: [Linear.RegAllocStats] - , cdColoredGraph :: Maybe (Color.Graph Reg RegClass Reg) - , cdAlloced :: [NatCmmTop] } + dumpIfSet_dyn dflags + Opt_D_dump_opt_cmm "Optimised Cmm" + (pprCmm $ Cmm [opt_cmm]) -dchoose dflags opt a b - | dopt opt dflags = a - | otherwise = b -dchooses dflags opts a b - | or $ map ( (flip dopt) dflags) opts = a - | otherwise = b + -- generate native code from cmm + let ((native, lastMinuteImports), usGen) = + initUs usFix $ genMachCode dflags opt_cmm --- | Complete native code generation phase for a single top-level chunk of Cmm. --- Unless they're being dumped, intermediate data structures are squashed after --- every stage to avoid creating space leaks. --- --- TODO: passing data via CmmNativeDump/squashing structs has become a horrible mess. --- it might be better to forgo trying to keep all the outputs for each --- stage together and just thread IO() through cmmNativeGen so we can dump --- what we want to after each stage. --- -cmmNativeGen :: DynFlags -> RawCmmTop -> UniqSM (CmmNativeGenDump, Pretty.Doc, [CLabel]) -cmmNativeGen dflags cmm - = do - -- - fixed_cmm - <- {-# SCC "fixAssigns" #-} - fixAssignsTop cmm - - ---- cmm to cmm optimisations - (cmm, imports, ppr_cmm) - <- (\fixed_cmm - -> {-# SCC "genericOpt" #-} - do let (cmm, imports) = cmmToCmm dflags fixed_cmm - - return ( cmm - , imports - , dchoose dflags Opt_D_dump_cmm cmm (CmmData Text [])) - ) fixed_cmm - - - ---- generate native code from cmm - (native, lastMinuteImports, ppr_native) - <- (\cmm - -> {-# SCC "genMachCode" #-} - do (machCode, lastMinuteImports) - <- genMachCode dflags cmm - - return ( machCode - , lastMinuteImports - , dchoose dflags Opt_D_dump_asm_native machCode []) - ) cmm - - - ---- tag instructions with register liveness information - (withLiveness, ppr_withLiveness) - <- (\native - -> {-# SCC "regLiveness" #-} - do - withLiveness <- mapUs regLiveness native - - return ( withLiveness - , dchoose dflags Opt_D_dump_asm_liveness withLiveness [])) - native - - ---- allocate registers - ( alloced, ppr_alloced, ppr_coalesce - , ppr_regAllocStats, ppr_regAllocStatsLinear, ppr_coloredGraph) - <- (\withLiveness - -> {-# SCC "regAlloc" #-} - do - if dopt Opt_RegsGraph dflags - then do - -- the regs usable for allocation - let alloc_regs - = foldr (\r -> plusUFM_C unionUniqSets - $ unitUFM (regClass r) (unitUniqSet r)) - emptyUFM - $ map RealReg allocatableRegs - - -- aggressively coalesce moves between virtual regs - coalesced <- regCoalesce withLiveness - - -- graph coloring register allocation - (alloced, regAllocStats) - <- Color.regAlloc - alloc_regs - (mkUniqSet [0..maxSpillSlots]) - coalesced - - return ( alloced - , dchoose dflags Opt_D_dump_asm_regalloc - alloced [] - , dchoose dflags Opt_D_dump_asm_coalesce - (Just coalesced) Nothing - , dchooses dflags - [ Opt_D_dump_asm_regalloc_stages - , Opt_D_drop_asm_stats] - (Just regAllocStats) Nothing - , [] - , dchoose dflags Opt_D_dump_asm_conflicts - Nothing Nothing) - - else do - -- do linear register allocation - (alloced, stats) - <- liftM unzip - $ mapUs Linear.regAlloc withLiveness - - return ( alloced - , dchoose dflags Opt_D_dump_asm_regalloc - alloced [] - , Nothing - , Nothing - , dchoose dflags Opt_D_drop_asm_stats - (catMaybes stats) [] - , Nothing )) - withLiveness - + dumpIfSet_dyn dflags + Opt_D_dump_asm_native "Native code" + (vcat $ map (docToSDoc . pprNatCmmTop) native) + + + -- tag instructions with register liveness information + let (withLiveness, usLive) = + initUs usGen $ mapUs regLiveness native + + dumpIfSet_dyn dflags + Opt_D_dump_asm_liveness "Liveness annotations added" + (vcat $ map ppr withLiveness) + + + -- allocate registers + (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <- + if dopt Opt_RegsGraph dflags + then do + -- the regs usable for allocation + let alloc_regs + = foldr (\r -> plusUFM_C unionUniqSets + $ unitUFM (regClass r) (unitUniqSet r)) + emptyUFM + $ map RealReg allocatableRegs + + -- aggressively coalesce moves between virtual regs + let (coalesced, usCoalesce) + = initUs usLive $ regCoalesce withLiveness + + dumpIfSet_dyn dflags + Opt_D_dump_asm_coalesce "Reg-Reg moves coalesced" + (vcat $ map ppr coalesced) + + -- graph coloring register allocation + let ((alloced, regAllocStats), usAlloc) + = initUs usCoalesce + $ Color.regAlloc + alloc_regs + (mkUniqSet [0..maxSpillSlots]) + coalesced + + dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc "Registers allocated" + (vcat $ map (docToSDoc . pprNatCmmTop) alloced) + + dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc_stages "Build/spill stages" + (vcat $ map (\(stage, stats) + -> text "-- Stage " <> int stage + $$ ppr stats) + $ zip [0..] regAllocStats) + + return ( alloced, usAlloc + , if dopt Opt_D_dump_asm_stats dflags + then Just regAllocStats else Nothing + , Nothing) + + else do + -- do linear register allocation + let ((alloced, regAllocStats), usAlloc) + = initUs usLive + $ liftM unzip + $ mapUs Linear.regAlloc withLiveness + + dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc "Registers allocated" + (vcat $ map (docToSDoc . pprNatCmmTop) alloced) + + return ( alloced, usAlloc + , Nothing + , if dopt Opt_D_dump_asm_stats dflags + then Just (catMaybes regAllocStats) else Nothing) ---- shortcut branches let shorted = @@ -352,24 +285,13 @@ cmmNativeGen dflags cmm #else sequenced #endif - - ---- vcat - let final_sdoc = - {-# SCC "vcat" #-} - Pretty.vcat (map pprNatCmmTop final_mach_code) - - let dump = - CmmNativeGenDump - { cdCmmOpt = ppr_cmm - , cdNative = ppr_native - , cdLiveness = ppr_withLiveness - , cdCoalesce = ppr_coalesce - , cdRegAllocStats = ppr_regAllocStats - , cdRegAllocStatsLinear = ppr_regAllocStatsLinear - , cdColoredGraph = ppr_coloredGraph - , cdAlloced = ppr_alloced } - - returnUs (dump, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports) + + return ( usAlloc + , ( final_mach_code + , lastMinuteImports ++ imports + , ppr_raStatsColor + , ppr_raStatsLinear) ) + #if i386_TARGET_ARCH x86fp_kludge :: NatCmmTop -> NatCmmTop @@ -382,77 +304,62 @@ x86fp_kludge top@(CmmProc info lbl params code) = #endif --- Dump output of native code generator passes --- stripe across the outputs for each block so all the information for a --- certain stage is concurrent in the dumps. +-- | Build assembler source file from native code and its imports. -- -cmmNativeGenDump :: DynFlags -> Module -> ModLocation -> [CmmNativeGenDump] -> IO () -cmmNativeGenDump dflags mod modLocation dump - = do - dumpIfSet_dyn dflags - Opt_D_dump_opt_cmm "Optimised Cmm" - (pprCmm $ Cmm $ map cdCmmOpt dump) +makeAsmDoc :: [NatCmmTop] -> [CLabel] -> Pretty.Doc +makeAsmDoc native imports + = Pretty.vcat (map pprNatCmmTop native) + Pretty.$$ (Pretty.text "") + Pretty.$$ dyld_stubs imports - dumpIfSet_dyn dflags - Opt_D_dump_asm_native "Native code" - (vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdNative dump) +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- On recent versions of Darwin, the linker supports + -- dead-stripping of code and data on a per-symbol basis. + -- There's a hack to make this work in PprMach.pprNatCmmTop. + Pretty.$$ Pretty.text ".subsections_via_symbols" +#endif +#if HAVE_GNU_NONEXEC_STACK + -- On recent GNU ELF systems one can mark an object file + -- as not requiring an executable stack. If all objects + -- linked into a program have this note then the program + -- will not use an executable stack, which is good for + -- security. GHC generated code does not need an executable + -- stack so add the note in: + Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits" +#endif +#if !defined(darwin_TARGET_OS) + -- And just because every other compiler does, lets stick in + -- an identifier directive: .ident "GHC x.y.z" + Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+> + Pretty.text cProjectVersion + in Pretty.text ".ident" Pretty.<+> + Pretty.doubleQuotes compilerIdent +#endif - dumpIfSet_dyn dflags - Opt_D_dump_asm_liveness "Liveness annotations added" - (vcat $ map (ppr . cdLiveness) dump) + where + -- Generate "symbol stubs" for all external symbols that might + -- come from a dynamic library. + dyld_stubs :: [CLabel] -> Pretty.Doc +{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ + map head $ group $ sort imps-} - dumpIfSet_dyn dflags - Opt_D_dump_asm_coalesce "Reg-Reg moves coalesced" - (vcat $ map (fromMaybe empty . liftM ppr . cdCoalesce) dump) + -- (Hack) sometimes two Labels pretty-print the same, but have + -- different uniques; so we compare their text versions... + dyld_stubs imps + | needImportedSymbols + = Pretty.vcat $ + (pprGotDeclaration :) $ + map (pprImportedSymbol . fst . head) $ + groupBy (\(_,a) (_,b) -> a == b) $ + sortBy (\(_,a) (_,b) -> compare a b) $ + map doPpr $ + imps + | otherwise + = Pretty.empty + + doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle) + astyle = mkCodeStyle AsmStyle - dumpIfSet_dyn dflags - Opt_D_dump_asm_regalloc "Registers allocated" - (vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdAlloced dump) - - -- with the graph coloring allocator, show the result of each build/spill stage - -- for each block in turn. - when (dopt Opt_D_dump_asm_regalloc_stages dflags) - $ do mapM_ (\stats - -> printDump - $ vcat $ map (\(stage, stats) -> - text "-- Stage " <> int stage - $$ ppr stats) - (zip [0..] stats)) - $ map (fromMaybe [] . cdRegAllocStats) dump - - -- Build a global register conflict graph. - -- If you want to see the graph for just one basic block then use asm-regalloc-stages instead. - dumpIfSet_dyn dflags - Opt_D_dump_asm_conflicts "Register conflict graph" - $ Color.dotGraph Color.regDotColor trivColorable - $ foldl Color.union Color.initGraph - $ catMaybes $ map cdColoredGraph dump - - -- Drop native code generator statistics. - -- This is potentially a large amount of information, and we want to be able - -- to collect it while running nofib. Drop a new file instead of emitting - -- it to stdout/stderr. - -- - when (dopt Opt_D_drop_asm_stats dflags) - $ do -- make the drop file name based on the object file name - let dropFile = (init $ ml_obj_file modLocation) ++ "drop-asm-stats" - - -- slurp out all the regalloc stats - let stats = concat $ catMaybes $ map cdRegAllocStats dump - - -- build a global conflict graph - let graph = foldl Color.union Color.initGraph $ map Color.raGraph stats - - -- pretty print the various sections and write out the file. - let outSpills = Color.pprStatsSpills stats - let outLife = Color.pprStatsLifetimes stats - let outConflict = Color.pprStatsConflict stats - let outScatter = Color.pprStatsLifeConflict stats graph - - writeFile dropFile - (showSDoc $ vcat [outSpills, outLife, outConflict, outScatter]) - - return () -- ----------------------------------------------------------------------------- -- Sequencing the basic blocks diff --git a/compiler/nativeGen/RegAllocColor.hs b/compiler/nativeGen/RegAllocColor.hs index 40e3bc3..5ce2a6c 100644 --- a/compiler/nativeGen/RegAllocColor.hs +++ b/compiler/nativeGen/RegAllocColor.hs @@ -63,7 +63,7 @@ regAlloc regsFree slotsFree code <- regAlloc_spin 0 trivColorable regsFree slotsFree [] code return ( code_final - , debug_codeGraphs ) + , reverse debug_codeGraphs ) regAlloc_spin (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs code = do @@ -84,6 +84,16 @@ regAlloc_spin (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs code let fmLife = plusUFMs_C (\(r1, l1) (r2, l2) -> (r1, l1 + l2)) $ map lifetimeCount code + -- record startup state + let stat1 = + if spinCount == 0 + then Just $ RegAllocStatsStart + { raLiveCmm = code + , raGraph = graph + , raLifetimes = fmLife } + else Nothing + + -- the function to choose regs to leave uncolored let spill = chooseSpill_maxLife fmLife @@ -101,13 +111,11 @@ regAlloc_spin (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs code -- record what happened in this stage for debugging let stat = RegAllocStatsColored - { raLiveCmm = code - , raGraph = graph_colored - , raPatchedCmm = code_patched - , raLifetimes = fmLife } + { raGraph = graph_colored + , raPatchedCmm = code_patched } return ( code_nat - , debug_codeGraphs ++ [stat] + , maybeToList stat1 ++ [stat] ++ debug_codeGraphs , graph_colored) else do @@ -122,14 +130,14 @@ regAlloc_spin (spinCount :: Int) triv regsFree slotsFree debug_codeGraphs code -- record what happened in this stage for debugging let stat = RegAllocStatsSpill - { raLiveCmm = code_spilled - , raGraph = graph_colored + { raGraph = graph_colored , raSpillStats = spillStats - , raLifetimes = fmLife } + , raLifetimes = fmLife + , raSpilled = code_spilled } -- try again regAlloc_spin (spinCount + 1) triv regsFree slotsFree' - (debug_codeGraphs ++ [stat]) + (maybeToList stat1 ++ [stat] ++ debug_codeGraphs) code_relive diff --git a/compiler/nativeGen/RegAllocLinear.hs b/compiler/nativeGen/RegAllocLinear.hs index 18e8ba0..d9ff121 100644 --- a/compiler/nativeGen/RegAllocLinear.hs +++ b/compiler/nativeGen/RegAllocLinear.hs @@ -82,7 +82,7 @@ The algorithm is roughly: module RegAllocLinear ( regAlloc, - RegAllocStats + RegAllocStats, pprStats ) where #include "HsVersions.h" @@ -103,7 +103,7 @@ import Outputable #ifndef DEBUG import Data.Maybe ( fromJust ) #endif -import Data.List ( nub, partition, mapAccumL) +import Data.List ( nub, partition, mapAccumL, foldl') import Control.Monad ( when ) import Data.Word import Data.Bits @@ -1000,7 +1000,7 @@ getUniqueR = RegM $ \s -> -- | Record that a spill instruction was inserted, for profiling. recordSpill :: SpillReason -> RegM () recordSpill spill - = RegM $ \s -> (# s { ra_spills = spill : ra_spills s}, () #) + = RegM $ \s -> (# s { ra_spills = spill : ra_spills s}, () #) -- ----------------------------------------------------------------------------- @@ -1046,6 +1046,31 @@ binSpillReasons reasons SpillJoinRM r -> (r, [0, 0, 0, 0, 1])) reasons) +-- | Pretty print some RegAllocStats +pprStats :: [RegAllocStats] -> SDoc +pprStats statss + = let spills = foldl' (plusUFM_C (zipWith (+))) + emptyUFM + $ map ra_spillInstrs statss + + spillTotals = foldl' (zipWith (+)) + [0, 0, 0, 0, 0] + $ eltsUFM spills + + pprSpill (reg, spills) + = parens $ (hcat $ punctuate (text ", ") (doubleQuotes (ppr reg) : map ppr spills)) + + in ( text "-- spills-added-total" + $$ text "-- (allocs, clobbers, loads, joinRR, joinRM)" + $$ (parens $ (hcat $ punctuate (text ", ") (map ppr spillTotals))) + $$ text "" + $$ text "-- spills-added" + $$ text "-- (reg_name, allocs, clobbers, loads, joinRR, joinRM)" + $$ (vcat $ map pprSpill + $ ufmToList spills) + $$ text "") + + -- ----------------------------------------------------------------------------- -- Utils diff --git a/compiler/nativeGen/RegAllocStats.hs b/compiler/nativeGen/RegAllocStats.hs index ae5f106..015453e 100644 --- a/compiler/nativeGen/RegAllocStats.hs +++ b/compiler/nativeGen/RegAllocStats.hs @@ -6,6 +6,7 @@ module RegAllocStats ( RegAllocStats (..), regDotColor, + pprStats, pprStatsSpills, pprStatsLifetimes, pprStatsConflict, @@ -29,49 +30,64 @@ import Data.List data RegAllocStats + -- initial graph + = RegAllocStatsStart + { raLiveCmm :: [LiveCmmTop] -- ^ initial code, with liveness + , raGraph :: Color.Graph Reg RegClass Reg -- ^ the initial, uncolored graph + , raLifetimes :: UniqFM (Reg, Int) } -- ^ number of instrs each reg lives for + -- a spill stage - = RegAllocStatsSpill - { raLiveCmm :: [LiveCmmTop] -- ^ code we tried to allocate regs for - , raGraph :: Color.Graph Reg RegClass Reg -- ^ the partially colored graph + | RegAllocStatsSpill + { raGraph :: Color.Graph Reg RegClass Reg -- ^ the partially colored graph , raSpillStats :: SpillStats -- ^ spiller stats - , raLifetimes :: UniqFM (Reg, Int) } -- ^ number of instrs each reg lives for + , raLifetimes :: UniqFM (Reg, Int) -- ^ number of instrs each reg lives for + , raSpilled :: [LiveCmmTop] } -- ^ code with spill instructions added -- a successful coloring | RegAllocStatsColored - { raLiveCmm :: [LiveCmmTop] -- ^ the code we allocated regs for - , raGraph :: Color.Graph Reg RegClass Reg -- ^ the colored graph - , raPatchedCmm :: [LiveCmmTop] -- ^ code with register allocation - , raLifetimes :: UniqFM (Reg, Int) } -- ^ number of instrs each reg lives for + { raGraph :: Color.Graph Reg RegClass Reg -- ^ the colored graph + , raPatchedCmm :: [LiveCmmTop] } -- ^ code after register allocation instance Outputable RegAllocStats where - ppr (s@RegAllocStatsSpill{}) - = text "-- Spill" - - $$ text "-- Native code with liveness information." + ppr (s@RegAllocStatsStart{}) + = text "# Start" + $$ text "# Native code with liveness information." $$ ppr (raLiveCmm s) - $$ text " " - - $$ text "-- Register conflict graph." + $$ text "" + $$ text "# Initial register conflict graph." $$ Color.dotGraph regDotColor trivColorable (raGraph s) - $$ text "-- Spill statistics." + ppr (s@RegAllocStatsSpill{}) + = text "# Spill" + $$ text "# Register conflict graph." + $$ Color.dotGraph regDotColor trivColorable (raGraph s) + $$ text "" + $$ text "# Spills inserted." $$ ppr (raSpillStats s) - + $$ text "" + $$ text "# Code with spills inserted." + $$ (ppr (raSpilled s)) ppr (s@RegAllocStatsColored{}) - = text "-- Colored" + = text "# Colored" + $$ text "# Register conflict graph." + $$ Color.dotGraph regDotColor trivColorable (raGraph s) + $$ text "" + $$ text "# Native code after register allocation." + $$ ppr (raPatchedCmm s) - $$ text "-- Native code with liveness information." - $$ ppr (raLiveCmm s) - $$ text " " - $$ text "-- Register conflict graph." - $$ Color.dotGraph regDotColor trivColorable (raGraph s) +-- | Do all the different analysis on this list of RegAllocStats +pprStats :: [RegAllocStats] -> Color.Graph Reg RegClass Reg -> SDoc +pprStats stats graph + = let outSpills = pprStatsSpills stats + outLife = pprStatsLifetimes stats + outConflict = pprStatsConflict stats + outScatter = pprStatsLifeConflict stats graph - $$ text "-- Native code after register allocation." - $$ ppr (raPatchedCmm s) + in vcat [outSpills, outLife, outConflict, outScatter] -- | Dump a table of how many spill loads / stores were inserted for each vreg. @@ -83,27 +99,37 @@ pprStatsSpills stats spillStats = [ s | s@RegAllocStatsSpill{} <- stats] -- build a map of how many spill load/stores were inserted for each vreg - spillLS = foldl' (plusUFM_C accSpillLS) emptyUFM - $ map (spillLoadStore . raSpillStats) spillStats + spillSL = foldl' (plusUFM_C accSpillSL) emptyUFM + $ map (spillStoreLoad . raSpillStats) spillStats -- print the count of load/spills as a tuple so we can read back from the file easilly - pprSpillLS (r, loads, stores) + pprSpillSL (r, loads, stores) = (parens $ (hcat $ punctuate (text ", ") [doubleQuotes (ppr r), int loads, int stores])) + -- sum up the total number of spill instructions inserted + spillList = eltsUFM spillSL + spillTotal = foldl' (\(s1, l1) (s2, l2) -> (s1 + s2, l1 + l2)) + (0, 0) + $ map (\(n, s, l) -> (s, l)) + $ spillList - in ( text "-- spills-added" - $$ text "-- (reg_name, spill_loads_added, spill_stores_added)." - $$ (vcat $ map pprSpillLS $ eltsUFM spillLS) - $$ text "\n") - + in ( text "-- spills-added-total" + $$ text "-- (stores, loads)" + $$ (ppr spillTotal) + $$ text "" + $$ text "-- spills-added" + $$ text "-- (reg_name, stores, loads)" + $$ (vcat $ map pprSpillSL $ spillList) + $$ text "") --- | Dump a table of how long vregs tend to live for. +-- | Dump a table of how long vregs tend to live for in the initial code. pprStatsLifetimes :: [RegAllocStats] -> SDoc pprStatsLifetimes stats - = let lifeMap = foldl' plusUFM emptyUFM $ map raLifetimes stats + = let lifeMap = foldl' plusUFM emptyUFM + [ raLifetimes s | s@RegAllocStatsStart{} <- stats ] lifeBins = binLifetimeCount lifeMap in ( text "-- vreg-population-lifetimes" @@ -123,7 +149,7 @@ binLifetimeCount fm lifes --- | Dump a table of how many conflicts vregs tend to have. +-- | Dump a table of how many conflicts vregs tend to have in the initial code. pprStatsConflict :: [RegAllocStats] -> SDoc @@ -131,7 +157,7 @@ pprStatsConflict stats = let confMap = foldl' (plusUFM_C (\(c1, n1) (c2, n2) -> (c1, n1 + n2))) emptyUFM $ map Color.slurpNodeConflictCount - $ map raGraph stats + [ raGraph s | s@RegAllocStatsStart{} <- stats ] in ( text "-- vreg-conflicts" $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)" @@ -142,10 +168,14 @@ pprStatsConflict stats -- | For every vreg, dump it's how many conflicts it has and its lifetime -- good for making a scatter plot. pprStatsLifeConflict - :: [RegAllocStats] -> Color.Graph Reg RegClass Reg -> SDoc + :: [RegAllocStats] + -> Color.Graph Reg RegClass Reg -- ^ global register conflict graph + -> SDoc pprStatsLifeConflict stats graph - = let lifeMap = foldl' plusUFM emptyUFM $ map raLifetimes stats + = let lifeMap = foldl' plusUFM emptyUFM + [ raLifetimes s | s@RegAllocStatsStart{} <- stats ] + scatter = map (\r -> let Just (_, lifetime) = lookupUFM lifeMap r Just node = Color.lookupNode graph r in parens $ hcat $ punctuate (text ", ") diff --git a/compiler/nativeGen/RegSpill.hs b/compiler/nativeGen/RegSpill.hs index 5d0396b..a349a56 100644 --- a/compiler/nativeGen/RegSpill.hs +++ b/compiler/nativeGen/RegSpill.hs @@ -2,7 +2,7 @@ module RegSpill ( regSpill, SpillStats(..), - accSpillLS + accSpillSL ) where @@ -142,7 +142,7 @@ spillRead regSlotMap instr reg , mkLoadInstr nReg delta slot ] modify $ \s -> s - { stateSpillLS = addToUFM_C accSpillLS (stateSpillLS s) reg (reg, 1, 0) } + { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) } return ( instr', (pre, [])) @@ -157,7 +157,7 @@ spillWrite regSlotMap instr reg , mkSpillInstr nReg delta slot ] modify $ \s -> s - { stateSpillLS = addToUFM_C accSpillLS (stateSpillLS s) reg (reg, 0, 1) } + { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 0) } return ( instr', ([], post)) @@ -175,7 +175,7 @@ spillModify regSlotMap instr reg , mkSpillInstr nReg delta slot ] modify $ \s -> s - { stateSpillLS = addToUFM_C accSpillLS (stateSpillLS s) reg (reg, 1, 1) } + { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) } return ( instr', (pre, post)) @@ -206,13 +206,13 @@ data SpillS = SpillS { stateDelta :: Int , stateUS :: UniqSupply - , stateSpillLS :: UniqFM (Reg, Int, Int) } -- ^ spilled reg vs number of times vreg was loaded, stored + , stateSpillSL :: UniqFM (Reg, Int, Int) } -- ^ spilled reg vs number of times vreg was loaded, stored initSpillS uniqueSupply = SpillS { stateDelta = 0 , stateUS = uniqueSupply - , stateSpillLS = emptyUFM } + , stateSpillSL = emptyUFM } type SpillM a = State SpillS a @@ -232,8 +232,8 @@ newUnique modify $ \s -> s { stateUS = us2 } return uniq -accSpillLS (r1, l1, s1) (r2, l2, s2) - = (r1, l1 + l2, s1 + s2) +accSpillSL (r1, s1, l1) (r2, s2, l2) + = (r1, s1 + s2, l1 + l2) @@ -242,15 +242,15 @@ accSpillLS (r1, l1, s1) (r2, l2, s2) data SpillStats = SpillStats - { spillLoadStore :: UniqFM (Reg, Int, Int) } + { spillStoreLoad :: UniqFM (Reg, Int, Int) } makeSpillStats :: SpillS -> SpillStats makeSpillStats s = SpillStats - { spillLoadStore = stateSpillLS s } + { spillStoreLoad = stateSpillSL s } instance Outputable SpillStats where - ppr s - = (vcat $ map (\(r, l, s) -> ppr r <+> int l <+> int s) - $ eltsUFM (spillLoadStore s)) + ppr stats + = (vcat $ map (\(r, s, l) -> ppr r <+> int s <+> int l) + $ eltsUFM (spillStoreLoad stats))