X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FnativeGen%2FAsmCodeGen.lhs;h=a5988fc62bab8bde97543026ab390bb18aababc3;hp=109054fb9f621b1e1fc033c1e5d949cb6f975f85;hb=8133a9f47b99f4e65ed30551de32ad72c6b61b27;hpb=e4c8d2b11b4be71885532cb14434511b6c47866c diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 109054f..a5988fc 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -12,51 +12,73 @@ module AsmCodeGen ( nativeCodeGen ) where #include "HsVersions.h" #include "nativeGen/NCG.h" -import MachInstrs -import MachRegs -import MachCodeGen -import PprMach -import RegisterAlloc -import RegAllocInfo ( jumpDests ) + +import qualified X86.CodeGen +import qualified X86.Regs +import qualified X86.Instr +import qualified X86.Ppr + +import qualified SPARC.CodeGen +import qualified SPARC.Regs +import qualified SPARC.Instr +import qualified SPARC.Ppr +import qualified SPARC.ShortcutJump +import qualified SPARC.CodeGen.Expand + +import qualified PPC.CodeGen +import qualified PPC.Cond +import qualified PPC.Regs +import qualified PPC.RegInfo +import qualified PPC.Instr +import qualified PPC.Ppr + +import RegAlloc.Liveness +import qualified RegAlloc.Linear.Main as Linear + +import qualified GraphColor as Color +import qualified RegAlloc.Graph.Main as Color +import qualified RegAlloc.Graph.Stats as Color +import qualified RegAlloc.Graph.TrivColorable as Color + +import TargetReg +import Platform +import Config +import Instruction +import PIC +import Reg import NCGMonad -import PositionIndependentCode - -import Cmm -import CmmOpt ( cmmMiniInline, cmmMachOpFold ) -import PprCmm ( pprStmt, pprCmms ) -import MachOp -import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel ) -#if powerpc_TARGET_ARCH -import CLabel ( mkRtsCodeLabel ) -#endif + +import BlockId +import CgUtils ( fixStgRegisters ) +import OldCmm +import CmmOpt ( cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold ) +import OldPprCmm +import CLabel import UniqFM import Unique ( Unique, getUnique ) import UniqSupply -import FastTypes -import List ( groupBy, sortBy ) -import CLabel ( pprCLabel ) -import ErrUtils ( dumpIfSet_dyn ) -import DynFlags ( DynFlags, DynFlag(..), dopt ) -import StaticFlags ( opt_Static, opt_PIC ) -import Config ( cProjectVersion ) +import DynFlags +import StaticFlags +import Util import Digraph +import Pretty (Doc) import qualified Pretty +import BufWrite import Outputable import FastString +import UniqSet +import ErrUtils +import Module -- DEBUGGING ONLY --import OrdList -#ifdef NCG_DEBUG -import List ( intersperse ) -#endif - -import Data.Int -import Data.Word -import Data.Bits -import GHC.Exts +import Data.List +import Data.Maybe +import Control.Monad +import System.IO {- The native-code generator has machine-independent and @@ -109,23 +131,368 @@ The machine-dependent bits break down as follows: -- ----------------------------------------------------------------------------- -- Top-level of the native codegen --- NB. We *lazilly* compile each block of code for space reasons. - -nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc -nativeCodeGen dflags cmms us - = let (res, _) = initUs us $ - cgCmm (concat (map add_split cmms)) - - cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel]) - cgCmm tops = - lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results -> - case unzip3 results of { (cmms,docs,imps) -> - returnUs (Cmm cmms, my_vcat docs, concat imps) - } - in - case res of { (ppr_cmms, insn_sdoc, imports) -> do - dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms]) - return (insn_sdoc Pretty.$$ dyld_stubs imports +data NcgImpl instr jumpDest = NcgImpl { + cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop instr], + generateJumpTableForInstr :: instr -> Maybe (NatCmmTop instr), + getJumpDestBlockId :: jumpDest -> Maybe BlockId, + canShortcut :: instr -> Maybe jumpDest, + shortcutStatic :: (BlockId -> Maybe jumpDest) -> CmmStatic -> CmmStatic, + shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr, + pprNatCmmTop :: NatCmmTop instr -> Doc, + maxSpillSlots :: Int, + allocatableRegs :: [RealReg], + ncg_x86fp_kludge :: [NatCmmTop instr] -> [NatCmmTop instr], + ncgExpandTop :: [NatCmmTop instr] -> [NatCmmTop instr], + ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr] + } + +-------------------- +nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO () +nativeCodeGen dflags h us cmms + = let nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms + x86NcgImpl = NcgImpl { + cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen + ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr + ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId + ,canShortcut = X86.Instr.canShortcut + ,shortcutStatic = X86.Instr.shortcutStatic + ,shortcutJump = X86.Instr.shortcutJump + ,pprNatCmmTop = X86.Ppr.pprNatCmmTop + ,maxSpillSlots = X86.Instr.maxSpillSlots + ,allocatableRegs = X86.Regs.allocatableRegs + ,ncg_x86fp_kludge = id + ,ncgExpandTop = id + ,ncgMakeFarBranches = id + } + in case platformArch $ targetPlatform dflags of + ArchX86 -> nCG' (x86NcgImpl { ncg_x86fp_kludge = map x86fp_kludge }) + ArchX86_64 -> nCG' x86NcgImpl + ArchPPC -> + nCG' $ NcgImpl { + cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen + ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr + ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId + ,canShortcut = PPC.RegInfo.canShortcut + ,shortcutStatic = PPC.RegInfo.shortcutStatic + ,shortcutJump = PPC.RegInfo.shortcutJump + ,pprNatCmmTop = PPC.Ppr.pprNatCmmTop + ,maxSpillSlots = PPC.Instr.maxSpillSlots + ,allocatableRegs = PPC.Regs.allocatableRegs + ,ncg_x86fp_kludge = id + ,ncgExpandTop = id + ,ncgMakeFarBranches = makeFarBranches + } + ArchSPARC -> + nCG' $ NcgImpl { + cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen + ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr + ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId + ,canShortcut = SPARC.ShortcutJump.canShortcut + ,shortcutStatic = SPARC.ShortcutJump.shortcutStatic + ,shortcutJump = SPARC.ShortcutJump.shortcutJump + ,pprNatCmmTop = SPARC.Ppr.pprNatCmmTop + ,maxSpillSlots = SPARC.Instr.maxSpillSlots + ,allocatableRegs = SPARC.Regs.allocatableRegs + ,ncg_x86fp_kludge = id + ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop + ,ncgMakeFarBranches = id + } + ArchPPC_64 -> + panic "nativeCodeGen: No NCG for PPC 64" + ArchUnknown -> + panic "nativeCodeGen: No NCG for unknown arch" + +nativeCodeGen' :: (Instruction instr, Outputable instr) + => DynFlags + -> NcgImpl instr jumpDest + -> Handle -> UniqSupply -> [RawCmm] -> IO () +nativeCodeGen' dflags ncgImpl h us cmms + = do + let split_cmms = concat $ map add_split cmms + -- BufHandle is a performance hack. We could hide it inside + -- Pretty if it weren't for the fact that we do lots of little + -- printDocs here (in order to do codegen in constant space). + bufh <- newBufHandle h + (imports, prof) <- cmmNativeGens dflags ncgImpl bufh us split_cmms [] [] 0 + bFlush bufh + + let (native, colorStats, linearStats) + = unzip3 prof + + -- dump native code + dumpIfSet_dyn dflags + Opt_D_dump_asm "Asm code" + (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) $ concat native) + + -- dump global NCG stats for graph coloring allocator + (case concat $ catMaybes colorStats of + [] -> return () + stats -> do + -- build the global register conflict graph + let graphGlobal + = foldl Color.union Color.initGraph + $ [ Color.raGraph stat + | stat@Color.RegAllocStatsStart{} <- stats] + + dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats" + $ Color.pprStats stats graphGlobal + + dumpIfSet_dyn dflags + Opt_D_dump_asm_conflicts "Register conflict graph" + $ Color.dotGraph + targetRegDotColor + (Color.trivColorable + targetVirtualRegSqueeze + targetRealRegSqueeze) + $ graphGlobal) + + + -- dump global NCG stats for linear allocator + (case concat $ catMaybes linearStats of + [] -> return () + stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats" + $ Linear.pprStats (concat native) stats) + + -- write out the imports + Pretty.printDoc Pretty.LeftMode h + $ makeImportsDoc dflags (concat imports) + + return () + + where add_split (Cmm tops) + | dopt Opt_SplitObjs dflags = split_marker : tops + | otherwise = tops + + split_marker = CmmProc [] mkSplitMarkerLabel (ListGraph []) + + +-- | Do native code generation on all these cmms. +-- +cmmNativeGens :: (Instruction instr, Outputable instr) + => DynFlags + -> NcgImpl instr jumpDest + -> BufHandle + -> UniqSupply + -> [RawCmmTop] + -> [[CLabel]] + -> [ ([NatCmmTop instr], + Maybe [Color.RegAllocStats instr], + Maybe [Linear.RegAllocStats]) ] + -> Int + -> IO ( [[CLabel]], + [([NatCmmTop instr], + Maybe [Color.RegAllocStats instr], + Maybe [Linear.RegAllocStats])] ) + +cmmNativeGens _ _ _ _ [] impAcc profAcc _ + = return (reverse impAcc, reverse profAcc) + +cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count + = do + (us', native, imports, colorStats, linearStats) + <- cmmNativeGen dflags ncgImpl us cmm count + + Pretty.bufLeftRender h + $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map (pprNatCmmTop ncgImpl) native + + -- carefully evaluate this strictly. Binding it with 'let' + -- and then using 'seq' doesn't work, because the let + -- apparently gets inlined first. + lsPprNative <- return $! + if dopt Opt_D_dump_asm dflags + || dopt Opt_D_dump_asm_stats dflags + then native + else [] + + count' <- return $! count + 1; + + -- force evaulation all this stuff to avoid space leaks + seqString (showSDoc $ vcat $ map ppr imports) `seq` return () + + cmmNativeGens dflags ncgImpl + h us' cmms + (imports : impAcc) + ((lsPprNative, colorStats, linearStats) : profAcc) + count' + + where seqString [] = () + seqString (x:xs) = x `seq` seqString xs `seq` () + + +-- | Complete native code generation phase for a single top-level chunk of Cmm. +-- Dumping the output of each stage along the way. +-- Global conflict graph and NGC stats +cmmNativeGen + :: (Instruction instr, Outputable instr) + => DynFlags + -> NcgImpl instr jumpDest + -> UniqSupply + -> RawCmmTop -- ^ the cmm to generate code for + -> Int -- ^ sequence number of this top thing + -> IO ( UniqSupply + , [NatCmmTop instr] -- native code + , [CLabel] -- things imported by this cmm + , Maybe [Color.RegAllocStats instr] -- stats for the coloring register allocator + , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators + +cmmNativeGen dflags ncgImpl us cmm count + = do + + -- rewrite assignments to global regs + let fixed_cmm = + {-# SCC "fixStgRegisters" #-} + fixStgRegisters cmm + + -- cmm to cmm optimisations + let (opt_cmm, imports) = + {-# SCC "cmmToCmm" #-} + cmmToCmm dflags fixed_cmm + + dumpIfSet_dyn dflags + Opt_D_dump_opt_cmm "Optimised Cmm" + (pprCmm $ Cmm [opt_cmm]) + + -- generate native code from cmm + let ((native, lastMinuteImports), usGen) = + {-# SCC "genMachCode" #-} + initUs us $ genMachCode dflags (cmmTopCodeGen ncgImpl) opt_cmm + + dumpIfSet_dyn dflags + Opt_D_dump_asm_native "Native code" + (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) native) + + -- tag instructions with register liveness information + let (withLiveness, usLive) = + {-# SCC "regLiveness" #-} + initUs usGen + $ mapUs regLiveness + $ map natCmmTopToLive native + + dumpIfSet_dyn dflags + Opt_D_dump_asm_liveness "Liveness annotations added" + (vcat $ map ppr withLiveness) + + -- allocate registers + (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <- + if ( dopt Opt_RegsGraph dflags + || dopt Opt_RegsIterative dflags) + then do + -- the regs usable for allocation + let (alloc_regs :: UniqFM (UniqSet RealReg)) + = foldr (\r -> plusUFM_C unionUniqSets + $ unitUFM (targetClassOfRealReg r) (unitUniqSet r)) + emptyUFM + $ allocatableRegs ncgImpl + + -- do the graph coloring register allocation + let ((alloced, regAllocStats), usAlloc) + = {-# SCC "RegAlloc" #-} + initUs usLive + $ Color.regAlloc + dflags + alloc_regs + (mkUniqSet [0 .. maxSpillSlots ncgImpl]) + withLiveness + + -- dump out what happened during register allocation + dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc "Registers allocated" + (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) alloced) + + dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc_stages "Build/spill stages" + (vcat $ map (\(stage, stats) + -> text "# --------------------------" + $$ text "# cmm " <> int count <> text " Stage " <> int stage + $$ ppr stats) + $ zip [0..] regAllocStats) + + let mPprStats = + if dopt Opt_D_dump_asm_stats dflags + then Just regAllocStats else Nothing + + -- force evaluation of the Maybe to avoid space leak + mPprStats `seq` return () + + return ( alloced, usAlloc + , mPprStats + , Nothing) + + else do + -- do linear register allocation + let ((alloced, regAllocStats), usAlloc) + = {-# SCC "RegAlloc" #-} + initUs usLive + $ liftM unzip + $ mapUs (Linear.regAlloc dflags) withLiveness + + dumpIfSet_dyn dflags + Opt_D_dump_asm_regalloc "Registers allocated" + (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) alloced) + + let mPprStats = + if dopt Opt_D_dump_asm_stats dflags + then Just (catMaybes regAllocStats) else Nothing + + -- force evaluation of the Maybe to avoid space leak + mPprStats `seq` return () + + return ( alloced, usAlloc + , Nothing + , mPprStats) + + ---- x86fp_kludge. This pass inserts ffree instructions to clear + ---- the FPU stack on x86. The x86 ABI requires that the FPU stack + ---- is clear, and library functions can return odd results if it + ---- isn't. + ---- + ---- NB. must happen before shortcutBranches, because that + ---- generates JXX_GBLs which we can't fix up in x86fp_kludge. + let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced + + ---- generate jump tables + let tabled = + {-# SCC "generateJumpTables" #-} + generateJumpTables ncgImpl kludged + + ---- shortcut branches + let shorted = + {-# SCC "shortcutBranches" #-} + shortcutBranches dflags ncgImpl tabled + + ---- sequence blocks + let sequenced = + {-# SCC "sequenceBlocks" #-} + map (sequenceTop ncgImpl) shorted + + ---- expansion of SPARC synthetic instrs + let expanded = + {-# SCC "sparc_expand" #-} + ncgExpandTop ncgImpl sequenced + + dumpIfSet_dyn dflags + Opt_D_dump_asm_expanded "Synthetic instructions expanded" + (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) expanded) + + return ( usAlloc + , expanded + , lastMinuteImports ++ imports + , ppr_raStatsColor + , ppr_raStatsLinear) + + +x86fp_kludge :: NatCmmTop X86.Instr.Instr -> NatCmmTop X86.Instr.Instr +x86fp_kludge top@(CmmData _ _) = top +x86fp_kludge (CmmProc info lbl (ListGraph code)) = + CmmProc info lbl (ListGraph $ X86.Instr.i386_insert_ffrees code) + + +-- | Build a doc for all the imports. +-- +makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc +makeImportsDoc dflags imports + = dyld_stubs imports + #if HAVE_SUBSECTIONS_VIA_SYMBOLS -- On recent versions of Darwin, the linker supports -- dead-stripping of code and data on a per-symbol basis. @@ -141,98 +508,41 @@ nativeCodeGen dflags cmms us -- stack so add the note in: Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits" #endif -#if !defined(darwin_TARGET_OS) -- And just because every other compiler does, lets stick in -- an identifier directive: .ident "GHC x.y.z" - Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+> + Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+> Pretty.text cProjectVersion in Pretty.text ".ident" Pretty.<+> Pretty.doubleQuotes compilerIdent -#endif - ) - } - - where - - add_split (Cmm tops) - | dopt Opt_SplitObjs dflags = split_marker : tops - | otherwise = tops - - split_marker = CmmProc [] mkSplitMarkerLabel [] [] - -- Generate "symbol stubs" for all external symbols that might - -- come from a dynamic library. -{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ + where + -- Generate "symbol stubs" for all external symbols that might + -- come from a dynamic library. + dyld_stubs :: [CLabel] -> Pretty.Doc +{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ map head $ group $ sort imps-} - + + arch = platformArch $ targetPlatform dflags + os = platformOS $ targetPlatform dflags + -- (Hack) sometimes two Labels pretty-print the same, but have -- different uniques; so we compare their text versions... - dyld_stubs imps - | needImportedSymbols - = Pretty.vcat $ - (pprGotDeclaration :) $ - map (pprImportedSymbol . fst . head) $ - groupBy (\(_,a) (_,b) -> a == b) $ - sortBy (\(_,a) (_,b) -> compare a b) $ - map doPpr $ - imps - | otherwise - = Pretty.empty - - where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle) - astyle = mkCodeStyle AsmStyle - -#ifndef NCG_DEBUG - my_vcat sds = Pretty.vcat sds -#else - my_vcat sds = Pretty.vcat ( - intersperse ( - Pretty.char ' ' - Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker") - Pretty.$$ Pretty.char ' ' - ) - sds - ) -#endif + dyld_stubs imps + | needImportedSymbols arch os + = Pretty.vcat $ + (pprGotDeclaration arch os :) $ + map ( pprImportedSymbol arch os . fst . head) $ + groupBy (\(_,a) (_,b) -> a == b) $ + sortBy (\(_,a) (_,b) -> compare a b) $ + map doPpr $ + imps + | otherwise + = Pretty.empty + + doPpr lbl = (lbl, renderWithStyle (pprCLabel lbl) astyle) + astyle = mkCodeStyle AsmStyle --- Complete native code generation phase for a single top-level chunk --- of Cmm. - -cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel]) -cmmNativeGen dflags cmm - = {-# SCC "fixAssigns" #-} - fixAssignsTop cmm `thenUs` \ fixed_cmm -> - {-# SCC "genericOpt" #-} - cmmToCmm fixed_cmm `bind` \ (cmm, imports) -> - (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance - then cmm - else CmmData Text []) `bind` \ ppr_cmm -> - {-# SCC "genMachCode" #-} - genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) -> - {-# SCC "regAlloc" #-} - mapUs regAlloc pre_regalloc `thenUs` \ with_regs -> - {-# SCC "sequenceBlocks" #-} - map sequenceTop with_regs `bind` \ sequenced -> - {-# SCC "x86fp_kludge" #-} - map x86fp_kludge sequenced `bind` \ final_mach_code -> - {-# SCC "vcat" #-} - Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc -> - - returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports) - where - x86fp_kludge :: NatCmmTop -> NatCmmTop - x86fp_kludge top@(CmmData _ _) = top -#if i386_TARGET_ARCH - x86fp_kludge top@(CmmProc info lbl params code) = - CmmProc info lbl params (map bb_i386_insert_ffrees code) - where - bb_i386_insert_ffrees (BasicBlock id instrs) = - BasicBlock id (i386_insert_ffrees instrs) -#else - x86fp_kludge top = top -#endif - -- ----------------------------------------------------------------------------- -- Sequencing the basic blocks @@ -242,10 +552,13 @@ cmmNativeGen dflags cmm -- such that as many of the local jumps as possible turn into -- fallthroughs. -sequenceTop :: NatCmmTop -> NatCmmTop -sequenceTop top@(CmmData _ _) = top -sequenceTop (CmmProc info lbl params blocks) = - CmmProc info lbl params (makeFarBranches $ sequenceBlocks blocks) +sequenceTop + :: Instruction instr + => NcgImpl instr jumpDest -> NatCmmTop instr -> NatCmmTop instr + +sequenceTop _ top@(CmmData _ _) = top +sequenceTop ncgImpl (CmmProc info lbl (ListGraph blocks)) = + CmmProc info lbl (ListGraph $ ncgMakeFarBranches ncgImpl $ sequenceBlocks blocks) -- The algorithm is very simple (and stupid): we make a graph out of -- the blocks where there is an edge from one block to another iff the @@ -254,24 +567,46 @@ sequenceTop (CmmProc info lbl params blocks) = -- output the block, then if it has an out edge, we move the -- destination of the out edge to the front of the list, and continue. -sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock] +-- FYI, the classic layout for basic blocks uses postorder DFS; this +-- algorithm is implemented in Hoopl. + +sequenceBlocks + :: Instruction instr + => [NatBasicBlock instr] + -> [NatBasicBlock instr] + sequenceBlocks [] = [] sequenceBlocks (entry:blocks) = seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks))) -- the first block is the entry point ==> it must remain at the start. -sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])] -sccBlocks blocks = stronglyConnCompR (map mkNode blocks) -getOutEdges :: [Instr] -> [Unique] -getOutEdges instrs = case jumpDests (last instrs) [] of - [one] -> [getUnique one] - _many -> [] - -- we're only interested in the last instruction of - -- the block, and only if it has a single destination. +sccBlocks + :: Instruction instr + => [NatBasicBlock instr] + -> [SCC ( NatBasicBlock instr + , Unique + , [Unique])] + +sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks) +-- we're only interested in the last instruction of +-- the block, and only if it has a single destination. +getOutEdges + :: Instruction instr + => [instr] -> [Unique] + +getOutEdges instrs + = case jumpDestsOfInstr (last instrs) of + [one] -> [getUnique one] + _many -> [] + +mkNode :: (Instruction t) + => GenBasicBlock t + -> (GenBasicBlock t, Unique, [Unique]) mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs) +seqBlocks :: (Eq t) => [(GenBasicBlock t1, t, [t])] -> [GenBasicBlock t1] seqBlocks [] = [] seqBlocks ((block,_,[]) : rest) = block : seqBlocks rest @@ -284,7 +619,8 @@ seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest) -- fallthroughs within a loop. seqBlocks _ = panic "AsmCodegen:seqBlocks" -reorder id accum [] = (False, reverse accum) +reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)]) +reorder _ accum [] = (False, reverse accum) reorder id accum (b@(block,id',out) : rest) | id == id' = (True, (block,id,out) : reverse accum ++ rest) | otherwise = reorder id (b:accum) rest @@ -296,9 +632,9 @@ reorder id accum (b@(block,id',out) : rest) -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too -- big, we have to work around this limitation. -makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock] - -#if powerpc_TARGET_ARCH +makeFarBranches + :: [NatBasicBlock PPC.Instr.Instr] + -> [NatBasicBlock PPC.Instr.Instr] makeFarBranches blocks | last blockAddresses < nearLimit = blocks | otherwise = zipWith handleBlock blockAddresses blocks @@ -309,14 +645,14 @@ makeFarBranches blocks handleBlock addr (BasicBlock id instrs) = BasicBlock id (zipWith makeFar [addr..] instrs) - makeFar addr (BCC ALWAYS tgt) = BCC ALWAYS tgt - makeFar addr (BCC cond tgt) + makeFar _ (PPC.Instr.BCC PPC.Cond.ALWAYS tgt) = PPC.Instr.BCC PPC.Cond.ALWAYS tgt + makeFar addr (PPC.Instr.BCC cond tgt) | abs (addr - targetAddr) >= nearLimit - = BCCFAR cond tgt + = PPC.Instr.BCCFAR cond tgt | otherwise - = BCC cond tgt + = PPC.Instr.BCC cond tgt where Just targetAddr = lookupUFM blockAddressMap tgt - makeFar addr other = other + makeFar _ other = other nearLimit = 7000 -- 8192 instructions are allowed; let's keep some -- distance, as we have a few pseudo-insns that are @@ -325,9 +661,81 @@ makeFarBranches blocks -- things exactly blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses -#else -makeFarBranches = id -#endif + +-- ----------------------------------------------------------------------------- +-- Generate jump tables + +-- Analyzes all native code and generates data sections for all jump +-- table instructions. +generateJumpTables + :: NcgImpl instr jumpDest + -> [NatCmmTop instr] -> [NatCmmTop instr] +generateJumpTables ncgImpl xs = concatMap f xs + where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs + f p = [p] + g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs) + +-- ----------------------------------------------------------------------------- +-- Shortcut branches + +shortcutBranches + :: DynFlags + -> NcgImpl instr jumpDest + -> [NatCmmTop instr] + -> [NatCmmTop instr] + +shortcutBranches dflags ncgImpl tops + | optLevel dflags < 1 = tops -- only with -O or higher + | otherwise = map (apply_mapping ncgImpl mapping) tops' + where + (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops + mapping = foldr plusUFM emptyUFM mappings + +build_mapping :: NcgImpl instr jumpDest + -> GenCmmTop d t (ListGraph instr) + -> (GenCmmTop d t (ListGraph instr), UniqFM jumpDest) +build_mapping _ top@(CmmData _ _) = (top, emptyUFM) +build_mapping _ (CmmProc info lbl (ListGraph [])) + = (CmmProc info lbl (ListGraph []), emptyUFM) +build_mapping ncgImpl (CmmProc info lbl (ListGraph (head:blocks))) + = (CmmProc info lbl (ListGraph (head:others)), mapping) + -- drop the shorted blocks, but don't ever drop the first one, + -- because it is pointed to by a global label. + where + -- find all the blocks that just consist of a jump that can be + -- shorted. + -- Don't completely eliminate loops here -- that can leave a dangling jump! + (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks + split (s, shortcut_blocks, others) b@(BasicBlock id [insn]) + | Just jd <- canShortcut ncgImpl insn, + Just dest <- getJumpDestBlockId ncgImpl jd, + (setMember dest s) || dest == id -- loop checks + = (s, shortcut_blocks, b : others) + split (s, shortcut_blocks, others) (BasicBlock id [insn]) + | Just dest <- canShortcut ncgImpl insn + = (setInsert id s, (id,dest) : shortcut_blocks, others) + split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others) + + + -- build a mapping from BlockId to JumpDest for shorting branches + mapping = foldl add emptyUFM shortcut_blocks + add ufm (id,dest) = addToUFM ufm id dest + +apply_mapping :: NcgImpl instr jumpDest + -> UniqFM jumpDest + -> GenCmmTop CmmStatic h (ListGraph instr) + -> GenCmmTop CmmStatic h (ListGraph instr) +apply_mapping ncgImpl ufm (CmmData sec statics) + = CmmData sec (map (shortcutStatic ncgImpl (lookupUFM ufm)) statics) + -- we need to get the jump tables, so apply the mapping to the entries + -- of a CmmData too. +apply_mapping ncgImpl ufm (CmmProc info lbl (ListGraph blocks)) + = CmmProc info lbl (ListGraph $ map short_bb blocks) + where + short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns + short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i + -- shortcutJump should apply the mapping repeatedly, + -- just in case we can short multiple branches. -- ----------------------------------------------------------------------------- -- Instruction selection @@ -349,13 +757,18 @@ makeFarBranches = id -- Switching between the two monads whilst carrying along the same -- Unique supply breaks abstraction. Is that bad? -genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel]) +genMachCode + :: DynFlags + -> (RawCmmTop -> NatM [NatCmmTop instr]) + -> RawCmmTop + -> UniqSM + ( [NatCmmTop instr] + , [CLabel]) -genMachCode cmm_top +genMachCode dflags cmmTopCodeGen cmm_top = do { initial_us <- getUs - ; let initial_st = mkNatM_State initial_us 0 + ; let initial_st = mkNatM_State initial_us 0 dflags (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top) - final_us = natm_us final_st final_delta = natm_delta final_st final_imports = natm_imports final_st ; if final_delta == 0 @@ -364,69 +777,6 @@ genMachCode cmm_top } -- ----------------------------------------------------------------------------- --- Fixup assignments to global registers so that they assign to --- locations within the RegTable, if appropriate. - --- Note that we currently don't fixup reads here: they're done by --- the generic optimiser below, to avoid having two separate passes --- over the Cmm. - -fixAssignsTop :: CmmTop -> UniqSM CmmTop -fixAssignsTop top@(CmmData _ _) = returnUs top -fixAssignsTop (CmmProc info lbl params blocks) = - mapUs fixAssignsBlock blocks `thenUs` \ blocks' -> - returnUs (CmmProc info lbl params blocks') - -fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock -fixAssignsBlock (BasicBlock id stmts) = - fixAssigns stmts `thenUs` \ stmts' -> - returnUs (BasicBlock id stmts') - -fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt] -fixAssigns stmts = - mapUs fixAssign stmts `thenUs` \ stmtss -> - returnUs (concat stmtss) - -fixAssign :: CmmStmt -> UniqSM [CmmStmt] -fixAssign (CmmAssign (CmmGlobal BaseReg) src) - = panic "cmmStmtConFold: assignment to BaseReg"; - -fixAssign (CmmAssign (CmmGlobal reg) src) - | Left realreg <- reg_or_addr - = returnUs [CmmAssign (CmmGlobal reg) src] - | Right baseRegAddr <- reg_or_addr - = returnUs [CmmStore baseRegAddr src] - -- Replace register leaves with appropriate StixTrees for - -- the given target. GlobalRegs which map to a reg on this - -- arch are left unchanged. Assigning to BaseReg is always - -- illegal, so we check for that. - where - reg_or_addr = get_GlobalReg_reg_or_addr reg - -fixAssign (CmmCall target results args vols) - = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) -> - returnUs (caller_save ++ - CmmCall target results' args vols : - caller_restore ++ - concat stores) - where - -- we also save/restore any caller-saves STG registers here - (caller_save, caller_restore) = callerSaveVolatileRegs vols - - fixResult g@(CmmGlobal reg,hint) = - case get_GlobalReg_reg_or_addr reg of - Left realreg -> returnUs (g, []) - Right baseRegAddr -> - getUniqueUs `thenUs` \ uq -> - let local = CmmLocal (LocalReg uq (globalRegRep reg)) in - returnUs ((local,hint), - [CmmStore baseRegAddr (CmmReg local)]) - fixResult other = - returnUs (other,[]) - -fixAssign other_stmt = returnUs [other_stmt] - --- ----------------------------------------------------------------------------- -- Generic Cmm optimiser {- @@ -435,45 +785,44 @@ Here we do: (a) Constant folding (b) Simple inlining: a temporary which is assigned to and then used, once, can be shorted. - (c) Replacement of references to GlobalRegs which do not have - machine registers by the appropriate memory load (eg. - Hp ==> *(BaseReg + 34) ). - (d) Position independent code and dynamic linking + (c) Position independent code and dynamic linking (i) introduce the appropriate indirections and position independent refs (ii) compile a list of imported symbols -Ideas for other things we could do (ToDo): +Ideas for other things we could do: - shortcut jumps-to-jumps - - eliminate dead code blocks - simple CSE: if an expr is assigned to a temp, then replace later occs of that expr with the temp, until the expr is no longer valid (can push through temp assignments, and certain assigns to mem...) -} -cmmToCmm :: CmmTop -> (CmmTop, [CLabel]) -cmmToCmm top@(CmmData _ _) = (top, []) -cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do - blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks) - return $ CmmProc info lbl params blocks' +cmmToCmm :: DynFlags -> RawCmmTop -> (RawCmmTop, [CLabel]) +cmmToCmm _ top@(CmmData _ _) = (top, []) +cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do + blocks' <- mapM cmmBlockConFold (cmmMiniInline (cmmEliminateDeadBlocks blocks)) + return $ CmmProc info lbl (ListGraph blocks') -newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #)) +newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #)) instance Monad CmmOptM where - return x = CmmOptM $ \imports -> (# x,imports #) + return x = CmmOptM $ \(imports, _) -> (# x,imports #) (CmmOptM f) >>= g = - CmmOptM $ \imports -> - case f imports of + CmmOptM $ \(imports, dflags) -> + case f (imports, dflags) of (# x, imports' #) -> case g x of - CmmOptM g' -> g' imports' + CmmOptM g' -> g' (imports', dflags) addImportCmmOpt :: CLabel -> CmmOptM () -addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #) +addImportCmmOpt lbl = CmmOptM $ \(imports, _dflags) -> (# (), lbl:imports #) + +getDynFlagsCmmOpt :: CmmOptM DynFlags +getDynFlagsCmmOpt = CmmOptM $ \(imports, dflags) -> (# dflags, imports #) -runCmmOpt :: CmmOptM a -> (a, [CLabel]) -runCmmOpt (CmmOptM f) = case f [] of +runCmmOpt :: DynFlags -> CmmOptM a -> (a, [CLabel]) +runCmmOpt dflags (CmmOptM f) = case f ([], dflags) of (# result, imports #) -> (result, imports) cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock @@ -481,6 +830,7 @@ cmmBlockConFold (BasicBlock id stmts) = do stmts' <- mapM cmmStmtConFold stmts return $ BasicBlock id stmts' +cmmStmtConFold :: CmmStmt -> CmmOptM CmmStmt cmmStmtConFold stmt = case stmt of CmmAssign reg src @@ -498,16 +848,16 @@ cmmStmtConFold stmt -> do addr' <- cmmExprConFold JumpReference addr return $ CmmJump addr' regs - CmmCall target regs args vols + CmmCall target regs args srt returns -> do target' <- case target of - CmmForeignCall e conv -> do + CmmCallee e conv -> do e' <- cmmExprConFold CallReference e - return $ CmmForeignCall e' conv + return $ CmmCallee e' conv other -> return other - args' <- mapM (\(arg, hint) -> do + args' <- mapM (\(CmmHinted arg hint) -> do arg' <- cmmExprConFold DataReference arg - return (arg', hint)) args - return $ CmmCall target' regs args' vols + return (CmmHinted arg' hint)) args + return $ CmmCall target' regs args' srt returns CmmCondBranch test dest -> do test' <- cmmExprConFold DataReference test @@ -516,8 +866,8 @@ cmmStmtConFold stmt CmmComment (mkFastString ("deleted: " ++ showSDoc (pprStmt stmt))) - CmmLit (CmmInt n _) -> CmmBranch dest - other -> CmmCondBranch test' dest + CmmLit (CmmInt _ _) -> CmmBranch dest + _other -> CmmCondBranch test' dest CmmSwitch expr ids -> do expr' <- cmmExprConFold DataReference expr @@ -527,8 +877,11 @@ cmmStmtConFold stmt -> return other -cmmExprConFold referenceKind expr - = case expr of +cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr +cmmExprConFold referenceKind expr = do + dflags <- getDynFlagsCmmOpt + let arch = platformArch (targetPlatform dflags) + case expr of CmmLoad addr rep -> do addr' <- cmmExprConFold DataReference addr return $ CmmLoad addr' rep @@ -540,64 +893,34 @@ cmmExprConFold referenceKind expr return $ cmmMachOpFold mop args' CmmLit (CmmLabel lbl) - -> cmmMakeDynamicReference addImportCmmOpt referenceKind lbl + -> do + cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl CmmLit (CmmLabelOff lbl off) - -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt referenceKind lbl - return $ cmmMachOpFold (MO_Add wordRep) [ + -> do + dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl + return $ cmmMachOpFold (MO_Add wordWidth) [ dynRef, - (CmmLit $ CmmInt (fromIntegral off) wordRep) + (CmmLit $ CmmInt (fromIntegral off) wordWidth) ] -#if powerpc_TARGET_ARCH - -- On powerpc (non-PIC), it's easier to jump directly to a label than - -- to use the register table, so we replace these registers - -- with the corresponding labels: + -- On powerpc (non-PIC), it's easier to jump directly to a label than + -- to use the register table, so we replace these registers + -- with the corresponding labels: + CmmReg (CmmGlobal EagerBlackholeInfo) + | arch == ArchPPC && not opt_PIC + -> cmmExprConFold referenceKind $ + CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_EAGER_BLACKHOLE_info"))) CmmReg (CmmGlobal GCEnter1) - | not opt_PIC + | arch == ArchPPC && not opt_PIC -> cmmExprConFold referenceKind $ - CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1"))) + CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_enter_1"))) CmmReg (CmmGlobal GCFun) - | not opt_PIC + | arch == ArchPPC && not opt_PIC -> cmmExprConFold referenceKind $ - CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun"))) -#endif + CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun"))) - CmmReg (CmmGlobal mid) - -- Replace register leaves with appropriate StixTrees for - -- the given target. MagicIds which map to a reg on this - -- arch are left unchanged. For the rest, BaseReg is taken - -- to mean the address of the reg table in MainCapability, - -- and for all others we generate an indirection to its - -- location in the register table. - -> case get_GlobalReg_reg_or_addr mid of - Left realreg -> return expr - Right baseRegAddr - -> case mid of - BaseReg -> cmmExprConFold DataReference baseRegAddr - other -> cmmExprConFold DataReference - (CmmLoad baseRegAddr (globalRegRep mid)) - -- eliminate zero offsets - CmmRegOff reg 0 - -> cmmExprConFold referenceKind (CmmReg reg) - - CmmRegOff (CmmGlobal mid) offset - -- RegOf leaves are just a shorthand form. If the reg maps - -- to a real reg, we keep the shorthand, otherwise, we just - -- expand it and defer to the above code. - -> case get_GlobalReg_reg_or_addr mid of - Left realreg -> return expr - Right baseRegAddr - -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordRep) [ - CmmReg (CmmGlobal mid), - CmmLit (CmmInt (fromIntegral offset) - wordRep)]) other -> return other --- ----------------------------------------------------------------------------- --- Utils - -bind f x = x $! f - \end{code}