-- -----------------------------------------------------------------------------
\begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
module AsmCodeGen ( nativeCodeGen ) where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-import MachInstrs
-import MachRegs
-import MachCodeGen
-import PprMach
-import RegAllocInfo
+
+#if alpha_TARGET_ARCH
+import Alpha.CodeGen
+import Alpha.Regs
+import Alpha.RegInfo
+import Alpha.Instr
+
+#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import X86.CodeGen
+import X86.Regs
+import X86.RegInfo
+import X86.Instr
+import X86.Ppr
+
+#elif sparc_TARGET_ARCH
+import SPARC.CodeGen
+import SPARC.Regs
+import SPARC.Instr
+import SPARC.Ppr
+import SPARC.ShortcutJump
+
+#elif powerpc_TARGET_ARCH
+import PPC.CodeGen
+import PPC.Cond
+import PPC.Regs
+import PPC.RegInfo
+import PPC.Instr
+import PPC.Ppr
+
+#else
+#error "AsmCodeGen: unknown architecture"
+
+#endif
+
+import RegAlloc.Liveness
+import qualified RegAlloc.Linear.Main as Linear
+
+import qualified GraphColor as Color
+import qualified RegAlloc.Graph.Main as Color
+import qualified RegAlloc.Graph.Stats as Color
+import qualified RegAlloc.Graph.Coalesce as Color
+import qualified RegAlloc.Graph.TrivColorable as Color
+
+import qualified SPARC.CodeGen.Expand as SPARC
+
+import TargetReg
+import Platform
+import Instruction
+import PIC
+import Reg
+import RegClass
import NCGMonad
-import PositionIndependentCode
-import RegAllocLinear
-import RegAllocStats
-import RegLiveness
-import RegCoalesce
-import qualified RegAllocColor as Color
-import qualified GraphColor as Color
+import BlockId
import Cmm
import CmmOpt ( cmmMiniInline, cmmMachOpFold )
-import PprCmm ( pprStmt, pprCmms, pprCmm )
-import MachOp
+import PprCmm
import CLabel
+import State
import UniqFM
import Unique ( Unique, getUnique )
import UniqSupply
-import FastTypes
-import List ( groupBy, sortBy )
-import ErrUtils ( dumpIfSet_dyn )
import DynFlags
+#if powerpc_TARGET_ARCH
import StaticFlags ( opt_Static, opt_PIC )
+#endif
import Util
import Config ( cProjectVersion )
import Module
import Digraph
import qualified Pretty
+import BufWrite
import Outputable
import FastString
import UniqSet
+import ErrUtils
-- DEBUGGING ONLY
--import OrdList
import Data.Maybe
import GHC.Exts
import Control.Monad
+import System.IO
{-
The native-code generator has machine-independent and
-- -----------------------------------------------------------------------------
-- Top-level of the native codegen
--- NB. We *lazilly* compile each block of code for space reasons.
-
--------------------
-nativeCodeGen :: DynFlags -> Module -> ModLocation -> [RawCmm] -> UniqSupply -> IO Pretty.Doc
-nativeCodeGen dflags mod modLocation cmms us
- = let (res, _) = initUs us $
- cgCmm (concat (map add_split cmms))
+nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO ()
+nativeCodeGen dflags h us cmms
+ = do
+ let split_cmms = concat $ map add_split cmms
- cgCmm :: [RawCmmTop] -> UniqSM ( [CmmNativeGenDump], Pretty.Doc, [CLabel])
- cgCmm tops =
- lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
- case unzip3 results of { (dump,docs,imps) ->
- returnUs (dump, my_vcat docs, concat imps)
- }
- in
- case res of { (dump, insn_sdoc, imports) -> do
+ -- BufHandle is a performance hack. We could hide it inside
+ -- Pretty if it weren't for the fact that we do lots of little
+ -- printDocs here (in order to do codegen in constant space).
+ bufh <- newBufHandle h
+ (imports, prof) <- cmmNativeGens dflags bufh us split_cmms [] [] 0
+ bFlush bufh
- cmmNativeGenDump dflags mod modLocation dump
+ let (native, colorStats, linearStats)
+ = unzip3 prof
- return (insn_sdoc Pretty.$$ dyld_stubs imports
+ -- dump native code
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm "Asm code"
+ (vcat $ map (docToSDoc . pprNatCmmTop) $ concat native)
+
+ -- dump global NCG stats for graph coloring allocator
+ (case concat $ catMaybes colorStats of
+ [] -> return ()
+ stats -> do
+ -- build the global register conflict graph
+ let graphGlobal
+ = foldl Color.union Color.initGraph
+ $ [ Color.raGraph stat
+ | stat@Color.RegAllocStatsStart{} <- stats]
+
+ dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
+ $ Color.pprStats stats graphGlobal
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_conflicts "Register conflict graph"
+ $ Color.dotGraph
+ targetRegDotColor
+ (Color.trivColorable
+ targetVirtualRegSqueeze
+ targetRealRegSqueeze)
+ $ graphGlobal)
+
+
+ -- dump global NCG stats for linear allocator
+ (case concat $ catMaybes linearStats of
+ [] -> return ()
+ stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
+ $ Linear.pprStats (concat native) stats)
+
+ -- write out the imports
+ Pretty.printDoc Pretty.LeftMode h
+ $ makeImportsDoc dflags (concat imports)
+
+ return ()
+
+ where add_split (Cmm tops)
+ | dopt Opt_SplitObjs dflags = split_marker : tops
+ | otherwise = tops
+
+ split_marker = CmmProc [] mkSplitMarkerLabel [] (ListGraph [])
+
+
+-- | Do native code generation on all these cmms.
+--
+cmmNativeGens dflags h us [] impAcc profAcc count
+ = return (reverse impAcc, reverse profAcc)
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
- -- On recent versions of Darwin, the linker supports
- -- dead-stripping of code and data on a per-symbol basis.
- -- There's a hack to make this work in PprMach.pprNatCmmTop.
- Pretty.$$ Pretty.text ".subsections_via_symbols"
-#endif
-#if HAVE_GNU_NONEXEC_STACK
- -- On recent GNU ELF systems one can mark an object file
- -- as not requiring an executable stack. If all objects
- -- linked into a program have this note then the program
- -- will not use an executable stack, which is good for
- -- security. GHC generated code does not need an executable
- -- stack so add the note in:
- Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
-#endif
-#if !defined(darwin_TARGET_OS)
- -- And just because every other compiler does, lets stick in
- -- an identifier directive: .ident "GHC x.y.z"
- Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
- Pretty.text cProjectVersion
- in Pretty.text ".ident" Pretty.<+>
- Pretty.doubleQuotes compilerIdent
-#endif
- )
- }
+cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
+ = do
+ (us', native, imports, colorStats, linearStats)
+ <- cmmNativeGen dflags us cmm count
- where
+ Pretty.bufLeftRender h
+ $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map pprNatCmmTop native
- add_split (Cmm tops)
- | dopt Opt_SplitObjs dflags = split_marker : tops
- | otherwise = tops
+ -- carefully evaluate this strictly. Binding it with 'let'
+ -- and then using 'seq' doesn't work, because the let
+ -- apparently gets inlined first.
+ lsPprNative <- return $!
+ if dopt Opt_D_dump_asm dflags
+ || dopt Opt_D_dump_asm_stats dflags
+ then native
+ else []
- split_marker = CmmProc [] mkSplitMarkerLabel [] []
+ count' <- return $! count + 1;
- -- Generate "symbol stubs" for all external symbols that might
- -- come from a dynamic library.
-{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
- map head $ group $ sort imps-}
-
- -- (Hack) sometimes two Labels pretty-print the same, but have
- -- different uniques; so we compare their text versions...
- dyld_stubs imps
- | needImportedSymbols
- = Pretty.vcat $
- (pprGotDeclaration :) $
- map (pprImportedSymbol . fst . head) $
- groupBy (\(_,a) (_,b) -> a == b) $
- sortBy (\(_,a) (_,b) -> compare a b) $
- map doPpr $
- imps
- | otherwise
- = Pretty.empty
-
- where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
- astyle = mkCodeStyle AsmStyle
+ -- force evaulation all this stuff to avoid space leaks
+ seqString (showSDoc $ vcat $ map ppr imports) `seq` return ()
-#ifndef NCG_DEBUG
- my_vcat sds = Pretty.vcat sds
-#else
- my_vcat sds = Pretty.vcat (
- intersperse (
- Pretty.char ' '
- Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
- Pretty.$$ Pretty.char ' '
- )
- sds
- )
-#endif
+ cmmNativeGens dflags h us' cmms
+ (imports : impAcc)
+ ((lsPprNative, colorStats, linearStats) : profAcc)
+ count'
+ where seqString [] = ()
+ seqString (x:xs) = x `seq` seqString xs `seq` ()
--- Carries output of the code generator passes, for dumping.
--- Make sure to only fill the one's we're interested in to avoid
--- creating space leaks.
-data CmmNativeGenDump
- = CmmNativeGenDump
- { cdCmmOpt :: RawCmmTop
- , cdNative :: [NatCmmTop]
- , cdLiveness :: [LiveCmmTop]
- , cdCoalesce :: Maybe [LiveCmmTop]
- , cdRegAllocStats :: Maybe [RegAllocStats]
- , cdColoredGraph :: Maybe (Color.Graph Reg RegClass Reg)
- , cdAlloced :: [NatCmmTop] }
+-- | Complete native code generation phase for a single top-level chunk of Cmm.
+-- Dumping the output of each stage along the way.
+-- Global conflict graph and NGC stats
+cmmNativeGen
+ :: DynFlags
+ -> UniqSupply
+ -> RawCmmTop -- ^ the cmm to generate code for
+ -> Int -- ^ sequence number of this top thing
+ -> IO ( UniqSupply
+ , [NatCmmTop Instr] -- native code
+ , [CLabel] -- things imported by this cmm
+ , Maybe [Color.RegAllocStats Instr] -- stats for the coloring register allocator
+ , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
+
+cmmNativeGen dflags us cmm count
+ = do
-dchoose dflags opt a b
- | dopt opt dflags = a
- | otherwise = b
+ -- rewrite assignments to global regs
+ let (fixed_cmm, usFix) =
+ {-# SCC "fixAssignsTop" #-}
+ initUs us $ fixAssignsTop cmm
-dchooses dflags opts a b
- | or $ map ( (flip dopt) dflags) opts = a
- | otherwise = b
+ -- cmm to cmm optimisations
+ let (opt_cmm, imports) =
+ {-# SCC "cmmToCmm" #-}
+ cmmToCmm dflags fixed_cmm
--- | Complete native code generation phase for a single top-level chunk of Cmm.
--- Unless they're being dumped, intermediate data structures are squashed after
--- every stage to avoid creating space leaks.
---
-cmmNativeGen :: DynFlags -> RawCmmTop -> UniqSM (CmmNativeGenDump, Pretty.Doc, [CLabel])
-cmmNativeGen dflags cmm
- = do
- --
- fixed_cmm
- <- {-# SCC "fixAssigns" #-}
- fixAssignsTop cmm
-
- ---- cmm to cmm optimisations
- (cmm, imports, ppr_cmm)
- <- (\fixed_cmm
- -> {-# SCC "genericOpt" #-}
- do let (cmm, imports) = cmmToCmm dflags fixed_cmm
-
- return ( cmm
- , imports
- , dchoose dflags Opt_D_dump_cmm cmm (CmmData Text []))
- ) fixed_cmm
-
-
- ---- generate native code from cmm
- (native, lastMinuteImports, ppr_native)
- <- (\cmm
- -> {-# SCC "genMachCode" #-}
- do (machCode, lastMinuteImports)
- <- genMachCode dflags cmm
-
- return ( machCode
- , lastMinuteImports
- , dchoose dflags Opt_D_dump_asm_native machCode [])
- ) cmm
-
-
- ---- tag instructions with register liveness information
- (withLiveness, ppr_withLiveness)
- <- (\native
- -> {-# SCC "regLiveness" #-}
- do
- withLiveness <- mapUs regLiveness native
-
- return ( withLiveness
- , dchoose dflags Opt_D_dump_asm_liveness withLiveness []))
- native
-
- ---- allocate registers
- (alloced, ppr_alloced, ppr_coalesce, ppr_regAllocStats, ppr_coloredGraph)
- <- (\withLiveness
- -> {-# SCC "regAlloc" #-}
- do
- if dopt Opt_RegsGraph dflags
- then do
- -- the regs usable for allocation
- let alloc_regs
- = foldr (\r -> plusUFM_C unionUniqSets
- $ unitUFM (regClass r) (unitUniqSet r))
- emptyUFM
- $ map RealReg allocatableRegs
-
- -- aggressively coalesce moves between virtual regs
- coalesced <- regCoalesce withLiveness
-
- -- graph coloring register allocation
- (alloced, regAllocStats)
- <- Color.regAlloc
- alloc_regs
- (mkUniqSet [0..maxSpillSlots])
- coalesced
-
- return ( alloced
- , dchoose dflags Opt_D_dump_asm_regalloc alloced []
- , dchoose dflags Opt_D_dump_asm_coalesce (Just coalesced) Nothing
- , dchooses dflags
- [ Opt_D_dump_asm_regalloc_stages
- , Opt_D_drop_asm_stats]
- (Just regAllocStats) Nothing
- , dchoose dflags Opt_D_dump_asm_conflicts Nothing Nothing)
-
- else do
- -- do linear register allocation
- alloced <- mapUs regAlloc withLiveness
- return ( alloced
- , dchoose dflags Opt_D_dump_asm_regalloc alloced []
- , Nothing
- , Nothing
- , Nothing ))
- withLiveness
-
+ dumpIfSet_dyn dflags
+ Opt_D_dump_opt_cmm "Optimised Cmm"
+ (pprCmm $ Cmm [opt_cmm])
+
+ -- generate native code from cmm
+ let ((native, lastMinuteImports), usGen) =
+ {-# SCC "genMachCode" #-}
+ initUs usFix $ genMachCode dflags opt_cmm
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_native "Native code"
+ (vcat $ map (docToSDoc . pprNatCmmTop) native)
+
+
+ -- tag instructions with register liveness information
+ let (withLiveness, usLive) =
+ {-# SCC "regLiveness" #-}
+ initUs usGen $ mapUs regLiveness native
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_liveness "Liveness annotations added"
+ (vcat $ map ppr withLiveness)
+
+
+ -- allocate registers
+ (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
+ if ( dopt Opt_RegsGraph dflags
+ || dopt Opt_RegsIterative dflags)
+ then do
+ -- the regs usable for allocation
+ let (alloc_regs :: UniqFM (UniqSet RealReg))
+ = foldr (\r -> plusUFM_C unionUniqSets
+ $ unitUFM (targetClassOfRealReg r) (unitUniqSet r))
+ emptyUFM
+ $ allocatableRegs
+
+
+ -- do the graph coloring register allocation
+ let ((alloced, regAllocStats), usAlloc)
+ = {-# SCC "RegAlloc" #-}
+ initUs usLive
+ $ Color.regAlloc
+ dflags
+ alloc_regs
+ (mkUniqSet [0..maxSpillSlots])
+ withLiveness
+
+ -- dump out what happened during register allocation
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_regalloc "Registers allocated"
+ (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_regalloc_stages "Build/spill stages"
+ (vcat $ map (\(stage, stats)
+ -> text "# --------------------------"
+ $$ text "# cmm " <> int count <> text " Stage " <> int stage
+ $$ ppr stats)
+ $ zip [0..] regAllocStats)
+
+ let mPprStats =
+ if dopt Opt_D_dump_asm_stats dflags
+ then Just regAllocStats else Nothing
+
+ -- force evaluation of the Maybe to avoid space leak
+ mPprStats `seq` return ()
+
+ return ( alloced, usAlloc
+ , mPprStats
+ , Nothing)
+
+ else do
+ -- do linear register allocation
+ let ((alloced, regAllocStats), usAlloc)
+ = {-# SCC "RegAlloc" #-}
+ initUs usLive
+ $ liftM unzip
+ $ mapUs Linear.regAlloc withLiveness
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_regalloc "Registers allocated"
+ (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
+
+ let mPprStats =
+ if dopt Opt_D_dump_asm_stats dflags
+ then Just (catMaybes regAllocStats) else Nothing
+
+ -- force evaluation of the Maybe to avoid space leak
+ mPprStats `seq` return ()
+
+ return ( alloced, usAlloc
+ , Nothing
+ , mPprStats)
---- shortcut branches
let shorted =
map sequenceTop shorted
---- x86fp_kludge
- let final_mach_code =
+ let kludged =
#if i386_TARGET_ARCH
{-# SCC "x86fp_kludge" #-}
map x86fp_kludge sequenced
#else
sequenced
#endif
-
- ---- vcat
- let final_sdoc =
- {-# SCC "vcat" #-}
- Pretty.vcat (map pprNatCmmTop final_mach_code)
-
- let dump =
- CmmNativeGenDump
- { cdCmmOpt = ppr_cmm
- , cdNative = ppr_native
- , cdLiveness = ppr_withLiveness
- , cdCoalesce = ppr_coalesce
- , cdRegAllocStats = ppr_regAllocStats
- , cdColoredGraph = ppr_coloredGraph
- , cdAlloced = ppr_alloced }
-
- returnUs (dump, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
-
-#if i386_TARGET_ARCH
-x86fp_kludge :: NatCmmTop -> NatCmmTop
-x86fp_kludge top@(CmmData _ _) = top
-x86fp_kludge top@(CmmProc info lbl params code) =
- CmmProc info lbl params (map bb_i386_insert_ffrees code)
- where
- bb_i386_insert_ffrees (BasicBlock id instrs) =
- BasicBlock id (i386_insert_ffrees instrs)
-#endif
+ ---- expansion of SPARC synthetic instrs
+#if sparc_TARGET_ARCH
+ let expanded =
+ {-# SCC "sparc_expand" #-}
+ map SPARC.expandTop kludged
--- Dump output of native code generator passes
--- stripe across the outputs for each block so all the information for a
--- certain stage is concurrent in the dumps.
---
-cmmNativeGenDump :: DynFlags -> Module -> ModLocation -> [CmmNativeGenDump] -> IO ()
-cmmNativeGenDump dflags mod modLocation dump
- = do
dumpIfSet_dyn dflags
- Opt_D_dump_opt_cmm "Optimised Cmm"
- (pprCmm $ Cmm $ map cdCmmOpt dump)
-
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_native "Native code"
- (vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdNative dump)
+ Opt_D_dump_asm_expanded "Synthetic instructions expanded"
+ (vcat $ map (docToSDoc . pprNatCmmTop) expanded)
+#else
+ let expanded =
+ kludged
+#endif
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_liveness "Liveness annotations added"
- (vcat $ map (ppr . cdLiveness) dump)
+ return ( usAlloc
+ , expanded
+ , lastMinuteImports ++ imports
+ , ppr_raStatsColor
+ , ppr_raStatsLinear)
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_coalesce "Reg-Reg moves coalesced"
- (vcat $ map (fromMaybe empty . liftM ppr . cdCoalesce) dump)
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_regalloc "Registers allocated"
- (vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdAlloced dump)
-
- -- with the graph coloring allocator, show the result of each build/spill stage
- -- for each block in turn.
- when (dopt Opt_D_dump_asm_regalloc_stages dflags)
- $ do mapM_ (\stats
- -> printDump
- $ vcat $ map (\(stage, stats) ->
- text "-- Stage " <> int stage
- $$ ppr stats)
- (zip [0..] stats))
- $ map (fromMaybe [] . cdRegAllocStats) dump
-
- -- Build a global register conflict graph.
- -- If you want to see the graph for just one basic block then use asm-regalloc-stages instead.
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_conflicts "Register conflict graph"
- $ Color.dotGraph Color.regDotColor trivColorable
- $ foldl Color.union Color.initGraph
- $ catMaybes $ map cdColoredGraph dump
+#if i386_TARGET_ARCH
+x86fp_kludge :: NatCmmTop Instr -> NatCmmTop Instr
+x86fp_kludge top@(CmmData _ _) = top
+x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
+ CmmProc info lbl params (ListGraph $ i386_insert_ffrees code)
+#endif
- -- Drop native code generator statistics.
- -- This is potentially a large amount of information, and we want to be able
- -- to collect it while running nofib. Drop a new file instead of emitting
- -- it to stdout/stderr.
- --
- when (dopt Opt_D_drop_asm_stats dflags)
- $ do -- make the drop file name based on the object file name
- let dropFile = (init $ ml_obj_file modLocation) ++ "drop-asm-stats"
- -- slurp out all the regalloc stats
- let stats = concat $ catMaybes $ map cdRegAllocStats dump
+-- | Build a doc for all the imports.
+--
+makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
+makeImportsDoc dflags imports
+ = dyld_stubs imports
- -- build a global conflict graph
- let graph = foldl Color.union Color.initGraph $ map raGraph stats
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ -- On recent versions of Darwin, the linker supports
+ -- dead-stripping of code and data on a per-symbol basis.
+ -- There's a hack to make this work in PprMach.pprNatCmmTop.
+ Pretty.$$ Pretty.text ".subsections_via_symbols"
+#endif
+#if HAVE_GNU_NONEXEC_STACK
+ -- On recent GNU ELF systems one can mark an object file
+ -- as not requiring an executable stack. If all objects
+ -- linked into a program have this note then the program
+ -- will not use an executable stack, which is good for
+ -- security. GHC generated code does not need an executable
+ -- stack so add the note in:
+ Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
+#endif
+#if !defined(darwin_TARGET_OS)
+ -- And just because every other compiler does, lets stick in
+ -- an identifier directive: .ident "GHC x.y.z"
+ Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
+ Pretty.text cProjectVersion
+ in Pretty.text ".ident" Pretty.<+>
+ Pretty.doubleQuotes compilerIdent
+#endif
- -- pretty print the various sections and write out the file.
- let outSpills = pprStatsSpills stats
- let outLife = pprStatsLifetimes stats
- let outConflict = pprStatsConflict stats
- let outScatter = pprStatsLifeConflict stats graph
+ where
+ -- Generate "symbol stubs" for all external symbols that might
+ -- come from a dynamic library.
+ dyld_stubs :: [CLabel] -> Pretty.Doc
+{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
+ map head $ group $ sort imps-}
- writeFile dropFile
- (showSDoc $ vcat [outSpills, outLife, outConflict, outScatter])
+ arch = platformArch $ targetPlatform dflags
+ os = platformOS $ targetPlatform dflags
+
+ -- (Hack) sometimes two Labels pretty-print the same, but have
+ -- different uniques; so we compare their text versions...
+ dyld_stubs imps
+ | needImportedSymbols arch os
+ = Pretty.vcat $
+ (pprGotDeclaration arch os :) $
+ map ( pprImportedSymbol arch os . fst . head) $
+ groupBy (\(_,a) (_,b) -> a == b) $
+ sortBy (\(_,a) (_,b) -> compare a b) $
+ map doPpr $
+ imps
+ | otherwise
+ = Pretty.empty
+
+ doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
+ astyle = mkCodeStyle AsmStyle
- return ()
-- -----------------------------------------------------------------------------
-- Sequencing the basic blocks
-- such that as many of the local jumps as possible turn into
-- fallthroughs.
-sequenceTop :: NatCmmTop -> NatCmmTop
+sequenceTop
+ :: NatCmmTop Instr
+ -> NatCmmTop Instr
+
sequenceTop top@(CmmData _ _) = top
-sequenceTop (CmmProc info lbl params blocks) =
- CmmProc info lbl params (makeFarBranches $ sequenceBlocks blocks)
+sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
+ CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
-- The algorithm is very simple (and stupid): we make a graph out of
-- the blocks where there is an edge from one block to another iff the
-- output the block, then if it has an out edge, we move the
-- destination of the out edge to the front of the list, and continue.
-sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
+-- FYI, the classic layout for basic blocks uses postorder DFS; this
+-- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
+
+sequenceBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [NatBasicBlock instr]
+
sequenceBlocks [] = []
sequenceBlocks (entry:blocks) =
seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
-- the first block is the entry point ==> it must remain at the start.
-sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
-sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
-getOutEdges :: [Instr] -> [Unique]
-getOutEdges instrs = case jumpDests (last instrs) [] of
- [one] -> [getUnique one]
- _many -> []
- -- we're only interested in the last instruction of
- -- the block, and only if it has a single destination.
+sccBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [SCC ( NatBasicBlock instr
+ , Unique
+ , [Unique])]
+
+sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
+
+-- we're only interested in the last instruction of
+-- the block, and only if it has a single destination.
+getOutEdges
+ :: Instruction instr
+ => [instr] -> [Unique]
+
+getOutEdges instrs
+ = case jumpDestsOfInstr (last instrs) of
+ [one] -> [getUnique one]
+ _many -> []
mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
-- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
-- big, we have to work around this limitation.
-makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
+makeFarBranches
+ :: [NatBasicBlock Instr]
+ -> [NatBasicBlock Instr]
#if powerpc_TARGET_ARCH
makeFarBranches blocks
-- -----------------------------------------------------------------------------
-- Shortcut branches
-shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
+shortcutBranches
+ :: DynFlags
+ -> [NatCmmTop Instr]
+ -> [NatCmmTop Instr]
+
shortcutBranches dflags tops
| optLevel dflags < 1 = tops -- only with -O or higher
| otherwise = map (apply_mapping mapping) tops'
mapping = foldr plusUFM emptyUFM mappings
build_mapping top@(CmmData _ _) = (top, emptyUFM)
-build_mapping (CmmProc info lbl params [])
- = (CmmProc info lbl params [], emptyUFM)
-build_mapping (CmmProc info lbl params (head:blocks))
- = (CmmProc info lbl params (head:others), mapping)
+build_mapping (CmmProc info lbl params (ListGraph []))
+ = (CmmProc info lbl params (ListGraph []), emptyUFM)
+build_mapping (CmmProc info lbl params (ListGraph (head:blocks)))
+ = (CmmProc info lbl params (ListGraph (head:others)), mapping)
-- drop the shorted blocks, but don't ever drop the first one,
-- because it is pointed to by a global label.
where
-- find all the blocks that just consist of a jump that can be
-- shorted.
- (shortcut_blocks, others) = partitionWith split blocks
- split (BasicBlock id [insn]) | Just dest <- canShortcut insn
- = Left (id,dest)
- split other = Right other
+ -- Don't completely eliminate loops here -- that can leave a dangling jump!
+ (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
+ split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
+ | Just (DestBlockId dest) <- canShortcut insn,
+ (elemBlockSet dest s) || dest == id -- loop checks
+ = (s, shortcut_blocks, b : others)
+ split (s, shortcut_blocks, others) (BasicBlock id [insn])
+ | Just dest <- canShortcut insn
+ = (extendBlockSet s id, (id,dest) : shortcut_blocks, others)
+ split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
+
-- build a mapping from BlockId to JumpDest for shorting branches
mapping = foldl add emptyUFM shortcut_blocks
= CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics)
-- we need to get the jump tables, so apply the mapping to the entries
-- of a CmmData too.
-apply_mapping ufm (CmmProc info lbl params blocks)
- = CmmProc info lbl params (map short_bb blocks)
+apply_mapping ufm (CmmProc info lbl params (ListGraph blocks))
+ = CmmProc info lbl params (ListGraph $ map short_bb blocks)
where
short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
short_insn i = shortcutJump (lookupUFM ufm) i
-- Switching between the two monads whilst carrying along the same
-- Unique supply breaks abstraction. Is that bad?
-genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel])
+genMachCode
+ :: DynFlags
+ -> RawCmmTop
+ -> UniqSM
+ ( [NatCmmTop Instr]
+ , [CLabel])
genMachCode dflags cmm_top
= do { initial_us <- getUs
; let initial_st = mkNatM_State initial_us 0 dflags
- (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
+ (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
final_delta = natm_delta final_st
final_imports = natm_imports final_st
; if final_delta == 0
fixAssignsTop :: RawCmmTop -> UniqSM RawCmmTop
fixAssignsTop top@(CmmData _ _) = returnUs top
-fixAssignsTop (CmmProc info lbl params blocks) =
+fixAssignsTop (CmmProc info lbl params (ListGraph blocks)) =
mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
- returnUs (CmmProc info lbl params blocks')
+ returnUs (CmmProc info lbl params (ListGraph blocks'))
fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
fixAssignsBlock (BasicBlock id stmts) =
cmmToCmm :: DynFlags -> RawCmmTop -> (RawCmmTop, [CLabel])
cmmToCmm _ top@(CmmData _ _) = (top, [])
-cmmToCmm dflags (CmmProc info lbl params blocks) = runCmmOpt dflags $ do
+cmmToCmm dflags (CmmProc info lbl params (ListGraph blocks)) = runCmmOpt dflags $ do
blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
- return $ CmmProc info lbl params blocks'
+ return $ CmmProc info lbl params (ListGraph blocks')
newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
e' <- cmmExprConFold CallReference e
return $ CmmCallee e' conv
other -> return other
- args' <- mapM (\(arg, hint) -> do
+ args' <- mapM (\(CmmHinted arg hint) -> do
arg' <- cmmExprConFold DataReference arg
- return (arg', hint)) args
+ return (CmmHinted arg' hint)) args
return $ CmmCall target' regs args' srt returns
CmmCondBranch test dest
-> do
dflags <- getDynFlagsCmmOpt
dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
- return $ cmmMachOpFold (MO_Add wordRep) [
+ return $ cmmMachOpFold (MO_Add wordWidth) [
dynRef,
- (CmmLit $ CmmInt (fromIntegral off) wordRep)
+ (CmmLit $ CmmInt (fromIntegral off) wordWidth)
]
#if powerpc_TARGET_ARCH
-- On powerpc (non-PIC), it's easier to jump directly to a label than
-- to use the register table, so we replace these registers
-- with the corresponding labels:
+ CmmReg (CmmGlobal EagerBlackholeInfo)
+ | not opt_PIC
+ -> cmmExprConFold referenceKind $
+ CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_EAGER_BLACKHOLE_INFO")))
CmmReg (CmmGlobal GCEnter1)
| not opt_PIC
-> cmmExprConFold referenceKind $
- CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
+ CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_enter_1")))
CmmReg (CmmGlobal GCFun)
| not opt_PIC
-> cmmExprConFold referenceKind $
- CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
+ CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_fun")))
#endif
CmmReg (CmmGlobal mid)
-> case mid of
BaseReg -> cmmExprConFold DataReference baseRegAddr
other -> cmmExprConFold DataReference
- (CmmLoad baseRegAddr (globalRegRep mid))
+ (CmmLoad baseRegAddr (globalRegType mid))
-- eliminate zero offsets
CmmRegOff reg 0
-> cmmExprConFold referenceKind (CmmReg reg)
-> case get_GlobalReg_reg_or_addr mid of
Left realreg -> return expr
Right baseRegAddr
- -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordRep) [
+ -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordWidth) [
CmmReg (CmmGlobal mid),
CmmLit (CmmInt (fromIntegral offset)
- wordRep)])
+ wordWidth)])
other
-> return other