-- -----------------------------------------------------------------------------
\begin{code}
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
module AsmCodeGen ( nativeCodeGen ) where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-import MachInstrs
-import MachRegs
-import MachCodeGen
-import PprMach
-import RegAllocInfo
-import NCGMonad
-import PositionIndependentCode
-import RegLiveness
-import RegCoalesce
-
-import qualified RegAlloc.Linear.Main as Linear
-import qualified RegAllocColor as Color
-import qualified RegAllocStats as Color
-import qualified GraphColor as Color
+import qualified X86.CodeGen
+import qualified X86.Regs
+import qualified X86.Instr
+import qualified X86.Ppr
+
+import qualified SPARC.CodeGen
+import qualified SPARC.Regs
+import qualified SPARC.Instr
+import qualified SPARC.Ppr
+import qualified SPARC.ShortcutJump
+import qualified SPARC.CodeGen.Expand
+
+import qualified PPC.CodeGen
+import qualified PPC.Cond
+import qualified PPC.Regs
+import qualified PPC.RegInfo
+import qualified PPC.Instr
+import qualified PPC.Ppr
+
+import RegAlloc.Liveness
+import qualified RegAlloc.Linear.Main as Linear
+
+import qualified GraphColor as Color
+import qualified RegAlloc.Graph.Main as Color
+import qualified RegAlloc.Graph.Stats as Color
+import qualified RegAlloc.Graph.TrivColorable as Color
+
+import TargetReg
+import Platform
+import Config
+import Instruction
+import PIC
+import Reg
+import NCGMonad
-import Cmm
-import CmmOpt ( cmmMiniInline, cmmMachOpFold )
-import PprCmm
+import BlockId
+import CgUtils ( fixStgRegisters )
+import OldCmm
+import CmmOpt ( cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold )
+import OldPprCmm
import CLabel
-import State
import UniqFM
import Unique ( Unique, getUnique )
import UniqSupply
-import List ( groupBy, sortBy )
import DynFlags
-#if powerpc_TARGET_ARCH
-import StaticFlags ( opt_Static, opt_PIC )
-#endif
+import StaticFlags
import Util
-import Config ( cProjectVersion )
-import Module
import Digraph
+import Pretty (Doc)
import qualified Pretty
import BufWrite
import Outputable
import FastString
import UniqSet
import ErrUtils
+import Module
-- DEBUGGING ONLY
--import OrdList
import Data.List
-import Data.Int
-import Data.Word
-import Data.Bits
import Data.Maybe
-import GHC.Exts
import Control.Monad
import System.IO
-- -----------------------------------------------------------------------------
-- Top-level of the native codegen
+data NcgImpl instr jumpDest = NcgImpl {
+ cmmTopCodeGen :: DynFlags -> RawCmmTop -> NatM [NatCmmTop instr],
+ generateJumpTableForInstr :: instr -> Maybe (NatCmmTop instr),
+ getJumpDestBlockId :: jumpDest -> Maybe BlockId,
+ canShortcut :: instr -> Maybe jumpDest,
+ shortcutStatic :: (BlockId -> Maybe jumpDest) -> CmmStatic -> CmmStatic,
+ shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
+ pprNatCmmTop :: NatCmmTop instr -> Doc,
+ maxSpillSlots :: Int,
+ allocatableRegs :: [RealReg],
+ ncg_x86fp_kludge :: [NatCmmTop instr] -> [NatCmmTop instr],
+ ncgExpandTop :: [NatCmmTop instr] -> [NatCmmTop instr],
+ ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr]
+ }
+
--------------------
nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO ()
nativeCodeGen dflags h us cmms
+ = let nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
+ x86NcgImpl = NcgImpl {
+ cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
+ ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr
+ ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
+ ,canShortcut = X86.Instr.canShortcut
+ ,shortcutStatic = X86.Instr.shortcutStatic
+ ,shortcutJump = X86.Instr.shortcutJump
+ ,pprNatCmmTop = X86.Ppr.pprNatCmmTop
+ ,maxSpillSlots = X86.Instr.maxSpillSlots
+ ,allocatableRegs = X86.Regs.allocatableRegs
+ ,ncg_x86fp_kludge = id
+ ,ncgExpandTop = id
+ ,ncgMakeFarBranches = id
+ }
+ in case platformArch $ targetPlatform dflags of
+ ArchX86 -> nCG' (x86NcgImpl { ncg_x86fp_kludge = map x86fp_kludge })
+ ArchX86_64 -> nCG' x86NcgImpl
+ ArchPPC ->
+ nCG' $ NcgImpl {
+ cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
+ ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr
+ ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
+ ,canShortcut = PPC.RegInfo.canShortcut
+ ,shortcutStatic = PPC.RegInfo.shortcutStatic
+ ,shortcutJump = PPC.RegInfo.shortcutJump
+ ,pprNatCmmTop = PPC.Ppr.pprNatCmmTop
+ ,maxSpillSlots = PPC.Instr.maxSpillSlots
+ ,allocatableRegs = PPC.Regs.allocatableRegs
+ ,ncg_x86fp_kludge = id
+ ,ncgExpandTop = id
+ ,ncgMakeFarBranches = makeFarBranches
+ }
+ ArchSPARC ->
+ nCG' $ NcgImpl {
+ cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
+ ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr
+ ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
+ ,canShortcut = SPARC.ShortcutJump.canShortcut
+ ,shortcutStatic = SPARC.ShortcutJump.shortcutStatic
+ ,shortcutJump = SPARC.ShortcutJump.shortcutJump
+ ,pprNatCmmTop = SPARC.Ppr.pprNatCmmTop
+ ,maxSpillSlots = SPARC.Instr.maxSpillSlots
+ ,allocatableRegs = SPARC.Regs.allocatableRegs
+ ,ncg_x86fp_kludge = id
+ ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
+ ,ncgMakeFarBranches = id
+ }
+ ArchPPC_64 ->
+ panic "nativeCodeGen: No NCG for PPC 64"
+
+nativeCodeGen' :: (Instruction instr, Outputable instr)
+ => DynFlags
+ -> NcgImpl instr jumpDest
+ -> Handle -> UniqSupply -> [RawCmm] -> IO ()
+nativeCodeGen' dflags ncgImpl h us cmms
= do
let split_cmms = concat $ map add_split cmms
-
-- BufHandle is a performance hack. We could hide it inside
-- Pretty if it weren't for the fact that we do lots of little
-- printDocs here (in order to do codegen in constant space).
bufh <- newBufHandle h
- (imports, prof) <- cmmNativeGens dflags bufh us split_cmms [] [] 0
+ (imports, prof) <- cmmNativeGens dflags ncgImpl bufh us split_cmms [] [] 0
bFlush bufh
let (native, colorStats, linearStats)
-- dump native code
dumpIfSet_dyn dflags
Opt_D_dump_asm "Asm code"
- (vcat $ map (docToSDoc . pprNatCmmTop) $ concat native)
+ (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) $ concat native)
-- dump global NCG stats for graph coloring allocator
(case concat $ catMaybes colorStats of
dumpIfSet_dyn dflags
Opt_D_dump_asm_conflicts "Register conflict graph"
- $ Color.dotGraph Color.regDotColor trivColorable
+ $ Color.dotGraph
+ targetRegDotColor
+ (Color.trivColorable
+ targetVirtualRegSqueeze
+ targetRealRegSqueeze)
$ graphGlobal)
-- write out the imports
Pretty.printDoc Pretty.LeftMode h
- $ makeImportsDoc (concat imports)
+ $ makeImportsDoc dflags (concat imports)
return ()
| dopt Opt_SplitObjs dflags = split_marker : tops
| otherwise = tops
- split_marker = CmmProc [] mkSplitMarkerLabel [] (ListGraph [])
+ split_marker = CmmProc [] mkSplitMarkerLabel (ListGraph [])
-- | Do native code generation on all these cmms.
--
-cmmNativeGens dflags h us [] impAcc profAcc count
+cmmNativeGens :: (Instruction instr, Outputable instr)
+ => DynFlags
+ -> NcgImpl instr jumpDest
+ -> BufHandle
+ -> UniqSupply
+ -> [RawCmmTop]
+ -> [[CLabel]]
+ -> [ ([NatCmmTop instr],
+ Maybe [Color.RegAllocStats instr],
+ Maybe [Linear.RegAllocStats]) ]
+ -> Int
+ -> IO ( [[CLabel]],
+ [([NatCmmTop instr],
+ Maybe [Color.RegAllocStats instr],
+ Maybe [Linear.RegAllocStats])] )
+
+cmmNativeGens _ _ _ _ [] impAcc profAcc _
= return (reverse impAcc, reverse profAcc)
-cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
+cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count
= do
(us', native, imports, colorStats, linearStats)
- <- cmmNativeGen dflags us cmm count
+ <- cmmNativeGen dflags ncgImpl us cmm count
Pretty.bufLeftRender h
- $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map pprNatCmmTop native
+ $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map (pprNatCmmTop ncgImpl) native
- let lsPprNative =
+ -- carefully evaluate this strictly. Binding it with 'let'
+ -- and then using 'seq' doesn't work, because the let
+ -- apparently gets inlined first.
+ lsPprNative <- return $!
if dopt Opt_D_dump_asm dflags
|| dopt Opt_D_dump_asm_stats dflags
then native
else []
- let count' = count + 1;
-
+ count' <- return $! count + 1;
-- force evaulation all this stuff to avoid space leaks
seqString (showSDoc $ vcat $ map ppr imports) `seq` return ()
- lsPprNative `seq` return ()
- count' `seq` return ()
- cmmNativeGens dflags h us' cmms
+ cmmNativeGens dflags ncgImpl
+ h us' cmms
(imports : impAcc)
((lsPprNative, colorStats, linearStats) : profAcc)
count'
-- | Complete native code generation phase for a single top-level chunk of Cmm.
-- Dumping the output of each stage along the way.
-- Global conflict graph and NGC stats
-cmmNativeGen
- :: DynFlags
+cmmNativeGen
+ :: (Instruction instr, Outputable instr)
+ => DynFlags
+ -> NcgImpl instr jumpDest
-> UniqSupply
- -> RawCmmTop -- ^ the cmm to generate code for
- -> Int -- ^ sequence number of this top thing
+ -> RawCmmTop -- ^ the cmm to generate code for
+ -> Int -- ^ sequence number of this top thing
-> IO ( UniqSupply
- , [NatCmmTop] -- native code
- , [CLabel] -- things imported by this cmm
- , Maybe [Color.RegAllocStats] -- stats for the coloring register allocator
- , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
+ , [NatCmmTop instr] -- native code
+ , [CLabel] -- things imported by this cmm
+ , Maybe [Color.RegAllocStats instr] -- stats for the coloring register allocator
+ , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
-cmmNativeGen dflags us cmm count
+cmmNativeGen dflags ncgImpl us cmm count
= do
-- rewrite assignments to global regs
- let (fixed_cmm, usFix) =
- {-# SCC "fixAssignsTop" #-}
- initUs us $ fixAssignsTop cmm
+ let fixed_cmm =
+ {-# SCC "fixStgRegisters" #-}
+ fixStgRegisters cmm
-- cmm to cmm optimisations
let (opt_cmm, imports) =
-- generate native code from cmm
let ((native, lastMinuteImports), usGen) =
{-# SCC "genMachCode" #-}
- initUs usFix $ genMachCode dflags opt_cmm
+ initUs us $ genMachCode dflags (cmmTopCodeGen ncgImpl) opt_cmm
dumpIfSet_dyn dflags
Opt_D_dump_asm_native "Native code"
- (vcat $ map (docToSDoc . pprNatCmmTop) native)
-
+ (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) native)
-- tag instructions with register liveness information
let (withLiveness, usLive) =
{-# SCC "regLiveness" #-}
- initUs usGen $ mapUs regLiveness native
+ initUs usGen
+ $ mapUs regLiveness
+ $ map natCmmTopToLive native
dumpIfSet_dyn dflags
Opt_D_dump_asm_liveness "Liveness annotations added"
(vcat $ map ppr withLiveness)
-
-- allocate registers
(alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
|| dopt Opt_RegsIterative dflags)
then do
-- the regs usable for allocation
- let alloc_regs
+ let (alloc_regs :: UniqFM (UniqSet RealReg))
= foldr (\r -> plusUFM_C unionUniqSets
- $ unitUFM (regClass r) (unitUniqSet r))
+ $ unitUFM (targetClassOfRealReg r) (unitUniqSet r))
emptyUFM
- $ map RealReg allocatableRegs
+ $ allocatableRegs ncgImpl
- -- graph coloring register allocation
+ -- do the graph coloring register allocation
let ((alloced, regAllocStats), usAlloc)
= {-# SCC "RegAlloc" #-}
initUs usLive
$ Color.regAlloc
dflags
alloc_regs
- (mkUniqSet [0..maxSpillSlots])
+ (mkUniqSet [0 .. maxSpillSlots ncgImpl])
withLiveness
-- dump out what happened during register allocation
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc "Registers allocated"
- (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
+ (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) alloced)
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc_stages "Build/spill stages"
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc "Registers allocated"
- (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
+ (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) alloced)
let mPprStats =
if dopt Opt_D_dump_asm_stats dflags
, Nothing
, mPprStats)
+ ---- x86fp_kludge. This pass inserts ffree instructions to clear
+ ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
+ ---- is clear, and library functions can return odd results if it
+ ---- isn't.
+ ----
+ ---- NB. must happen before shortcutBranches, because that
+ ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
+ let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
+
+ ---- generate jump tables
+ let tabled =
+ {-# SCC "generateJumpTables" #-}
+ generateJumpTables ncgImpl kludged
+
---- shortcut branches
let shorted =
{-# SCC "shortcutBranches" #-}
- shortcutBranches dflags alloced
+ shortcutBranches dflags ncgImpl tabled
---- sequence blocks
let sequenced =
{-# SCC "sequenceBlocks" #-}
- map sequenceTop shorted
-
- ---- x86fp_kludge
- let final_mach_code =
-#if i386_TARGET_ARCH
- {-# SCC "x86fp_kludge" #-}
- map x86fp_kludge sequenced
-#else
- sequenced
-#endif
+ map (sequenceTop ncgImpl) shorted
+
+ ---- expansion of SPARC synthetic instrs
+ let expanded =
+ {-# SCC "sparc_expand" #-}
+ ncgExpandTop ncgImpl sequenced
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_expanded "Synthetic instructions expanded"
+ (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) expanded)
return ( usAlloc
- , final_mach_code
+ , expanded
, lastMinuteImports ++ imports
, ppr_raStatsColor
, ppr_raStatsLinear)
-#if i386_TARGET_ARCH
-x86fp_kludge :: NatCmmTop -> NatCmmTop
+x86fp_kludge :: NatCmmTop X86.Instr.Instr -> NatCmmTop X86.Instr.Instr
x86fp_kludge top@(CmmData _ _) = top
-x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
- CmmProc info lbl params (ListGraph $ i386_insert_ffrees code)
-#endif
+x86fp_kludge (CmmProc info lbl (ListGraph code)) =
+ CmmProc info lbl (ListGraph $ X86.Instr.i386_insert_ffrees code)
-- | Build a doc for all the imports.
--
-makeImportsDoc :: [CLabel] -> Pretty.Doc
-makeImportsDoc imports
+makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
+makeImportsDoc dflags imports
= dyld_stubs imports
#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-- stack so add the note in:
Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
#endif
-#if !defined(darwin_TARGET_OS)
-- And just because every other compiler does, lets stick in
-- an identifier directive: .ident "GHC x.y.z"
- Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
+ Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
Pretty.text cProjectVersion
in Pretty.text ".ident" Pretty.<+>
Pretty.doubleQuotes compilerIdent
-#endif
where
-- Generate "symbol stubs" for all external symbols that might
{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
map head $ group $ sort imps-}
+ arch = platformArch $ targetPlatform dflags
+ os = platformOS $ targetPlatform dflags
+
-- (Hack) sometimes two Labels pretty-print the same, but have
-- different uniques; so we compare their text versions...
dyld_stubs imps
- | needImportedSymbols
+ | needImportedSymbols arch os
= Pretty.vcat $
- (pprGotDeclaration :) $
- map (pprImportedSymbol . fst . head) $
+ (pprGotDeclaration arch os :) $
+ map ( pprImportedSymbol arch os . fst . head) $
groupBy (\(_,a) (_,b) -> a == b) $
sortBy (\(_,a) (_,b) -> compare a b) $
map doPpr $
| otherwise
= Pretty.empty
- doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
+ doPpr lbl = (lbl, renderWithStyle (pprCLabel lbl) astyle)
astyle = mkCodeStyle AsmStyle
-- such that as many of the local jumps as possible turn into
-- fallthroughs.
-sequenceTop :: NatCmmTop -> NatCmmTop
-sequenceTop top@(CmmData _ _) = top
-sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
- CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
+sequenceTop
+ :: Instruction instr
+ => NcgImpl instr jumpDest -> NatCmmTop instr -> NatCmmTop instr
+
+sequenceTop _ top@(CmmData _ _) = top
+sequenceTop ncgImpl (CmmProc info lbl (ListGraph blocks)) =
+ CmmProc info lbl (ListGraph $ ncgMakeFarBranches ncgImpl $ sequenceBlocks blocks)
-- The algorithm is very simple (and stupid): we make a graph out of
-- the blocks where there is an edge from one block to another iff the
-- destination of the out edge to the front of the list, and continue.
-- FYI, the classic layout for basic blocks uses postorder DFS; this
--- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
+-- algorithm is implemented in Hoopl.
+
+sequenceBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [NatBasicBlock instr]
-sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
sequenceBlocks [] = []
sequenceBlocks (entry:blocks) =
seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
-- the first block is the entry point ==> it must remain at the start.
-sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
+
+sccBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [SCC ( NatBasicBlock instr
+ , Unique
+ , [Unique])]
+
sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
-getOutEdges :: [Instr] -> [Unique]
-getOutEdges instrs = case jumpDests (last instrs) [] of
- [one] -> [getUnique one]
- _many -> []
- -- we're only interested in the last instruction of
- -- the block, and only if it has a single destination.
+-- we're only interested in the last instruction of
+-- the block, and only if it has a single destination.
+getOutEdges
+ :: Instruction instr
+ => [instr] -> [Unique]
+
+getOutEdges instrs
+ = case jumpDestsOfInstr (last instrs) of
+ [one] -> [getUnique one]
+ _many -> []
+mkNode :: (Instruction t)
+ => GenBasicBlock t
+ -> (GenBasicBlock t, Unique, [Unique])
mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
+seqBlocks :: (Eq t) => [(GenBasicBlock t1, t, [t])] -> [GenBasicBlock t1]
seqBlocks [] = []
seqBlocks ((block,_,[]) : rest)
= block : seqBlocks rest
-- fallthroughs within a loop.
seqBlocks _ = panic "AsmCodegen:seqBlocks"
-reorder id accum [] = (False, reverse accum)
+reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
+reorder _ accum [] = (False, reverse accum)
reorder id accum (b@(block,id',out) : rest)
| id == id' = (True, (block,id,out) : reverse accum ++ rest)
| otherwise = reorder id (b:accum) rest
-- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
-- big, we have to work around this limitation.
-makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
-
-#if powerpc_TARGET_ARCH
+makeFarBranches
+ :: [NatBasicBlock PPC.Instr.Instr]
+ -> [NatBasicBlock PPC.Instr.Instr]
makeFarBranches blocks
| last blockAddresses < nearLimit = blocks
| otherwise = zipWith handleBlock blockAddresses blocks
handleBlock addr (BasicBlock id instrs)
= BasicBlock id (zipWith makeFar [addr..] instrs)
- makeFar addr (BCC ALWAYS tgt) = BCC ALWAYS tgt
- makeFar addr (BCC cond tgt)
+ makeFar _ (PPC.Instr.BCC PPC.Cond.ALWAYS tgt) = PPC.Instr.BCC PPC.Cond.ALWAYS tgt
+ makeFar addr (PPC.Instr.BCC cond tgt)
| abs (addr - targetAddr) >= nearLimit
- = BCCFAR cond tgt
+ = PPC.Instr.BCCFAR cond tgt
| otherwise
- = BCC cond tgt
+ = PPC.Instr.BCC cond tgt
where Just targetAddr = lookupUFM blockAddressMap tgt
- makeFar addr other = other
+ makeFar _ other = other
nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
-- distance, as we have a few pseudo-insns that are
-- things exactly
blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
-#else
-makeFarBranches = id
-#endif
+
+-- -----------------------------------------------------------------------------
+-- Generate jump tables
+
+-- Analyzes all native code and generates data sections for all jump
+-- table instructions.
+generateJumpTables
+ :: NcgImpl instr jumpDest
+ -> [NatCmmTop instr] -> [NatCmmTop instr]
+generateJumpTables ncgImpl xs = concatMap f xs
+ where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs
+ f p = [p]
+ g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
-- -----------------------------------------------------------------------------
-- Shortcut branches
-shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
-shortcutBranches dflags tops
+shortcutBranches
+ :: DynFlags
+ -> NcgImpl instr jumpDest
+ -> [NatCmmTop instr]
+ -> [NatCmmTop instr]
+
+shortcutBranches dflags ncgImpl tops
| optLevel dflags < 1 = tops -- only with -O or higher
- | otherwise = map (apply_mapping mapping) tops'
+ | otherwise = map (apply_mapping ncgImpl mapping) tops'
where
- (tops', mappings) = mapAndUnzip build_mapping tops
+ (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
mapping = foldr plusUFM emptyUFM mappings
-build_mapping top@(CmmData _ _) = (top, emptyUFM)
-build_mapping (CmmProc info lbl params (ListGraph []))
- = (CmmProc info lbl params (ListGraph []), emptyUFM)
-build_mapping (CmmProc info lbl params (ListGraph (head:blocks)))
- = (CmmProc info lbl params (ListGraph (head:others)), mapping)
+build_mapping :: NcgImpl instr jumpDest
+ -> GenCmmTop d t (ListGraph instr)
+ -> (GenCmmTop d t (ListGraph instr), UniqFM jumpDest)
+build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
+build_mapping _ (CmmProc info lbl (ListGraph []))
+ = (CmmProc info lbl (ListGraph []), emptyUFM)
+build_mapping ncgImpl (CmmProc info lbl (ListGraph (head:blocks)))
+ = (CmmProc info lbl (ListGraph (head:others)), mapping)
-- drop the shorted blocks, but don't ever drop the first one,
-- because it is pointed to by a global label.
where
-- find all the blocks that just consist of a jump that can be
-- shorted.
- (shortcut_blocks, others) = partitionWith split blocks
- split (BasicBlock id [insn]) | Just dest <- canShortcut insn
- = Left (id,dest)
- split other = Right other
+ -- Don't completely eliminate loops here -- that can leave a dangling jump!
+ (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
+ split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
+ | Just jd <- canShortcut ncgImpl insn,
+ Just dest <- getJumpDestBlockId ncgImpl jd,
+ (setMember dest s) || dest == id -- loop checks
+ = (s, shortcut_blocks, b : others)
+ split (s, shortcut_blocks, others) (BasicBlock id [insn])
+ | Just dest <- canShortcut ncgImpl insn
+ = (setInsert id s, (id,dest) : shortcut_blocks, others)
+ split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
+
-- build a mapping from BlockId to JumpDest for shorting branches
mapping = foldl add emptyUFM shortcut_blocks
add ufm (id,dest) = addToUFM ufm id dest
-apply_mapping ufm (CmmData sec statics)
- = CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics)
+apply_mapping :: NcgImpl instr jumpDest
+ -> UniqFM jumpDest
+ -> GenCmmTop CmmStatic h (ListGraph instr)
+ -> GenCmmTop CmmStatic h (ListGraph instr)
+apply_mapping ncgImpl ufm (CmmData sec statics)
+ = CmmData sec (map (shortcutStatic ncgImpl (lookupUFM ufm)) statics)
-- we need to get the jump tables, so apply the mapping to the entries
-- of a CmmData too.
-apply_mapping ufm (CmmProc info lbl params (ListGraph blocks))
- = CmmProc info lbl params (ListGraph $ map short_bb blocks)
+apply_mapping ncgImpl ufm (CmmProc info lbl (ListGraph blocks))
+ = CmmProc info lbl (ListGraph $ map short_bb blocks)
where
short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
- short_insn i = shortcutJump (lookupUFM ufm) i
+ short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
-- shortcutJump should apply the mapping repeatedly,
-- just in case we can short multiple branches.
-- Switching between the two monads whilst carrying along the same
-- Unique supply breaks abstraction. Is that bad?
-genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel])
+genMachCode
+ :: DynFlags
+ -> (DynFlags -> RawCmmTop -> NatM [NatCmmTop instr])
+ -> RawCmmTop
+ -> UniqSM
+ ( [NatCmmTop instr]
+ , [CLabel])
-genMachCode dflags cmm_top
+genMachCode dflags cmmTopCodeGen cmm_top
= do { initial_us <- getUs
; let initial_st = mkNatM_State initial_us 0 dflags
- (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
+ (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
final_delta = natm_delta final_st
final_imports = natm_imports final_st
; if final_delta == 0
}
-- -----------------------------------------------------------------------------
--- Fixup assignments to global registers so that they assign to
--- locations within the RegTable, if appropriate.
-
--- Note that we currently don't fixup reads here: they're done by
--- the generic optimiser below, to avoid having two separate passes
--- over the Cmm.
-
-fixAssignsTop :: RawCmmTop -> UniqSM RawCmmTop
-fixAssignsTop top@(CmmData _ _) = returnUs top
-fixAssignsTop (CmmProc info lbl params (ListGraph blocks)) =
- mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
- returnUs (CmmProc info lbl params (ListGraph blocks'))
-
-fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
-fixAssignsBlock (BasicBlock id stmts) =
- fixAssigns stmts `thenUs` \ stmts' ->
- returnUs (BasicBlock id stmts')
-
-fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
-fixAssigns stmts =
- mapUs fixAssign stmts `thenUs` \ stmtss ->
- returnUs (concat stmtss)
-
-fixAssign :: CmmStmt -> UniqSM [CmmStmt]
-fixAssign (CmmAssign (CmmGlobal reg) src)
- | Left realreg <- reg_or_addr
- = returnUs [CmmAssign (CmmGlobal reg) src]
- | Right baseRegAddr <- reg_or_addr
- = returnUs [CmmStore baseRegAddr src]
- -- Replace register leaves with appropriate StixTrees for
- -- the given target. GlobalRegs which map to a reg on this
- -- arch are left unchanged. Assigning to BaseReg is always
- -- illegal, so we check for that.
- where
- reg_or_addr = get_GlobalReg_reg_or_addr reg
-
-fixAssign other_stmt = returnUs [other_stmt]
-
--- -----------------------------------------------------------------------------
-- Generic Cmm optimiser
{-
(a) Constant folding
(b) Simple inlining: a temporary which is assigned to and then
used, once, can be shorted.
- (c) Replacement of references to GlobalRegs which do not have
- machine registers by the appropriate memory load (eg.
- Hp ==> *(BaseReg + 34) ).
- (d) Position independent code and dynamic linking
+ (c) Position independent code and dynamic linking
(i) introduce the appropriate indirections
and position independent refs
(ii) compile a list of imported symbols
-Ideas for other things we could do (ToDo):
+Ideas for other things we could do:
- shortcut jumps-to-jumps
- - eliminate dead code blocks
- simple CSE: if an expr is assigned to a temp, then replace later occs of
that expr with the temp, until the expr is no longer valid (can push through
temp assignments, and certain assigns to mem...)
cmmToCmm :: DynFlags -> RawCmmTop -> (RawCmmTop, [CLabel])
cmmToCmm _ top@(CmmData _ _) = (top, [])
-cmmToCmm dflags (CmmProc info lbl params (ListGraph blocks)) = runCmmOpt dflags $ do
- blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
- return $ CmmProc info lbl params (ListGraph blocks')
+cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do
+ blocks' <- mapM cmmBlockConFold (cmmMiniInline (cmmEliminateDeadBlocks blocks))
+ return $ CmmProc info lbl (ListGraph blocks')
newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
CmmOptM g' -> g' (imports', dflags)
addImportCmmOpt :: CLabel -> CmmOptM ()
-addImportCmmOpt lbl = CmmOptM $ \(imports, dflags) -> (# (), lbl:imports #)
+addImportCmmOpt lbl = CmmOptM $ \(imports, _dflags) -> (# (), lbl:imports #)
getDynFlagsCmmOpt :: CmmOptM DynFlags
getDynFlagsCmmOpt = CmmOptM $ \(imports, dflags) -> (# dflags, imports #)
stmts' <- mapM cmmStmtConFold stmts
return $ BasicBlock id stmts'
+cmmStmtConFold :: CmmStmt -> CmmOptM CmmStmt
cmmStmtConFold stmt
= case stmt of
CmmAssign reg src
CmmComment (mkFastString ("deleted: " ++
showSDoc (pprStmt stmt)))
- CmmLit (CmmInt n _) -> CmmBranch dest
- other -> CmmCondBranch test' dest
+ CmmLit (CmmInt _ _) -> CmmBranch dest
+ _other -> CmmCondBranch test' dest
CmmSwitch expr ids
-> do expr' <- cmmExprConFold DataReference expr
-> return other
-cmmExprConFold referenceKind expr
- = case expr of
+cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
+cmmExprConFold referenceKind expr = do
+ dflags <- getDynFlagsCmmOpt
+ let arch = platformArch (targetPlatform dflags)
+ case expr of
CmmLoad addr rep
-> do addr' <- cmmExprConFold DataReference addr
return $ CmmLoad addr' rep
CmmLit (CmmLabel lbl)
-> do
- dflags <- getDynFlagsCmmOpt
cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
CmmLit (CmmLabelOff lbl off)
-> do
- dflags <- getDynFlagsCmmOpt
dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
return $ cmmMachOpFold (MO_Add wordWidth) [
dynRef,
(CmmLit $ CmmInt (fromIntegral off) wordWidth)
]
-#if powerpc_TARGET_ARCH
- -- On powerpc (non-PIC), it's easier to jump directly to a label than
- -- to use the register table, so we replace these registers
- -- with the corresponding labels:
+ -- On powerpc (non-PIC), it's easier to jump directly to a label than
+ -- to use the register table, so we replace these registers
+ -- with the corresponding labels:
CmmReg (CmmGlobal EagerBlackholeInfo)
- | not opt_PIC
+ | arch == ArchPPC && not opt_PIC
-> cmmExprConFold referenceKind $
- CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_EAGER_BLACKHOLE_INFO")))
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_EAGER_BLACKHOLE_info")))
CmmReg (CmmGlobal GCEnter1)
- | not opt_PIC
+ | arch == ArchPPC && not opt_PIC
-> cmmExprConFold referenceKind $
- CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_enter_1")))
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_enter_1")))
CmmReg (CmmGlobal GCFun)
- | not opt_PIC
+ | arch == ArchPPC && not opt_PIC
-> cmmExprConFold referenceKind $
- CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_fun")))
-#endif
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun")))
- CmmReg (CmmGlobal mid)
- -- Replace register leaves with appropriate StixTrees for
- -- the given target. MagicIds which map to a reg on this
- -- arch are left unchanged. For the rest, BaseReg is taken
- -- to mean the address of the reg table in MainCapability,
- -- and for all others we generate an indirection to its
- -- location in the register table.
- -> case get_GlobalReg_reg_or_addr mid of
- Left realreg -> return expr
- Right baseRegAddr
- -> case mid of
- BaseReg -> cmmExprConFold DataReference baseRegAddr
- other -> cmmExprConFold DataReference
- (CmmLoad baseRegAddr (globalRegType mid))
- -- eliminate zero offsets
- CmmRegOff reg 0
- -> cmmExprConFold referenceKind (CmmReg reg)
-
- CmmRegOff (CmmGlobal mid) offset
- -- RegOf leaves are just a shorthand form. If the reg maps
- -- to a real reg, we keep the shorthand, otherwise, we just
- -- expand it and defer to the above code.
- -> case get_GlobalReg_reg_or_addr mid of
- Left realreg -> return expr
- Right baseRegAddr
- -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordWidth) [
- CmmReg (CmmGlobal mid),
- CmmLit (CmmInt (fromIntegral offset)
- wordWidth)])
other
-> return other
--- -----------------------------------------------------------------------------
--- Utils
-
-bind f x = x $! f
-
\end{code}