#include "HsVersions.h"
#include "nativeGen/NCG.h"
-import MachInstrs
-import MachRegs
-import MachCodeGen
-import PprMach
-import RegAllocInfo
+
+#if alpha_TARGET_ARCH
+import Alpha.CodeGen
+import Alpha.Regs
+import Alpha.RegInfo
+import Alpha.Instr
+
+#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import X86.CodeGen
+import X86.Regs
+import X86.RegInfo
+import X86.Instr
+import X86.Ppr
+
+#elif sparc_TARGET_ARCH
+import SPARC.CodeGen
+import SPARC.Regs
+import SPARC.Instr
+import SPARC.Ppr
+import SPARC.ShortcutJump
+
+#elif powerpc_TARGET_ARCH
+import PPC.CodeGen
+import PPC.Cond
+import PPC.Regs
+import PPC.RegInfo
+import PPC.Instr
+import PPC.Ppr
+
+#else
+#error "AsmCodeGen: unknown architecture"
+
+#endif
+
+import RegAlloc.Liveness
+import qualified RegAlloc.Linear.Main as Linear
+
+import qualified GraphColor as Color
+import qualified RegAlloc.Graph.Main as Color
+import qualified RegAlloc.Graph.Stats as Color
+import qualified RegAlloc.Graph.Coalesce as Color
+import qualified RegAlloc.Graph.TrivColorable as Color
+
+import qualified SPARC.CodeGen.Expand as SPARC
+
+import TargetReg
+import Platform
+import Instruction
+import PIC
+import Reg
+import RegClass
import NCGMonad
-import PositionIndependentCode
-import RegLiveness
-import RegCoalesce
-import qualified RegAllocLinear as Linear
-import qualified RegAllocColor as Color
-import qualified RegAllocStats as Color
-import qualified GraphColor as Color
+import BlockId
+import CgUtils ( fixStgRegisters )
import Cmm
import CmmOpt ( cmmMiniInline, cmmMachOpFold )
-import PprCmm ( pprStmt, pprCmms, pprCmm )
-import MachOp
+import PprCmm
import CLabel
import State
import UniqFM
import Unique ( Unique, getUnique )
import UniqSupply
-import FastTypes
-import List ( groupBy, sortBy )
-import ErrUtils ( dumpIfSet_dyn )
import DynFlags
+#if powerpc_TARGET_ARCH
import StaticFlags ( opt_Static, opt_PIC )
+#endif
import Util
import Config ( cProjectVersion )
import Module
import Digraph
import qualified Pretty
+import BufWrite
import Outputable
import FastString
import UniqSet
= do
let split_cmms = concat $ map add_split cmms
- (imports, prof)
- <- cmmNativeGens dflags h us split_cmms [] []
+ -- BufHandle is a performance hack. We could hide it inside
+ -- Pretty if it weren't for the fact that we do lots of little
+ -- printDocs here (in order to do codegen in constant space).
+ bufh <- newBufHandle h
+ (imports, prof) <- cmmNativeGens dflags bufh us split_cmms [] [] 0
+ bFlush bufh
let (native, colorStats, linearStats)
= unzip3 prof
dumpIfSet_dyn dflags
Opt_D_dump_asm_conflicts "Register conflict graph"
- $ Color.dotGraph Color.regDotColor trivColorable
+ $ Color.dotGraph
+ targetRegDotColor
+ (Color.trivColorable
+ targetVirtualRegSqueeze
+ targetRealRegSqueeze)
$ graphGlobal)
-- write out the imports
Pretty.printDoc Pretty.LeftMode h
- $ makeImportsDoc (concat imports)
+ $ makeImportsDoc dflags (concat imports)
return ()
-- | Do native code generation on all these cmms.
--
-cmmNativeGens dflags h us [] impAcc profAcc
+cmmNativeGens dflags h us [] impAcc profAcc count
= return (reverse impAcc, reverse profAcc)
-cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc
+cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
= do
(us', native, imports, colorStats, linearStats)
- <- cmmNativeGen dflags us cmm
+ <- cmmNativeGen dflags us cmm count
- Pretty.printDoc Pretty.LeftMode h
+ Pretty.bufLeftRender h
$ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map pprNatCmmTop native
- let lsPprNative =
+ -- carefully evaluate this strictly. Binding it with 'let'
+ -- and then using 'seq' doesn't work, because the let
+ -- apparently gets inlined first.
+ lsPprNative <- return $!
if dopt Opt_D_dump_asm dflags
|| dopt Opt_D_dump_asm_stats dflags
then native
else []
- -- force evaulation of imports and lsPprNative to avoid space leak
+ count' <- return $! count + 1;
+
+ -- force evaulation all this stuff to avoid space leaks
seqString (showSDoc $ vcat $ map ppr imports) `seq` return ()
- lsPprNative `seq` return ()
cmmNativeGens dflags h us' cmms
(imports : impAcc)
((lsPprNative, colorStats, linearStats) : profAcc)
+ count'
where seqString [] = ()
seqString (x:xs) = x `seq` seqString xs `seq` ()
cmmNativeGen
:: DynFlags
-> UniqSupply
- -> RawCmmTop
+ -> RawCmmTop -- ^ the cmm to generate code for
+ -> Int -- ^ sequence number of this top thing
-> IO ( UniqSupply
- , [NatCmmTop]
- , [CLabel]
- , Maybe [Color.RegAllocStats]
- , Maybe [Linear.RegAllocStats])
+ , [NatCmmTop Instr] -- native code
+ , [CLabel] -- things imported by this cmm
+ , Maybe [Color.RegAllocStats Instr] -- stats for the coloring register allocator
+ , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
-cmmNativeGen dflags us cmm
+cmmNativeGen dflags us cmm count
= do
+
-- rewrite assignments to global regs
- let (fixed_cmm, usFix) =
- {-# SCC "fixAssignsTop" #-}
- initUs us $ fixAssignsTop cmm
+ let fixed_cmm =
+ {-# SCC "fixStgRegisters" #-}
+ fixStgRegisters cmm
-- cmm to cmm optimisations
let (opt_cmm, imports) =
-- generate native code from cmm
let ((native, lastMinuteImports), usGen) =
{-# SCC "genMachCode" #-}
- initUs usFix $ genMachCode dflags opt_cmm
+ initUs us $ genMachCode dflags opt_cmm
dumpIfSet_dyn dflags
Opt_D_dump_asm_native "Native code"
(vcat $ map (docToSDoc . pprNatCmmTop) native)
-
-- tag instructions with register liveness information
let (withLiveness, usLive) =
{-# SCC "regLiveness" #-}
- initUs usGen $ mapUs regLiveness native
+ initUs usGen
+ $ mapUs regLiveness
+ $ map natCmmTopToLive native
dumpIfSet_dyn dflags
Opt_D_dump_asm_liveness "Liveness annotations added"
(vcat $ map ppr withLiveness)
-
-- allocate registers
(alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
- if dopt Opt_RegsGraph dflags
+ if ( dopt Opt_RegsGraph dflags
+ || dopt Opt_RegsIterative dflags)
then do
-- the regs usable for allocation
- let alloc_regs
+ let (alloc_regs :: UniqFM (UniqSet RealReg))
= foldr (\r -> plusUFM_C unionUniqSets
- $ unitUFM (regClass r) (unitUniqSet r))
+ $ unitUFM (targetClassOfRealReg r) (unitUniqSet r))
emptyUFM
- $ map RealReg allocatableRegs
-
- -- if any of these dump flags are turned on we want to hang on to
- -- intermediate structures in the allocator - otherwise tell the
- -- allocator to ditch them early so we don't end up creating space leaks.
- let generateRegAllocStats = or
- [ dopt Opt_D_dump_asm_regalloc_stages dflags
- , dopt Opt_D_dump_asm_stats dflags
- , dopt Opt_D_dump_asm_conflicts dflags ]
+ $ allocatableRegs
- -- graph coloring register allocation
+ -- do the graph coloring register allocation
let ((alloced, regAllocStats), usAlloc)
- = {-# SCC "RegAlloc(color)" #-}
+ = {-# SCC "RegAlloc" #-}
initUs usLive
$ Color.regAlloc
- generateRegAllocStats
+ dflags
alloc_regs
(mkUniqSet [0..maxSpillSlots])
withLiveness
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc_stages "Build/spill stages"
(vcat $ map (\(stage, stats)
- -> text "-- Stage " <> int stage
+ -> text "# --------------------------"
+ $$ text "# cmm " <> int count <> text " Stage " <> int stage
$$ ppr stats)
$ zip [0..] regAllocStats)
else do
-- do linear register allocation
let ((alloced, regAllocStats), usAlloc)
- = {-# SCC "RegAlloc(linear)" #-}
+ = {-# SCC "RegAlloc" #-}
initUs usLive
$ liftM unzip
$ mapUs Linear.regAlloc withLiveness
map sequenceTop shorted
---- x86fp_kludge
- let final_mach_code =
+ let kludged =
#if i386_TARGET_ARCH
{-# SCC "x86fp_kludge" #-}
map x86fp_kludge sequenced
sequenced
#endif
+ ---- expansion of SPARC synthetic instrs
+#if sparc_TARGET_ARCH
+ let expanded =
+ {-# SCC "sparc_expand" #-}
+ map SPARC.expandTop kludged
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_expanded "Synthetic instructions expanded"
+ (vcat $ map (docToSDoc . pprNatCmmTop) expanded)
+#else
+ let expanded =
+ kludged
+#endif
+
return ( usAlloc
- , final_mach_code
+ , expanded
, lastMinuteImports ++ imports
, ppr_raStatsColor
, ppr_raStatsLinear)
#if i386_TARGET_ARCH
-x86fp_kludge :: NatCmmTop -> NatCmmTop
+x86fp_kludge :: NatCmmTop Instr -> NatCmmTop Instr
x86fp_kludge top@(CmmData _ _) = top
x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
- CmmProc info lbl params (ListGraph $ map bb_i386_insert_ffrees code)
- where
- bb_i386_insert_ffrees (BasicBlock id instrs) =
- BasicBlock id (i386_insert_ffrees instrs)
+ CmmProc info lbl params (ListGraph $ i386_insert_ffrees code)
#endif
-- | Build a doc for all the imports.
--
-makeImportsDoc :: [CLabel] -> Pretty.Doc
-makeImportsDoc imports
+makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
+makeImportsDoc dflags imports
= dyld_stubs imports
#if HAVE_SUBSECTIONS_VIA_SYMBOLS
{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
map head $ group $ sort imps-}
+ arch = platformArch $ targetPlatform dflags
+ os = platformOS $ targetPlatform dflags
+
-- (Hack) sometimes two Labels pretty-print the same, but have
-- different uniques; so we compare their text versions...
dyld_stubs imps
- | needImportedSymbols
+ | needImportedSymbols arch os
= Pretty.vcat $
- (pprGotDeclaration :) $
- map (pprImportedSymbol . fst . head) $
+ (pprGotDeclaration arch os :) $
+ map ( pprImportedSymbol arch os . fst . head) $
groupBy (\(_,a) (_,b) -> a == b) $
sortBy (\(_,a) (_,b) -> compare a b) $
map doPpr $
-- such that as many of the local jumps as possible turn into
-- fallthroughs.
-sequenceTop :: NatCmmTop -> NatCmmTop
+sequenceTop
+ :: NatCmmTop Instr
+ -> NatCmmTop Instr
+
sequenceTop top@(CmmData _ _) = top
sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
-- output the block, then if it has an out edge, we move the
-- destination of the out edge to the front of the list, and continue.
-sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
+-- FYI, the classic layout for basic blocks uses postorder DFS; this
+-- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
+
+sequenceBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [NatBasicBlock instr]
+
sequenceBlocks [] = []
sequenceBlocks (entry:blocks) =
seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
-- the first block is the entry point ==> it must remain at the start.
-sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
-sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
-getOutEdges :: [Instr] -> [Unique]
-getOutEdges instrs = case jumpDests (last instrs) [] of
- [one] -> [getUnique one]
- _many -> []
- -- we're only interested in the last instruction of
- -- the block, and only if it has a single destination.
+sccBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [SCC ( NatBasicBlock instr
+ , Unique
+ , [Unique])]
+
+sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
+
+-- we're only interested in the last instruction of
+-- the block, and only if it has a single destination.
+getOutEdges
+ :: Instruction instr
+ => [instr] -> [Unique]
+
+getOutEdges instrs
+ = case jumpDestsOfInstr (last instrs) of
+ [one] -> [getUnique one]
+ _many -> []
mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
-- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
-- big, we have to work around this limitation.
-makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
+makeFarBranches
+ :: [NatBasicBlock Instr]
+ -> [NatBasicBlock Instr]
#if powerpc_TARGET_ARCH
makeFarBranches blocks
-- -----------------------------------------------------------------------------
-- Shortcut branches
-shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
+shortcutBranches
+ :: DynFlags
+ -> [NatCmmTop Instr]
+ -> [NatCmmTop Instr]
+
shortcutBranches dflags tops
| optLevel dflags < 1 = tops -- only with -O or higher
| otherwise = map (apply_mapping mapping) tops'
where
-- find all the blocks that just consist of a jump that can be
-- shorted.
- (shortcut_blocks, others) = partitionWith split blocks
- split (BasicBlock id [insn]) | Just dest <- canShortcut insn
- = Left (id,dest)
- split other = Right other
+ -- Don't completely eliminate loops here -- that can leave a dangling jump!
+ (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
+ split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
+ | Just (DestBlockId dest) <- canShortcut insn,
+ (elemBlockSet dest s) || dest == id -- loop checks
+ = (s, shortcut_blocks, b : others)
+ split (s, shortcut_blocks, others) (BasicBlock id [insn])
+ | Just dest <- canShortcut insn
+ = (extendBlockSet s id, (id,dest) : shortcut_blocks, others)
+ split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
+
-- build a mapping from BlockId to JumpDest for shorting branches
mapping = foldl add emptyUFM shortcut_blocks
-- Switching between the two monads whilst carrying along the same
-- Unique supply breaks abstraction. Is that bad?
-genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel])
+genMachCode
+ :: DynFlags
+ -> RawCmmTop
+ -> UniqSM
+ ( [NatCmmTop Instr]
+ , [CLabel])
genMachCode dflags cmm_top
= do { initial_us <- getUs
; let initial_st = mkNatM_State initial_us 0 dflags
- (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
+ (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
final_delta = natm_delta final_st
final_imports = natm_imports final_st
; if final_delta == 0
else pprPanic "genMachCode: nonzero final delta" (int final_delta)
}
--- -----------------------------------------------------------------------------
--- Fixup assignments to global registers so that they assign to
--- locations within the RegTable, if appropriate.
-
--- Note that we currently don't fixup reads here: they're done by
--- the generic optimiser below, to avoid having two separate passes
--- over the Cmm.
-
-fixAssignsTop :: RawCmmTop -> UniqSM RawCmmTop
-fixAssignsTop top@(CmmData _ _) = returnUs top
-fixAssignsTop (CmmProc info lbl params (ListGraph blocks)) =
- mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
- returnUs (CmmProc info lbl params (ListGraph blocks'))
-
-fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
-fixAssignsBlock (BasicBlock id stmts) =
- fixAssigns stmts `thenUs` \ stmts' ->
- returnUs (BasicBlock id stmts')
-
-fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
-fixAssigns stmts =
- mapUs fixAssign stmts `thenUs` \ stmtss ->
- returnUs (concat stmtss)
-
-fixAssign :: CmmStmt -> UniqSM [CmmStmt]
-fixAssign (CmmAssign (CmmGlobal reg) src)
- | Left realreg <- reg_or_addr
- = returnUs [CmmAssign (CmmGlobal reg) src]
- | Right baseRegAddr <- reg_or_addr
- = returnUs [CmmStore baseRegAddr src]
- -- Replace register leaves with appropriate StixTrees for
- -- the given target. GlobalRegs which map to a reg on this
- -- arch are left unchanged. Assigning to BaseReg is always
- -- illegal, so we check for that.
- where
- reg_or_addr = get_GlobalReg_reg_or_addr reg
-
-fixAssign other_stmt = returnUs [other_stmt]
-- -----------------------------------------------------------------------------
-- Generic Cmm optimiser
(a) Constant folding
(b) Simple inlining: a temporary which is assigned to and then
used, once, can be shorted.
- (c) Replacement of references to GlobalRegs which do not have
- machine registers by the appropriate memory load (eg.
- Hp ==> *(BaseReg + 34) ).
- (d) Position independent code and dynamic linking
+ (c) Position independent code and dynamic linking
(i) introduce the appropriate indirections
and position independent refs
(ii) compile a list of imported symbols
e' <- cmmExprConFold CallReference e
return $ CmmCallee e' conv
other -> return other
- args' <- mapM (\(arg, hint) -> do
+ args' <- mapM (\(CmmHinted arg hint) -> do
arg' <- cmmExprConFold DataReference arg
- return (arg', hint)) args
+ return (CmmHinted arg' hint)) args
return $ CmmCall target' regs args' srt returns
CmmCondBranch test dest
-> do
dflags <- getDynFlagsCmmOpt
dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
- return $ cmmMachOpFold (MO_Add wordRep) [
+ return $ cmmMachOpFold (MO_Add wordWidth) [
dynRef,
- (CmmLit $ CmmInt (fromIntegral off) wordRep)
+ (CmmLit $ CmmInt (fromIntegral off) wordWidth)
]
#if powerpc_TARGET_ARCH
-- On powerpc (non-PIC), it's easier to jump directly to a label than
-- to use the register table, so we replace these registers
-- with the corresponding labels:
+ CmmReg (CmmGlobal EagerBlackholeInfo)
+ | not opt_PIC
+ -> cmmExprConFold referenceKind $
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_EAGER_BLACKHOLE_info")))
CmmReg (CmmGlobal GCEnter1)
| not opt_PIC
-> cmmExprConFold referenceKind $
- CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_enter_1")))
CmmReg (CmmGlobal GCFun)
| not opt_PIC
-> cmmExprConFold referenceKind $
- CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun")))
#endif
- CmmReg (CmmGlobal mid)
- -- Replace register leaves with appropriate StixTrees for
- -- the given target. MagicIds which map to a reg on this
- -- arch are left unchanged. For the rest, BaseReg is taken
- -- to mean the address of the reg table in MainCapability,
- -- and for all others we generate an indirection to its
- -- location in the register table.
- -> case get_GlobalReg_reg_or_addr mid of
- Left realreg -> return expr
- Right baseRegAddr
- -> case mid of
- BaseReg -> cmmExprConFold DataReference baseRegAddr
- other -> cmmExprConFold DataReference
- (CmmLoad baseRegAddr (globalRegRep mid))
- -- eliminate zero offsets
- CmmRegOff reg 0
- -> cmmExprConFold referenceKind (CmmReg reg)
-
- CmmRegOff (CmmGlobal mid) offset
- -- RegOf leaves are just a shorthand form. If the reg maps
- -- to a real reg, we keep the shorthand, otherwise, we just
- -- expand it and defer to the above code.
- -> case get_GlobalReg_reg_or_addr mid of
- Left realreg -> return expr
- Right baseRegAddr
- -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordRep) [
- CmmReg (CmmGlobal mid),
- CmmLit (CmmInt (fromIntegral offset)
- wordRep)])
other
-> return other
--- -----------------------------------------------------------------------------
--- Utils
-
-bind f x = x $! f
-
\end{code}