X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FnativeGen%2FAsmCodeGen.lhs;h=4152c45220811c5d377d237845d932ed81b66fc8;hp=c6069183b009007f9a46708d0a79651edba1f946;hb=3d52165d6ff86168fd68addd56be0eb1893aaa1f;hpb=f2cd56cf9fc310c9b587ecb5dfaee4ad6b580355 diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index c606918..4152c45 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -7,46 +7,91 @@ -- ----------------------------------------------------------------------------- \begin{code} +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + module AsmCodeGen ( nativeCodeGen ) where #include "HsVersions.h" #include "nativeGen/NCG.h" -import MachInstrs -import MachRegs -import MachCodeGen -import PprMach -import RegAllocInfo + +#if alpha_TARGET_ARCH +import Alpha.CodeGen +import Alpha.Regs +import Alpha.RegInfo +import Alpha.Instr +import Alpha.Ppr + +#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH +import X86.CodeGen +import X86.Regs +import X86.RegInfo +import X86.Instr +import X86.Ppr + +#elif sparc_TARGET_ARCH +import SPARC.CodeGen +import SPARC.Regs +import SPARC.RegInfo +import SPARC.Instr +import SPARC.Ppr + +#elif powerpc_TARGET_ARCH +import PPC.CodeGen +import PPC.Cond +import PPC.Regs +import PPC.RegInfo +import PPC.Instr +import PPC.Ppr + +#else +#error "AsmCodeGen: unknown architecture" + +#endif + +import RegAlloc.Liveness +import qualified RegAlloc.Linear.Main as Linear + +import qualified GraphColor as Color +import qualified RegAlloc.Graph.Main as Color +import qualified RegAlloc.Graph.Stats as Color +import qualified RegAlloc.Graph.Coalesce as Color +import qualified RegAlloc.Graph.TrivColorable as Color + +import qualified TargetReg as Target + +import Platform +import Instruction +import PIC +import Reg import NCGMonad -import PositionIndependentCode -import RegLiveness -import RegCoalesce -import qualified RegAllocLinear as Linear -import qualified RegAllocColor as Color -import qualified RegAllocStats as Color -import qualified GraphColor as Color import Cmm import CmmOpt ( cmmMiniInline, cmmMachOpFold ) -import PprCmm ( pprStmt, pprCmms, pprCmm ) -import MachOp +import PprCmm import CLabel import State import UniqFM import Unique ( Unique, getUnique ) import UniqSupply -import FastTypes import List ( groupBy, sortBy ) -import ErrUtils ( dumpIfSet_dyn ) import DynFlags +#if powerpc_TARGET_ARCH import StaticFlags ( opt_Static, opt_PIC ) +#endif import Util import Config ( cProjectVersion ) import Module import Digraph import qualified Pretty +import BufWrite import Outputable import FastString import UniqSet @@ -62,6 +107,7 @@ import Data.Bits import Data.Maybe import GHC.Exts import Control.Monad +import System.IO {- The native-code generator has machine-independent and @@ -114,22 +160,29 @@ The machine-dependent bits break down as follows: -- ----------------------------------------------------------------------------- -- Top-level of the native codegen --- NB. We *lazilly* compile each block of code for space reasons. - -------------------- -nativeCodeGen :: DynFlags -> [RawCmm] -> UniqSupply -> IO Pretty.Doc -nativeCodeGen dflags cmms us +nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO () +nativeCodeGen dflags h us cmms = do - -- do native code generation on all these cmm things - (us', result) - <- mapAccumLM (cmmNativeGen dflags) us - $ concat $ map add_split cmms + let split_cmms = concat $ map add_split cmms + + -- BufHandle is a performance hack. We could hide it inside + -- Pretty if it weren't for the fact that we do lots of little + -- printDocs here (in order to do codegen in constant space). + bufh <- newBufHandle h + (imports, prof) <- cmmNativeGens dflags bufh us split_cmms [] [] 0 + bFlush bufh + + let (native, colorStats, linearStats) + = unzip3 prof - let (native, imports, mColorStats, mLinearStats) - = unzip4 result + -- dump native code + dumpIfSet_dyn dflags + Opt_D_dump_asm "Asm code" + (vcat $ map (docToSDoc . pprNatCmmTop) $ concat native) -- dump global NCG stats for graph coloring allocator - (case concat $ catMaybes mColorStats of + (case concat $ catMaybes colorStats of [] -> return () stats -> do -- build the global register conflict graph @@ -143,23 +196,63 @@ nativeCodeGen dflags cmms us dumpIfSet_dyn dflags Opt_D_dump_asm_conflicts "Register conflict graph" - $ Color.dotGraph Color.regDotColor trivColorable + $ Color.dotGraph Target.targetRegDotColor (Color.trivColorable Target.targetRegClass) $ graphGlobal) -- dump global NCG stats for linear allocator - (case catMaybes mLinearStats of + (case concat $ catMaybes linearStats of [] -> return () stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats" - $ Linear.pprStats (concat stats)) + $ Linear.pprStats (concat native) stats) + + -- write out the imports + Pretty.printDoc Pretty.LeftMode h + $ makeImportsDoc dflags (concat imports) + + return () - return $ makeAsmDoc (concat native) (concat imports) + where add_split (Cmm tops) + | dopt Opt_SplitObjs dflags = split_marker : tops + | otherwise = tops + + split_marker = CmmProc [] mkSplitMarkerLabel [] (ListGraph []) + + +-- | Do native code generation on all these cmms. +-- +cmmNativeGens dflags h us [] impAcc profAcc count + = return (reverse impAcc, reverse profAcc) + +cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count + = do + (us', native, imports, colorStats, linearStats) + <- cmmNativeGen dflags us cmm count - where add_split (Cmm tops) - | dopt Opt_SplitObjs dflags = split_marker : tops - | otherwise = tops + Pretty.bufLeftRender h + $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map pprNatCmmTop native - split_marker = CmmProc [] mkSplitMarkerLabel [] [] + let lsPprNative = + if dopt Opt_D_dump_asm dflags + || dopt Opt_D_dump_asm_stats dflags + then native + else [] + + let count' = count + 1; + + + -- force evaulation all this stuff to avoid space leaks + seqString (showSDoc $ vcat $ map ppr imports) `seq` return () + lsPprNative `seq` return () + count' `seq` return () + + cmmNativeGens dflags h us' cmms + (imports : impAcc) + ((lsPprNative, colorStats, linearStats) : profAcc) + count' + + where seqString [] = () + seqString (x:xs) = x `seq` seqString xs `seq` () -- | Complete native code generation phase for a single top-level chunk of Cmm. @@ -168,30 +261,34 @@ nativeCodeGen dflags cmms us cmmNativeGen :: DynFlags -> UniqSupply - -> RawCmmTop - -> IO ( UniqSupply - , ( [NatCmmTop] - , [CLabel] - , Maybe [Color.RegAllocStats] - , Maybe [Linear.RegAllocStats])) - -cmmNativeGen dflags us cmm + -> RawCmmTop -- ^ the cmm to generate code for + -> Int -- ^ sequence number of this top thing + -> IO ( UniqSupply + , [NatCmmTop Instr] -- native code + , [CLabel] -- things imported by this cmm + , Maybe [Color.RegAllocStats Instr] -- stats for the coloring register allocator + , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators + +cmmNativeGen dflags us cmm count = do + -- rewrite assignments to global regs let (fixed_cmm, usFix) = + {-# SCC "fixAssignsTop" #-} initUs us $ fixAssignsTop cmm -- cmm to cmm optimisations let (opt_cmm, imports) = + {-# SCC "cmmToCmm" #-} cmmToCmm dflags fixed_cmm dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmm $ Cmm [opt_cmm]) - -- generate native code from cmm let ((native, lastMinuteImports), usGen) = + {-# SCC "genMachCode" #-} initUs usFix $ genMachCode dflags opt_cmm dumpIfSet_dyn dflags @@ -201,6 +298,7 @@ cmmNativeGen dflags us cmm -- tag instructions with register liveness information let (withLiveness, usLive) = + {-# SCC "regLiveness" #-} initUs usGen $ mapUs regLiveness native dumpIfSet_dyn dflags @@ -210,7 +308,8 @@ cmmNativeGen dflags us cmm -- allocate registers (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <- - if dopt Opt_RegsGraph dflags + if ( dopt Opt_RegsGraph dflags + || dopt Opt_RegsIterative dflags) then do -- the regs usable for allocation let alloc_regs @@ -219,22 +318,17 @@ cmmNativeGen dflags us cmm emptyUFM $ map RealReg allocatableRegs - -- aggressively coalesce moves between virtual regs - let (coalesced, usCoalesce) - = initUs usLive $ regCoalesce withLiveness - - dumpIfSet_dyn dflags - Opt_D_dump_asm_coalesce "Reg-Reg moves coalesced" - (vcat $ map ppr coalesced) - -- graph coloring register allocation let ((alloced, regAllocStats), usAlloc) - = initUs usCoalesce - $ Color.regAlloc + = {-# SCC "RegAlloc" #-} + initUs usLive + $ Color.regAlloc + dflags alloc_regs (mkUniqSet [0..maxSpillSlots]) - coalesced + withLiveness + -- dump out what happened during register allocation dumpIfSet_dyn dflags Opt_D_dump_asm_regalloc "Registers allocated" (vcat $ map (docToSDoc . pprNatCmmTop) alloced) @@ -242,30 +336,44 @@ cmmNativeGen dflags us cmm dumpIfSet_dyn dflags Opt_D_dump_asm_regalloc_stages "Build/spill stages" (vcat $ map (\(stage, stats) - -> text "-- Stage " <> int stage + -> text "# --------------------------" + $$ text "# cmm " <> int count <> text " Stage " <> int stage $$ ppr stats) $ zip [0..] regAllocStats) + let mPprStats = + if dopt Opt_D_dump_asm_stats dflags + then Just regAllocStats else Nothing + + -- force evaluation of the Maybe to avoid space leak + mPprStats `seq` return () + return ( alloced, usAlloc - , if dopt Opt_D_dump_asm_stats dflags - then Just regAllocStats else Nothing + , mPprStats , Nothing) else do -- do linear register allocation let ((alloced, regAllocStats), usAlloc) - = initUs usLive - $ liftM unzip - $ mapUs Linear.regAlloc withLiveness + = {-# SCC "RegAlloc" #-} + initUs usLive + $ liftM unzip + $ mapUs Linear.regAlloc withLiveness dumpIfSet_dyn dflags Opt_D_dump_asm_regalloc "Registers allocated" (vcat $ map (docToSDoc . pprNatCmmTop) alloced) + let mPprStats = + if dopt Opt_D_dump_asm_stats dflags + then Just (catMaybes regAllocStats) else Nothing + + -- force evaluation of the Maybe to avoid space leak + mPprStats `seq` return () + return ( alloced, usAlloc , Nothing - , if dopt Opt_D_dump_asm_stats dflags - then Just (catMaybes regAllocStats) else Nothing) + , mPprStats) ---- shortcut branches let shorted = @@ -287,30 +395,25 @@ cmmNativeGen dflags us cmm #endif return ( usAlloc - , ( final_mach_code - , lastMinuteImports ++ imports - , ppr_raStatsColor - , ppr_raStatsLinear) ) + , final_mach_code + , lastMinuteImports ++ imports + , ppr_raStatsColor + , ppr_raStatsLinear) #if i386_TARGET_ARCH -x86fp_kludge :: NatCmmTop -> NatCmmTop +x86fp_kludge :: NatCmmTop Instr -> NatCmmTop Instr x86fp_kludge top@(CmmData _ _) = top -x86fp_kludge top@(CmmProc info lbl params code) = - CmmProc info lbl params (map bb_i386_insert_ffrees code) - where - bb_i386_insert_ffrees (BasicBlock id instrs) = - BasicBlock id (i386_insert_ffrees instrs) +x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) = + CmmProc info lbl params (ListGraph $ i386_insert_ffrees code) #endif --- | Build assembler source file from native code and its imports. +-- | Build a doc for all the imports. -- -makeAsmDoc :: [NatCmmTop] -> [CLabel] -> Pretty.Doc -makeAsmDoc native imports - = Pretty.vcat (map pprNatCmmTop native) - Pretty.$$ (Pretty.text "") - Pretty.$$ dyld_stubs imports +makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc +makeImportsDoc dflags imports + = dyld_stubs imports #if HAVE_SUBSECTIONS_VIA_SYMBOLS -- On recent versions of Darwin, the linker supports @@ -343,13 +446,16 @@ makeAsmDoc native imports {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ map head $ group $ sort imps-} + arch = platformArch $ targetPlatform dflags + os = platformOS $ targetPlatform dflags + -- (Hack) sometimes two Labels pretty-print the same, but have -- different uniques; so we compare their text versions... dyld_stubs imps - | needImportedSymbols + | needImportedSymbols arch os = Pretty.vcat $ - (pprGotDeclaration :) $ - map (pprImportedSymbol . fst . head) $ + (pprGotDeclaration arch os :) $ + map ( pprImportedSymbol arch os . fst . head) $ groupBy (\(_,a) (_,b) -> a == b) $ sortBy (\(_,a) (_,b) -> compare a b) $ map doPpr $ @@ -370,10 +476,13 @@ makeAsmDoc native imports -- such that as many of the local jumps as possible turn into -- fallthroughs. -sequenceTop :: NatCmmTop -> NatCmmTop +sequenceTop + :: NatCmmTop Instr + -> NatCmmTop Instr + sequenceTop top@(CmmData _ _) = top -sequenceTop (CmmProc info lbl params blocks) = - CmmProc info lbl params (makeFarBranches $ sequenceBlocks blocks) +sequenceTop (CmmProc info lbl params (ListGraph blocks)) = + CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks) -- The algorithm is very simple (and stupid): we make a graph out of -- the blocks where there is an edge from one block to another iff the @@ -382,21 +491,39 @@ sequenceTop (CmmProc info lbl params blocks) = -- output the block, then if it has an out edge, we move the -- destination of the out edge to the front of the list, and continue. -sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock] +-- FYI, the classic layout for basic blocks uses postorder DFS; this +-- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007). + +sequenceBlocks + :: Instruction instr + => [NatBasicBlock instr] + -> [NatBasicBlock instr] + sequenceBlocks [] = [] sequenceBlocks (entry:blocks) = seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks))) -- the first block is the entry point ==> it must remain at the start. -sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])] -sccBlocks blocks = stronglyConnCompR (map mkNode blocks) -getOutEdges :: [Instr] -> [Unique] -getOutEdges instrs = case jumpDests (last instrs) [] of - [one] -> [getUnique one] - _many -> [] - -- we're only interested in the last instruction of - -- the block, and only if it has a single destination. +sccBlocks + :: Instruction instr + => [NatBasicBlock instr] + -> [SCC ( NatBasicBlock instr + , Unique + , [Unique])] + +sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks) + +-- we're only interested in the last instruction of +-- the block, and only if it has a single destination. +getOutEdges + :: Instruction instr + => [instr] -> [Unique] + +getOutEdges instrs + = case jumpDestsOfInstr (last instrs) of + [one] -> [getUnique one] + _many -> [] mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs) @@ -424,7 +551,9 @@ reorder id accum (b@(block,id',out) : rest) -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too -- big, we have to work around this limitation. -makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock] +makeFarBranches + :: [NatBasicBlock Instr] + -> [NatBasicBlock Instr] #if powerpc_TARGET_ARCH makeFarBranches blocks @@ -460,7 +589,11 @@ makeFarBranches = id -- ----------------------------------------------------------------------------- -- Shortcut branches -shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop] +shortcutBranches + :: DynFlags + -> [NatCmmTop Instr] + -> [NatCmmTop Instr] + shortcutBranches dflags tops | optLevel dflags < 1 = tops -- only with -O or higher | otherwise = map (apply_mapping mapping) tops' @@ -469,10 +602,10 @@ shortcutBranches dflags tops mapping = foldr plusUFM emptyUFM mappings build_mapping top@(CmmData _ _) = (top, emptyUFM) -build_mapping (CmmProc info lbl params []) - = (CmmProc info lbl params [], emptyUFM) -build_mapping (CmmProc info lbl params (head:blocks)) - = (CmmProc info lbl params (head:others), mapping) +build_mapping (CmmProc info lbl params (ListGraph [])) + = (CmmProc info lbl params (ListGraph []), emptyUFM) +build_mapping (CmmProc info lbl params (ListGraph (head:blocks))) + = (CmmProc info lbl params (ListGraph (head:others)), mapping) -- drop the shorted blocks, but don't ever drop the first one, -- because it is pointed to by a global label. where @@ -491,8 +624,8 @@ apply_mapping ufm (CmmData sec statics) = CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics) -- we need to get the jump tables, so apply the mapping to the entries -- of a CmmData too. -apply_mapping ufm (CmmProc info lbl params blocks) - = CmmProc info lbl params (map short_bb blocks) +apply_mapping ufm (CmmProc info lbl params (ListGraph blocks)) + = CmmProc info lbl params (ListGraph $ map short_bb blocks) where short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns short_insn i = shortcutJump (lookupUFM ufm) i @@ -519,12 +652,17 @@ apply_mapping ufm (CmmProc info lbl params blocks) -- Switching between the two monads whilst carrying along the same -- Unique supply breaks abstraction. Is that bad? -genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel]) +genMachCode + :: DynFlags + -> RawCmmTop + -> UniqSM + ( [NatCmmTop Instr] + , [CLabel]) genMachCode dflags cmm_top = do { initial_us <- getUs ; let initial_st = mkNatM_State initial_us 0 dflags - (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top) + (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top) final_delta = natm_delta final_st final_imports = natm_imports final_st ; if final_delta == 0 @@ -542,9 +680,9 @@ genMachCode dflags cmm_top fixAssignsTop :: RawCmmTop -> UniqSM RawCmmTop fixAssignsTop top@(CmmData _ _) = returnUs top -fixAssignsTop (CmmProc info lbl params blocks) = +fixAssignsTop (CmmProc info lbl params (ListGraph blocks)) = mapUs fixAssignsBlock blocks `thenUs` \ blocks' -> - returnUs (CmmProc info lbl params blocks') + returnUs (CmmProc info lbl params (ListGraph blocks')) fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock fixAssignsBlock (BasicBlock id stmts) = @@ -599,9 +737,9 @@ Ideas for other things we could do (ToDo): cmmToCmm :: DynFlags -> RawCmmTop -> (RawCmmTop, [CLabel]) cmmToCmm _ top@(CmmData _ _) = (top, []) -cmmToCmm dflags (CmmProc info lbl params blocks) = runCmmOpt dflags $ do +cmmToCmm dflags (CmmProc info lbl params (ListGraph blocks)) = runCmmOpt dflags $ do blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks) - return $ CmmProc info lbl params blocks' + return $ CmmProc info lbl params (ListGraph blocks') newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #)) @@ -652,9 +790,9 @@ cmmStmtConFold stmt e' <- cmmExprConFold CallReference e return $ CmmCallee e' conv other -> return other - args' <- mapM (\(arg, hint) -> do + args' <- mapM (\(CmmHinted arg hint) -> do arg' <- cmmExprConFold DataReference arg - return (arg', hint)) args + return (CmmHinted arg' hint)) args return $ CmmCall target' regs args' srt returns CmmCondBranch test dest @@ -695,23 +833,27 @@ cmmExprConFold referenceKind expr -> do dflags <- getDynFlagsCmmOpt dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl - return $ cmmMachOpFold (MO_Add wordRep) [ + return $ cmmMachOpFold (MO_Add wordWidth) [ dynRef, - (CmmLit $ CmmInt (fromIntegral off) wordRep) + (CmmLit $ CmmInt (fromIntegral off) wordWidth) ] #if powerpc_TARGET_ARCH -- On powerpc (non-PIC), it's easier to jump directly to a label than -- to use the register table, so we replace these registers -- with the corresponding labels: + CmmReg (CmmGlobal EagerBlackholeInfo) + | not opt_PIC + -> cmmExprConFold referenceKind $ + CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_EAGER_BLACKHOLE_INFO"))) CmmReg (CmmGlobal GCEnter1) | not opt_PIC -> cmmExprConFold referenceKind $ - CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1"))) + CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_enter_1"))) CmmReg (CmmGlobal GCFun) | not opt_PIC -> cmmExprConFold referenceKind $ - CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun"))) + CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_fun"))) #endif CmmReg (CmmGlobal mid) @@ -727,7 +869,7 @@ cmmExprConFold referenceKind expr -> case mid of BaseReg -> cmmExprConFold DataReference baseRegAddr other -> cmmExprConFold DataReference - (CmmLoad baseRegAddr (globalRegRep mid)) + (CmmLoad baseRegAddr (globalRegType mid)) -- eliminate zero offsets CmmRegOff reg 0 -> cmmExprConFold referenceKind (CmmReg reg) @@ -739,10 +881,10 @@ cmmExprConFold referenceKind expr -> case get_GlobalReg_reg_or_addr mid of Left realreg -> return expr Right baseRegAddr - -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordRep) [ + -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordWidth) [ CmmReg (CmmGlobal mid), CmmLit (CmmInt (fromIntegral offset) - wordRep)]) + wordWidth)]) other -> return other