X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FAsmCodeGen.lhs;h=47c875c41df490e844892c68b42730f5bed3d6d0;hb=001d6ffda6a509c6349f6644df10e8fcf270d218;hp=0e05fb8f51dd6635992150e64fb0b729ee5c962a;hpb=451d907d9db34b9f7c787af4196e0bec05916508;p=ghc-hetmet.git diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 0e05fb8..47c875c 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -17,7 +17,7 @@ import MachRegs import MachCodeGen import PprMach import RegisterAlloc -import RegAllocInfo ( jumpDests ) +import RegAllocInfo import NCGMonad import PositionIndependentCode @@ -25,7 +25,7 @@ import Cmm import CmmOpt ( cmmMiniInline, cmmMachOpFold ) import PprCmm ( pprStmt, pprCmms ) import MachOp -import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel ) +import CLabel #if powerpc_TARGET_ARCH import CLabel ( mkRtsCodeLabel ) #endif @@ -37,8 +37,9 @@ import FastTypes import List ( groupBy, sortBy ) import CLabel ( pprCLabel ) import ErrUtils ( dumpIfSet_dyn ) -import DynFlags ( DynFlags, DynFlag(..), dopt ) +import DynFlags import StaticFlags ( opt_Static, opt_PIC ) +import Util import Config ( cProjectVersion ) import Digraph @@ -53,10 +54,10 @@ import FastString import List ( intersperse ) #endif -import DATA_INT -import DATA_WORD -import DATA_BITS -import GLAEXTS +import Data.Int +import Data.Word +import Data.Bits +import GHC.Exts {- The native-code generator has machine-independent and @@ -212,8 +213,10 @@ cmmNativeGen dflags cmm genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) -> {-# SCC "regAlloc" #-} mapUs regAlloc pre_regalloc `thenUs` \ with_regs -> + {-# SCC "shortcutBranches" #-} + shortcutBranches dflags with_regs `bind` \ shorted -> {-# SCC "sequenceBlocks" #-} - map sequenceTop with_regs `bind` \ sequenced -> + map sequenceTop shorted `bind` \ sequenced -> {-# SCC "x86fp_kludge" #-} map x86fp_kludge sequenced `bind` \ final_mach_code -> {-# SCC "vcat" #-} @@ -245,7 +248,7 @@ cmmNativeGen dflags cmm sequenceTop :: NatCmmTop -> NatCmmTop sequenceTop top@(CmmData _ _) = top sequenceTop (CmmProc info lbl params blocks) = - CmmProc info lbl params (sequenceBlocks blocks) + CmmProc info lbl params (makeFarBranches $ sequenceBlocks blocks) -- The algorithm is very simple (and stupid): we make a graph out of -- the blocks where there is an edge from one block to another iff the @@ -289,6 +292,88 @@ reorder id accum (b@(block,id',out) : rest) | id == id' = (True, (block,id,out) : reverse accum ++ rest) | otherwise = reorder id (b:accum) rest + +-- ----------------------------------------------------------------------------- +-- Making far branches + +-- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too +-- big, we have to work around this limitation. + +makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock] + +#if powerpc_TARGET_ARCH +makeFarBranches blocks + | last blockAddresses < nearLimit = blocks + | otherwise = zipWith handleBlock blockAddresses blocks + where + blockAddresses = scanl (+) 0 $ map blockLen blocks + blockLen (BasicBlock _ instrs) = length instrs + + handleBlock addr (BasicBlock id instrs) + = BasicBlock id (zipWith makeFar [addr..] instrs) + + makeFar addr (BCC ALWAYS tgt) = BCC ALWAYS tgt + makeFar addr (BCC cond tgt) + | abs (addr - targetAddr) >= nearLimit + = BCCFAR cond tgt + | otherwise + = BCC cond tgt + where Just targetAddr = lookupUFM blockAddressMap tgt + makeFar addr other = other + + nearLimit = 7000 -- 8192 instructions are allowed; let's keep some + -- distance, as we have a few pseudo-insns that are + -- pretty-printed as multiple instructions, + -- and it's just not worth the effort to calculate + -- things exactly + + blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses +#else +makeFarBranches = id +#endif + +-- ----------------------------------------------------------------------------- +-- Shortcut branches + +shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop] +shortcutBranches dflags tops + | optLevel dflags < 1 = tops -- only with -O or higher + | otherwise = map (apply_mapping mapping) tops' + where + (tops', mappings) = mapAndUnzip build_mapping tops + mapping = foldr plusUFM emptyUFM mappings + +build_mapping top@(CmmData _ _) = (top, emptyUFM) +build_mapping (CmmProc info lbl params []) + = (CmmProc info lbl params [], emptyUFM) +build_mapping (CmmProc info lbl params (head:blocks)) + = (CmmProc info lbl params (head:others), mapping) + -- drop the shorted blocks, but don't ever drop the first one, + -- because it is pointed to by a global label. + where + -- find all the blocks that just consist of a jump that can be + -- shorted. + (shortcut_blocks, others) = partitionWith split blocks + split (BasicBlock id [insn]) | Just dest <- canShortcut insn + = Left (id,dest) + split other = Right other + + -- build a mapping from BlockId to JumpDest for shorting branches + mapping = foldl add emptyUFM shortcut_blocks + add ufm (id,dest) = addToUFM ufm id dest + +apply_mapping ufm (CmmData sec statics) + = CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics) + -- we need to get the jump tables, so apply the mapping to the entries + -- of a CmmData too. +apply_mapping ufm (CmmProc info lbl params blocks) + = CmmProc info lbl params (map short_bb blocks) + where + short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns + short_insn i = shortcutJump (lookupUFM ufm) i + -- shortcutJump should apply the mapping repeatedly, + -- just in case we can short multiple branches. + -- ----------------------------------------------------------------------------- -- Instruction selection @@ -444,33 +529,33 @@ cmmBlockConFold (BasicBlock id stmts) = do cmmStmtConFold stmt = case stmt of CmmAssign reg src - -> do src' <- cmmExprConFold False src + -> do src' <- cmmExprConFold DataReference src return $ case src' of CmmReg reg' | reg == reg' -> CmmNop new_src -> CmmAssign reg new_src CmmStore addr src - -> do addr' <- cmmExprConFold False addr - src' <- cmmExprConFold False src + -> do addr' <- cmmExprConFold DataReference addr + src' <- cmmExprConFold DataReference src return $ CmmStore addr' src' CmmJump addr regs - -> do addr' <- cmmExprConFold True addr + -> do addr' <- cmmExprConFold JumpReference addr return $ CmmJump addr' regs CmmCall target regs args vols -> do target' <- case target of CmmForeignCall e conv -> do - e' <- cmmExprConFold True e + e' <- cmmExprConFold CallReference e return $ CmmForeignCall e' conv other -> return other args' <- mapM (\(arg, hint) -> do - arg' <- cmmExprConFold False arg + arg' <- cmmExprConFold DataReference arg return (arg', hint)) args return $ CmmCall target' regs args' vols CmmCondBranch test dest - -> do test' <- cmmExprConFold False test + -> do test' <- cmmExprConFold DataReference test return $ case test' of CmmLit (CmmInt 0 _) -> CmmComment (mkFastString ("deleted: " ++ @@ -480,29 +565,29 @@ cmmStmtConFold stmt other -> CmmCondBranch test' dest CmmSwitch expr ids - -> do expr' <- cmmExprConFold False expr + -> do expr' <- cmmExprConFold DataReference expr return $ CmmSwitch expr' ids other -> return other -cmmExprConFold isJumpTarget expr +cmmExprConFold referenceKind expr = case expr of CmmLoad addr rep - -> do addr' <- cmmExprConFold False addr + -> do addr' <- cmmExprConFold DataReference addr return $ CmmLoad addr' rep CmmMachOp mop args -- For MachOps, we first optimize the children, and then we try -- our hand at some constant-folding. - -> do args' <- mapM (cmmExprConFold False) args + -> do args' <- mapM (cmmExprConFold DataReference) args return $ cmmMachOpFold mop args' CmmLit (CmmLabel lbl) - -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl + -> cmmMakeDynamicReference addImportCmmOpt referenceKind lbl CmmLit (CmmLabelOff lbl off) - -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl + -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt referenceKind lbl return $ cmmMachOpFold (MO_Add wordRep) [ dynRef, (CmmLit $ CmmInt (fromIntegral off) wordRep) @@ -514,11 +599,11 @@ cmmExprConFold isJumpTarget expr -- with the corresponding labels: CmmReg (CmmGlobal GCEnter1) | not opt_PIC - -> cmmExprConFold isJumpTarget $ + -> cmmExprConFold referenceKind $ CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1"))) CmmReg (CmmGlobal GCFun) | not opt_PIC - -> cmmExprConFold isJumpTarget $ + -> cmmExprConFold referenceKind $ CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun"))) #endif @@ -533,12 +618,12 @@ cmmExprConFold isJumpTarget expr Left realreg -> return expr Right baseRegAddr -> case mid of - BaseReg -> cmmExprConFold False baseRegAddr - other -> cmmExprConFold False (CmmLoad baseRegAddr - (globalRegRep mid)) + BaseReg -> cmmExprConFold DataReference baseRegAddr + other -> cmmExprConFold DataReference + (CmmLoad baseRegAddr (globalRegRep mid)) -- eliminate zero offsets CmmRegOff reg 0 - -> cmmExprConFold False (CmmReg reg) + -> cmmExprConFold referenceKind (CmmReg reg) CmmRegOff (CmmGlobal mid) offset -- RegOf leaves are just a shorthand form. If the reg maps @@ -547,7 +632,7 @@ cmmExprConFold isJumpTarget expr -> case get_GlobalReg_reg_or_addr mid of Left realreg -> return expr Right baseRegAddr - -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [ + -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordRep) [ CmmReg (CmmGlobal mid), CmmLit (CmmInt (fromIntegral offset) wordRep)])