From: mrchebas@gmail.com Date: Mon, 22 Jan 2007 11:42:01 +0000 (+0000) Subject: optimisation: shortcut branches when possible (x86/x86_64 only for now) X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=22bbcd1dcd94851b8f9409310cf95f3b9332850c optimisation: shortcut branches when possible (x86/x86_64 only for now) This is only turned on with -O, and probably won't make much difference at the moment, but it will be important for semi-tagging. --- diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index d96d416..397a074 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -98,7 +98,7 @@ module CLabel ( mkHpcModuleOffsetLabel, infoLblToEntryLbl, entryLblToInfoLbl, - needsCDecl, isAsmTemp, externallyVisibleCLabel, + needsCDecl, isAsmTemp, maybeAsmTemp, externallyVisibleCLabel, CLabelType(..), labelType, labelDynamic, pprCLabel @@ -497,6 +497,10 @@ isAsmTemp :: CLabel -> Bool -- is a local temporary for native code generati isAsmTemp (AsmTempLabel _) = True isAsmTemp _ = False +maybeAsmTemp :: CLabel -> Maybe Unique +maybeAsmTemp (AsmTempLabel uq) = Just uq +maybeAsmTemp _ = Nothing + -- ----------------------------------------------------------------------------- -- Is a CLabel visible outside this object file or not? diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index d30f963..06e3d16 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -133,6 +133,42 @@ data CmmStmt | CmmJump CmmExpr [LocalReg] -- Jump to another function, with these -- parameters. +{- +Discussion +~~~~~~~~~~ + +One possible problem with the above type is that the only way to do a +non-local conditional jump is to encode it as a branch to a block that +contains a single jump. This leads to inefficient code in the back end. + +One possible way to fix this would be: + +data CmmStat = + ... + | CmmJump CmmBranchDest + | CmmCondJump CmmExpr CmmBranchDest + ... + +data CmmBranchDest + = Local BlockId + | NonLocal CmmExpr [LocalReg] + +In favour: + ++ one fewer constructors in CmmStmt ++ allows both cond branch and switch to jump to non-local destinations + +Against: + +- not strictly necessary: can already encode as branch+jump +- not always possible to implement any better in the back end +- could do the optimisation in the back end (but then plat-specific?) +- C-- doesn't have it +- back-end optimisation might be more general (jump shortcutting) + +So we'll stick with the way it is, and add the optimisation to the NCG. +-} + ----------------------------------------------------------------------------- -- CmmCallTarget -- diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 109054f..f7b7570 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -17,7 +17,7 @@ import MachRegs import MachCodeGen import PprMach import RegisterAlloc -import RegAllocInfo ( jumpDests ) +import RegAllocInfo import NCGMonad import PositionIndependentCode @@ -25,7 +25,7 @@ import Cmm import CmmOpt ( cmmMiniInline, cmmMachOpFold ) import PprCmm ( pprStmt, pprCmms ) import MachOp -import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel ) +import CLabel #if powerpc_TARGET_ARCH import CLabel ( mkRtsCodeLabel ) #endif @@ -37,8 +37,9 @@ import FastTypes import List ( groupBy, sortBy ) import CLabel ( pprCLabel ) import ErrUtils ( dumpIfSet_dyn ) -import DynFlags ( DynFlags, DynFlag(..), dopt ) +import DynFlags import StaticFlags ( opt_Static, opt_PIC ) +import Util import Config ( cProjectVersion ) import Digraph @@ -212,8 +213,10 @@ cmmNativeGen dflags cmm genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) -> {-# SCC "regAlloc" #-} mapUs regAlloc pre_regalloc `thenUs` \ with_regs -> + {-# SCC "shortcutBranches" #-} + shortcutBranches dflags with_regs `bind` \ shorted -> {-# SCC "sequenceBlocks" #-} - map sequenceTop with_regs `bind` \ sequenced -> + map sequenceTop shorted `bind` \ sequenced -> {-# SCC "x86fp_kludge" #-} map x86fp_kludge sequenced `bind` \ final_mach_code -> {-# SCC "vcat" #-} @@ -330,6 +333,48 @@ makeFarBranches = id #endif -- ----------------------------------------------------------------------------- +-- Shortcut branches + +shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop] +shortcutBranches dflags tops + | optLevel dflags < 1 = tops -- only with -O or higher + | otherwise = map (apply_mapping mapping) tops' + where + (tops', mappings) = mapAndUnzip build_mapping tops + mapping = foldr plusUFM emptyUFM mappings + +build_mapping top@(CmmData _ _) = (top, emptyUFM) +build_mapping (CmmProc info lbl params []) $ + = (CmmProc info lbl params [], emptyUFM) +build_mapping (CmmProc info lbl params (head:blocks)) + = (CmmProc info lbl params (head:others), mapping) + -- drop the shorted blocks, but don't ever drop the first one, + -- because it is pointed to by a global label. + where + -- find all the blocks that just consist of a jump that can be + -- shorted. + (shortcut_blocks, others) = partitionWith split blocks + split (BasicBlock id [insn]) | Just dest <- canShortcut insn + = Left (id,dest) + split other = Right other + + -- build a mapping from BlockId to JumpDest for shorting branches + mapping = foldl add emptyUFM shortcut_blocks + add ufm (id,dest) = addToUFM ufm id dest + +apply_mapping ufm (CmmData sec statics) + = CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics) + -- we need to get the jump tables, so apply the mapping to the entries + -- of a CmmData too. +apply_mapping ufm (CmmProc info lbl params blocks) + = CmmProc info lbl params (map short_bb blocks) + where + short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns + short_insn i = shortcutJump (lookupUFM ufm) i + -- shortcutJump should apply the mapping repeatedly, + -- just in case we can short multiple branches. + +-- ----------------------------------------------------------------------------- -- Instruction selection -- Native code instruction selection for a chunk of stix code. For diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs index a9b04db..96db72d 100644 --- a/compiler/nativeGen/MachInstrs.hs +++ b/compiler/nativeGen/MachInstrs.hs @@ -520,6 +520,7 @@ bit or 64 bit precision. -- Jumping around. | JMP Operand | JXX Cond BlockId -- includes unconditional branches + | JXX_GBL Cond Imm -- non-local version of JXX | JMP_TBL Operand [BlockId] -- table jump | CALL (Either Imm Reg) [Reg] diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs index 5016726..5c731f1 100644 --- a/compiler/nativeGen/PprMach.hs +++ b/compiler/nativeGen/PprMach.hs @@ -1291,6 +1291,8 @@ pprInstr (JXX cond (BlockId id)) = pprCondInstr SLIT("j") cond (pprCLabel_asm lab) where lab = mkAsmTempLabel id +pprInstr (JXX_GBL cond imm) = pprCondInstr SLIT("j") cond (pprImm imm) + pprInstr (JMP (OpImm imm)) = (<>) (ptext SLIT("\tjmp ")) (pprImm imm) pprInstr (JMP op) = (<>) (ptext SLIT("\tjmp *")) (pprOperand wordRep op) pprInstr (JMP_TBL op ids) = pprInstr (JMP op) diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs index 2c3ab6b..fefa314 100644 --- a/compiler/nativeGen/RegAllocInfo.hs +++ b/compiler/nativeGen/RegAllocInfo.hs @@ -17,6 +17,8 @@ module RegAllocInfo ( patchJump, isRegRegMove, + JumpDest, canShortcut, shortcutJump, shortcutStatic, + maxSpillSlots, mkSpillInstr, mkLoadInstr, @@ -26,7 +28,8 @@ module RegAllocInfo ( #include "HsVersions.h" -import Cmm ( BlockId ) +import Cmm +import CLabel import MachOp ( MachRep(..), wordRep ) import MachInstrs import MachRegs @@ -172,6 +175,7 @@ regUsage instr = case instr of CMP sz src dst -> mkRUR (use_R src ++ use_R dst) SETCC cond op -> mkRU [] (def_W op) JXX cond lbl -> mkRU [] [] + JXX_GBL cond lbl -> mkRU [] [] JMP op -> mkRUR (use_R op) JMP_TBL op ids -> mkRUR (use_R op) CALL (Left imm) params -> mkRU params callClobberedRegs @@ -417,6 +421,45 @@ patchJump insn old new #endif _other -> insn +data JumpDest = DestBlockId BlockId | DestImm Imm + +canShortcut :: Instr -> Maybe JumpDest +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +canShortcut (JXX ALWAYS id) = Just (DestBlockId id) +canShortcut (JMP (OpImm imm)) = Just (DestImm imm) +#endif +canShortcut _ = Nothing + +shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +shortcutJump fn insn@(JXX cc id) = + case fn id of + Nothing -> insn + Just (DestBlockId id') -> shortcutJump fn (JXX cc id') + Just (DestImm imm) -> shortcutJump fn (JXX_GBL cc imm) +#endif +shortcutJump fn other = other + +-- Here because it knows about JumpDest +shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic +shortcutStatic fn (CmmStaticLit (CmmLabel lab)) + | Just uq <- maybeAsmTemp lab + = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) + | Just uq <- maybeAsmTemp lbl1 + = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) + -- slightly dodgy, we're ignoring the second label, but this + -- works with the way we use CmmLabelDiffOff for jump tables now. +shortcutStatic fn other_static + = other_static + +shortBlockId fn blockid@(BlockId uq) = + case fn blockid of + Nothing -> mkAsmTempLabel uq + Just (DestBlockId blockid') -> shortBlockId fn blockid' + Just (DestImm (ImmCLbl lbl)) -> lbl + _other -> panic "shortBlockId" + -- ----------------------------------------------------------------------------- -- 'patchRegs' function @@ -562,6 +605,7 @@ patchRegs instr env = case instr of COMMENT _ -> instr DELTA _ -> instr JXX _ _ -> instr + JXX_GBL _ _ -> instr CLTD _ -> instr _other -> panic "patchRegs: unrecognised instr"