X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fcmm%2FCmmOpt.hs;h=710301437f3187cb7f062403dfe81a4c669ba9b3;hb=d5934bbb856aa0aa620c9b2e0fa51c90a1a5a048;hp=c454ff4c6a6c980fbf3f7db050e09b5011ef42f7;hpb=0065d5ab628975892cea1ec7303f968c3338cbe1;p=ghc-hetmet.git diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index c454ff4..71030143 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -15,22 +15,21 @@ module CmmOpt ( #include "HsVersions.h" import Cmm -import CmmUtils ( hasNoGlobalRegs ) -import CLabel ( entryLblToInfoLbl ) +import CmmUtils +import CLabel import MachOp -import SMRep ( tablesNextToCode ) +import SMRep +import StaticFlags import UniqFM -import Unique ( Unique ) -import Panic ( panic ) +import Unique import Outputable -import Bits -import Word -import Int -import GLAEXTS - +import Data.Bits +import Data.Word +import Data.Int +import GHC.Exts -- ----------------------------------------------------------------------------- -- The mini-inliner @@ -385,14 +384,37 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))] cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))] = case mop of MO_Mul rep - -> case exactLog2 n of - Nothing -> unchanged - Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)] + | Just p <- exactLog2 n -> + CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)] MO_S_Quot rep - -> case exactLog2 n of - Nothing -> unchanged - Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)] - other + | Just p <- exactLog2 n, + CmmReg _ <- x -> -- We duplicate x below, hence require + -- it is a reg. FIXME: remove this restriction. + -- shift right is not the same as quot, because it rounds + -- to minus infinity, whereasq uot rounds toward zero. + -- To fix this up, we add one less than the divisor to the + -- dividend if it is a negative number. + -- + -- to avoid a test/jump, we use the following sequence: + -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve) + -- x2 = y & (divisor-1) + -- result = (x+x2) >>= log2(divisor) + -- this could be done a bit more simply using conditional moves, + -- but we're processor independent here. + -- + -- we optimise the divide by 2 case slightly, generating + -- x1 = x >> word_size-1 (unsigned) + -- return = (x + x1) >>= log2(divisor) + let + bits = fromIntegral (machRepBitWidth rep) - 1 + shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep + x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] + x2 = if p == 1 then x1 else + CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] + x3 = CmmMachOp (MO_Add rep) [x, x2] + in + CmmMachOp (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)] + other -> unchanged where unchanged = CmmMachOp mop args