X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fcmm%2FCmmOpt.hs;h=69df4fbff14bab176a615c6c9e219de64ab1a570;hp=710301437f3187cb7f062403dfe81a4c669ba9b3;hb=e2e0785eb7f4efd9f7791d913cdfdfd03148cd86;hpb=80564ddc183ea98856994112858f0b9f3e178f94 diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 71030143..69df4fb 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -1,3 +1,10 @@ +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + ----------------------------------------------------------------------------- -- -- Cmm optimisation @@ -7,6 +14,7 @@ ----------------------------------------------------------------------------- module CmmOpt ( + cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold, cmmLoopifyForC, @@ -14,38 +22,100 @@ module CmmOpt ( #include "HsVersions.h" -import Cmm +import OldCmm import CmmUtils import CLabel -import MachOp -import SMRep import StaticFlags import UniqFM import Unique - +import FastTypes import Outputable +import BlockId import Data.Bits import Data.Word import Data.Int -import GHC.Exts +import Data.Maybe +import Data.List + +import Compiler.Hoopl hiding (Unique) + +-- ----------------------------------------------------------------------------- +-- Eliminates dead blocks + +{- +We repeatedly expand the set of reachable blocks until we hit a +fixpoint, and then prune any blocks that were not in this set. This is +actually a required optimization, as dead blocks can cause problems +for invariants in the linear register allocator (and possibly other +places.) +-} + +-- Deep fold over statements could probably be abstracted out, but it +-- might not be worth the effort since OldCmm is moribund +cmmEliminateDeadBlocks :: [CmmBasicBlock] -> [CmmBasicBlock] +cmmEliminateDeadBlocks [] = [] +cmmEliminateDeadBlocks blocks@(BasicBlock base_id _:_) = + let -- Calculate what's reachable from what block + reachableMap = foldl' f emptyUFM blocks -- lazy in values + where f m (BasicBlock block_id stmts) = addToUFM m block_id (reachableFrom stmts) + reachableFrom stmts = foldl stmt [] stmts + where + stmt m CmmNop = m + stmt m (CmmComment _) = m + stmt m (CmmAssign _ e) = expr m e + stmt m (CmmStore e1 e2) = expr (expr m e1) e2 + stmt m (CmmCall c _ as _ _) = f (actuals m as) c + where f m (CmmCallee e _) = expr m e + f m (CmmPrim _) = m + stmt m (CmmBranch b) = b:m + stmt m (CmmCondBranch e b) = b:(expr m e) + stmt m (CmmSwitch e bs) = catMaybes bs ++ expr m e + stmt m (CmmJump e as) = expr (actuals m as) e + stmt m (CmmReturn as) = actuals m as + actuals m as = foldl' (\m h -> expr m (hintlessCmm h)) m as + -- We have to do a deep fold into CmmExpr because + -- there may be a BlockId in the CmmBlock literal. + expr m (CmmLit l) = lit m l + expr m (CmmLoad e _) = expr m e + expr m (CmmReg _) = m + expr m (CmmMachOp _ es) = foldl' expr m es + expr m (CmmStackSlot _ _) = m + expr m (CmmRegOff _ _) = m + lit m (CmmBlock b) = b:m + lit m _ = m + -- go todo done + reachable = go [base_id] (setEmpty :: BlockSet) + where go [] m = m + go (x:xs) m + | setMember x m = go xs m + | otherwise = go (add ++ xs) (setInsert x m) + where add = fromMaybe (panic "cmmEliminateDeadBlocks: unknown block") + (lookupUFM reachableMap x) + in filter (\(BasicBlock block_id _) -> setMember block_id reachable) blocks -- ----------------------------------------------------------------------------- -- The mini-inliner {- -This pass inlines assignments to temporaries that are used just -once. It works as follows: +This pass inlines assignments to temporaries. Temporaries that are +only used once are unconditionally inlined. Temporaries that are used +two or more times are only inlined if they are assigned a literal. It +works as follows: - count uses of each temporary - - for each temporary that occurs just once: + - for each temporary: - attempt to push it forward to the statement that uses it - only push forward past assignments to other temporaries (assumes that temporaries are single-assignment) - if we reach the statement that uses it, inline the rhs and delete the original assignment. +[N.B. In the Quick C-- compiler, this optimization is achieved by a + combination of two dataflow passes: forward substitution (peephole + optimization) and dead-assignment elimination. ---NR] + Possible generalisations: here is an example from factorial Fac_zdwfac_entry: @@ -79,21 +149,49 @@ To inline _smi: its occurrences. -} +countUses :: UserOfLocalRegs a => a -> UniqFM Int +countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a + where count m r = lookupWithDefaultUFM m (0::Int) r + cmmMiniInline :: [CmmBasicBlock] -> [CmmBasicBlock] cmmMiniInline blocks = map do_inline blocks - where - blockUses (BasicBlock _ stmts) - = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts) - - uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks) - - do_inline (BasicBlock id stmts) - = BasicBlock id (cmmMiniInlineStmts uses stmts) - + where do_inline (BasicBlock id stmts) + = BasicBlock id (cmmMiniInlineStmts (countUses blocks) stmts) cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt] cmmMiniInlineStmts uses [] = [] cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts) + -- not used: just discard this assignment + | Nothing <- lookupUFM uses u + = cmmMiniInlineStmts uses stmts + + -- used (literal): try to inline at all the use sites + | Just n <- lookupUFM uses u, isLit expr + = +#ifdef NCG_DEBUG + trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $ +#endif + case lookForInlineLit u expr stmts of + (m, stmts') + | n == m -> cmmMiniInlineStmts (delFromUFM uses u) stmts' + | otherwise -> + stmt : cmmMiniInlineStmts (adjustUFM (\x -> x - m) uses u) stmts' + + -- used (foldable to literal): try to inline at all the use sites + | Just n <- lookupUFM uses u, + CmmMachOp op es <- expr, + e@(CmmLit _) <- cmmMachOpFold op es + = +#ifdef NCG_DEBUG + trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $ +#endif + case lookForInlineLit u e stmts of + (m, stmts') + | n == m -> cmmMiniInlineStmts (delFromUFM uses u) stmts' + | otherwise -> + stmt : cmmMiniInlineStmts (adjustUFM (\x -> x - m) uses u) stmts' + + -- used once (non-literal): try to inline at the use site | Just 1 <- lookupUFM uses u, Just stmts' <- lookForInline u expr stmts = @@ -105,25 +203,46 @@ cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts cmmMiniInlineStmts uses (stmt:stmts) = stmt : cmmMiniInlineStmts uses stmts +-- | Takes a register, a 'CmmLit' expression assigned to that +-- register, and a list of statements. Inlines the expression at all +-- use sites of the register. Returns the number of substituations +-- made and the, possibly modified, list of statements. +lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt]) +lookForInlineLit _ _ [] = (0, []) +lookForInlineLit u expr stmts@(stmt : rest) + | Just n <- lookupUFM (countUses stmt) u + = case lookForInlineLit u expr rest of + (m, stmts) -> let z = n + m + in z `seq` (z, inlineStmt u expr stmt : stmts) + + | ok_to_skip + = case lookForInlineLit u expr rest of + (n, stmts) -> (n, stmt : stmts) + + | otherwise + = (0, stmts) + where + -- We skip over assignments to registers, unless the register + -- being assigned to is the one we're inlining. + ok_to_skip = case stmt of + CmmAssign (CmmLocal r@(LocalReg u' _)) _ | u' == u -> False + _other -> True + +lookForInline u expr stmts = lookForInline' u expr regset stmts + where regset = foldRegsUsed extendRegSet emptyRegSet expr + +lookForInline' u expr regset (stmt : rest) + | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline + = Just (inlineStmt u expr stmt : rest) + + | ok_to_skip + = case lookForInline' u expr regset rest of + Nothing -> Nothing + Just stmts -> Just (stmt:stmts) + + | otherwise + = Nothing --- Try to inline a temporary assignment. We can skip over assignments to --- other tempoararies, because we know that expressions aren't side-effecting --- and temporaries are single-assignment. -lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest) - | u /= u' - = case lookupUFM (getExprUses rhs) u of - Just 1 -> Just (inlineStmt u expr stmt : rest) - _other -> case lookForInline u expr rest of - Nothing -> Nothing - Just stmts -> Just (stmt:stmts) - -lookForInline u expr (CmmNop : rest) - = lookForInline u expr rest - -lookForInline u expr (stmt:stmts) - = case lookupUFM (getStmtUses stmt) u of - Just 1 | ok_to_inline -> Just (inlineStmt u expr stmt : stmts) - _other -> Nothing where -- we don't inline into CmmCall if the expression refers to global -- registers. This is a HACK to avoid global registers clashing with @@ -134,38 +253,30 @@ lookForInline u expr (stmt:stmts) CmmCall{} -> hasNoGlobalRegs expr _ -> True --- ----------------------------------------------------------------------------- --- Boring Cmm traversals for collecting usage info and substitutions. - -getStmtUses :: CmmStmt -> UniqFM Int -getStmtUses (CmmAssign _ e) = getExprUses e -getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2) -getStmtUses (CmmCall target _ es _) - = plusUFM_C (+) (uses target) (getExprsUses (map fst es)) - where uses (CmmForeignCall e _) = getExprUses e - uses _ = emptyUFM -getStmtUses (CmmCondBranch e _) = getExprUses e -getStmtUses (CmmSwitch e _) = getExprUses e -getStmtUses (CmmJump e _) = getExprUses e -getStmtUses _ = emptyUFM - -getExprUses :: CmmExpr -> UniqFM Int -getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1 -getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1 -getExprUses (CmmLoad e _) = getExprUses e -getExprUses (CmmMachOp _ es) = getExprsUses es -getExprUses _other = emptyUFM - -getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es) + -- Expressions aren't side-effecting. Temporaries may or may not + -- be single-assignment depending on the source (the old code + -- generator creates single-assignment code, but hand-written Cmm + -- and Cmm from the new code generator is not single-assignment.) + -- So we do an extra check to make sure that the register being + -- changed is not one we were relying on. I don't know how much of a + -- performance hit this is (we have to create a regset for every + -- instruction.) -- EZY + ok_to_skip = case stmt of + CmmNop -> True + CmmComment{} -> True + CmmAssign (CmmLocal r@(LocalReg u' _)) rhs | u' /= u && not (r `elemRegSet` regset) -> True + CmmAssign g@(CmmGlobal _) rhs -> not (g `regUsedIn` expr) + _other -> False + inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e) inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2) -inlineStmt u a (CmmCall target regs es vols) - = CmmCall (infn target) regs es' vols - where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv +inlineStmt u a (CmmCall target regs es srt ret) + = CmmCall (infn target) regs es' srt ret + where infn (CmmCallee fn cconv) = CmmCallee (inlineExpr u a fn) cconv infn (CmmPrim p) = CmmPrim p - es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ] + es' = [ (CmmHinted (inlineExpr u a e) hint) | (CmmHinted e hint) <- es ] inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d @@ -176,8 +287,10 @@ inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _))) | u == u' = a | otherwise = e inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off) - | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)] + | u == u' = CmmMachOp (MO_Add width) [a, CmmLit (CmmInt (fromIntegral off) width)] | otherwise = e + where + width = typeWidth rep inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es) inlineExpr u a other_expr = other_expr @@ -202,17 +315,16 @@ cmmMachOpFold op arg@[CmmLit (CmmInt x rep)] -- "from" type, in order to truncate to the correct size. -- The final narrow/widen to the destination type -- is implicit in the CmmLit. - MO_S_Conv from to - | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to) - | otherwise -> CmmLit (CmmInt (narrowS from x) to) - MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to) + MO_SF_Conv from to -> CmmLit (CmmFloat (fromInteger x) to) + MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to) + MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to) _ -> panic "cmmMachOpFold: unknown unary op" -- Eliminate conversion NOPs -cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x -cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x +cmmMachOpFold (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = x +cmmMachOpFold (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = x -- Eliminate nested conversions where possible cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] @@ -231,20 +343,18 @@ cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] cmmMachOpFold (intconv signed1 rep1 rep3) [x] -- Nested narrowings: collapse | rep1 > rep2 && rep2 > rep3 -> - cmmMachOpFold (MO_U_Conv rep1 rep3) [x] + cmmMachOpFold (MO_UU_Conv rep1 rep3) [x] | otherwise -> CmmMachOp conv_outer args where - isIntConversion (MO_U_Conv rep1 rep2) - | not (isFloatingRep rep1) && not (isFloatingRep rep2) + isIntConversion (MO_UU_Conv rep1 rep2) = Just (rep1,rep2,False) - isIntConversion (MO_S_Conv rep1 rep2) - | not (isFloatingRep rep1) && not (isFloatingRep rep2) + isIntConversion (MO_SS_Conv rep1 rep2) = Just (rep1,rep2,True) isIntConversion _ = Nothing - intconv True = MO_S_Conv - intconv False = MO_U_Conv + intconv True = MO_SS_Conv + intconv False = MO_UU_Conv -- ToDo: a narrow of a load can be collapsed into a narrow load, right? -- but what if the architecture only supports word-sized loads, should @@ -254,22 +364,24 @@ cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] = case mop of -- for comparisons: don't forget to narrow the arguments before -- comparing, since they might be out of range. - MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep) - MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep) + MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordWidth) + MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordWidth) - MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep) - MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep) - MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep) - MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep) + MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordWidth) + MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordWidth) + MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordWidth) + MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordWidth) - MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep) - MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep) - MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep) - MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep) + MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordWidth) + MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordWidth) + MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordWidth) + MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordWidth) MO_Add r -> CmmLit (CmmInt (x + y) r) MO_Sub r -> CmmLit (CmmInt (x - y) r) MO_Mul r -> CmmLit (CmmInt (x * y) r) + MO_U_Quot r | y /= 0 -> CmmLit (CmmInt (x_u `quot` y_u) r) + MO_U_Rem r | y /= 0 -> CmmLit (CmmInt (x_u `rem` y_u) r) MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r) MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r) @@ -312,9 +424,22 @@ cmmMachOpFold op [x@(CmmLit _), y] -- put arg1 on the left of the rearranged expression, we'll get into a -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ... -- +-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the +-- PicBaseReg from the corresponding label (or label difference). +-- cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3] - | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1) - = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]] + | mop2 `associates_with` mop1 + && not (isLit arg1) && not (isPicReg arg1) + = cmmMachOpFold mop2 [arg1, cmmMachOpFold mop1 [arg2,arg3]] + where + MO_Add{} `associates_with` MO_Sub{} = True + mop1 `associates_with` mop2 = + mop1 == mop2 && isAssociativeMachOp mop1 + +-- special case: (a - b) + c ==> a + (c - b) +cmmMachOpFold mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3] + | not (isLit arg1) && not (isPicReg arg1) + = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg3,arg2]] -- Make a RegOff if we can cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)] @@ -335,6 +460,58 @@ cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)] cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)] = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i)))) + +-- Comparison of literal with widened operand: perform the comparison +-- at the smaller width, as long as the literal is within range. + +-- We can't do the reverse trick, when the operand is narrowed: +-- narrowing throws away bits from the operand, there's no way to do +-- the same comparison at the larger size. + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +-- powerPC NCG has a TODO for I8/I16 comparisons, so don't try + +cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)] + | -- if the operand is widened: + Just (rep, signed, narrow_fn) <- maybe_conversion conv, + -- and this is a comparison operation: + Just narrow_cmp <- maybe_comparison cmp rep signed, + -- and the literal fits in the smaller size: + i == narrow_fn rep i + -- then we can do the comparison at the smaller size + = cmmMachOpFold narrow_cmp [x, CmmLit (CmmInt i rep)] + where + maybe_conversion (MO_UU_Conv from to) + | to > from + = Just (from, False, narrowU) + maybe_conversion (MO_SS_Conv from to) + | to > from + = Just (from, True, narrowS) + + -- don't attempt to apply this optimisation when the source + -- is a float; see #1916 + maybe_conversion _ = Nothing + + -- careful (#2080): if the original comparison was signed, but + -- we were doing an unsigned widen, then we must do an + -- unsigned comparison at the smaller size. + maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep) + maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep) + maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep) + maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep) + maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep) + maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep) + maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep) + maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep) + maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep) + maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep) + maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep) + maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep) + maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep) + maybe_comparison _ _ _ = Nothing + +#endif + -- We can often do something with constants of 0 and 1 ... cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))] @@ -349,15 +526,15 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))] MO_S_Shr r -> x MO_U_Shr r -> x MO_Ne r | isComparisonExpr x -> x - MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_Eq r | Just x' <- maybeInvertCmmExpr x -> x' MO_U_Gt r | isComparisonExpr x -> x MO_S_Gt r | isComparisonExpr x -> x - MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) - MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) - MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x' - MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) + MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) + MO_U_Le r | Just x' <- maybeInvertCmmExpr x -> x' + MO_S_Le r | Just x' <- maybeInvertCmmExpr x -> x' other -> CmmMachOp mop args cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))] @@ -367,14 +544,14 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))] MO_U_Quot r -> x MO_S_Rem r -> CmmLit (CmmInt 0 rep) MO_U_Rem r -> CmmLit (CmmInt 0 rep) - MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_Ne r | Just x' <- maybeInvertCmmExpr x -> x' MO_Eq r | isComparisonExpr x -> x - MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x' - MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x' - MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) - MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_U_Lt r | Just x' <- maybeInvertCmmExpr x -> x' + MO_S_Lt r | Just x' <- maybeInvertCmmExpr x -> x' + MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) + MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) MO_U_Ge r | isComparisonExpr x -> x MO_S_Ge r | isComparisonExpr x -> x other -> CmmMachOp mop args @@ -385,13 +562,16 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))] = case mop of MO_Mul rep | Just p <- exactLog2 n -> - CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)] + cmmMachOpFold (MO_Shl rep) [x, CmmLit (CmmInt p rep)] + MO_U_Quot rep + | Just p <- exactLog2 n -> + cmmMachOpFold (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)] MO_S_Quot rep | Just p <- exactLog2 n, CmmReg _ <- x -> -- We duplicate x below, hence require -- it is a reg. FIXME: remove this restriction. -- shift right is not the same as quot, because it rounds - -- to minus infinity, whereasq uot rounds toward zero. + -- to minus infinity, whereasq quot rounds toward zero. -- To fix this up, we add one less than the divisor to the -- dividend if it is a negative number. -- @@ -406,14 +586,14 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))] -- x1 = x >> word_size-1 (unsigned) -- return = (x + x1) >>= log2(divisor) let - bits = fromIntegral (machRepBitWidth rep) - 1 + bits = fromIntegral (widthInBits rep) - 1 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] x2 = if p == 1 then x1 else CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] x3 = CmmMachOp (MO_Add rep) [x, x2] in - CmmMachOp (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)] + cmmMachOpFold (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)] other -> unchanged where @@ -433,43 +613,29 @@ cmmMachOpFold mop args = CmmMachOp mop args -- Used to be in MachInstrs --SDM. -- ToDo: remove use of unboxery --SDM. -w2i x = word2Int# x -i2w x = int2Word# x +-- Unboxery removed in favor of FastInt; but is the function supposed to fail +-- on inputs >= 2147483648, or was that just an implementation artifact? +-- And is this speed-critical, or can we just use Integer operations +-- (including Data.Bits)? +-- --Isaac Dupree exactLog2 :: Integer -> Maybe Integer -exactLog2 x - = if (x <= 0 || x >= 2147483648) then +exactLog2 x_ + = if (x_ <= 0 || x_ >= 2147483648) then Nothing else - case fromInteger x of { I# x# -> - if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then + case iUnbox (fromInteger x_) of { x -> + if (x `bitAndFastInt` negateFastInt x) /=# x then Nothing else - Just (toInteger (I# (pow2 x#))) + Just (toInteger (iBox (pow2 x))) } where - pow2 x# | x# ==# 1# = 0# - | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#)) + pow2 x | x ==# _ILIT(1) = _ILIT(0) + | otherwise = _ILIT(1) +# pow2 (x `shiftR_FastInt` _ILIT(1)) -- ----------------------------------------------------------------------------- --- widening / narrowing - -narrowU :: MachRep -> Integer -> Integer -narrowU I8 x = fromIntegral (fromIntegral x :: Word8) -narrowU I16 x = fromIntegral (fromIntegral x :: Word16) -narrowU I32 x = fromIntegral (fromIntegral x :: Word32) -narrowU I64 x = fromIntegral (fromIntegral x :: Word64) -narrowU _ _ = panic "narrowTo" - -narrowS :: MachRep -> Integer -> Integer -narrowS I8 x = fromIntegral (fromIntegral x :: Int8) -narrowS I16 x = fromIntegral (fromIntegral x :: Int16) -narrowS I32 x = fromIntegral (fromIntegral x :: Int32) -narrowS I64 x = fromIntegral (fromIntegral x :: Int64) -narrowS _ _ = panic "narrowTo" - --- ----------------------------------------------------------------------------- -- Loopify for C {- @@ -495,12 +661,13 @@ narrowS _ _ = panic "narrowTo" except factorial, but what the hell. -} -cmmLoopifyForC :: CmmTop -> CmmTop -cmmLoopifyForC p@(CmmProc info entry_lbl [] blocks@(BasicBlock top_id _ : _)) +cmmLoopifyForC :: RawCmmTop -> RawCmmTop +cmmLoopifyForC p@(CmmProc info entry_lbl + (ListGraph blocks@(BasicBlock top_id _ : _))) | null info = p -- only if there's an info table, ignore case alts | otherwise = -- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $ - CmmProc info entry_lbl [] blocks' + CmmProc info entry_lbl (ListGraph blocks') where blocks' = [ BasicBlock id (map do_stmt stmts) | BasicBlock id stmts <- blocks ] @@ -523,7 +690,6 @@ isComparisonExpr :: CmmExpr -> Bool isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op isComparisonExpr _other = False -maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr -maybeInvertConditionalExpr (CmmMachOp op args) - | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args) -maybeInvertConditionalExpr _ = Nothing +isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True +isPicReg _ = False +