compiler/cmm/CmmOpt.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Cmm optimisation
   4 --
   5 -- (c) The University of Glasgow 2006
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 module CmmOpt (
  10         cmmMiniInline,
  11         cmmMachOpFold,
  12         cmmLoopifyForC,
  13  ) where
  14
  15 #include "HsVersions.h"
  16
  17 import Cmm
  18 import CmmUtils
  19 import CLabel
  20 import MachOp
  21 import StaticFlags
  22
  23 import UniqFM
  24 import Unique
  25
  26 import Outputable
  27
  28 import Data.Bits
  29 import Data.Word
  30 import Data.Int
  31 import GHC.Exts
  32
  33 -- -----------------------------------------------------------------------------
  34 -- The mini-inliner
  35
  36 {-
  37 This pass inlines assignments to temporaries that are used just
  38 once.  It works as follows:
  39
  40   - count uses of each temporary
  41   - for each temporary that occurs just once:
  42         - attempt to push it forward to the statement that uses it
  43         - only push forward past assignments to other temporaries
  44           (assumes that temporaries are single-assignment)
  45         - if we reach the statement that uses it, inline the rhs
  46           and delete the original assignment.
  47
  48 Possible generalisations: here is an example from factorial
  49
  50 Fac_zdwfac_entry:
  51     cmG:
  52         _smi = R2;
  53         if (_smi != 0) goto cmK;
  54         R1 = R3;
  55         jump I64[Sp];
  56     cmK:
  57         _smn = _smi * R3;
  58         R2 = _smi + (-1);
  59         R3 = _smn;
  60         jump Fac_zdwfac_info;
  61
  62 We want to inline _smi and _smn.  To inline _smn:
  63
  64    - we must be able to push forward past assignments to global regs.
  65      We can do this if the rhs of the assignment we are pushing
  66      forward doesn't refer to the global reg being assigned to; easy
  67      to test.
  68
  69 To inline _smi:
  70
  71    - It is a trivial replacement, reg for reg, but it occurs more than
  72      once.
  73    - We can inline trivial assignments even if the temporary occurs
  74      more than once, as long as we don't eliminate the original assignment
  75      (this doesn't help much on its own).
  76    - We need to be able to propagate the assignment forward through jumps;
  77      if we did this, we would find that it can be inlined safely in all
  78      its occurrences.
  79 -}
  80
  81 cmmMiniInline :: [CmmBasicBlock] -> [CmmBasicBlock]
  82 cmmMiniInline blocks = map do_inline blocks
  83   where
  84         blockUses (BasicBlock _ stmts)
  85          = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
  86
  87         uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
  88
  89         do_inline (BasicBlock id stmts)
  90          = BasicBlock id (cmmMiniInlineStmts uses stmts)
  91
  92
  93 cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
  94 cmmMiniInlineStmts uses [] = []
  95 cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _ _)) expr) : stmts)
  96   | Just 1 <- lookupUFM uses u,
  97     Just stmts' <- lookForInline u expr stmts
  98   =
  99 #ifdef NCG_DEBUG
 100      trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
 101 #endif
 102      cmmMiniInlineStmts uses stmts'
 103
 104 cmmMiniInlineStmts uses (stmt:stmts)
 105   = stmt : cmmMiniInlineStmts uses stmts
 106
 107
 108 -- Try to inline a temporary assignment.  We can skip over assignments to
 109 -- other tempoararies, because we know that expressions aren't side-effecting
 110 -- and temporaries are single-assignment.
 111 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _ _)) rhs) : rest)
 112   | u /= u'
 113   = case lookupUFM (getExprUses rhs) u of
 114         Just 1 -> Just (inlineStmt u expr stmt : rest)
 115         _other -> case lookForInline u expr rest of
 116                      Nothing    -> Nothing
 117                      Just stmts -> Just (stmt:stmts)
 118
 119 lookForInline u expr (CmmNop : rest)
 120   = lookForInline u expr rest
 121
 122 lookForInline u expr (stmt:stmts)
 123   = case lookupUFM (getStmtUses stmt) u of
 124         Just 1 | ok_to_inline -> Just (inlineStmt u expr stmt : stmts)
 125         _other -> Nothing
 126   where
 127         -- we don't inline into CmmCall if the expression refers to global
 128         -- registers.  This is a HACK to avoid global registers clashing with
 129         -- C argument-passing registers, really the back-end ought to be able
 130         -- to handle it properly, but currently neither PprC nor the NCG can
 131         -- do it.  See also CgForeignCall:load_args_into_temps.
 132     ok_to_inline = case stmt of
 133                      CmmCall{} -> hasNoGlobalRegs expr
 134                      _ -> True
 135
 136 -- -----------------------------------------------------------------------------
 137 -- Boring Cmm traversals for collecting usage info and substitutions.
 138
 139 getStmtUses :: CmmStmt -> UniqFM Int
 140 getStmtUses (CmmAssign _ e) = getExprUses e
 141 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
 142 getStmtUses (CmmCall target _ es _ _)
 143    = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
 144    where uses (CmmCallee e _) = getExprUses e
 145          uses _ = emptyUFM
 146 getStmtUses (CmmCondBranch e _) = getExprUses e
 147 getStmtUses (CmmSwitch e _) = getExprUses e
 148 getStmtUses (CmmJump e _) = getExprUses e
 149 getStmtUses _ = emptyUFM
 150
 151 getExprUses :: CmmExpr -> UniqFM Int
 152 getExprUses (CmmReg (CmmLocal (LocalReg u _ _))) = unitUFM u 1
 153 getExprUses (CmmRegOff (CmmLocal (LocalReg u _ _)) _) = unitUFM u 1
 154 getExprUses (CmmLoad e _) = getExprUses e
 155 getExprUses (CmmMachOp _ es) = getExprsUses es
 156 getExprUses _other = emptyUFM
 157
 158 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
 159
 160 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
 161 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
 162 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
 163 inlineStmt u a (CmmCall target regs es srt ret)
 164    = CmmCall (infn target) regs es' srt ret
 165    where infn (CmmCallee fn cconv) = CmmCallee fn cconv
 166          infn (CmmPrim p) = CmmPrim p
 167          es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
 168 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
 169 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
 170 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
 171 inlineStmt u a other_stmt = other_stmt
 172
 173 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
 174 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _ _)))
 175   | u == u' = a
 176   | otherwise = e
 177 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep _)) off)
 178   | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
 179   | otherwise = e
 180 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
 181 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
 182 inlineExpr u a other_expr = other_expr
 183
 184 -- -----------------------------------------------------------------------------
 185 -- MachOp constant folder
 186
 187 -- Now, try to constant-fold the MachOps.  The arguments have already
 188 -- been optimized and folded.
 189
 190 cmmMachOpFold
 191     :: MachOp           -- The operation from an CmmMachOp
 192     -> [CmmExpr]        -- The optimized arguments
 193     -> CmmExpr
 194
 195 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
 196   = case op of
 197       MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
 198       MO_Not r   -> CmmLit (CmmInt (complement x) rep)
 199
 200         -- these are interesting: we must first narrow to the
 201         -- "from" type, in order to truncate to the correct size.
 202         -- The final narrow/widen to the destination type
 203         -- is implicit in the CmmLit.
 204       MO_S_Conv from to
 205            | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to)
 206            | otherwise        -> CmmLit (CmmInt (narrowS from x) to)
 207       MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
 208
 209       _ -> panic "cmmMachOpFold: unknown unary op"
 210
 211
 212 -- Eliminate conversion NOPs
 213 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
 214 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
 215
 216 -- Eliminate nested conversions where possible
 217 cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
 218   | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
 219     Just (_,   rep3,signed2) <- isIntConversion conv_outer
 220   = case () of
 221         -- widen then narrow to the same size is a nop
 222       _ | rep1 < rep2 && rep1 == rep3 -> x
 223         -- Widen then narrow to different size: collapse to single conversion
 224         -- but remember to use the signedness from the widening, just in case
 225         -- the final conversion is a widen.
 226         | rep1 < rep2 && rep2 > rep3 ->
 227             cmmMachOpFold (intconv signed1 rep1 rep3) [x]
 228         -- Nested widenings: collapse if the signedness is the same
 229         | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
 230             cmmMachOpFold (intconv signed1 rep1 rep3) [x]
 231         -- Nested narrowings: collapse
 232         | rep1 > rep2 && rep2 > rep3 ->
 233             cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
 234         | otherwise ->
 235             CmmMachOp conv_outer args
 236   where
 237         isIntConversion (MO_U_Conv rep1 rep2)
 238           | not (isFloatingRep rep1) && not (isFloatingRep rep2)
 239           = Just (rep1,rep2,False)
 240         isIntConversion (MO_S_Conv rep1 rep2)
 241           | not (isFloatingRep rep1) && not (isFloatingRep rep2)
 242           = Just (rep1,rep2,True)
 243         isIntConversion _ = Nothing
 244
 245         intconv True  = MO_S_Conv
 246         intconv False = MO_U_Conv
 247
 248 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
 249 -- but what if the architecture only supports word-sized loads, should
 250 -- we do the transformation anyway?
 251
 252 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
 253   = case mop of
 254         -- for comparisons: don't forget to narrow the arguments before
 255         -- comparing, since they might be out of range.
 256         MO_Eq r   -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
 257         MO_Ne r   -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
 258
 259         MO_U_Gt r -> CmmLit (CmmInt (if x_u >  y_u then 1 else 0) wordRep)
 260         MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
 261         MO_U_Lt r -> CmmLit (CmmInt (if x_u <  y_u then 1 else 0) wordRep)
 262         MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
 263
 264         MO_S_Gt r -> CmmLit (CmmInt (if x_s >  y_s then 1 else 0) wordRep)
 265         MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
 266         MO_S_Lt r -> CmmLit (CmmInt (if x_s <  y_s then 1 else 0) wordRep)
 267         MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
 268
 269         MO_Add r -> CmmLit (CmmInt (x + y) r)
 270         MO_Sub r -> CmmLit (CmmInt (x - y) r)
 271         MO_Mul r -> CmmLit (CmmInt (x * y) r)
 272         MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
 273         MO_S_Rem  r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
 274
 275         MO_And   r -> CmmLit (CmmInt (x .&. y) r)
 276         MO_Or    r -> CmmLit (CmmInt (x .|. y) r)
 277         MO_Xor   r -> CmmLit (CmmInt (x `xor` y) r)
 278
 279         MO_Shl   r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
 280         MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
 281         MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
 282
 283         other      -> CmmMachOp mop args
 284
 285    where
 286         x_u = narrowU xrep x
 287         y_u = narrowU xrep y
 288         x_s = narrowS xrep x
 289         y_s = narrowS xrep y
 290
 291
 292 -- When possible, shift the constants to the right-hand side, so that we
 293 -- can match for strength reductions.  Note that the code generator will
 294 -- also assume that constants have been shifted to the right when
 295 -- possible.
 296
 297 cmmMachOpFold op [x@(CmmLit _), y]
 298    | not (isLit y) && isCommutableMachOp op
 299    = cmmMachOpFold op [y, x]
 300
 301 -- Turn (a+b)+c into a+(b+c) where possible.  Because literals are
 302 -- moved to the right, it is more likely that we will find
 303 -- opportunities for constant folding when the expression is
 304 -- right-associated.
 305 --
 306 -- ToDo: this appears to introduce a quadratic behaviour due to the
 307 -- nested cmmMachOpFold.  Can we fix this?
 308 --
 309 -- Why do we check isLit arg1?  If arg1 is a lit, it means that arg2
 310 -- is also a lit (otherwise arg1 would be on the right).  If we
 311 -- put arg1 on the left of the rearranged expression, we'll get into a
 312 -- loop:  (x1+x2)+x3 => x1+(x2+x3)  => (x2+x3)+x1 => x2+(x3+x1) ...
 313 --
 314 -- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
 315 -- PicBaseReg from the corresponding label (or label difference).
 316 --
 317 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
 318    | mop1 == mop2 && isAssociativeMachOp mop1
 319      && not (isLit arg1) && not (isPicReg arg1)
 320    = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
 321
 322 -- Make a RegOff if we can
 323 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
 324   = CmmRegOff reg (fromIntegral (narrowS rep n))
 325 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
 326   = CmmRegOff reg (off + fromIntegral (narrowS rep n))
 327 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
 328   = CmmRegOff reg (- fromIntegral (narrowS rep n))
 329 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
 330   = CmmRegOff reg (off - fromIntegral (narrowS rep n))
 331
 332 -- Fold label(+/-)offset into a CmmLit where possible
 333
 334 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
 335   = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
 336 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
 337   = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
 338 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
 339   = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
 340
 341
 342 -- Comparison of literal with narrowed/widened operand: perform
 343 -- the comparison at a different width, as long as the literal is
 344 -- within range.
 345
 346 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 347 -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
 348
 349 cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
 350   | Just (rep, narrow) <- maybe_conversion conv,
 351     Just narrow_cmp <- maybe_comparison cmp rep,
 352     let narrow_i = narrow rep i,
 353     narrow_i == i
 354   = cmmMachOpFold narrow_cmp [x, CmmLit (CmmInt narrow_i rep)]
 355  where
 356     maybe_conversion (MO_U_Conv from _) = Just (from, narrowU)
 357     maybe_conversion (MO_S_Conv from _) = Just (from, narrowS)
 358     maybe_conversion _ = Nothing
 359
 360     maybe_comparison (MO_U_Gt _) rep = Just (MO_U_Gt rep)
 361     maybe_comparison (MO_U_Ge _) rep = Just (MO_U_Ge rep)
 362     maybe_comparison (MO_U_Lt _) rep = Just (MO_U_Lt rep)
 363     maybe_comparison (MO_U_Le _) rep = Just (MO_U_Le rep)
 364     maybe_comparison (MO_S_Gt _) rep = Just (MO_S_Gt rep)
 365     maybe_comparison (MO_S_Ge _) rep = Just (MO_S_Ge rep)
 366     maybe_comparison (MO_S_Lt _) rep = Just (MO_S_Lt rep)
 367     maybe_comparison (MO_S_Le _) rep = Just (MO_S_Le rep)
 368     maybe_comparison (MO_Eq   _) rep = Just (MO_Eq   rep)
 369     maybe_comparison _ _ = Nothing
 370
 371 #endif
 372
 373 -- We can often do something with constants of 0 and 1 ...
 374
 375 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
 376   = case mop of
 377         MO_Add   r -> x
 378         MO_Sub   r -> x
 379         MO_Mul   r -> y
 380         MO_And   r -> y
 381         MO_Or    r -> x
 382         MO_Xor   r -> x
 383         MO_Shl   r -> x
 384         MO_S_Shr r -> x
 385         MO_U_Shr r -> x
 386         MO_Ne    r | isComparisonExpr x -> x
 387         MO_Eq    r | Just x' <- maybeInvertConditionalExpr x -> x'
 388         MO_U_Gt  r | isComparisonExpr x -> x
 389         MO_S_Gt  r | isComparisonExpr x -> x
 390         MO_U_Lt  r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
 391         MO_S_Lt  r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
 392         MO_U_Ge  r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
 393         MO_S_Ge  r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
 394         MO_U_Le  r | Just x' <- maybeInvertConditionalExpr x -> x'
 395         MO_S_Le  r | Just x' <- maybeInvertConditionalExpr x -> x'
 396         other    -> CmmMachOp mop args
 397
 398 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
 399   = case mop of
 400         MO_Mul    r -> x
 401         MO_S_Quot r -> x
 402         MO_U_Quot r -> x
 403         MO_S_Rem  r -> CmmLit (CmmInt 0 rep)
 404         MO_U_Rem  r -> CmmLit (CmmInt 0 rep)
 405         MO_Ne    r | Just x' <- maybeInvertConditionalExpr x -> x'
 406         MO_Eq    r | isComparisonExpr x -> x
 407         MO_U_Lt  r | Just x' <- maybeInvertConditionalExpr x -> x'
 408         MO_S_Lt  r | Just x' <- maybeInvertConditionalExpr x -> x'
 409         MO_U_Gt  r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
 410         MO_S_Gt  r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
 411         MO_U_Le  r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
 412         MO_S_Le  r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
 413         MO_U_Ge  r | isComparisonExpr x -> x
 414         MO_S_Ge  r | isComparisonExpr x -> x
 415         other       -> CmmMachOp mop args
 416
 417 -- Now look for multiplication/division by powers of 2 (integers).
 418
 419 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
 420   = case mop of
 421         MO_Mul rep
 422            | Just p <- exactLog2 n ->
 423                  CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
 424         MO_S_Quot rep
 425            | Just p <- exactLog2 n,
 426              CmmReg _ <- x ->   -- We duplicate x below, hence require
 427                                 -- it is a reg.  FIXME: remove this restriction.
 428                 -- shift right is not the same as quot, because it rounds
 429                 -- to minus infinity, whereasq uot rounds toward zero.
 430                 -- To fix this up, we add one less than the divisor to the
 431                 -- dividend if it is a negative number.
 432                 --
 433                 -- to avoid a test/jump, we use the following sequence:
 434                 --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
 435                 --      x2 = y & (divisor-1)
 436                 --      result = (x+x2) >>= log2(divisor)
 437                 -- this could be done a bit more simply using conditional moves,
 438                 -- but we're processor independent here.
 439                 --
 440                 -- we optimise the divide by 2 case slightly, generating
 441                 --      x1 = x >> word_size-1  (unsigned)
 442                 --      return = (x + x1) >>= log2(divisor)
 443                 let
 444                     bits = fromIntegral (machRepBitWidth rep) - 1
 445                     shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
 446                     x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
 447                     x2 = if p == 1 then x1 else
 448                          CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
 449                     x3 = CmmMachOp (MO_Add rep) [x, x2]
 450                 in
 451                 CmmMachOp (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)]
 452         other
 453            -> unchanged
 454     where
 455        unchanged = CmmMachOp mop args
 456
 457 -- Anything else is just too hard.
 458
 459 cmmMachOpFold mop args = CmmMachOp mop args
 460
 461 -- -----------------------------------------------------------------------------
 462 -- exactLog2
 463
 464 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
 465 -- from GCC.  It requires bit manipulation primitives, and we use GHC
 466 -- extensions.  Tough.
 467 --
 468 -- Used to be in MachInstrs --SDM.
 469 -- ToDo: remove use of unboxery --SDM.
 470
 471 w2i x = word2Int# x
 472 i2w x = int2Word# x
 473
 474 exactLog2 :: Integer -> Maybe Integer
 475 exactLog2 x
 476   = if (x <= 0 || x >= 2147483648) then
 477        Nothing
 478     else
 479        case fromInteger x of { I# x# ->
 480        if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
 481           Nothing
 482        else
 483           Just (toInteger (I# (pow2 x#)))
 484        }
 485   where
 486     pow2 x# | x# ==# 1# = 0#
 487             | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
 488
 489
 490 -- -----------------------------------------------------------------------------
 491 -- widening / narrowing
 492
 493 narrowU :: MachRep -> Integer -> Integer
 494 narrowU I8  x = fromIntegral (fromIntegral x :: Word8)
 495 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
 496 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
 497 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
 498 narrowU _ _ = panic "narrowTo"
 499
 500 narrowS :: MachRep -> Integer -> Integer
 501 narrowS I8  x = fromIntegral (fromIntegral x :: Int8)
 502 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
 503 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
 504 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
 505 narrowS _ _ = panic "narrowTo"
 506
 507 -- -----------------------------------------------------------------------------
 508 -- Loopify for C
 509
 510 {-
 511  This is a simple pass that replaces tail-recursive functions like this:
 512
 513    fac() {
 514      ...
 515      jump fac();
 516    }
 517
 518  with this:
 519
 520   fac() {
 521    L:
 522      ...
 523      goto L;
 524   }
 525
 526   the latter generates better C code, because the C compiler treats it
 527   like a loop, and brings full loop optimisation to bear.
 528
 529   In my measurements this makes little or no difference to anything
 530   except factorial, but what the hell.
 531 -}
 532
 533 cmmLoopifyForC :: RawCmmTop -> RawCmmTop
 534 cmmLoopifyForC p@(CmmProc info entry_lbl [] blocks@(BasicBlock top_id _ : _))
 535   | null info = p  -- only if there's an info table, ignore case alts
 536   | otherwise =
 537 --  pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
 538   CmmProc info entry_lbl [] blocks'
 539   where blocks' = [ BasicBlock id (map do_stmt stmts)
 540                   | BasicBlock id stmts <- blocks ]
 541
 542         do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
 543                 = CmmBranch top_id
 544         do_stmt stmt = stmt
 545
 546         jump_lbl | tablesNextToCode = entryLblToInfoLbl entry_lbl
 547                  | otherwise        = entry_lbl
 548
 549 cmmLoopifyForC top = top
 550
 551 -- -----------------------------------------------------------------------------
 552 -- Utils
 553
 554 isLit (CmmLit _) = True
 555 isLit _          = False
 556
 557 isComparisonExpr :: CmmExpr -> Bool
 558 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
 559 isComparisonExpr _other             = False
 560
 561 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
 562 maybeInvertConditionalExpr (CmmMachOp op args)
 563   | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
 564 maybeInvertConditionalExpr _ = Nothing
 565
 566 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
 567 isPicReg _ = False