compiler/cmm/CmmRewriteAssignments.hs

   1 {-# LANGUAGE ViewPatterns #-}
   2 {-# LANGUAGE GADTs #-}
   3 {-# LANGUAGE FlexibleContexts #-}
   4
   5 {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
   6
   7 -- This module implements generalized code motion for assignments to
   8 -- local registers, inlining and sinking when possible.  It also does
   9 -- some amount of rewriting for stores to register slots, which are
  10 -- effectively equivalent to local registers.
  11 module CmmRewriteAssignments
  12   ( rewriteAssignments
  13   ) where
  14
  15 import Cmm
  16 import CmmExpr
  17 import OptimizationFuel
  18 import StgCmmUtils
  19
  20 import Control.Monad
  21 import UniqFM
  22 import Unique
  23
  24 import Compiler.Hoopl hiding (Unique)
  25 import Data.Maybe
  26 import Prelude hiding (succ, zip)
  27
  28 ----------------------------------------------------------------
  29 --- Main function
  30
  31 rewriteAssignments :: CmmGraph -> FuelUniqSM CmmGraph
  32 rewriteAssignments g = do
  33   -- Because we need to act on forwards and backwards information, we
  34   -- first perform usage analysis and bake this information into the
  35   -- graph (backwards transform), and then do a forwards transform
  36   -- to actually perform inlining and sinking.
  37   g'  <- annotateUsage g
  38   g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
  39                                      analRewFwd assignmentLattice assignmentTransfer assignmentRewrite
  40   return (modifyGraph eraseRegUsage g'')
  41
  42 ----------------------------------------------------------------
  43 --- Usage information
  44
  45 -- We decorate all register assignments with approximate usage
  46 -- information, that is, the maximum number of times the register is
  47 -- referenced while it is live along all outgoing control paths.
  48 -- This analysis provides a precise upper bound for usage, so if a
  49 -- register is never referenced, we can remove it, as that assignment is
  50 -- dead.
  51 --
  52 -- This analysis is very similar to liveness analysis; we just keep a
  53 -- little extra info. (Maybe we should move it to CmmLive, and subsume
  54 -- the old liveness analysis.)
  55 --
  56 -- There are a few subtleties here:
  57 --
  58 --  - If a register goes dead, and then becomes live again, the usages
  59 --    of the disjoint live range don't count towards the original range.
  60 --
  61 --          a = 1; // used once
  62 --          b = a;
  63 --          a = 2; // used once
  64 --          c = a;
  65 --
  66 --  - A register may be used multiple times, but these all reside in
  67 --    different control paths, such that any given execution only uses
  68 --    it once. In that case, the usage count may still be 1.
  69 --
  70 --          a = 1; // used once
  71 --          if (b) {
  72 --              c = a + 3;
  73 --          } else {
  74 --              c = a + 1;
  75 --          }
  76 --
  77 --    This policy corresponds to an inlining strategy that does not
  78 --    duplicate computation but may increase binary size.
  79 --
  80 --  - If we naively implement a usage count, we have a counting to
  81 --    infinity problem across joins.  Furthermore, knowing that
  82 --    something is used 2 or more times in one runtime execution isn't
  83 --    particularly useful for optimizations (inlining may be beneficial,
  84 --    but there's no way of knowing that without register pressure
  85 --    information.)
  86 --
  87 --          while (...) {
  88 --              // first iteration, b used once
  89 --              // second iteration, b used twice
  90 --              // third iteration ...
  91 --              a = b;
  92 --          }
  93 --          // b used zero times
  94 --
  95 --    There is an orthogonal question, which is that for every runtime
  96 --    execution, the register may be used only once, but if we inline it
  97 --    in every conditional path, the binary size might increase a lot.
  98 --    But tracking this information would be tricky, because it violates
  99 --    the finite lattice restriction Hoopl requires for termination;
 100 --    we'd thus need to supply an alternate proof, which is probably
 101 --    something we should defer until we actually have an optimization
 102 --    that would take advantage of this.  (This might also interact
 103 --    strangely with liveness information.)
 104 --
 105 --          a = ...;
 106 --          // a is used one time, but in X different paths
 107 --          case (b) of
 108 --              1 -> ... a ...
 109 --              2 -> ... a ...
 110 --              3 -> ... a ...
 111 --              ...
 112 --
 113 --  - Memory stores to local register slots (CmmStore (CmmStackSlot
 114 --    (LocalReg _) 0) _) have similar behavior to local registers,
 115 --    in that these locations are all disjoint from each other.  Thus,
 116 --    we attempt to inline them too. Note that because these are only
 117 --    generated as part of the spilling process, most of the time this
 118 --    will refer to a local register and the assignment will immediately
 119 --    die on the subsequent call.  However, if we manage to replace that
 120 --    local register with a memory location, it means that we've managed
 121 --    to preserve a value on the stack without having to move it to
 122 --    another memory location again!  We collect usage information just
 123 --    to be safe in case extra computation is involved.
 124
 125 data RegUsage = SingleUse | ManyUse
 126     deriving (Ord, Eq, Show)
 127 -- Absence in map = ZeroUse
 128
 129 {-
 130 -- minBound is bottom, maxBound is top, least-upper-bound is max
 131 -- ToDo: Put this in Hoopl.  Note that this isn't as useful as I
 132 -- originally hoped, because you usually want to leave out the bottom
 133 -- element when you have things like this put in maps.  Maybe f is
 134 -- useful on its own as a combining function.
 135 boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
 136 boundedOrdLattice n = DataflowLattice n minBound f
 137     where f _ (OldFact x) (NewFact y)
 138             | x >= y    = (NoChange,   x)
 139             | otherwise = (SomeChange, y)
 140 -}
 141
 142 -- Custom node type we'll rewrite to.  CmmAssign nodes to local
 143 -- registers are replaced with AssignLocal nodes.
 144 data WithRegUsage n e x where
 145     -- Plain will not contain CmmAssign nodes immediately after
 146     -- transformation, but as we rewrite assignments, we may have
 147     -- assignments here: these are assignments that should not be
 148     -- rewritten!
 149     Plain       :: n e x -> WithRegUsage n e x
 150     AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
 151
 152 instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where
 153     foldRegsUsed f z (Plain n) = foldRegsUsed f z n
 154     foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e
 155
 156 instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where
 157     foldRegsDefd f z (Plain n) = foldRegsDefd f z n
 158     foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r
 159
 160 instance NonLocal n => NonLocal (WithRegUsage n) where
 161     entryLabel (Plain n) = entryLabel n
 162     successors (Plain n) = successors n
 163
 164 liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
 165 liftRegUsage = mapGraph Plain
 166
 167 eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
 168 eraseRegUsage = mapGraph f
 169     where f :: WithRegUsage CmmNode e x -> CmmNode e x
 170           f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
 171           f (Plain n) = n
 172
 173 type UsageMap = UniqFM RegUsage
 174
 175 usageLattice :: DataflowLattice UsageMap
 176 usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
 177     where f _ (OldFact x) (NewFact y)
 178             | x >= y    = (NoChange,   x)
 179             | otherwise = (SomeChange, y)
 180
 181 -- We reuse the names 'gen' and 'kill', although we're doing something
 182 -- slightly different from the Dragon Book
 183 usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap
 184 usageTransfer = mkBTransfer3 first middle last
 185     where first _ f = f
 186           middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
 187           middle n f = gen_kill n f
 188           last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
 189           -- Checking for CmmCall/CmmForeignCall is unnecessary, because
 190           -- spills/reloads have already occurred by the time we do this
 191           -- analysis.
 192           -- XXX Deprecated warning is puzzling: what label are we
 193           -- supposed to use?
 194           -- ToDo: With a bit more cleverness here, we can avoid
 195           -- disappointment and heartbreak associated with the inability
 196           -- to inline into CmmCall and CmmForeignCall by
 197           -- over-estimating the usage to be ManyUse.
 198           last n f = gen_kill n (joinOutFacts usageLattice n f)
 199           gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
 200           gen_kill a = gen a . kill a
 201           gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
 202           gen  a f = foldRegsUsed increaseUsage f a
 203           kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
 204           kill a f = foldRegsDefd delFromUFM f a
 205           increaseUsage f r = addToUFM_C combine f r SingleUse
 206             where combine _ _ = ManyUse
 207
 208 usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
 209 usageRewrite = mkBRewrite3 first middle last
 210     where first  _ _ = return Nothing
 211           middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
 212           middle (Plain (CmmAssign (CmmLocal l) e)) f
 213                      = return . Just
 214                      $ case lookupUFM f l of
 215                             Nothing    -> emptyGraph
 216                             Just usage -> mkMiddle (AssignLocal l e usage)
 217           middle _ _ = return Nothing
 218           last   _ _ = return Nothing
 219
 220 type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
 221 annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
 222 annotateUsage vanilla_g =
 223     let g = modifyGraph liftRegUsage vanilla_g
 224     in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
 225                                    analRewBwd usageLattice usageTransfer usageRewrite
 226
 227 ----------------------------------------------------------------
 228 --- Assignment tracking
 229
 230 -- The idea is to maintain a map of local registers do expressions,
 231 -- such that the value of that register is the same as the value of that
 232 -- expression at any given time.  We can then do several things,
 233 -- as described by Assignment.
 234
 235 -- Assignment describes the various optimizations that are valid
 236 -- at a given point in the program.
 237 data Assignment =
 238 -- This assignment can always be inlined.  It is cheap or single-use.
 239                   AlwaysInline CmmExpr
 240 -- This assignment should be sunk down to its first use.  (This will
 241 -- increase code size if the register is used in multiple control flow
 242 -- paths, but won't increase execution time, and the reduction of
 243 -- register pressure is worth it, I think.)
 244                 | AlwaysSink CmmExpr
 245 -- We cannot safely optimize occurrences of this local register. (This
 246 -- corresponds to top in the lattice structure.)
 247                 | NeverOptimize
 248
 249 -- Extract the expression that is being assigned to
 250 xassign :: Assignment -> Maybe CmmExpr
 251 xassign (AlwaysInline e) = Just e
 252 xassign (AlwaysSink e)   = Just e
 253 xassign NeverOptimize    = Nothing
 254
 255 -- Extracts the expression, but only if they're the same constructor
 256 xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
 257 xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
 258 xassign2 (AlwaysSink e, AlwaysSink e')     = Just (e, e')
 259 xassign2 _ = Nothing
 260
 261 -- Note: We'd like to make decisions about "not optimizing" as soon as
 262 -- possible, because this will make running the transfer function more
 263 -- efficient.
 264 type AssignmentMap = UniqFM Assignment
 265
 266 assignmentLattice :: DataflowLattice AssignmentMap
 267 assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
 268     where add _ (OldFact old) (NewFact new)
 269             = case (old, new) of
 270                 (NeverOptimize, _) -> (NoChange,   NeverOptimize)
 271                 (_, NeverOptimize) -> (SomeChange, NeverOptimize)
 272                 (xassign2 -> Just (e, e'))
 273                     | e == e'   -> (NoChange, old)
 274                     | otherwise -> (SomeChange, NeverOptimize)
 275                 _ -> (SomeChange, NeverOptimize)
 276
 277 -- Deletes sinks from assignment map, because /this/ is the place
 278 -- where it will be sunk to.
 279 deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap
 280 deleteSinks n m = foldRegsUsed (adjustUFM f) m n
 281   where f (AlwaysSink _) = NeverOptimize
 282         f old = old
 283
 284 -- Invalidates any expressions that use a register.
 285 invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
 286 -- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
 287 invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
 288     where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
 289           f _ _ m = m
 290 {- This requires the entire spine of the map to be continually rebuilt,
 291  - which causes crazy memory usage!
 292 invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
 293   where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
 294         invalidateUsers' _ old = old
 295 -}
 296
 297 -- Note [foldUFM performance]
 298 -- These calls to fold UFM no longer leak memory, but they do cause
 299 -- pretty killer amounts of allocation.  So they'll be something to
 300 -- optimize; we need an algorithmic change to prevent us from having to
 301 -- traverse the /entire/ map continually.
 302
 303 middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap
 304
 305 -- Algorithm for annotated assignments:
 306 --  1. Delete any sinking assignments that were used by this instruction
 307 --  2. Add the assignment to our list of valid local assignments with
 308 --     the correct optimization policy.
 309 --  3. Look for all assignments that reference that register and
 310 --     invalidate them.
 311 middleAssignment n@(AssignLocal r e usage) assign
 312     = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign
 313       where add m = addToUFM m r
 314                   $ case usage of
 315                         SingleUse -> AlwaysInline e
 316                         ManyUse   -> decide e
 317             decide CmmLit{}       = AlwaysInline e
 318             decide CmmReg{}       = AlwaysInline e
 319             decide CmmLoad{}      = AlwaysSink e
 320             decide CmmStackSlot{} = AlwaysSink e
 321             decide CmmMachOp{}    = AlwaysSink e
 322             -- We'll always inline simple operations on the global
 323             -- registers, to reduce register pressure: Sp - 4 or Hp - 8
 324             -- EZY: Justify this optimization more carefully.
 325             decide CmmRegOff{}    = AlwaysInline e
 326
 327 -- Algorithm for unannotated assignments of global registers:
 328 -- 1. Delete any sinking assignments that were used by this instruction
 329 -- 2. Look for all assignments that reference this register and
 330 --    invalidate them.
 331 middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
 332     = invalidateUsersOf reg . deleteSinks n $ assign
 333
 334 -- Algorithm for unannotated assignments of *local* registers: do
 335 -- nothing (it's a reload, so no state should have changed)
 336 middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign
 337
 338 -- Algorithm for stores:
 339 --  1. Delete any sinking assignments that were used by this instruction
 340 --  2. Look for all assignments that load from memory locations that
 341 --     were clobbered by this store and invalidate them.
 342 middleAssignment (Plain n@(CmmStore lhs rhs)) assign
 343     = let m = deleteSinks n assign
 344       in foldUFM_Directly f m m -- [foldUFM performance]
 345       where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize
 346             f _ _ m = m
 347 {- Also leaky
 348     = mapUFM_Directly p . deleteSinks n $ assign
 349       -- ToDo: There's a missed opportunity here: even if a memory
 350       -- access we're attempting to sink gets clobbered at some
 351       -- location, it's still /better/ to sink it to right before the
 352       -- point where it gets clobbered.  How might we do this?
 353       -- Unfortunately, it's too late to change the assignment...
 354       where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
 355             p _ old = old
 356 -}
 357
 358 -- Assumption: Unsafe foreign calls don't clobber memory
 359 -- Since foreign calls clobber caller saved registers, we need
 360 -- invalidate any assignments that reference those global registers.
 361 -- This is kind of expensive. (One way to optimize this might be to
 362 -- store extra information about expressions that allow this and other
 363 -- checks to be done cheaply.)
 364 middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign
 365     = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n)
 366     where deleteCallerSaves m = foldUFM_Directly f m m
 367           f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
 368           f _ _ m = m
 369           g (CmmReg (CmmGlobal r)) _      | callerSaves r = True
 370           g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True
 371           g _ b = b
 372
 373 middleAssignment (Plain (CmmComment {})) assign
 374     = assign
 375
 376 -- Assumptions:
 377 --  * Writes using Hp do not overlap with any other memory locations
 378 --    (An important invariant being relied on here is that we only ever
 379 --    use Hp to allocate values on the heap, which appears to be the
 380 --    case given hpReg usage, and that our heap writing code doesn't
 381 --    do anything stupid like overlapping writes.)
 382 --  * Stack slots do not overlap with any other memory locations
 383 --  * Stack slots for different areas do not overlap
 384 --  * Stack slots within the same area and different offsets may
 385 --    overlap; we need to do a size check (see 'overlaps').
 386 --  * Register slots only overlap with themselves.  (But this shouldn't
 387 --    happen in practice, because we'll fail to inline a reload across
 388 --    the next spill.)
 389 --  * Non stack-slot stores always conflict with each other.  (This is
 390 --    not always the case; we could probably do something special for Hp)
 391 clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
 392          -> (Unique,  CmmExpr) -- (register, expression) that may be clobbered
 393          -> Bool
 394 clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
 395 clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
 396 -- ToDo: Also catch MachOp case
 397 clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
 398     | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
 399 clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
 400     where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
 401             = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
 402           f (CmmLoad e _)    = containsStackSlot e
 403           f (CmmMachOp _ es) = or (map f es)
 404           f _                = False
 405           -- Maybe there's an invariant broken if this actually ever
 406           -- returns True
 407           containsStackSlot (CmmLoad{})      = True -- load of a load, all bets off
 408           containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
 409           containsStackSlot (CmmStackSlot{}) = True
 410           containsStackSlot _ = False
 411 clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
 412     where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
 413           f _ = False
 414 clobbers _ (_, e) = f e
 415     where f (CmmLoad (CmmStackSlot _ _) _) = False
 416           f (CmmLoad{}) = True -- conservative
 417           f (CmmMachOp _ es) = or (map f es)
 418           f _ = False
 419
 420 -- Check for memory overlapping.
 421 -- Diagram:
 422 --      4      8     12
 423 --      s -w-  o
 424 --      [ I32  ]
 425 --      [    F64     ]
 426 --      s'   -w'-    o'
 427 type CallSubArea = (AreaId, Int, Int) -- area, offset, width
 428 overlaps :: CallSubArea -> CallSubArea -> Bool
 429 overlaps (a, _, _) (a', _, _) | a /= a' = False
 430 overlaps (_, o, w) (_, o', w') =
 431     let s  = o  - w
 432         s' = o' - w'
 433     in (s' < o) && (s < o) -- Not LTE, because [ I32  ][ I32  ] is OK
 434
 435 lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
 436 lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, invalidateVolatile k assign)]
 437 lastAssignment (Plain (CmmForeignCall {succ=k}))  assign = [(k, invalidateVolatile k assign)]
 438 lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l
 439
 440 -- Invalidates any expressions that have volatile contents: essentially,
 441 -- all terminals volatile except for literals and loads of stack slots
 442 -- that do not correspond to the call area for 'k' (the current call
 443 -- area is volatile because overflow return parameters may be written
 444 -- there.)
 445 -- Note: mapUFM could be expensive, but hopefully block boundaries
 446 -- aren't too common.  If it is a problem, replace with something more
 447 -- clever.
 448 invalidateVolatile k m = mapUFM p m
 449   where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
 450             where exp CmmLit{} = True
 451                   exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
 452                     | k' == k = False
 453                   exp (CmmLoad (CmmStackSlot _ _) _) = True
 454                   exp (CmmMachOp _ es) = and (map exp es)
 455                   exp _ = False
 456         p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink
 457
 458 assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap
 459 assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
 460
 461 -- Note [Soundness of inlining]
 462 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 463 -- In the Hoopl paper, the soundness condition on rewrite functions is
 464 -- described as follows:
 465 --
 466 --      "If it replaces a node n by a replacement graph g, then g must
 467 --      be observationally equivalent to n under the assumptions
 468 --      expressed by the incoming dataflow fact f.  Moreover, analysis of
 469 --      g must produce output fact(s) that are at least as informative
 470 --      as the fact(s) produced by applying the transfer function to n."
 471 --
 472 -- We consider the second condition in more detail here.  It says given
 473 -- the rewrite R(n, f) = g, then for any incoming fact f' consistent
 474 -- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g).
 475 -- For inlining this is not necessarily the case:
 476 --
 477 --  n = "x = a + 2"
 478 --  f = f' = {a = y}
 479 --  g = "x = y + 2"
 480 --  T(f', n) = {x = a + 2, a = y}
 481 --  T(f', g) = {x = y + 2, a = y}
 482 --
 483 -- y + 2 and a + 2 are not obviously comparable, and a naive
 484 -- implementation of the lattice would say they are incomparable.
 485 -- At best, this means we may be over-conservative, at worst, it means
 486 -- we may not terminate.
 487 --
 488 -- However, in the original Lerner-Grove-Chambers paper, soundness and
 489 -- termination are separated, and only equivalence of facts is required
 490 -- for soundness.  Monotonicity of the transfer function is not required
 491 -- for termination (as the calculation of least-upper-bound prevents
 492 -- this from being a problem), but it means we won't necessarily find
 493 -- the least-fixed point.
 494
 495 -- Note [Coherency of annotations]
 496 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 497 -- Is it possible for our usage annotations to become invalid after we
 498 -- start performing transformations?  As the usage info only provides
 499 -- an upper bound, we only need to consider cases where the usages of
 500 -- a register may increase due to transformations--e.g. any reference
 501 -- to a local register in an AlwaysInline or AlwaysSink instruction, whose
 502 -- originating assignment was single use (we don't care about the
 503 -- many use case, because it is the top of the lattice).  But such a
 504 -- case is not possible, because we always inline any single use
 505 -- register.  QED.
 506 --
 507 -- TODO: A useful lint option would be to check this invariant that
 508 -- there is never a local register in the assignment map that is
 509 -- single-use.
 510
 511 -- Note [Soundness of store rewriting]
 512 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 513 -- Its soundness depends on the invariant that no assignment is made to
 514 -- the local register before its store is accessed.  This is clearly
 515 -- true with unoptimized spill-reload code, and as the store will always
 516 -- be rewritten first (if possible), there is no chance of it being
 517 -- propagated down before getting written (possibly with incorrect
 518 -- values from the assignment map, due to reassignment of the local
 519 -- register.)  This is probably not locally sound.
 520
 521 assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
 522 assignmentRewrite = mkFRewrite3 first middle last
 523     where
 524         first _ _ = return Nothing
 525         middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
 526         middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
 527         middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u
 528         last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
 529         -- Tuple is (inline?, reloads for sinks)
 530         precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O])
 531         precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless
 532             where f (i, l) r = case lookupUFM assign r of
 533                                 Just (AlwaysSink e)   -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
 534                                 Just (AlwaysInline _) -> (True, l)
 535                                 Just NeverOptimize    -> (i, l)
 536                                 -- This case can show up when we have
 537                                 -- limited optimization fuel.
 538                                 Nothing -> (i, l)
 539         rewrite :: AssignmentMap
 540                 -> (Bool, [WithRegUsage CmmNode O O])
 541                 -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x)
 542                 -> CmmNode O x
 543                 -> Maybe (Graph (WithRegUsage CmmNode) O x)
 544         rewrite _ (False, []) _ _ = Nothing
 545         -- Note [CmmCall Inline Hack]
 546         -- Conservative hack: don't do any inlining on what will
 547         -- be translated into an OldCmm CmmCalls, since the code
 548         -- produced here tends to be unproblematic and I need to write
 549         -- lint passes to ensure that we don't put anything in the
 550         -- arguments that could be construed as a global register by
 551         -- some later translation pass.  (For example, slots will turn
 552         -- into dereferences of Sp).  See [Register parameter passing].
 553         -- ToDo: Fix this up to only bug out if all inlines were for
 554         -- CmmExprs with global registers (we can't use the
 555         -- straightforward mapExpDeep call, in this case.) ToDo: We miss
 556         -- an opportunity here, where all possible inlinings should
 557         -- instead be sunk.
 558         rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
 559         rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
 560
 561         rewriteLocal :: AssignmentMap
 562                      -> (Bool, [WithRegUsage CmmNode O O])
 563                      -> LocalReg -> CmmExpr -> RegUsage
 564                      -> Maybe (Graph (WithRegUsage CmmNode) O O)
 565         rewriteLocal _ (False, []) _ _ _ = Nothing
 566         rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n'
 567             where n' = AssignLocal l e' u
 568                   e' = if i then wrapRecExp (inlineExp assign) e else e
 569             -- inlinable check omitted, since we can always inline into
 570             -- assignments.
 571
 572         inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
 573         inline False _ n = n
 574         inline True  _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
 575         inline True assign n = mapExpDeep (inlineExp assign) n
 576
 577         inlineExp assign old@(CmmReg (CmmLocal r))
 578           = case lookupUFM assign r of
 579               Just (AlwaysInline x) -> x
 580               _ -> old
 581         inlineExp assign old@(CmmRegOff (CmmLocal r) i)
 582           = case lookupUFM assign r of
 583               Just (AlwaysInline x) ->
 584                 case x of
 585                     (CmmRegOff r' i') -> CmmRegOff r' (i + i')
 586                     _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
 587                           where rep = typeWidth (localRegType r)
 588               _ -> old
 589         -- See Note [Soundness of store rewriting]
 590         inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
 591           = case lookupUFM assign r of
 592               Just (AlwaysInline x) -> x
 593               _ -> old
 594         inlineExp _ old = old
 595
 596         inlinable :: CmmNode e x -> Bool
 597         inlinable (CmmCall{}) = False
 598         inlinable (CmmForeignCall{}) = False
 599         inlinable (CmmUnsafeForeignCall{}) = False
 600         inlinable _ = True
 601
 602 -- ToDo: Outputable instance for UsageMap and AssignmentMap