compiler/cmm/CmmProcPointZ.hs

   1
   2 module CmmProcPointZ
   3     ( callProcPoints, minimalProcPointSet
   4     , addProcPointProtocols, splitAtProcPoints, procPointAnalysis
   5     , liveSlotAnal, cafAnal, layout, manifestSP, igraph, areaBuilder
   6     )
   7 where
   8
   9 import Constants
  10 import qualified Prelude as P
  11 import Prelude hiding (zip, unzip, last)
  12 import Util (sortLe)
  13
  14 import BlockId
  15 import Bitmap
  16 import CLabel
  17 import Cmm hiding (blockId)
  18 import CmmExpr
  19 import CmmContFlowOpt
  20 import CmmLiveZ
  21 import CmmTx
  22 import DFMonad
  23 import FiniteMap
  24 import IdInfo
  25 import List (sortBy)
  26 import Maybes
  27 import MkZipCfgCmm hiding (CmmBlock, CmmGraph, CmmTopZ)
  28 import Monad
  29 import Name
  30 import Outputable
  31 import Panic
  32 import SMRep (rET_SMALL)
  33 import StgCmmClosure
  34 import StgCmmUtils
  35 import UniqFM
  36 import UniqSet
  37 import UniqSupply
  38 import ZipCfg
  39 import ZipCfgCmmRep
  40 import ZipDataflow
  41
  42 -- Compute a minimal set of proc points for a control-flow graph.
  43
  44 -- Determine a protocol for each proc point (which live variables will
  45 -- be passed as arguments and which will be on the stack).
  46
  47 {-
  48 A proc point is a basic block that, after CPS transformation, will
  49 start a new function.  The entry block of the original function is a
  50 proc point, as is the continuation of each function call.
  51 A third kind of proc point arises if we want to avoid copying code.
  52 Suppose we have code like the following:
  53
  54   f() {
  55     if (...) { ..1..; call foo(); ..2..}
  56     else     { ..3..; call bar(); ..4..}
  57     x = y + z;
  58     return x;
  59   }
  60
  61 The statement 'x = y + z' can be reached from two different proc
  62 points: the continuations of foo() and bar().  We would prefer not to
  63 put a copy in each continuation; instead we would like 'x = y + z' to
  64 be the start of a new procedure to which the continuations can jump:
  65
  66   f_cps () {
  67     if (...) { ..1..; push k_foo; jump foo_cps(); }
  68     else     { ..3..; push k_bar; jump bar_cps(); }
  69   }
  70   k_foo() { ..2..; jump k_join(y, z); }
  71   k_bar() { ..4..; jump k_join(y, z); }
  72   k_join(y, z) { x = y + z; return x; }
  73
  74 You might think then that a criterion to make a node a proc point is
  75 that it is directly reached by two distinct proc points.  (Note
  76 [Direct reachability].)  But this criterion is a bit too simple; for
  77 example, 'return x' is also reached by two proc points, yet there is
  78 no point in pulling it out of k_join.  A good criterion would be to
  79 say that a node should be made a proc point if it is reached by a set
  80 of proc points that is different than its immediate dominator.  NR
  81 believes this criterion can be shown to produce a minimum set of proc
  82 points, and given a dominator tree, the proc points can be chosen in
  83 time linear in the number of blocks.  Lacking a dominator analysis,
  84 however, we turn instead to an iterative solution, starting with no
  85 proc points and adding them according to these rules:
  86
  87   1. The entry block is a proc point.
  88   2. The continuation of a call is a proc point.
  89   3. A node is a proc point if it is directly reached by more proc
  90      points than one of its predecessors.
  91
  92 Because we don't understand the problem very well, we apply rule 3 at
  93 most once per iteration, then recompute the reachability information.
  94 (See Note [No simple dataflow].)  The choice of the new proc point is
  95 arbitrary, and I don't know if the choice affects the final solution,
  96 so I don't know if the number of proc points chosen is the
  97 minimum---but the set will be minimal.
  98 -}
  99
 100 type ProcPointSet = BlockSet
 101
 102 data Status
 103   = ReachedBy ProcPointSet  -- set of proc points that directly reach the block
 104   | ProcPoint               -- this block is itself a proc point
 105
 106 instance Outputable Status where
 107   ppr (ReachedBy ps)
 108       | isEmptyUniqSet ps = text "<not-reached>"
 109       | otherwise = text "reached by" <+>
 110                     (hsep $ punctuate comma $ map ppr $ uniqSetToList ps)
 111   ppr ProcPoint = text "<procpt>"
 112
 113
 114 lattice :: DataflowLattice Status
 115 lattice = DataflowLattice "direct proc-point reachability" unreached add_to False
 116     where unreached = ReachedBy emptyBlockSet
 117           add_to _ ProcPoint = noTx ProcPoint
 118           add_to ProcPoint _ = aTx ProcPoint -- aTx because of previous case again
 119           add_to (ReachedBy p) (ReachedBy p') =
 120               let union = unionUniqSets p p'
 121               in  if sizeUniqSet union > sizeUniqSet p' then
 122                       aTx (ReachedBy union)
 123                   else
 124                       noTx (ReachedBy p')
 125 --------------------------------------------------
 126 -- transfer equations
 127
 128 forward :: ForwardTransfers Middle Last Status
 129 forward = ForwardTransfers first middle last exit
 130     where first ProcPoint id = ReachedBy $ unitUniqSet id
 131           first  x _ = x
 132           middle x _ = x
 133           last _ (LastCall _ (Just id) _) = LastOutFacts [(id, ProcPoint)]
 134           last x l = LastOutFacts $ map (\id -> (id, x)) (succs l)
 135           exit x   = x
 136
 137 -- It is worth distinguishing two sets of proc points:
 138 -- those that are induced by calls in the original graph
 139 -- and those that are introduced because they're reachable from multiple proc points.
 140 callProcPoints      :: CmmGraph -> ProcPointSet
 141 minimalProcPointSet :: ProcPointSet -> CmmGraph -> FuelMonad ProcPointSet
 142
 143 callProcPoints g = fold_blocks add entryPoint g
 144   where entryPoint = unitUniqSet (lg_entry g)
 145         add b set = case last $ unzip b of
 146                       LastOther (LastCall _ (Just k) _) -> extendBlockSet set k
 147                       _ -> set
 148
 149 minimalProcPointSet callProcPoints g = extendPPSet g (postorder_dfs g) callProcPoints
 150
 151 type PPFix = FuelMonad (ForwardFixedPoint Middle Last Status ())
 152
 153 procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelMonad (BlockEnv Status)
 154 procPointAnalysis procPoints g =
 155   let addPP env id = extendBlockEnv env id ProcPoint
 156       initProcPoints = foldl addPP emptyBlockEnv (uniqSetToList procPoints)
 157   in liftM zdfFpFacts $
 158         (zdfSolveFrom initProcPoints "proc-point reachability" lattice
 159                               forward (fact_bot lattice) $ graphOfLGraph g :: PPFix)
 160
 161 extendPPSet :: CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelMonad ProcPointSet
 162 extendPPSet g blocks procPoints =
 163     do env <- procPointAnalysis procPoints g
 164        let add block pps = let id = blockId block
 165                            in  case lookupBlockEnv env id of
 166                                  Just ProcPoint -> extendBlockSet pps id
 167                                  _ -> pps
 168            procPoints' = fold_blocks add emptyBlockSet g
 169            newPoint = listToMaybe (mapMaybe ppSuccessor blocks)
 170            ppSuccessor b@(Block id _ _) =
 171                let nreached id = case lookupBlockEnv env id `orElse` panic "no ppt" of
 172                                    ProcPoint -> 1
 173                                    ReachedBy ps -> sizeUniqSet ps
 174                    my_nreached = nreached id
 175                    -- | Looking for a successor of b that is reached by
 176                    -- more proc points than b and is not already a proc
 177                    -- point.  If found, it can become a proc point.
 178                    newId succ_id = not (elemBlockSet succ_id procPoints') &&
 179                                    nreached succ_id > my_nreached
 180                in  listToMaybe $ filter newId $ succs b
 181        case newPoint of Just id ->
 182                           if elemBlockSet id procPoints' then panic "added old proc pt"
 183                           else extendPPSet g blocks (extendBlockSet procPoints' id)
 184                         Nothing -> return procPoints'
 185
 186
 187 ------------------------------------------------------------------------
 188 --                    Computing Proc-Point Protocols                  --
 189 ------------------------------------------------------------------------
 190
 191 {-
 192
 193 There is one major trick, discovered by Michael Adams, which is that
 194 we want to choose protocols in a way that enables us to optimize away
 195 some continuations.  The optimization is very much like branch-chain
 196 elimination, except that it involves passing results as well as
 197 control.  The idea is that if a call's continuation k does nothing but
 198 CopyIn its results and then goto proc point P, the call's continuation
 199 may be changed to P, *provided* P's protocol is identical to the
 200 protocol for the CopyIn.  We choose protocols to make this so.
 201
 202 Here's an explanatory example; we begin with the source code (lines
 203 separate basic blocks):
 204
 205   ..1..;
 206   x, y = g();
 207   goto P;
 208   -------
 209   P: ..2..;
 210
 211 Zipperization converts this code as follows:
 212
 213   ..1..;
 214   call g() returns to k;
 215   -------
 216   k: CopyIn(x, y);
 217      goto P;
 218   -------
 219   P: ..2..;
 220
 221 What we'd like to do is assign P the same CopyIn protocol as k, so we
 222 can eliminate k:
 223
 224   ..1..;
 225   call g() returns to P;
 226   -------
 227   P: CopyIn(x, y); ..2..;
 228
 229 Of course, P may be the target of more than one continuation, and
 230 different continuations may have different protocols.  Michael Adams
 231 implemented a voting mechanism, but he thinks a simple greedy
 232 algorithm would be just as good, so that's what we do.
 233
 234 -}
 235
 236 data Protocol = Protocol Convention CmmFormals Area
 237   deriving Eq
 238 instance Outputable Protocol where
 239   ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a
 240
 241 -- | Function 'optimize_calls' chooses protocols only for those proc
 242 -- points that are relevant to the optimization explained above.
 243 -- The others are assigned by 'add_unassigned', which is not yet clever.
 244
 245 addProcPointProtocols :: ProcPointSet -> ProcPointSet -> CmmGraph -> FuelMonad CmmGraph
 246 addProcPointProtocols callPPs procPoints g =
 247   do liveness <- cmmLivenessZ g
 248      (protos, g') <- return $ optimize_calls liveness g
 249      blocks'' <- add_CopyOuts protos procPoints g'
 250      return $ LGraph (lg_entry g) (lg_argoffset g) blocks''
 251     where optimize_calls liveness g =  -- see Note [Separate Adams optimization]
 252               let (protos, blocks') =
 253                       fold_blocks maybe_add_call (init_protocols, emptyBlockEnv) g
 254                   protos' = add_unassigned liveness procPoints protos
 255                   g'  = LGraph (lg_entry g) (lg_argoffset g) $
 256                                add_CopyIns callPPs protos' blocks'
 257               in  (protos', runTx removeUnreachableBlocksZ g')
 258           maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock)
 259                          -> (BlockEnv Protocol, BlockEnv CmmBlock)
 260           -- ^ If the block is a call whose continuation goes to a proc point
 261           -- whose protocol either matches the continuation's or is not yet set,
 262           -- redirect the call (cf 'newblock') and set the protocol if necessary
 263           maybe_add_call block (protos, blocks) =
 264               case goto_end $ unzip block of
 265                 (h, LastOther (LastCall tgt (Just k) s))
 266                     | Just proto <- lookupBlockEnv protos k,
 267                       Just pee   <- branchesToProcPoint k
 268                     -> let newblock = zipht h (tailOfLast (LastCall tgt (Just pee) s))
 269                            changed_blocks   = insertBlock newblock blocks
 270                            unchanged_blocks = insertBlock block    blocks
 271                        in case lookupBlockEnv protos pee of
 272                             Nothing -> (extendBlockEnv protos pee proto,changed_blocks)
 273                             Just proto' ->
 274                               if proto == proto' then (protos, changed_blocks)
 275                               else (protos, unchanged_blocks)
 276                 _ -> (protos, insertBlock block blocks)
 277
 278           branchesToProcPoint :: BlockId -> Maybe BlockId
 279           -- ^ Tells whether the named block is just a branch to a proc point
 280           branchesToProcPoint id =
 281               let (Block _ _ t) = lookupBlockEnv (lg_blocks g) id `orElse`
 282                                   panic "branch out of graph"
 283               in case t of
 284                    ZLast (LastOther (LastBranch pee))
 285                        | elemBlockSet pee procPoints -> Just pee
 286                    _ -> Nothing
 287           init_protocols = fold_blocks maybe_add_proto emptyBlockEnv g
 288           maybe_add_proto :: CmmBlock -> BlockEnv Protocol -> BlockEnv Protocol
 289           --maybe_add_proto (Block id (ZTail (CopyIn c _ fs _srt) _)) env =
 290           --    extendBlockEnv env id (Protocol c fs $ toArea id fs)
 291           maybe_add_proto _ env = env
 292
 293 -- | For now, following a suggestion by Ben Lippmeier, we pass all
 294 -- live variables as arguments, hoping that a clever register
 295 -- allocator might help.
 296
 297 add_unassigned :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol ->
 298                   BlockEnv Protocol
 299 add_unassigned = pass_live_vars_as_args
 300
 301 pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet ->
 302                           BlockEnv Protocol -> BlockEnv Protocol
 303 pass_live_vars_as_args _liveness procPoints protos = protos'
 304   where protos' = foldUniqSet addLiveVars protos procPoints
 305         addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol
 306         addLiveVars id protos =
 307             case lookupBlockEnv protos id of
 308               Just _  -> protos
 309               Nothing -> let live = emptyBlockEnv
 310                                     --lookupBlockEnv _liveness id `orElse`
 311                                     --panic ("no liveness at block " ++ show id)
 312                              formals = uniqSetToList live
 313                              prot = Protocol Private formals $ CallArea $ Young id
 314                          in  extendBlockEnv protos id prot
 315
 316
 317 -- | Add copy-in instructions to each proc point that did not arise from a call
 318 -- instruction. (Proc-points that arise from calls already have their copy-in instructions.)
 319
 320 add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock
 321 add_CopyIns callPPs protos blocks = mapUFM maybe_insert_CopyIns blocks
 322     where maybe_insert_CopyIns :: CmmBlock -> CmmBlock
 323           maybe_insert_CopyIns b@(Block id off t) | not $ elementOfUniqSet id callPPs =
 324             case (off, lookupBlockEnv protos id) of
 325               (Just _, _) -> panic "shouldn't copy arguments twice into a block"
 326               (_, Just (Protocol c fs area)) -> Block id (Just off) $ foldr ZTail t copies
 327                 where (off, copies) = copyIn c False area fs
 328               (_, Nothing) -> b
 329           maybe_insert_CopyIns b = b
 330
 331 -- | Add a CopyOut node before each procpoint.
 332 -- If the predecessor is a call, then the copy outs should already be done by the callee.
 333 -- Note: If we need to add copy-out instructions, they may require stack space,
 334 -- so we accumulate a map from the successors to the necessary stack space,
 335 -- then update the successors after we have finished inserting the copy-outs.
 336
 337 add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph ->
 338                 FuelMonad (BlockEnv CmmBlock)
 339 add_CopyOuts protos procPoints g = fold_blocks mb_copy_out (return emptyBlockEnv) g
 340     where mb_copy_out :: CmmBlock -> FuelMonad (BlockEnv CmmBlock) ->
 341                                      FuelMonad (BlockEnv CmmBlock)
 342           mb_copy_out b@(Block bid _ _) z | bid == lg_entry g = skip b z
 343           mb_copy_out b z =
 344             case last $ unzip b of
 345               LastOther (LastCall _ _ _) -> skip b z -- copy out done by callee
 346               _ -> mb_copy_out' b z
 347           mb_copy_out' b z = fold_succs trySucc b init >>= finish
 348             where init = z >>= (\bmap -> return (b, bmap))
 349                   trySucc succId z =
 350                     if elemBlockSet succId procPoints then
 351                       case lookupBlockEnv protos succId of
 352                         Nothing -> z
 353                         Just (Protocol c fs area) ->
 354                           let (_, copies) = copyOut c Jump area $ map (CmmReg . CmmLocal) fs
 355                           in  insert z succId copies
 356                     else z
 357                   insert z succId m =
 358                     do (b, bmap) <- z
 359                        (b, bs)   <- insertBetween b m succId
 360                        pprTrace "insert for succ" (ppr succId <> ppr m) $
 361                         return $ (b, foldl (flip insertBlock) bmap bs)
 362                   finish (b@(Block bid _ _), bmap) =
 363                     return $ (extendBlockEnv bmap bid b)
 364           skip b@(Block bid _ _) bs =
 365             bs >>= (\bmap -> return (extendBlockEnv bmap bid b))
 366
 367 -- At this point, we have found a set of procpoints, each of which should be
 368 -- the entry point of a procedure.
 369 -- Now, we create the procedure for each proc point,
 370 -- which requires that we:
 371 -- 1. build a map from proc points to the blocks reachable from the proc point
 372 -- 2. turn each branch to a proc point into a jump
 373 -- 3. turn calls and returns into jumps
 374 -- 4. build info tables for the procedures -- and update the info table for
 375 --    the SRTs in the entry procedure as well.
 376 -- Input invariant: A block should only be reachable from a single ProcPoint.
 377 splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status ->
 378                      BlockEnv SubAreaSet -> AreaMap -> CmmTopZ -> FuelMonad [CmmTopZ]
 379 splitAtProcPoints entry_label callPPs procPoints procMap slotEnv areaMap
 380                   (CmmProc top_info top_l top_args g@(LGraph entry e_off blocks)) =
 381   do -- Build a map from procpoints to the blocks they reach
 382      let addBlock b@(Block bid _ _) graphEnv =
 383            case lookupBlockEnv procMap bid of
 384              Just ProcPoint -> add graphEnv bid bid b
 385              Just (ReachedBy set) ->
 386                case uniqSetToList set of
 387                  []   -> graphEnv
 388                  [id] -> add graphEnv id bid b
 389                  _ -> panic "Each block should be reachable from only one ProcPoint"
 390              Nothing -> pprPanic "block not reached by a proc point?" (ppr bid)
 391          add graphEnv procId bid b = extendBlockEnv graphEnv procId graph'
 392                where graph  = lookupBlockEnv graphEnv procId `orElse` emptyBlockEnv
 393                      graph' = extendBlockEnv graph bid b
 394      graphEnv <- return $ fold_blocks addBlock emptyBlockEnv g
 395      -- Build a map from proc point BlockId to labels for their new procedures
 396      let add_label map pp = return $ addToFM map pp lbl
 397            where lbl = if pp == entry then entry_label else blockLbl pp
 398      procLabels <- foldM add_label emptyFM (uniqSetToList procPoints)
 399      -- Convert call and return instructions to jumps.
 400      let last (LastCall e _ n) = LastJump e n
 401          last l = l
 402      graphEnv <- return $ mapUFM (mapUFM (map_one_block id id last)) graphEnv
 403      -- In each new graph, add blocks jumping off to the new procedures,
 404      -- and replace branches to procpoints with branches to the jump-off blocks
 405      let add_jump_block (env, bs) (pp, l) =
 406            do bid <- liftM mkBlockId getUniqueM
 407               let b = Block bid Nothing (ZLast (LastOther jump))
 408                   argSpace = case lookupBlockEnv blocks pp of
 409                                Just (Block _ (Just s) _) -> s
 410                                Just (Block _ Nothing  _) -> panic "no args at procpoint"
 411                                _ -> panic "can't find procpoint block"
 412                   jump = LastJump (CmmLit (CmmLabel l)) argSpace
 413               return $ (extendBlockEnv env pp bid, b : bs)
 414          add_jumps newGraphEnv (guniq, blockEnv) =
 415            do (jumpEnv, jumpBlocks) <- foldM add_jump_block (emptyBlockEnv, [])
 416                                            $ fmToList procLabels
 417               let ppId = mkBlockId guniq
 418                   (b_off, b) =
 419                     case lookupBlockEnv blockEnv ppId of
 420                       Just (Block id (Just b_off) t) -> (b_off, Block id Nothing t)
 421                       Just b@(Block _ Nothing _)     -> (0, b)
 422                       Nothing -> panic "couldn't find entry block while splitting"
 423                   off = if ppId == entry then e_off else b_off
 424                   LGraph _ _ blockEnv' = pprTrace "jumpEnv" (ppr jumpEnv) $
 425                                          replaceLabelsZ jumpEnv $ LGraph ppId off blockEnv
 426                   blockEnv'' = foldl (flip insertBlock) (extendBlockEnv blockEnv' ppId b)
 427                                      jumpBlocks
 428               return $ extendBlockEnv newGraphEnv ppId $
 429                        runTx cmmCfgOptsZ $ LGraph ppId off blockEnv''
 430          upd_info_tbl srt' (CmmInfoTable p t typeinfo) = CmmInfoTable p t typeinfo'
 431            where typeinfo' = case typeinfo of
 432                    t@(ConstrInfo _ _ _)    -> t
 433                    (FunInfo    c _ a d e)  -> FunInfo c srt' a d e
 434                    (ThunkInfo  c _)        -> ThunkInfo c srt'
 435                    (ThunkSelectorInfo s _) -> ThunkSelectorInfo s srt'
 436                    (ContInfo vars _)       -> ContInfo vars srt'
 437          upd_info_tbl _ CmmNonInfoTable = CmmNonInfoTable
 438          to_proc cafMap (ppUniq, g) | elementOfUniqSet bid callPPs =
 439            if bid == entry then
 440              CmmProc (CmmInfo gc upd_fr (upd_info_tbl srt' info_tbl)) top_l top_args g
 441            else
 442             pprTrace "adding infotable for" (ppr bid) $
 443              CmmProc (CmmInfo Nothing Nothing $ infoTbl) lbl [] g
 444            where bid = mkBlockId ppUniq
 445                  lbl = expectJust "pp label" $ lookupFM procLabels bid
 446                  infoTbl = CmmInfoTable (ProfilingInfo zero zero) rET_SMALL
 447                                         (ContInfo stack_vars srt')
 448                  stack_vars = pprTrace "slotEnv" (ppr slotEnv) $
 449                                live_vars slotEnv areaMap bid
 450                  zero = CmmInt 0 wordWidth
 451                  srt' = expectJust "procpoint.infoTbl" $ lookupBlockEnv cafMap bid
 452                  CmmInfo gc upd_fr info_tbl = top_info
 453          to_proc _ (ppUniq, g) =
 454           pprTrace "not adding infotable for" (ppr bid) $
 455            CmmProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] g
 456              where bid = mkBlockId ppUniq
 457                    lbl = expectJust "pp label" $ lookupFM procLabels bid
 458      graphEnv <- foldM add_jumps emptyBlockEnv $ ufmToList graphEnv
 459      cafEnv <- cafAnal g
 460      (cafTable, blockCafs) <- buildCafs cafEnv
 461      procs <- return $ map (to_proc blockCafs) $ ufmToList graphEnv
 462      return $ pprTrace "procLabels" (ppr procLabels) $
 463               pprTrace "splitting graphs" (ppr graphEnv) $ cafTable ++ procs
 464 splitAtProcPoints _ _ _ _ _ _ t@(CmmData _ _) = return [t]
 465
 466 ------------------------------------------------------------------------
 467 --                    Stack Layout                                    --
 468 ------------------------------------------------------------------------
 469
 470 -- | Before we lay out the stack, we need to know something about the
 471 -- liveness of the stack slots. In particular, to decide whether we can
 472 -- reuse a stack location to hold multiple stack slots, we need to know
 473 -- when each of the stack slots is used.
 474 -- Although tempted to use something simpler, we really need a full interference
 475 -- graph. Consider the following case:
 476 --   case <...> of
 477 --     1 -> <spill x>; // y is dead out
 478 --     2 -> <spill y>; // x is dead out
 479 --     3 -> <spill x and y>
 480 -- If we consider the arms in order and we use just the deadness information given by a
 481 -- dataflow analysis, we might decide to allocate the stack slots for x and y
 482 -- to the same stack location, which will lead to incorrect code in the third arm.
 483 -- We won't make this mistake with an interference graph.
 484
 485 -- First, the liveness analysis.
 486 -- We represent a slot with an area, an offset into the area, and a width.
 487 -- Tracking the live slots is a bit tricky because there may be loads and stores
 488 -- into only a part of a stack slot (e.g. loading the low word of a 2-word long),
 489 -- e.g. Slot A 0 8 overlaps with Slot A 4 4.
 490 --
 491 -- The definition of a slot set is intended to reduce the number of overlap
 492 -- checks we have to make. There's no reason to check for overlap between
 493 -- slots in different areas, so we segregate the map by Area's.
 494 -- We expect few slots in each Area, so we collect them in an unordered list.
 495 -- To keep these lists short, any contiguous live slots are coalesced into
 496 -- a single slot, on insertion.
 497
 498 type SubAreaSet   = FiniteMap Area [SubArea]
 499 fold_subareas :: (SubArea -> z -> z) -> SubAreaSet -> z -> z
 500 fold_subareas f m z = foldFM (\_ s z -> foldr (\a z -> f a z) z s) z m
 501
 502 liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea])
 503 liveGen s set = liveGen' s set []
 504   where liveGen' s [] z = (True, s : z)
 505         liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z =
 506           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
 507             liveGen' s rst (s' : z)
 508           else if s' `contains` s then               -- old contains new
 509             (False, set)
 510           else                                       -- overlap: coalesce the slots
 511             let new_hi = max hi hi'
 512                 new_lo = min lo lo'
 513             in liveGen' (a, new_hi, new_hi - new_lo) rst z
 514           where lo  = hi  - w  -- remember: areas grow down
 515                 lo' = hi' - w'
 516         contains (a, hi, w) (a', hi', w') =
 517           a == a' && hi >= hi' && hi - w <= hi' - w'
 518
 519 liveKill :: SubArea -> [SubArea] -> [SubArea]
 520 liveKill (a, hi, w) set = pprTrace "killing slots in area" (ppr a) $ liveKill' set []
 521   where liveKill' [] z = z
 522         liveKill' (s'@(a', hi', w') : rst) z =
 523           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
 524             liveKill' rst (s' : z)
 525           else                                       -- overlap: split the old slot
 526             let z'  = if hi' > hi  then (a, hi', hi' - hi)  : z else z
 527                 z'' = if lo  > lo' then (a, lo,  lo  - lo') : z' else z'
 528             in liveKill' rst z''
 529           where lo  = hi  - w  -- remember: areas grow down
 530                 lo' = hi' - w'
 531
 532 slotLattice :: DataflowLattice SubAreaSet
 533 slotLattice = DataflowLattice "live slots" emptyFM add True
 534   where add new old = case foldFM addArea (False, old) new of
 535                         (True,  x) -> aTx  x
 536                         (False, x) -> noTx x
 537         addArea a newSlots z = foldr (addSlot a) z newSlots
 538         addSlot a slot (changed, map) =
 539           let (c, live) = liveGen slot $ lookupWithDefaultFM map [] a
 540           in (c || changed, addToFM map a live)
 541
 542 liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
 543 liveInSlots live x = foldSlotsUsed add (foldSlotsDefd remove live x) x
 544   where add    live (a, i, w) = liftToArea a (snd . liveGen  (a, i, w)) live
 545         remove live (a, i, w) = liftToArea a       (liveKill (a, i, w)) live
 546         liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a)
 547
 548 -- Unlike the liveness transfer functions @gen@ and @kill@, this function collects
 549 -- _any_ slot that is named.
 550 --addNamedSlots :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
 551 --addNamedSlots live x = foldSlotsUsed add (foldSlotsDefd add live x) x
 552 --  where add    live (a, i, w) = liftToArea a (snd . liveGen  (a, i, w)) live
 553 --        liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a)
 554
 555 -- Note: the stack slots that hold variables returned on the stack are not
 556 -- considered live in to the block -- we treat the first node as a definition site.
 557 -- BEWARE: I'm being a little careless here in failing to check for the
 558 -- entry Id (which would use the CallArea Old).
 559 liveTransfers :: BackwardTransfers Middle Last SubAreaSet
 560 liveTransfers = BackwardTransfers first liveInSlots liveLastIn
 561     where first live id = delFromFM live (CallArea (Young id))
 562
 563 liveLastIn :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet
 564 liveLastIn env l = liveInSlots (liveLastOut env l) l
 565
 566 -- Don't forget to keep the outgoing parameters in the CallArea live.
 567 liveLastOut :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet
 568 liveLastOut env l =
 569   case l of
 570     LastReturn n          -> add_area (CallArea Old)       n out
 571     LastJump _ n          -> add_area (CallArea Old)       n out
 572     LastCall _ Nothing  n -> add_area (CallArea Old)       n out
 573     LastCall _ (Just k) n -> add_area (CallArea (Young k)) n out
 574     _                     -> out
 575   where out = joinOuts slotLattice env l
 576 add_area :: Area -> Int -> SubAreaSet -> SubAreaSet
 577 add_area a n live =
 578   addToFM live a $ snd $ liveGen (a, n, n) $ lookupWithDefaultFM live [] a
 579
 580 type SlotFix a = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet a)
 581 liveSlotAnal :: LGraph Middle Last -> FuelMonad (BlockEnv SubAreaSet)
 582 liveSlotAnal g = liftM zdfFpFacts (res :: SlotFix ())
 583   where res = zdfSolveFromL emptyBlockEnv "live slot analysis" slotLattice
 584                             liveTransfers (fact_bot slotLattice) g
 585
 586 -- The liveness analysis must be precise: otherwise, we won't know if a definition
 587 -- should really kill a live-out stack slot.
 588 -- But the interference graph does not have to be precise -- it might decide that
 589 -- any live areas interfere. To maintain both a precise analysis and an imprecise
 590 -- interference graph, we need to convert the live-out stack slots to graph nodes
 591 -- at each and every instruction; rather than reconstruct a new list of nodes
 592 -- every time, I provide a function to fold over the nodes, which should be a
 593 -- reasonably efficient approach for the implementations we envision.
 594 -- Of course, it will probably be much easier to program if we just return a list...
 595 type Set x = FiniteMap x ()
 596 type AreaMap = FiniteMap Area Int
 597 data IGraphBuilder n =
 598   Builder { foldNodes     :: forall z. SubArea -> (n -> z -> z) -> z -> z
 599           , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int]
 600           }
 601
 602 areaBuilder :: IGraphBuilder Area
 603 areaBuilder = Builder fold words
 604   where fold (a, _, _) f z = f a z
 605         words areaSize areaMap a =
 606           case lookupFM areaMap a of
 607             Just addr -> [addr .. addr + (lookupFM areaSize a `orElse`
 608                                           pprPanic "wordsOccupied: unknown area" (ppr a))]
 609             Nothing   -> []
 610
 611 --slotBuilder :: IGraphBuilder (Area, Int)
 612 --slotBuilder = undefined
 613
 614 -- Now, we can build the interference graph.
 615 -- The usual story: a definition interferes with all live outs and all other
 616 -- definitions.
 617 type IGraph x = FiniteMap x (Set x)
 618 type IGPair x = (IGraph x, IGraphBuilder x)
 619 igraph :: (Ord x) => IGraphBuilder x -> BlockEnv SubAreaSet -> LGraph Middle Last -> IGraph x
 620 igraph builder env g = foldr interfere emptyFM (postorder_dfs g)
 621   where foldN = foldNodes builder
 622         interfere block igraph =
 623           let (h, l) = goto_end (unzip block)
 624               --heads :: ZHead Middle -> (IGraph x, SubAreaSet) -> IGraph x
 625               heads (ZFirst _ _) (igraph, _)       = igraph
 626               heads (ZHead h m)  (igraph, liveOut) =
 627                 heads h (addEdges igraph m liveOut, liveInSlots liveOut m)
 628               -- add edges between a def and the other defs and liveouts
 629               addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i
 630               addDef (igraph, out) def@(a, _, _) =
 631                 (foldN def (addDefN out) igraph,
 632                  addToFM out a (snd $ liveGen def (lookupWithDefaultFM out [] a)))
 633               addDefN out n igraph =
 634                 let addEdgeNO o igraph = foldN o addEdgeNN igraph
 635                     addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph
 636                     addEdgeNN' n n' igraph = addToFM igraph n (addToFM set n' ())
 637                       where set = lookupWithDefaultFM igraph emptyFM n
 638                 in foldFM (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out
 639               env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 640           in heads h $ case l of LastExit    -> (igraph, emptyFM)
 641                                  LastOther l -> (addEdges igraph l $ liveLastOut env' l,
 642                                                  liveLastIn env' l)
 643
 644 -- Before allocating stack slots, we need to collect one more piece of information:
 645 -- what's the highest offset (in bytes) used in each Area?
 646 -- We'll need to allocate that much space for each Area.
 647 getAreaSize :: LGraph Middle Last -> AreaMap
 648 getAreaSize g@(LGraph _ off _) =
 649   fold_blocks (fold_fwd_block first add add) (unitFM (CallArea Old) off) g
 650   where first _ z = z
 651         add   x z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z x) x
 652         addSlot z (a, off, _) = addToFM z a $ max off $ lookupWithDefaultFM z 0 a
 653
 654
 655 -- Find the Stack slots occupied by the subarea's conflicts
 656 conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int
 657 conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
 658   foldNodes subarea foldNode emptyFM
 659   where foldNode n set = foldFM conflict set $ lookupWithDefaultFM ig emptyFM n
 660         conflict n' () set = liveInSlots areaMap n' set
 661         -- Add stack slots occupied by igraph node n
 662         liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
 663         setAdd w s = addToFM s w ()
 664
 665 -- Find any open space on the stack, starting from the offset.
 666 freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int
 667 freeSlotFrom ig areaSize offset areaMap area =
 668   let size = lookupFM areaSize area `orElse` 0
 669       conflicts = conflictSlots ig areaSize areaMap (area, size, size)
 670       -- Find a space big enough to hold the area
 671       findSpace curr 0 = curr
 672       findSpace curr cnt = -- target slot, considerand, # left to check
 673         if elemFM curr conflicts then
 674           findSpace (curr + size) size
 675         else findSpace (curr - 1) (cnt - 1)
 676   in findSpace (offset + size) size
 677
 678 -- Find an open space on the stack, and assign it to the area.
 679 allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap
 680 allocSlotFrom ig areaSize from areaMap area =
 681   if elemFM area areaMap then areaMap
 682   else addToFM areaMap area $ freeSlotFrom ig areaSize from areaMap area
 683
 684 -- | Greedy stack layout.
 685 -- Compute liveness, build the interference graph, and allocate slots for the areas.
 686 -- We visit each basic block in a (generally) forward order.
 687 -- At each instruction that names a register subarea r, we immediately allocate
 688 -- any available slot on the stack by the following procedure:
 689 --  1. Find the nodes N' that conflict with r
 690 --  2. Find the stack slots used for N'
 691 --  3. Choose a contiguous stack space s not in N' (s must be large enough to hold r)
 692 -- For a CallArea, we allocate the stack space only when we reach a function
 693 -- call that returns to the CallArea's blockId.
 694 -- We use a similar procedure, with one exception: the stack space
 695 -- must be allocated below the youngest stack slot that is live out.
 696
 697 -- Note: The stack pointer only has to be younger than the youngest live stack slot
 698 -- at proc points. Otherwise, the stack pointer can point anywhere.
 699 layout :: ProcPointSet -> BlockEnv SubAreaSet -> LGraph Middle Last -> AreaMap
 700 layout procPoints env g@(LGraph _ entrySp _) =
 701   let builder = areaBuilder
 702       ig = (igraph builder env g, builder)
 703       env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 704       areaSize = getAreaSize g
 705       -- Find the slots that are live-in to the block
 706       live_in (ZTail m l) = liveInSlots (live_in l) m
 707       live_in (ZLast (LastOther l)) = liveLastIn env' l
 708       live_in (ZLast LastExit) = emptyFM
 709       -- Find the youngest live stack slot
 710       youngest_live areaMap live = fold_subareas young_slot live 0
 711         where young_slot (a, o, _) z = case lookupFM areaMap a of
 712                                          Just top -> max z $ top + o
 713                                          Nothing  -> z
 714       -- Allocate space for spill slots and call areas
 715       allocVarSlot = allocSlotFrom ig areaSize 0
 716       allocCallSlot areaMap (Block id _ t) | elemBlockSet id procPoints =
 717         allocSlotFrom ig areaSize (youngest_live areaMap $ live_in t)
 718                       areaMap (CallArea (Young id))
 719       allocCallSlot areaMap _ = areaMap
 720       alloc i areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap i) i
 721         where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
 722               alloc' areaMap _ = areaMap
 723       layoutAreas areaMap b@(Block _ _ t) = layout areaMap t
 724         where layout areaMap (ZTail m t) = layout (alloc m areaMap) t
 725               layout areaMap (ZLast _) = allocCallSlot areaMap b
 726       areaMap = foldl layoutAreas (addToFM emptyFM (CallArea Old) 0) $ postorder_dfs g
 727   in pprTrace "ProcPoints" (ppr procPoints) $
 728        pprTrace "Area SizeMap" (ppr areaSize) $
 729          pprTrace "Entry SP" (ppr entrySp) $
 730            pprTrace "Area Map" (ppr areaMap) $ areaMap
 731
 732 -- After determining the stack layout, we can:
 733 -- 1. Replace references to stack Areas with addresses relative to the stack
 734 --    pointer.
 735 -- 2. Insert adjustments to the stack pointer to ensure that it is at a
 736 --    conventional location at each proc point.
 737 --    Because we don't take interrupts on the execution stack, we only need the
 738 --    stack pointer to be younger than the live values on the stack at proc points.
 739 -- 3. At some point, we should check for stack overflow, but not just yet.
 740 manifestSP :: ProcPointSet -> BlockEnv Status -> AreaMap ->
 741                 LGraph Middle Last -> FuelMonad (LGraph Middle Last)
 742 manifestSP procPoints procMap areaMap g@(LGraph entry args blocks) =
 743   liftM (LGraph entry args) blocks'
 744   where blocks' = foldl replB (return emptyBlockEnv) (postorder_dfs g)
 745         slot a = pprTrace "slot" (ppr a) $ lookupFM areaMap a `orElse` panic "unallocated Area"
 746         slot' id = pprTrace "slot'" (ppr id)$ slot $ CallArea (Young id)
 747         sp_on_entry id | id == entry = slot (CallArea Old) + args
 748         sp_on_entry id | elemBlockSet id procPoints =
 749           case lookupBlockEnv blocks id of
 750             Just (Block _ (Just o) _) -> slot' id + o
 751             Just (Block _ Nothing  _) -> slot' id
 752             Nothing -> panic "procpoint dropped from block env"
 753         sp_on_entry id =
 754           case lookupBlockEnv procMap id of
 755             Just (ReachedBy pp) -> case uniqSetToList pp of
 756                                      [id] -> sp_on_entry id
 757                                      _    -> panic "block not reached by single proc point"
 758             Just ProcPoint -> panic "procpoint not in procpoint set"
 759             Nothing -> panic "block not found in procmap"
 760         -- On entry to procpoints, the stack pointer is conventional;
 761         -- otherwise, we check the SP set by predecessors.
 762         replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock)
 763         replB blocks (Block id o t) =
 764           do bs <- replTail (Block id o) spIn t
 765              pprTrace "spIn" (ppr id <+> ppr spIn)$ liftM (flip (foldr insertBlock) bs) blocks
 766           where spIn = sp_on_entry id
 767         replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) ->
 768                     FuelMonad ([CmmBlock])
 769         replTail h spOff (ZTail m t) = replTail (h . ZTail (middle spOff m)) spOff t
 770         replTail h spOff (ZLast (LastOther l)) = fixSp h spOff l
 771         replTail h _   l@(ZLast LastExit) = return [h l]
 772         middle spOff m = mapExpDeepMiddle (replSlot spOff) m
 773         last   spOff l = mapExpDeepLast   (replSlot spOff) l
 774         replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i))
 775         replSlot _ e = e
 776         -- The block must establish the SP expected at each successsor.
 777         fixSp :: (ZTail Middle Last -> CmmBlock) -> Int -> Last -> FuelMonad ([CmmBlock])
 778         fixSp h spOff l@(LastReturn n)          = updSp h spOff (slot (CallArea Old) + n) l
 779         fixSp h spOff l@(LastJump _ n)          = updSp h spOff (slot (CallArea Old) + n) l
 780         fixSp h spOff l@(LastCall _ (Just k) n) = updSp h spOff (slot' k + n)             l
 781         fixSp h spOff l@(LastCall _ Nothing  n) = updSp h spOff (slot (CallArea Old) + n) l
 782         fixSp h spOff l@(LastBranch k) | elemBlockSet k procPoints =
 783           pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $ updSp h spOff (sp_on_entry k) l
 784         fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, [])
 785           where b = h (ZLast (LastOther (last spOff l)))
 786                 succ succId z =
 787                   let succSp = sp_on_entry succId in
 788                   if elemBlockSet succId procPoints && succSp /= spOff then
 789                     do (b,  bs)  <- z
 790                        (b', bs') <- insertBetween b [setSpMid spOff succSp] succId
 791                        return (b', bs ++ bs')
 792                   else z
 793         updSp h old new l = return [h $ setSp old new $ ZLast $ LastOther (last new l)]
 794         setSpMid sp sp' = MidAssign (CmmGlobal Sp) e
 795           where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off]
 796                 off = CmmLit $ CmmInt (toInteger $ sp - sp') wordWidth
 797         setSp sp sp' t = if sp == sp' then t else ZTail (setSpMid sp sp') t
 798
 799 ----------------------------------------------------------------
 800 -- Building InfoTables
 801
 802 type CAFSet = FiniteMap CLabel ()
 803
 804 -- First, an analysis to find live CAFs.
 805 cafLattice :: DataflowLattice CAFSet
 806 cafLattice = DataflowLattice "live cafs" emptyFM add True
 807   where add new old = if sizeFM new' > sizeFM old then aTx new' else noTx new'
 808           where new' = new `plusFM` old
 809
 810 cafTransfers :: BackwardTransfers Middle Last CAFSet
 811 cafTransfers = BackwardTransfers first middle last
 812     where first  live _ = live
 813           middle live m = pprTrace "cafmiddle" (ppr m) $ foldExpDeepMiddle addCaf m live
 814           last   env  l = foldExpDeepLast addCaf l (joinOuts cafLattice env l)
 815           addCaf e set = case e of
 816                  CmmLit (CmmLabel c) -> add c set
 817                  CmmLit (CmmLabelOff c _) -> add c set
 818                  CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set
 819                  _ -> set
 820           add c s = pprTrace "CAF analysis saw label" (ppr c) $
 821                      if hasCAF c then (pprTrace "has caf" (ppr c) $ addToFM s c ()) else (pprTrace "no cafs" (ppr c) $ s)
 822
 823 type CafFix a = FuelMonad (BackwardFixedPoint Middle Last CAFSet a)
 824 cafAnal :: LGraph Middle Last -> FuelMonad (BlockEnv CAFSet)
 825 cafAnal g = liftM zdfFpFacts (res :: CafFix ())
 826   where res = zdfSolveFromL emptyBlockEnv "live CAF analysis" cafLattice
 827                             cafTransfers (fact_bot cafLattice) g
 828
 829 -- Once we have found the CAFs, we need to do two things:
 830 -- 1. Build a table of all the CAFs used in the procedure.
 831 -- 2. Compute the C_SRT describing the subset of CAFs live at each procpoint.
 832 buildCafs :: (BlockEnv CAFSet) -> FuelMonad ([CmmTopZ], BlockEnv C_SRT)
 833 buildCafs blockCafs =
 834   -- This is surely the wrong way to get names, as in BlockId
 835   do top_lbl <- getUniqueM >>= \ u -> return $ mkSRTLabel (mkFCallName u "srt") MayHaveCafRefs
 836      let allCafs = foldBlockEnv (\_ x y -> plusFM x y) emptyFM blockCafs
 837          caf_entry (ix, map, tbl') caf = (ix + 1, addToFM map caf ix, entry : tbl')
 838            where entry = CmmStaticLit $ CmmLabel caf
 839          (_::Int, cafMap, tbl') = foldl caf_entry (0, emptyFM, []) $ keysFM allCafs
 840          top_tbl = CmmData RelocatableReadOnlyData $ CmmDataLabel top_lbl : reverse tbl'
 841          sub_srt id cafs z =
 842            do (tbls, blocks) <- z
 843               (top, srt)     <- procpointSRT top_lbl cafMap cafs
 844               let blocks' = extendBlockEnv blocks id srt
 845               case top of Just t  -> return (t:tbls, blocks')
 846                           Nothing -> return (tbls,   blocks')
 847      (sub_tbls, blockSRTs) <- foldBlockEnv sub_srt (return ([], emptyBlockEnv)) blockCafs
 848      return (top_tbl :  sub_tbls, blockSRTs)
 849
 850 -- Construct an SRT bitmap.
 851 -- Adapted from simpleStg/SRT.lhs, which expects Id's.
 852 procpointSRT :: CLabel -> FiniteMap CLabel Int -> FiniteMap CLabel () ->
 853                 FuelMonad (Maybe CmmTopZ, C_SRT)
 854 procpointSRT top_srt top_table entries
 855  | isEmptyFM entries = pprTrace "nil SRT" (ppr top_srt) $ return (Nothing, NoC_SRT)
 856  | otherwise  = pprTrace "non-nil SRT" (ppr top_srt) $ bitmap `seq` to_SRT top_srt offset len bitmap
 857   where
 858     ints = map (expectJust "constructSRT" . lookupFM top_table) (keysFM entries)
 859     sorted_ints = sortLe (<=) ints
 860     offset = head sorted_ints
 861     bitmap_entries = map (subtract offset) sorted_ints
 862     len = P.last bitmap_entries + 1
 863     bitmap = intsToBitmap len bitmap_entries
 864
 865 -- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT.
 866 to_SRT :: CLabel -> Int -> Int -> Bitmap -> FuelMonad (Maybe CmmTopZ, C_SRT)
 867 to_SRT top_srt off len bmp
 868   | len > widthInBits wordWidth `div` 2 || bmp == [fromIntegral srt_escape]
 869   = do id <- getUniqueM
 870        let srt_desc_lbl = mkLargeSRTLabel id
 871            tbl = CmmData RelocatableReadOnlyData $
 872                    CmmDataLabel srt_desc_lbl : map CmmStaticLit
 873                      ( cmmLabelOffW top_srt off
 874                      : mkWordCLit (fromIntegral len)
 875                      : map mkWordCLit bmp)
 876        return (Just tbl, C_SRT srt_desc_lbl 0 srt_escape)
 877   | otherwise
 878   = return (Nothing, C_SRT top_srt off (fromIntegral (head bmp)))
 879         -- The fromIntegral converts to StgHalfWord
 880
 881 -- Given a block ID, we return a representation of the layout of the stack.
 882 -- If the element is `Nothing`, then it represents an empty or dead
 883 -- word on the stack.
 884 -- If the element is `Just` a register, then it represents a live spill slot
 885 -- for the register; note that a register may occupy multiple words.
 886 -- The head of the list represents the young end of the stack where the infotable
 887 -- pointer for the block `Bid` is stored.
 888 -- The infotable pointer itself is not included in the list.
 889 live_vars :: BlockEnv SubAreaSet -> AreaMap -> BlockId -> [Maybe LocalReg]
 890 live_vars slotEnv areaMap bid = slotsToList youngByte liveSlots
 891   where slotsToList 0 [] = []
 892         slotsToList 0 ((_, r, _) : _)  = pprPanic "slot left off live_vars" (ppr r)
 893         slotsToList n _ | n < 0 = panic "stack slots not allocated on word boundaries?"
 894         slotsToList n ((n', r, w) : rst) =
 895           if n == n' then Just r : slotsToList (n - w) rst
 896           else Nothing : slotsToList (n - wORD_SIZE) rst
 897         slotsToList n [] = Nothing : slotsToList (n - wORD_SIZE) []
 898         liveSlots = sortBy (\ (_,off,_) (_,off',_) -> compare off' off)
 899                       (foldFM (\_ -> flip $ foldr add_slot) [] slots)
 900         add_slot (a@(RegSlot r@(LocalReg _ ty)), off, w) rst =
 901           if off == w && widthInBytes (typeWidth ty) == w then
 902             (expectJust "add_slot" (lookupFM areaMap a), r, w) : rst
 903           else panic "live_vars: only part of a variable live at a proc point"
 904         add_slot (CallArea Old, off, w) rst =
 905           if off == wORD_SIZE && w == wORD_SIZE then
 906              rst -- the return infotable should be live
 907           else pprPanic "CallAreas must not be live across function calls" (ppr bid)
 908         add_slot (CallArea (Young _), _, _) _ =
 909           pprPanic "CallAreas must not be live across function calls" (ppr bid)
 910         slots = expectJust "live_vars slots" $ lookupBlockEnv slotEnv bid
 911         youngByte = expectJust "live_vars bid_pos" $ lookupFM areaMap (CallArea (Young bid))
 912
 913 ----------------------------------------------------------------
 914
 915 {-
 916 Note [Direct reachability]
 917
 918 Block B is directly reachable from proc point P iff control can flow
 919 from P to B without passing through an intervening proc point.
 920 -}
 921
 922 ----------------------------------------------------------------
 923
 924 {-
 925 Note [No simple dataflow]
 926
 927 Sadly, it seems impossible to compute the proc points using a single
 928 dataflow pass.  One might attempt to use this simple lattice:
 929
 930   data Location = Unknown
 931                 | InProc BlockId -- node is in procedure headed by the named proc point
 932                 | ProcPoint      -- node is itself a proc point
 933
 934 At a join, a node in two different blocks becomes a proc point.
 935 The difficulty is that the change of information during iterative
 936 computation may promote a node prematurely.  Here's a program that
 937 illustrates the difficulty:
 938
 939   f () {
 940   entry:
 941     ....
 942   L1:
 943     if (...) { ... }
 944     else { ... }
 945
 946   L2: if (...) { g(); goto L1; }
 947       return x + y;
 948   }
 949
 950 The only proc-point needed (besides the entry) is L1.  But in an
 951 iterative analysis, consider what happens to L2.  On the first pass
 952 through, it rises from Unknown to 'InProc entry', but when L1 is
 953 promoted to a proc point (because it's the successor of g()), L1's
 954 successors will be promoted to 'InProc L1'.  The problem hits when the
 955 new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'.
 956 The join operation makes it a proc point when in fact it needn't be,
 957 because its immediate dominator L1 is already a proc point and there
 958 are no other proc points that directly reach L2.
 959 -}
 960
 961
 962
 963 {- Note [Separate Adams optimization]
 964 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 965 It may be worthwhile to attempt the Adams optimization by rewriting
 966 the graph before the assignment of proc-point protocols.  Here are a
 967 couple of rules:
 968
 969   g() returns to k;                    g() returns to L;
 970   k: CopyIn c ress; goto L:
 971    ...                        ==>        ...
 972   L: // no CopyIn node here            L: CopyIn c ress;
 973
 974
 975 And when c == c' and ress == ress', this also:
 976
 977   g() returns to k;                    g() returns to L;
 978   k: CopyIn c ress; goto L:
 979    ...                        ==>        ...
 980   L: CopyIn c' ress'                   L: CopyIn c' ress' ;
 981
 982 In both cases the goal is to eliminate k.
 983 -}