compiler/cmm/CmmStackLayout.hs

   1 module CmmStackLayout
   2     ( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs
   3     , layout, manifestSP, igraph, areaBuilder
   4     , stubSlotsOnDeath ) -- to help crash early during debugging
   5 where
   6
   7 import Constants
   8 import qualified Prelude as P
   9 import Prelude hiding (zip, unzip, last)
  10
  11 import BlockId
  12 import CmmExpr
  13 import CmmProcPointZ
  14 import CmmTx
  15 import DFMonad
  16 import FiniteMap
  17 import Maybes
  18 import MkZipCfg
  19 import MkZipCfgCmm hiding (CmmBlock, CmmGraph)
  20 import Monad
  21 import Outputable
  22 import Panic
  23 import SMRep (ByteOff)
  24 import ZipCfg
  25 import ZipCfg as Z
  26 import ZipCfgCmmRep
  27 import ZipDataflow
  28
  29 ------------------------------------------------------------------------
  30 --                    Stack Layout                                    --
  31 ------------------------------------------------------------------------
  32
  33 -- | Before we lay out the stack, we need to know something about the
  34 -- liveness of the stack slots. In particular, to decide whether we can
  35 -- reuse a stack location to hold multiple stack slots, we need to know
  36 -- when each of the stack slots is used.
  37 -- Although tempted to use something simpler, we really need a full interference
  38 -- graph. Consider the following case:
  39 --   case <...> of
  40 --     1 -> <spill x>; // y is dead out
  41 --     2 -> <spill y>; // x is dead out
  42 --     3 -> <spill x and y>
  43 -- If we consider the arms in order and we use just the deadness information given by a
  44 -- dataflow analysis, we might decide to allocate the stack slots for x and y
  45 -- to the same stack location, which will lead to incorrect code in the third arm.
  46 -- We won't make this mistake with an interference graph.
  47
  48 -- First, the liveness analysis.
  49 -- We represent a slot with an area, an offset into the area, and a width.
  50 -- Tracking the live slots is a bit tricky because there may be loads and stores
  51 -- into only a part of a stack slot (e.g. loading the low word of a 2-word long),
  52 -- e.g. Slot A 0 8 overlaps with Slot A 4 4.
  53 --
  54 -- The definition of a slot set is intended to reduce the number of overlap
  55 -- checks we have to make. There's no reason to check for overlap between
  56 -- slots in different areas, so we segregate the map by Area's.
  57 -- We expect few slots in each Area, so we collect them in an unordered list.
  58 -- To keep these lists short, any contiguous live slots are coalesced into
  59 -- a single slot, on insertion.
  60
  61 slotLattice :: DataflowLattice SubAreaSet
  62 slotLattice = DataflowLattice "live slots" emptyFM add False
  63   where add new old = case foldFM addArea (False, old) new of
  64                         (True,  x) -> aTx  x
  65                         (False, x) -> noTx x
  66         addArea a newSlots z = foldr (addSlot a) z newSlots
  67         addSlot a slot (changed, map) =
  68           let (c, live) = liveGen slot $ lookupWithDefaultFM map [] a
  69           in (c || changed, addToFM map a live)
  70
  71 type SlotEnv   = BlockEnv SubAreaSet
  72 type SlotFix a = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet a)
  73
  74 liveSlotAnal :: LGraph Middle Last -> FuelMonad SlotEnv
  75 liveSlotAnal g = liftM zdfFpFacts (res :: SlotFix ())
  76   where res = zdfSolveFromL emptyBlockEnv "live slot analysis" slotLattice
  77                             liveSlotTransfers (fact_bot slotLattice) g
  78
  79 -- Add the subarea s to the subareas in the list-set (possibly coalescing it with
  80 -- adjacent subareas), and also return whether s was a new addition.
  81 liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea])
  82 liveGen s set = liveGen' s set []
  83   where liveGen' s [] z = (True, s : z)
  84         liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z =
  85           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
  86             liveGen' s rst (s' : z)
  87           else if s' `contains` s then               -- old contains new
  88             (False, set)
  89           else                                       -- overlap: coalesce the slots
  90             let new_hi = max hi hi'
  91                 new_lo = min lo lo'
  92             in liveGen' (a, new_hi, new_hi - new_lo) rst z
  93           where lo  = hi  - w  -- remember: areas grow down
  94                 lo' = hi' - w'
  95         contains (a, hi, w) (a', hi', w') =
  96           a == a' && hi >= hi' && hi - w <= hi' - w'
  97
  98 liveKill :: SubArea -> [SubArea] -> [SubArea]
  99 liveKill (a, hi, w) set = -- pprTrace "killing slots in area" (ppr a) $
 100                           liveKill' set []
 101   where liveKill' [] z = z
 102         liveKill' (s'@(a', hi', w') : rst) z =
 103           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
 104             liveKill' rst (s' : z)
 105           else                                       -- overlap: split the old slot
 106             let z'  = if hi' > hi  then (a, hi', hi' - hi)  : z else z
 107                 z'' = if lo  > lo' then (a, lo,  lo  - lo') : z' else z'
 108             in liveKill' rst z''
 109           where lo  = hi  - w  -- remember: areas grow down
 110                 lo' = hi' - w'
 111
 112 -- Note: the stack slots that hold variables returned on the stack are not
 113 -- considered live in to the block -- we treat the first node as a definition site.
 114 -- BEWARE?: Am I being a little careless here in failing to check for the
 115 -- entry Id (which would use the CallArea Old).
 116 liveSlotTransfers :: BackwardTransfers Middle Last SubAreaSet
 117 liveSlotTransfers =
 118   BackwardTransfers first liveInSlots liveLastIn
 119     where first id live = delFromFM live (CallArea (Young id))
 120
 121 -- Slot sets: adding slots, removing slots, and checking for membership.
 122 liftToArea :: Area -> ([SubArea] -> [SubArea]) -> SubAreaSet -> SubAreaSet
 123 addSlot, removeSlot :: SubAreaSet -> SubArea -> SubAreaSet
 124 elemSlot            :: SubAreaSet -> SubArea -> Bool
 125 liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a)
 126 addSlot    live (a, i, w) = liftToArea a (snd . liveGen  (a, i, w)) live
 127 removeSlot live (a, i, w) = liftToArea a       (liveKill (a, i, w)) live
 128 elemSlot   live (a, i, w) =
 129   not $ fst $ liveGen  (a, i, w) (lookupWithDefaultFM live [] a)
 130
 131 removeLiveSlotDefs :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
 132 removeLiveSlotDefs = foldSlotsDefd removeSlot
 133
 134 liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => s -> SubAreaSet -> SubAreaSet
 135 liveInSlots x live = foldSlotsUsed addSlot (removeLiveSlotDefs live x) x
 136
 137 liveLastIn :: Last -> (BlockId -> SubAreaSet) -> SubAreaSet
 138 liveLastIn l env = liveInSlots l (liveLastOut env l)
 139
 140 -- Don't forget to keep the outgoing parameters in the CallArea live,
 141 -- as well as the update frame.
 142 -- Note: We have to keep the update frame live at a call because of the
 143 -- case where the function doesn't return -- in that case, there won't
 144 -- be a return to keep the update frame live. We'd still better keep the
 145 -- info pointer in the update frame live at any call site;
 146 -- otherwise we could screw up the garbage collector.
 147 liveLastOut :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet
 148 liveLastOut env l =
 149   case l of
 150     LastCall _ Nothing n _ _ ->
 151       add_area (CallArea Old) n out -- add outgoing args (includes upd frame)
 152     LastCall _ (Just k) n _ (Just _) ->
 153       add_area (CallArea Old) n (add_area (CallArea (Young k)) n out)
 154     LastCall _ (Just k) n _ Nothing ->
 155       add_area (CallArea (Young k)) n out
 156     _ -> out
 157   where out = joinOuts slotLattice env l
 158         add_area _ n live | n == 0 = live
 159         add_area a n live =
 160           addToFM live a $ snd $ liveGen (a, n, n) $ lookupWithDefaultFM live [] a
 161
 162 -- The liveness analysis must be precise: otherwise, we won't know if a definition
 163 -- should really kill a live-out stack slot.
 164 -- But the interference graph does not have to be precise -- it might decide that
 165 -- any live areas interfere. To maintain both a precise analysis and an imprecise
 166 -- interference graph, we need to convert the live-out stack slots to graph nodes
 167 -- at each and every instruction; rather than reconstruct a new list of nodes
 168 -- every time, I provide a function to fold over the nodes, which should be a
 169 -- reasonably efficient approach for the implementations we envision.
 170 -- Of course, it will probably be much easier to program if we just return a list...
 171 type Set x = FiniteMap x ()
 172 data IGraphBuilder n =
 173   Builder { foldNodes     :: forall z. SubArea -> (n -> z -> z) -> z -> z
 174           , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int]
 175           }
 176
 177 areaBuilder :: IGraphBuilder Area
 178 areaBuilder = Builder fold words
 179   where fold (a, _, _) f z = f a z
 180         words areaSize areaMap a =
 181           case lookupFM areaMap a of
 182             Just addr -> [addr .. addr + (lookupFM areaSize a `orElse`
 183                                           pprPanic "wordsOccupied: unknown area" (ppr a))]
 184             Nothing   -> []
 185
 186 --slotBuilder :: IGraphBuilder (Area, Int)
 187 --slotBuilder = undefined
 188
 189 -- Now, we can build the interference graph.
 190 -- The usual story: a definition interferes with all live outs and all other
 191 -- definitions.
 192 type IGraph x = FiniteMap x (Set x)
 193 type IGPair x = (IGraph x, IGraphBuilder x)
 194 igraph :: (Ord x) => IGraphBuilder x -> SlotEnv -> LGraph Middle Last -> IGraph x
 195 igraph builder env g = foldr interfere emptyFM (postorder_dfs g)
 196   where foldN = foldNodes builder
 197         interfere block igraph =
 198           let (h, l) = goto_end (unzip block)
 199               --heads :: ZHead Middle -> (IGraph x, SubAreaSet) -> IGraph x
 200               heads (ZFirst _) (igraph, _)       = igraph
 201               heads (ZHead h m)    (igraph, liveOut) =
 202                 heads h (addEdges igraph m liveOut, liveInSlots m liveOut)
 203               -- add edges between a def and the other defs and liveouts
 204               addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i
 205               addDef (igraph, out) def@(a, _, _) =
 206                 (foldN def (addDefN out) igraph,
 207                  addToFM out a (snd $ liveGen def (lookupWithDefaultFM out [] a)))
 208               addDefN out n igraph =
 209                 let addEdgeNO o igraph = foldN o addEdgeNN igraph
 210                     addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph
 211                     addEdgeNN' n n' igraph = addToFM igraph n (addToFM set n' ())
 212                       where set = lookupWithDefaultFM igraph emptyFM n
 213                 in foldFM (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out
 214               env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 215           in heads h $ case l of LastExit    -> (igraph, emptyFM)
 216                                  LastOther l -> (addEdges igraph l $ liveLastOut env' l,
 217                                                  liveLastIn l env')
 218
 219 -- Before allocating stack slots, we need to collect one more piece of information:
 220 -- what's the highest offset (in bytes) used in each Area?
 221 -- We'll need to allocate that much space for each Area.
 222 getAreaSize :: ByteOff -> LGraph Middle Last -> AreaMap
 223 getAreaSize entry_off g@(LGraph _ _) =
 224   fold_blocks (fold_fwd_block first add_regslots last)
 225               (unitFM (CallArea Old) entry_off) g
 226   where first _  z = z
 227         last l@(LastOther (LastCall _ Nothing args res _)) z =
 228           add_regslots l (add (add z area args) area res)
 229           where area = CallArea Old
 230         last l@(LastOther (LastCall _ (Just k) args res _)) z =
 231           add_regslots l (add (add z area args) area res)
 232           where area = CallArea (Young k)
 233         last l z = add_regslots l z
 234         add_regslots i z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z i) i
 235         addSlot z (a@(RegSlot (LocalReg _ ty)), _, _) =
 236           add z a $ widthInBytes $ typeWidth ty
 237         addSlot z _ = z
 238         add z a off = addToFM z a (max off (lookupWithDefaultFM z 0 a))
 239
 240
 241 -- Find the Stack slots occupied by the subarea's conflicts
 242 conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int
 243 conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
 244   foldNodes subarea foldNode emptyFM
 245   where foldNode n set = foldFM conflict set $ lookupWithDefaultFM ig emptyFM n
 246         conflict n' () set = liveInSlots areaMap n' set
 247         -- Add stack slots occupied by igraph node n
 248         liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
 249         setAdd w s = addToFM s w ()
 250
 251 -- Find any open space on the stack, starting from the offset.
 252 -- If the area is a CallArea or a spill slot for a pointer, then it must
 253 -- be word-aligned.
 254 freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int
 255 freeSlotFrom ig areaSize offset areaMap area =
 256   let size = lookupFM areaSize area `orElse` 0
 257       conflicts = conflictSlots ig areaSize areaMap (area, size, size)
 258       -- CallAreas and Ptrs need to be word-aligned (round up!)
 259       align = case area of CallArea _                                -> align'
 260                            RegSlot  r | isGcPtrType (localRegType r) -> align'
 261                            RegSlot  _                                -> id
 262       align' n = (n + (wORD_SIZE - 1)) `div` wORD_SIZE * wORD_SIZE
 263       -- Find a space big enough to hold the area
 264       findSpace curr 0 = curr
 265       findSpace curr cnt = -- part of target slot, # of bytes left to check
 266         if elemFM curr conflicts then
 267           findSpace (align (curr + size)) size -- try the next (possibly) open space
 268         else findSpace (curr - 1) (cnt - 1)
 269   in findSpace (align (offset + size)) size
 270
 271 -- Find an open space on the stack, and assign it to the area.
 272 allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap
 273 allocSlotFrom ig areaSize from areaMap area =
 274   if elemFM area areaMap then areaMap
 275   else addToFM areaMap area $ freeSlotFrom ig areaSize from areaMap area
 276
 277 -- | Greedy stack layout.
 278 -- Compute liveness, build the interference graph, and allocate slots for the areas.
 279 -- We visit each basic block in a (generally) forward order.
 280 -- At each instruction that names a register subarea r, we immediately allocate
 281 -- any available slot on the stack by the following procedure:
 282 --  1. Find the nodes N' that conflict with r
 283 --  2. Find the stack slots used for N'
 284 --  3. Choose a contiguous stack space s not in N' (s must be large enough to hold r)
 285 -- For a CallArea, we allocate the stack space only when we reach a function
 286 -- call that returns to the CallArea's blockId.
 287 -- We use a similar procedure, with one exception: the stack space
 288 -- must be allocated below the youngest stack slot that is live out.
 289
 290 -- Note: The stack pointer only has to be younger than the youngest live stack slot
 291 -- at proc points. Otherwise, the stack pointer can point anywhere.
 292 layout :: ProcPointSet -> SlotEnv -> ByteOff -> LGraph Middle Last -> AreaMap
 293 layout procPoints env entry_off g =
 294   let ig = (igraph areaBuilder env g, areaBuilder)
 295       env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 296       areaSize = getAreaSize entry_off g
 297       -- Find the slots that are live-in to a block tail
 298       live_in (ZTail m l) = liveInSlots m (live_in l)
 299       live_in (ZLast (LastOther l)) = liveLastIn l env'
 300       live_in (ZLast LastExit) = emptyFM
 301       -- Find the youngest live stack slot
 302       youngest_live areaMap live = fold_subareas young_slot live 0
 303         where young_slot (a, o, _) z = case lookupFM areaMap a of
 304                                          Just top -> max z $ top + o
 305                                          Nothing  -> z
 306               fold_subareas f m z = foldFM (\_ s z -> foldr f z s) z m
 307       -- Allocate space for spill slots and call areas
 308       allocVarSlot = allocSlotFrom ig areaSize 0
 309       -- Update the successor's incoming SP.
 310       setSuccSPs inSp bid areaMap =
 311         case (lookupFM areaMap area, lookupBlockEnv (lg_blocks g) bid) of
 312           (Just _, _) -> areaMap -- succ already knows incoming SP
 313           (Nothing, Just (Block _ _)) ->
 314             if elemBlockSet bid procPoints then
 315               let young = youngest_live areaMap $ env' bid
 316                   -- start = case returnOff stackInfo of Just b  -> max b young
 317                   --                                     Nothing -> young
 318                   start = young -- maybe wrong, but I don't understand
 319                                 -- why the preceding is necessary...
 320               in  allocSlotFrom ig areaSize start areaMap area
 321             else addToFM areaMap area inSp
 322           (_, Nothing) -> panic "Block not found in cfg"
 323         where area = CallArea (Young bid)
 324       allocLast (Block id _) areaMap l =
 325         fold_succs (setSuccSPs inSp) l areaMap
 326         where inSp = expectJust "sp in" $ lookupFM areaMap (CallArea (Young id))
 327       allocMidCall m@(MidForeignCall (Safe bid _) _ _ _) t areaMap =
 328         let young     = youngest_live areaMap $ removeLiveSlotDefs (live_in t) m
 329             area      = CallArea (Young bid)
 330             areaSize' = addToFM areaSize area (widthInBytes (typeWidth gcWord))
 331         in  allocSlotFrom ig areaSize' young areaMap area
 332       allocMidCall _ _ areaMap = areaMap
 333       alloc m t areaMap =
 334           foldSlotsDefd alloc' (foldSlotsUsed alloc' (allocMidCall m t areaMap) m) m
 335         where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
 336               alloc' areaMap _ = areaMap
 337       layoutAreas areaMap b@(Block _ t) = layout areaMap t
 338         where layout areaMap (ZTail m t) = layout (alloc m t areaMap) t
 339               layout areaMap (ZLast l)   = allocLast b areaMap l
 340       initMap = addToFM (addToFM emptyFM (CallArea Old) 0)
 341                         (CallArea (Young (lg_entry g))) 0
 342       areaMap = foldl layoutAreas initMap (postorder_dfs g)
 343   in -- pprTrace "ProcPoints" (ppr procPoints) $
 344         -- pprTrace "Area SizeMap" (ppr areaSize) $
 345          -- pprTrace "Entry SP" (ppr entrySp) $
 346            -- pprTrace "Area Map" (ppr areaMap) $
 347      areaMap
 348
 349 -- After determining the stack layout, we can:
 350 -- 1. Replace references to stack Areas with addresses relative to the stack
 351 --    pointer.
 352 -- 2. Insert adjustments to the stack pointer to ensure that it is at a
 353 --    conventional location at each proc point.
 354 --    Because we don't take interrupts on the execution stack, we only need the
 355 --    stack pointer to be younger than the live values on the stack at proc points.
 356 -- 3. Compute the maximum stack offset used in the procedure and replace
 357 --    the stack high-water mark with that offset.
 358 manifestSP :: AreaMap -> ByteOff -> LGraph Middle Last -> FuelMonad (LGraph Middle Last)
 359 manifestSP areaMap entry_off g@(LGraph entry _blocks) =
 360   liftM (LGraph entry) $ foldl replB (return emptyBlockEnv) (postorder_dfs g)
 361   where slot a = -- pprTrace "slot" (ppr a) $
 362                    lookupFM areaMap a `orElse` panic "unallocated Area"
 363         slot' (Just id) = slot $ CallArea (Young id)
 364         slot' Nothing   = slot $ CallArea Old
 365         sp_high = maxSlot slot g
 366         proc_entry_sp = slot (CallArea Old) + entry_off
 367
 368         add_sp_off b env =
 369           case Z.last (unzip b) of
 370             LastOther (LastCall {cml_cont = Just succ, cml_ret_args = off}) ->
 371               extendBlockEnv env succ off
 372             _ -> env
 373         spEntryMap = fold_blocks add_sp_off (mkBlockEnv [(entry, entry_off)]) g
 374         spOffset id = lookupBlockEnv spEntryMap id `orElse` 0
 375
 376         sp_on_entry id | id == entry = proc_entry_sp
 377         sp_on_entry id = slot' (Just id) + spOffset id
 378
 379         -- On entry to procpoints, the stack pointer is conventional;
 380         -- otherwise, we check the SP set by predecessors.
 381         replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock)
 382         replB blocks (Block id t) =
 383           do bs <- replTail (Block id) spIn t
 384              -- pprTrace "spIn" (ppr id <+> ppr spIn) $ do
 385              liftM (flip (foldr insertBlock) bs) blocks
 386           where spIn = sp_on_entry id
 387         replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) ->
 388                     FuelMonad ([CmmBlock])
 389         replTail h spOff (ZTail m@(MidForeignCall (Safe bid _) _ _ _) t) =
 390           replTail (\t' -> h (setSp spOff spOff' (ZTail (middle spOff m) t'))) spOff' t
 391             where spOff' = slot' (Just bid) + widthInBytes (typeWidth gcWord)
 392         replTail h spOff (ZTail m t) = replTail (h . ZTail (middle spOff m)) spOff t
 393         replTail h spOff (ZLast (LastOther l)) = fixSp h spOff l
 394         replTail h _   l@(ZLast LastExit) = return [h l]
 395         middle spOff m = mapExpDeepMiddle (replSlot spOff) m
 396         last   spOff l = mapExpDeepLast   (replSlot spOff) l
 397         replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i))
 398         replSlot _ (CmmLit CmmHighStackMark) = -- replacing the high water mark
 399           CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord))
 400         replSlot _ e = e
 401         -- The block must establish the SP expected at each successsor.
 402         fixSp :: (ZTail Middle Last -> CmmBlock) -> Int -> Last -> FuelMonad ([CmmBlock])
 403         fixSp h spOff l@(LastCall _ k n _ _) = updSp h spOff (slot' k + n) l
 404         fixSp h spOff l@(LastBranch k) =
 405           let succSp = sp_on_entry k in
 406           if succSp /= spOff then
 407                -- pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $
 408                updSp h spOff succSp l
 409           else return $ [h (ZLast (LastOther (last spOff l)))]
 410         fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, [])
 411           where b = h (ZLast (LastOther (last spOff l)))
 412                 succ succId z =
 413                   let succSp = sp_on_entry succId in
 414                   if succSp /= spOff then
 415                     do (b,  bs)  <- z
 416                        (b', bs') <- insertBetween b [setSpMid spOff succSp] succId
 417                        return (b', bs ++ bs')
 418                   else z
 419         updSp h old new l = return [h $ setSp old new $ ZLast $ LastOther (last new l)]
 420         setSpMid sp sp' = MidAssign (CmmGlobal Sp) e
 421           where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off]
 422                 off = CmmLit $ CmmInt (toInteger $ sp - sp') wordWidth
 423         setSp sp sp' t = if sp == sp' then t else ZTail (setSpMid sp sp') t
 424
 425
 426 -- To compute the stack high-water mark, we fold over the graph and
 427 -- compute the highest slot offset.
 428 maxSlot :: (Area -> Int) -> CmmGraph -> Int
 429 maxSlot slotOff g = fold_blocks (fold_fwd_block (\ _ x -> x) highSlot highSlot) 0 g
 430   where highSlot i z = foldSlotsUsed add (foldSlotsDefd add z i) i
 431         add z (a, i, _) = max z (slotOff a + i)
 432
 433 -----------------------------------------------------------------------------
 434 -- | Sanity check: stub pointers immediately after they die
 435 -----------------------------------------------------------------------------
 436 -- This will miss stack slots that are last used in a Last node,
 437 -- but it should do pretty well...
 438
 439 type StubPtrFix = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet CmmGraph)
 440
 441 stubSlotsOnDeath :: (LGraph Middle Last) -> FuelMonad (LGraph Middle Last)
 442 stubSlotsOnDeath g = liftM zdfFpContents $ (res :: StubPtrFix)
 443     where res = zdfBRewriteFromL RewriteShallow emptyBlockEnv "stub ptrs" slotLattice
 444                                  liveSlotTransfers rewrites (fact_bot slotLattice) g
 445           rewrites = BackwardRewrites first middle last Nothing
 446           first _ _ = Nothing
 447           last  _ _ = Nothing
 448           middle m liveSlots = foldSlotsUsed (stub liveSlots m) Nothing m
 449           stub liveSlots m rst subarea@(a, off, w) =
 450             if elemSlot liveSlots subarea then rst
 451             else let store = mkStore (CmmStackSlot a off)
 452                                      (stackStubExpr (widthFromBytes w))
 453                  in case rst of Nothing -> Just (mkMiddle m <*> store)
 454                                 Just g  -> Just (g <*> store)