compiler/cmm/CmmStackLayout.hs

   1 module CmmStackLayout
   2     ( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs
   3     , layout, manifestSP, igraph, areaBuilder
   4     , stubSlotsOnDeath ) -- to help crash early during debugging
   5 where
   6
   7 import Constants
   8 import Prelude hiding (zip, unzip, last)
   9
  10 import BlockId
  11 import CmmExpr
  12 import CmmProcPointZ
  13 import CmmTx
  14 import DFMonad
  15 import FiniteMap
  16 import Maybes
  17 import MkZipCfg
  18 import MkZipCfgCmm hiding (CmmBlock, CmmGraph)
  19 import Control.Monad
  20 import Outputable
  21 import SMRep (ByteOff)
  22 import ZipCfg
  23 import ZipCfg as Z
  24 import ZipCfgCmmRep
  25 import ZipDataflow
  26
  27 ------------------------------------------------------------------------
  28 --                    Stack Layout                                    --
  29 ------------------------------------------------------------------------
  30
  31 -- | Before we lay out the stack, we need to know something about the
  32 -- liveness of the stack slots. In particular, to decide whether we can
  33 -- reuse a stack location to hold multiple stack slots, we need to know
  34 -- when each of the stack slots is used.
  35 -- Although tempted to use something simpler, we really need a full interference
  36 -- graph. Consider the following case:
  37 --   case <...> of
  38 --     1 -> <spill x>; // y is dead out
  39 --     2 -> <spill y>; // x is dead out
  40 --     3 -> <spill x and y>
  41 -- If we consider the arms in order and we use just the deadness information given by a
  42 -- dataflow analysis, we might decide to allocate the stack slots for x and y
  43 -- to the same stack location, which will lead to incorrect code in the third arm.
  44 -- We won't make this mistake with an interference graph.
  45
  46 -- First, the liveness analysis.
  47 -- We represent a slot with an area, an offset into the area, and a width.
  48 -- Tracking the live slots is a bit tricky because there may be loads and stores
  49 -- into only a part of a stack slot (e.g. loading the low word of a 2-word long),
  50 -- e.g. Slot A 0 8 overlaps with Slot A 4 4.
  51 --
  52 -- The definition of a slot set is intended to reduce the number of overlap
  53 -- checks we have to make. There's no reason to check for overlap between
  54 -- slots in different areas, so we segregate the map by Area's.
  55 -- We expect few slots in each Area, so we collect them in an unordered list.
  56 -- To keep these lists short, any contiguous live slots are coalesced into
  57 -- a single slot, on insertion.
  58
  59 slotLattice :: DataflowLattice SubAreaSet
  60 slotLattice = DataflowLattice "live slots" emptyFM add False
  61   where add new old = case foldFM addArea (False, old) new of
  62                         (True,  x) -> aTx  x
  63                         (False, x) -> noTx x
  64         addArea a newSlots z = foldr (addSlot a) z newSlots
  65         addSlot a slot (changed, map) =
  66           let (c, live) = liveGen slot $ lookupWithDefaultFM map [] a
  67           in (c || changed, addToFM map a live)
  68
  69 type SlotEnv   = BlockEnv SubAreaSet
  70   -- The sub-areas live on entry to the block
  71
  72 type SlotFix a = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet a)
  73
  74 liveSlotAnal :: LGraph Middle Last -> FuelMonad SlotEnv
  75 liveSlotAnal g = liftM zdfFpFacts (res :: SlotFix ())
  76   where res = zdfSolveFromL emptyBlockEnv "live slot analysis" slotLattice
  77                             liveSlotTransfers (fact_bot slotLattice) g
  78
  79 -- Add the subarea s to the subareas in the list-set (possibly coalescing it with
  80 -- adjacent subareas), and also return whether s was a new addition.
  81 liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea])
  82 liveGen s set = liveGen' s set []
  83   where liveGen' s [] z = (True, s : z)
  84         liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z =
  85           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
  86             liveGen' s rst (s' : z)
  87           else if s' `contains` s then               -- old contains new
  88             (False, set)
  89           else                                       -- overlap: coalesce the slots
  90             let new_hi = max hi hi'
  91                 new_lo = min lo lo'
  92             in liveGen' (a, new_hi, new_hi - new_lo) rst z
  93           where lo  = hi  - w  -- remember: areas grow down
  94                 lo' = hi' - w'
  95         contains (a, hi, w) (a', hi', w') =
  96           a == a' && hi >= hi' && hi - w <= hi' - w'
  97
  98 liveKill :: SubArea -> [SubArea] -> [SubArea]
  99 liveKill (a, hi, w) set = -- pprTrace "killing slots in area" (ppr a) $
 100                           liveKill' set []
 101   where liveKill' [] z = z
 102         liveKill' (s'@(a', hi', w') : rst) z =
 103           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
 104             liveKill' rst (s' : z)
 105           else                                       -- overlap: split the old slot
 106             let z'  = if hi' > hi  then (a, hi', hi' - hi)  : z else z
 107                 z'' = if lo  > lo' then (a, lo,  lo  - lo') : z' else z'
 108             in liveKill' rst z''
 109           where lo  = hi  - w  -- remember: areas grow down
 110                 lo' = hi' - w'
 111
 112 -- Note: the stack slots that hold variables returned on the stack are not
 113 -- considered live in to the block -- we treat the first node as a definition site.
 114 -- BEWARE?: Am I being a little careless here in failing to check for the
 115 -- entry Id (which would use the CallArea Old).
 116 liveSlotTransfers :: BackwardTransfers Middle Last SubAreaSet
 117 liveSlotTransfers =
 118   BackwardTransfers first liveInSlots liveLastIn
 119     where first id live = delFromFM live (CallArea (Young id))
 120
 121 -- Slot sets: adding slots, removing slots, and checking for membership.
 122 liftToArea :: Area -> ([SubArea] -> [SubArea]) -> SubAreaSet -> SubAreaSet
 123 addSlot, removeSlot :: SubAreaSet -> SubArea -> SubAreaSet
 124 elemSlot            :: SubAreaSet -> SubArea -> Bool
 125 liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a)
 126 addSlot    live (a, i, w) = liftToArea a (snd . liveGen  (a, i, w)) live
 127 removeSlot live (a, i, w) = liftToArea a       (liveKill (a, i, w)) live
 128 elemSlot   live (a, i, w) =
 129   not $ fst $ liveGen  (a, i, w) (lookupWithDefaultFM live [] a)
 130
 131 removeLiveSlotDefs :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
 132 removeLiveSlotDefs = foldSlotsDefd removeSlot
 133
 134 liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => s -> SubAreaSet -> SubAreaSet
 135 liveInSlots x live = foldSlotsUsed addSlot (removeLiveSlotDefs live x) x
 136
 137 liveLastIn :: Last -> (BlockId -> SubAreaSet) -> SubAreaSet
 138 liveLastIn l env = liveInSlots l (liveLastOut env l)
 139
 140 -- Don't forget to keep the outgoing parameters in the CallArea live,
 141 -- as well as the update frame.
 142 -- Note: We have to keep the update frame live at a call because of the
 143 -- case where the function doesn't return -- in that case, there won't
 144 -- be a return to keep the update frame live. We'd still better keep the
 145 -- info pointer in the update frame live at any call site;
 146 -- otherwise we could screw up the garbage collector.
 147 liveLastOut :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet
 148 liveLastOut env l =
 149   case l of
 150     LastCall _ Nothing n _ _ ->
 151       add_area (CallArea Old) n out -- add outgoing args (includes upd frame)
 152     LastCall _ (Just k) n _ (Just _) ->
 153       add_area (CallArea Old) n (add_area (CallArea (Young k)) n out)
 154     LastCall _ (Just k) n _ Nothing ->
 155       add_area (CallArea (Young k)) n out
 156     _ -> out
 157   where out = joinOuts slotLattice env l
 158         add_area _ n live | n == 0 = live
 159         add_area a n live =
 160           addToFM live a $ snd $ liveGen (a, n, n) $ lookupWithDefaultFM live [] a
 161
 162 -- The liveness analysis must be precise: otherwise, we won't know if a definition
 163 -- should really kill a live-out stack slot.
 164 -- But the interference graph does not have to be precise -- it might decide that
 165 -- any live areas interfere. To maintain both a precise analysis and an imprecise
 166 -- interference graph, we need to convert the live-out stack slots to graph nodes
 167 -- at each and every instruction; rather than reconstruct a new list of nodes
 168 -- every time, I provide a function to fold over the nodes, which should be a
 169 -- reasonably efficient approach for the implementations we envision.
 170 -- Of course, it will probably be much easier to program if we just return a list...
 171 type Set x = FiniteMap x ()
 172 data IGraphBuilder n =
 173   Builder { foldNodes     :: forall z. SubArea -> (n -> z -> z) -> z -> z
 174           , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int]
 175           }
 176
 177 areaBuilder :: IGraphBuilder Area
 178 areaBuilder = Builder fold words
 179   where fold (a, _, _) f z = f a z
 180         words areaSize areaMap a =
 181           case lookupFM areaMap a of
 182             Just addr -> [addr .. addr + (lookupFM areaSize a `orElse`
 183                                           pprPanic "wordsOccupied: unknown area" (ppr a))]
 184             Nothing   -> []
 185
 186 --slotBuilder :: IGraphBuilder (Area, Int)
 187 --slotBuilder = undefined
 188
 189 -- Now, we can build the interference graph.
 190 -- The usual story: a definition interferes with all live outs and all other
 191 -- definitions.
 192 type IGraph x = FiniteMap x (Set x)
 193 type IGPair x = (IGraph x, IGraphBuilder x)
 194 igraph :: (Ord x) => IGraphBuilder x -> SlotEnv -> LGraph Middle Last -> IGraph x
 195 igraph builder env g = foldr interfere emptyFM (postorder_dfs g)
 196   where foldN = foldNodes builder
 197         interfere block igraph =
 198           let (h, l) = goto_end (unzip block)
 199               --heads :: ZHead Middle -> (IGraph x, SubAreaSet) -> IGraph x
 200               heads (ZFirst _) (igraph, _)       = igraph
 201               heads (ZHead h m)    (igraph, liveOut) =
 202                 heads h (addEdges igraph m liveOut, liveInSlots m liveOut)
 203               -- add edges between a def and the other defs and liveouts
 204               addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i
 205               addDef (igraph, out) def@(a, _, _) =
 206                 (foldN def (addDefN out) igraph,
 207                  addToFM out a (snd $ liveGen def (lookupWithDefaultFM out [] a)))
 208               addDefN out n igraph =
 209                 let addEdgeNO o igraph = foldN o addEdgeNN igraph
 210                     addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph
 211                     addEdgeNN' n n' igraph = addToFM igraph n (addToFM set n' ())
 212                       where set = lookupWithDefaultFM igraph emptyFM n
 213                 in foldFM (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out
 214               env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 215           in heads h $ case l of LastExit    -> (igraph, emptyFM)
 216                                  LastOther l -> (addEdges igraph l $ liveLastOut env' l,
 217                                                  liveLastIn l env')
 218
 219 -- Before allocating stack slots, we need to collect one more piece of information:
 220 -- what's the highest offset (in bytes) used in each Area?
 221 -- We'll need to allocate that much space for each Area.
 222 getAreaSize :: ByteOff -> LGraph Middle Last -> AreaMap
 223   -- The domain of the returned mapping consists only of Areas
 224   -- used for (a) variable spill slots, and (b) parameter passing ares for calls
 225 getAreaSize entry_off g@(LGraph _ _) =
 226   fold_blocks (fold_fwd_block first add_regslots last)
 227               (unitFM (CallArea Old) entry_off) g
 228   where first _  z = z
 229         last l@(LastOther (LastCall _ Nothing args res _)) z =
 230           add_regslots l (add (add z area args) area res)
 231           where area = CallArea Old
 232         last l@(LastOther (LastCall _ (Just k) args res _)) z =
 233           add_regslots l (add (add z area args) area res)
 234           where area = CallArea (Young k)
 235         last l z = add_regslots l z
 236         add_regslots i z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z i) i
 237         addSlot z (a@(RegSlot (LocalReg _ ty)), _, _) =
 238           add z a $ widthInBytes $ typeWidth ty
 239         addSlot z _ = z
 240         add z a off = addToFM z a (max off (lookupWithDefaultFM z 0 a))
 241         -- The 'max' is important.  Two calls, to f and g, might share a common
 242         -- continuation (and hence a common CallArea), but their number of overflow
 243         -- parameters might differ.
 244
 245
 246 -- Find the Stack slots occupied by the subarea's conflicts
 247 conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int
 248 conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
 249   foldNodes subarea foldNode emptyFM
 250   where foldNode n set = foldFM conflict set $ lookupWithDefaultFM ig emptyFM n
 251         conflict n' () set = liveInSlots areaMap n' set
 252         -- Add stack slots occupied by igraph node n
 253         liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
 254         setAdd w s = addToFM s w ()
 255
 256 -- Find any open space on the stack, starting from the offset.
 257 -- If the area is a CallArea or a spill slot for a pointer, then it must
 258 -- be word-aligned.
 259 freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int
 260 freeSlotFrom ig areaSize offset areaMap area =
 261   let size = lookupFM areaSize area `orElse` 0
 262       conflicts = conflictSlots ig areaSize areaMap (area, size, size)
 263       -- CallAreas and Ptrs need to be word-aligned (round up!)
 264       align = case area of CallArea _                                -> align'
 265                            RegSlot  r | isGcPtrType (localRegType r) -> align'
 266                            RegSlot  _                                -> id
 267       align' n = (n + (wORD_SIZE - 1)) `div` wORD_SIZE * wORD_SIZE
 268       -- Find a space big enough to hold the area
 269       findSpace curr 0 = curr
 270       findSpace curr cnt = -- part of target slot, # of bytes left to check
 271         if elemFM curr conflicts then
 272           findSpace (align (curr + size)) size -- try the next (possibly) open space
 273         else findSpace (curr - 1) (cnt - 1)
 274   in findSpace (align (offset + size)) size
 275
 276 -- Find an open space on the stack, and assign it to the area.
 277 allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap
 278 allocSlotFrom ig areaSize from areaMap area =
 279   if elemFM area areaMap then areaMap
 280   else addToFM areaMap area $ freeSlotFrom ig areaSize from areaMap area
 281
 282 -- | Greedy stack layout.
 283 -- Compute liveness, build the interference graph, and allocate slots for the areas.
 284 -- We visit each basic block in a (generally) forward order.
 285
 286 -- At each instruction that names a register subarea r, we immediately allocate
 287 -- any available slot on the stack by the following procedure:
 288 --  1. Find the sub-areas S that conflict with r
 289 --  2. Find the stack slots used for S
 290 --  3. Choose a contiguous stack space s not in S (s must be large enough to hold r)
 291
 292 -- For a CallArea, we allocate the stack space only when we reach a function
 293 -- call that returns to the CallArea's blockId.
 294 -- Then, we allocate the Area subject to the following constraints:
 295 --   a) It must be younger than all the sub-areas that are live on entry to the block
 296 --         This constraint is only necessary for the successor of a call
 297 --   b) It must not overlap with any already-allocated Area with which it conflicts
 298 --         (ie at some point, not necessarily now, is live at the same time)
 299 --   Part (b) is just the 1,2,3 part above
 300
 301 -- Note: The stack pointer only has to be younger than the youngest live stack slot
 302 -- at proc points. Otherwise, the stack pointer can point anywhere.
 303
 304 layout :: ProcPointSet -> SlotEnv -> ByteOff -> LGraph Middle Last -> AreaMap
 305 -- The domain of the returned map includes an Area for EVERY block
 306 -- including each block that is not the successor of a call (ie is not a proc-point)
 307 -- That's how we return the info of what the SP should be at the entry of every block
 308
 309 layout procPoints env entry_off g =
 310   let ig = (igraph areaBuilder env g, areaBuilder)
 311       env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 312       areaSize = getAreaSize entry_off g
 313       -- Find the slots that are live-in to a block tail
 314       live_in (ZTail m l) = liveInSlots m (live_in l)
 315       live_in (ZLast (LastOther l)) = liveLastIn l env'
 316       live_in (ZLast LastExit) = emptyFM
 317
 318       -- Find the youngest live stack slot that has already been allocated
 319       youngest_live :: AreaMap     -- Already allocated
 320                     -> SubAreaSet  -- Sub-areas live here
 321                     -> ByteOff     -- Offset of the youngest byte of any
 322                                    --    already-allocated, live sub-area
 323       youngest_live areaMap live = fold_subareas young_slot live 0
 324         where young_slot (a, o, _) z = case lookupFM areaMap a of
 325                                          Just top -> max z $ top + o
 326                                          Nothing  -> z
 327               fold_subareas f m z = foldFM (\_ s z -> foldr f z s) z m
 328
 329       -- Allocate space for spill slots and call areas
 330       allocVarSlot = allocSlotFrom ig areaSize 0
 331
 332       -- Update the successor's incoming SP.
 333       setSuccSPs inSp bid areaMap =
 334         case (lookupFM areaMap area, lookupBlockEnv (lg_blocks g) bid) of
 335           (Just _, _) -> areaMap -- succ already knows incoming SP
 336           (Nothing, Just (Block _ _)) ->
 337             if elemBlockSet bid procPoints then
 338               let young = youngest_live areaMap $ env' bid
 339                   -- start = case returnOff stackInfo of Just b  -> max b young
 340                   --                                     Nothing -> young
 341                   start = young -- maybe wrong, but I don't understand
 342                                 -- why the preceding is necessary...
 343               in  allocSlotFrom ig areaSize start areaMap area
 344             else addToFM areaMap area inSp
 345           (_, Nothing) -> panic "Block not found in cfg"
 346         where area = CallArea (Young bid)
 347
 348       allocLast (Block id _) areaMap l =
 349         fold_succs (setSuccSPs inSp) l areaMap
 350         where inSp = expectJust "sp in" $ lookupFM areaMap (CallArea (Young id))
 351
 352       allocMidCall m@(MidForeignCall (Safe bid _) _ _ _) t areaMap =
 353         let young     = youngest_live areaMap $ removeLiveSlotDefs (live_in t) m
 354             area      = CallArea (Young bid)
 355             areaSize' = addToFM areaSize area (widthInBytes (typeWidth gcWord))
 356         in  allocSlotFrom ig areaSize' young areaMap area
 357       allocMidCall _ _ areaMap = areaMap
 358
 359       alloc m t areaMap =
 360           foldSlotsDefd alloc' (foldSlotsUsed alloc' (allocMidCall m t areaMap) m) m
 361         where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
 362               alloc' areaMap _ = areaMap
 363
 364       layoutAreas areaMap b@(Block _ t) = layout areaMap t
 365         where layout areaMap (ZTail m t) = layout (alloc m t areaMap) t
 366               layout areaMap (ZLast l)   = allocLast b areaMap l
 367       initMap = addToFM (addToFM emptyFM (CallArea Old) 0)
 368                         (CallArea (Young (lg_entry g))) 0
 369       areaMap = foldl layoutAreas initMap (postorder_dfs g)
 370   in -- pprTrace "ProcPoints" (ppr procPoints) $
 371         -- pprTrace "Area SizeMap" (ppr areaSize) $
 372          -- pprTrace "Entry SP" (ppr entrySp) $
 373            -- pprTrace "Area Map" (ppr areaMap) $
 374      areaMap
 375
 376 -- After determining the stack layout, we can:
 377 -- 1. Replace references to stack Areas with addresses relative to the stack
 378 --    pointer.
 379 -- 2. Insert adjustments to the stack pointer to ensure that it is at a
 380 --    conventional location at each proc point.
 381 --    Because we don't take interrupts on the execution stack, we only need the
 382 --    stack pointer to be younger than the live values on the stack at proc points.
 383 -- 3. Compute the maximum stack offset used in the procedure and replace
 384 --    the stack high-water mark with that offset.
 385 manifestSP :: AreaMap -> ByteOff -> LGraph Middle Last -> FuelMonad (LGraph Middle Last)
 386 manifestSP areaMap entry_off g@(LGraph entry _blocks) =
 387   liftM (LGraph entry) $ foldl replB (return emptyBlockEnv) (postorder_dfs g)
 388   where slot a = -- pprTrace "slot" (ppr a) $
 389                    lookupFM areaMap a `orElse` panic "unallocated Area"
 390         slot' (Just id) = slot $ CallArea (Young id)
 391         slot' Nothing   = slot $ CallArea Old
 392         sp_high = maxSlot slot g
 393         proc_entry_sp = slot (CallArea Old) + entry_off
 394
 395         add_sp_off b env =
 396           case Z.last (unzip b) of
 397             LastOther (LastCall {cml_cont = Just succ, cml_ret_args = off}) ->
 398               extendBlockEnv env succ off
 399             _ -> env
 400         spEntryMap = fold_blocks add_sp_off (mkBlockEnv [(entry, entry_off)]) g
 401         spOffset id = lookupBlockEnv spEntryMap id `orElse` 0
 402
 403         sp_on_entry id | id == entry = proc_entry_sp
 404         sp_on_entry id = slot' (Just id) + spOffset id
 405
 406         -- On entry to procpoints, the stack pointer is conventional;
 407         -- otherwise, we check the SP set by predecessors.
 408         replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock)
 409         replB blocks (Block id t) =
 410           do bs <- replTail (Block id) spIn t
 411              -- pprTrace "spIn" (ppr id <+> ppr spIn) $ do
 412              liftM (flip (foldr insertBlock) bs) blocks
 413           where spIn = sp_on_entry id
 414         replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) ->
 415                     FuelMonad ([CmmBlock])
 416         replTail h spOff (ZTail m@(MidForeignCall (Safe bid _) _ _ _) t) =
 417           replTail (\t' -> h (setSp spOff spOff' (ZTail (middle spOff m) t'))) spOff' t
 418             where spOff' = slot' (Just bid) + widthInBytes (typeWidth gcWord)
 419         replTail h spOff (ZTail m t) = replTail (h . ZTail (middle spOff m)) spOff t
 420         replTail h spOff (ZLast (LastOther l)) = fixSp h spOff l
 421         replTail h _   l@(ZLast LastExit) = return [h l]
 422         middle spOff m = mapExpDeepMiddle (replSlot spOff) m
 423         last   spOff l = mapExpDeepLast   (replSlot spOff) l
 424         replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i))
 425         replSlot _ (CmmLit CmmHighStackMark) = -- replacing the high water mark
 426           CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord))
 427         replSlot _ e = e
 428         -- The block must establish the SP expected at each successsor.
 429         fixSp :: (ZTail Middle Last -> CmmBlock) -> Int -> Last -> FuelMonad ([CmmBlock])
 430         fixSp h spOff l@(LastCall _ k n _ _) = updSp h spOff (slot' k + n) l
 431         fixSp h spOff l@(LastBranch k) =
 432           let succSp = sp_on_entry k in
 433           if succSp /= spOff then
 434                -- pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $
 435                updSp h spOff succSp l
 436           else return $ [h (ZLast (LastOther (last spOff l)))]
 437         fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, [])
 438           where b = h (ZLast (LastOther (last spOff l)))
 439                 succ succId z =
 440                   let succSp = sp_on_entry succId in
 441                   if succSp /= spOff then
 442                     do (b,  bs)  <- z
 443                        (b', bs') <- insertBetween b [setSpMid spOff succSp] succId
 444                        return (b', bs ++ bs')
 445                   else z
 446         updSp h old new l = return [h $ setSp old new $ ZLast $ LastOther (last new l)]
 447         setSpMid sp sp' = MidAssign (CmmGlobal Sp) e
 448           where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off]
 449                 off = CmmLit $ CmmInt (toInteger $ sp - sp') wordWidth
 450         setSp sp sp' t = if sp == sp' then t else ZTail (setSpMid sp sp') t
 451
 452
 453 -- To compute the stack high-water mark, we fold over the graph and
 454 -- compute the highest slot offset.
 455 maxSlot :: (Area -> Int) -> CmmGraph -> Int
 456 maxSlot slotOff g = fold_blocks (fold_fwd_block (\ _ x -> x) highSlot highSlot) 0 g
 457   where highSlot i z = foldSlotsUsed add (foldSlotsDefd add z i) i
 458         add z (a, i, _) = max z (slotOff a + i)
 459
 460 -----------------------------------------------------------------------------
 461 -- | Sanity check: stub pointers immediately after they die
 462 -----------------------------------------------------------------------------
 463 -- This will miss stack slots that are last used in a Last node,
 464 -- but it should do pretty well...
 465
 466 type StubPtrFix = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet CmmGraph)
 467
 468 stubSlotsOnDeath :: (LGraph Middle Last) -> FuelMonad (LGraph Middle Last)
 469 stubSlotsOnDeath g = liftM zdfFpContents $ (res :: StubPtrFix)
 470     where res = zdfBRewriteFromL RewriteShallow emptyBlockEnv "stub ptrs" slotLattice
 471                                  liveSlotTransfers rewrites (fact_bot slotLattice) g
 472           rewrites = BackwardRewrites first middle last Nothing
 473           first _ _ = Nothing
 474           last  _ _ = Nothing
 475           middle m liveSlots = foldSlotsUsed (stub liveSlots m) Nothing m
 476           stub liveSlots m rst subarea@(a, off, w) =
 477             if elemSlot liveSlots subarea then rst
 478             else let store = mkStore (CmmStackSlot a off)
 479                                      (stackStubExpr (widthFromBytes w))
 480                  in case rst of Nothing -> Just (mkMiddle m <*> store)
 481                                 Just g  -> Just (g <*> store)