compiler/cmm/CmmStackLayout.hs

   1 {-# OPTIONS_GHC -XNoMonoLocalBinds #-}
   2 -- Norman likes local bindings
   3 -- If this module lives on I'd like to get rid of this flag in due course
   4
   5 module CmmStackLayout
   6     ( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs
   7     , layout, manifestSP, igraph, areaBuilder
   8     , stubSlotsOnDeath ) -- to help crash early during debugging
   9 where
  10
  11 import Constants
  12 import Prelude hiding (zip, unzip, last)
  13
  14 import BlockId
  15 import CmmExpr
  16 import CmmProcPointZ
  17 import CmmTx
  18 import DFMonad
  19 import FiniteMap
  20 import Maybes
  21 import MkZipCfg
  22 import MkZipCfgCmm hiding (CmmBlock, CmmGraph)
  23 import Control.Monad
  24 import Outputable
  25 import SMRep (ByteOff)
  26 import ZipCfg
  27 import ZipCfg as Z
  28 import ZipCfgCmmRep
  29 import ZipDataflow
  30
  31 ------------------------------------------------------------------------
  32 --                    Stack Layout                                    --
  33 ------------------------------------------------------------------------
  34
  35 -- | Before we lay out the stack, we need to know something about the
  36 -- liveness of the stack slots. In particular, to decide whether we can
  37 -- reuse a stack location to hold multiple stack slots, we need to know
  38 -- when each of the stack slots is used.
  39 -- Although tempted to use something simpler, we really need a full interference
  40 -- graph. Consider the following case:
  41 --   case <...> of
  42 --     1 -> <spill x>; // y is dead out
  43 --     2 -> <spill y>; // x is dead out
  44 --     3 -> <spill x and y>
  45 -- If we consider the arms in order and we use just the deadness information given by a
  46 -- dataflow analysis, we might decide to allocate the stack slots for x and y
  47 -- to the same stack location, which will lead to incorrect code in the third arm.
  48 -- We won't make this mistake with an interference graph.
  49
  50 -- First, the liveness analysis.
  51 -- We represent a slot with an area, an offset into the area, and a width.
  52 -- Tracking the live slots is a bit tricky because there may be loads and stores
  53 -- into only a part of a stack slot (e.g. loading the low word of a 2-word long),
  54 -- e.g. Slot A 0 8 overlaps with Slot A 4 4.
  55 --
  56 -- The definition of a slot set is intended to reduce the number of overlap
  57 -- checks we have to make. There's no reason to check for overlap between
  58 -- slots in different areas, so we segregate the map by Area's.
  59 -- We expect few slots in each Area, so we collect them in an unordered list.
  60 -- To keep these lists short, any contiguous live slots are coalesced into
  61 -- a single slot, on insertion.
  62
  63 slotLattice :: DataflowLattice SubAreaSet
  64 slotLattice = DataflowLattice "live slots" emptyFM add False
  65   where add new old = case foldFM addArea (False, old) new of
  66                         (True,  x) -> aTx  x
  67                         (False, x) -> noTx x
  68         addArea a newSlots z = foldr (addSlot a) z newSlots
  69         addSlot a slot (changed, map) =
  70           let (c, live) = liveGen slot $ lookupWithDefaultFM map [] a
  71           in (c || changed, addToFM map a live)
  72
  73 type SlotEnv   = BlockEnv SubAreaSet
  74   -- The sub-areas live on entry to the block
  75
  76 type SlotFix a = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet a)
  77
  78 liveSlotAnal :: LGraph Middle Last -> FuelMonad SlotEnv
  79 liveSlotAnal g = liftM zdfFpFacts (res :: SlotFix ())
  80   where res = zdfSolveFromL emptyBlockEnv "live slot analysis" slotLattice
  81                             liveSlotTransfers (fact_bot slotLattice) g
  82
  83 -- Add the subarea s to the subareas in the list-set (possibly coalescing it with
  84 -- adjacent subareas), and also return whether s was a new addition.
  85 liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea])
  86 liveGen s set = liveGen' s set []
  87   where liveGen' s [] z = (True, s : z)
  88         liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z =
  89           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
  90             liveGen' s rst (s' : z)
  91           else if s' `contains` s then               -- old contains new
  92             (False, set)
  93           else                                       -- overlap: coalesce the slots
  94             let new_hi = max hi hi'
  95                 new_lo = min lo lo'
  96             in liveGen' (a, new_hi, new_hi - new_lo) rst z
  97           where lo  = hi  - w  -- remember: areas grow down
  98                 lo' = hi' - w'
  99         contains (a, hi, w) (a', hi', w') =
 100           a == a' && hi >= hi' && hi - w <= hi' - w'
 101
 102 liveKill :: SubArea -> [SubArea] -> [SubArea]
 103 liveKill (a, hi, w) set = -- pprTrace "killing slots in area" (ppr a) $
 104                           liveKill' set []
 105   where liveKill' [] z = z
 106         liveKill' (s'@(a', hi', w') : rst) z =
 107           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
 108             liveKill' rst (s' : z)
 109           else                                       -- overlap: split the old slot
 110             let z'  = if hi' > hi  then (a, hi', hi' - hi)  : z else z
 111                 z'' = if lo  > lo' then (a, lo,  lo  - lo') : z' else z'
 112             in liveKill' rst z''
 113           where lo  = hi  - w  -- remember: areas grow down
 114                 lo' = hi' - w'
 115
 116 -- Note: the stack slots that hold variables returned on the stack are not
 117 -- considered live in to the block -- we treat the first node as a definition site.
 118 -- BEWARE?: Am I being a little careless here in failing to check for the
 119 -- entry Id (which would use the CallArea Old).
 120 liveSlotTransfers :: BackwardTransfers Middle Last SubAreaSet
 121 liveSlotTransfers =
 122   BackwardTransfers first liveInSlots liveLastIn
 123     where first id live = delFromFM live (CallArea (Young id))
 124
 125 -- Slot sets: adding slots, removing slots, and checking for membership.
 126 liftToArea :: Area -> ([SubArea] -> [SubArea]) -> SubAreaSet -> SubAreaSet
 127 addSlot, removeSlot :: SubAreaSet -> SubArea -> SubAreaSet
 128 elemSlot            :: SubAreaSet -> SubArea -> Bool
 129 liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a)
 130 addSlot    live (a, i, w) = liftToArea a (snd . liveGen  (a, i, w)) live
 131 removeSlot live (a, i, w) = liftToArea a       (liveKill (a, i, w)) live
 132 elemSlot   live (a, i, w) =
 133   not $ fst $ liveGen  (a, i, w) (lookupWithDefaultFM live [] a)
 134
 135 removeLiveSlotDefs :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
 136 removeLiveSlotDefs = foldSlotsDefd removeSlot
 137
 138 liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => s -> SubAreaSet -> SubAreaSet
 139 liveInSlots x live = foldSlotsUsed addSlot (removeLiveSlotDefs live x) x
 140
 141 liveLastIn :: Last -> (BlockId -> SubAreaSet) -> SubAreaSet
 142 liveLastIn l env = liveInSlots l (liveLastOut env l)
 143
 144 -- Don't forget to keep the outgoing parameters in the CallArea live,
 145 -- as well as the update frame.
 146 -- Note: We have to keep the update frame live at a call because of the
 147 -- case where the function doesn't return -- in that case, there won't
 148 -- be a return to keep the update frame live. We'd still better keep the
 149 -- info pointer in the update frame live at any call site;
 150 -- otherwise we could screw up the garbage collector.
 151 liveLastOut :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet
 152 liveLastOut env l =
 153   case l of
 154     LastCall _ Nothing n _ _ ->
 155       add_area (CallArea Old) n out -- add outgoing args (includes upd frame)
 156     LastCall _ (Just k) n _ (Just _) ->
 157       add_area (CallArea Old) n (add_area (CallArea (Young k)) n out)
 158     LastCall _ (Just k) n _ Nothing ->
 159       add_area (CallArea (Young k)) n out
 160     _ -> out
 161   where out = joinOuts slotLattice env l
 162         add_area _ n live | n == 0 = live
 163         add_area a n live =
 164           addToFM live a $ snd $ liveGen (a, n, n) $ lookupWithDefaultFM live [] a
 165
 166 -- The liveness analysis must be precise: otherwise, we won't know if a definition
 167 -- should really kill a live-out stack slot.
 168 -- But the interference graph does not have to be precise -- it might decide that
 169 -- any live areas interfere. To maintain both a precise analysis and an imprecise
 170 -- interference graph, we need to convert the live-out stack slots to graph nodes
 171 -- at each and every instruction; rather than reconstruct a new list of nodes
 172 -- every time, I provide a function to fold over the nodes, which should be a
 173 -- reasonably efficient approach for the implementations we envision.
 174 -- Of course, it will probably be much easier to program if we just return a list...
 175 type Set x = FiniteMap x ()
 176 data IGraphBuilder n =
 177   Builder { foldNodes     :: forall z. SubArea -> (n -> z -> z) -> z -> z
 178           , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int]
 179           }
 180
 181 areaBuilder :: IGraphBuilder Area
 182 areaBuilder = Builder fold words
 183   where fold (a, _, _) f z = f a z
 184         words areaSize areaMap a =
 185           case lookupFM areaMap a of
 186             Just addr -> [addr .. addr + (lookupFM areaSize a `orElse`
 187                                           pprPanic "wordsOccupied: unknown area" (ppr a))]
 188             Nothing   -> []
 189
 190 --slotBuilder :: IGraphBuilder (Area, Int)
 191 --slotBuilder = undefined
 192
 193 -- Now, we can build the interference graph.
 194 -- The usual story: a definition interferes with all live outs and all other
 195 -- definitions.
 196 type IGraph x = FiniteMap x (Set x)
 197 type IGPair x = (IGraph x, IGraphBuilder x)
 198 igraph :: (Ord x) => IGraphBuilder x -> SlotEnv -> LGraph Middle Last -> IGraph x
 199 igraph builder env g = foldr interfere emptyFM (postorder_dfs g)
 200   where foldN = foldNodes builder
 201         interfere block igraph =
 202           let (h, l) = goto_end (unzip block)
 203               --heads :: ZHead Middle -> (IGraph x, SubAreaSet) -> IGraph x
 204               heads (ZFirst _) (igraph, _)       = igraph
 205               heads (ZHead h m)    (igraph, liveOut) =
 206                 heads h (addEdges igraph m liveOut, liveInSlots m liveOut)
 207               -- add edges between a def and the other defs and liveouts
 208               addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i
 209               addDef (igraph, out) def@(a, _, _) =
 210                 (foldN def (addDefN out) igraph,
 211                  addToFM out a (snd $ liveGen def (lookupWithDefaultFM out [] a)))
 212               addDefN out n igraph =
 213                 let addEdgeNO o igraph = foldN o addEdgeNN igraph
 214                     addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph
 215                     addEdgeNN' n n' igraph = addToFM igraph n (addToFM set n' ())
 216                       where set = lookupWithDefaultFM igraph emptyFM n
 217                 in foldFM (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out
 218               env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 219           in heads h $ case l of LastExit    -> (igraph, emptyFM)
 220                                  LastOther l -> (addEdges igraph l $ liveLastOut env' l,
 221                                                  liveLastIn l env')
 222
 223 -- Before allocating stack slots, we need to collect one more piece of information:
 224 -- what's the highest offset (in bytes) used in each Area?
 225 -- We'll need to allocate that much space for each Area.
 226 getAreaSize :: ByteOff -> LGraph Middle Last -> AreaMap
 227   -- The domain of the returned mapping consists only of Areas
 228   -- used for (a) variable spill slots, and (b) parameter passing ares for calls
 229 getAreaSize entry_off g@(LGraph _ _) =
 230   fold_blocks (fold_fwd_block first add_regslots last)
 231               (unitFM (CallArea Old) entry_off) g
 232   where first _  z = z
 233         last l@(LastOther (LastCall _ Nothing args res _)) z =
 234           add_regslots l (add (add z area args) area res)
 235           where area = CallArea Old
 236         last l@(LastOther (LastCall _ (Just k) args res _)) z =
 237           add_regslots l (add (add z area args) area res)
 238           where area = CallArea (Young k)
 239         last l z = add_regslots l z
 240         add_regslots i z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z i) i
 241         addSlot z (a@(RegSlot (LocalReg _ ty)), _, _) =
 242           add z a $ widthInBytes $ typeWidth ty
 243         addSlot z _ = z
 244         add z a off = addToFM z a (max off (lookupWithDefaultFM z 0 a))
 245         -- The 'max' is important.  Two calls, to f and g, might share a common
 246         -- continuation (and hence a common CallArea), but their number of overflow
 247         -- parameters might differ.
 248
 249
 250 -- Find the Stack slots occupied by the subarea's conflicts
 251 conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int
 252 conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
 253   foldNodes subarea foldNode emptyFM
 254   where foldNode n set = foldFM conflict set $ lookupWithDefaultFM ig emptyFM n
 255         conflict n' () set = liveInSlots areaMap n' set
 256         -- Add stack slots occupied by igraph node n
 257         liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
 258         setAdd w s = addToFM s w ()
 259
 260 -- Find any open space on the stack, starting from the offset.
 261 -- If the area is a CallArea or a spill slot for a pointer, then it must
 262 -- be word-aligned.
 263 freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int
 264 freeSlotFrom ig areaSize offset areaMap area =
 265   let size = lookupFM areaSize area `orElse` 0
 266       conflicts = conflictSlots ig areaSize areaMap (area, size, size)
 267       -- CallAreas and Ptrs need to be word-aligned (round up!)
 268       align = case area of CallArea _                                -> align'
 269                            RegSlot  r | isGcPtrType (localRegType r) -> align'
 270                            RegSlot  _                                -> id
 271       align' n = (n + (wORD_SIZE - 1)) `div` wORD_SIZE * wORD_SIZE
 272       -- Find a space big enough to hold the area
 273       findSpace curr 0 = curr
 274       findSpace curr cnt = -- part of target slot, # of bytes left to check
 275         if elemFM curr conflicts then
 276           findSpace (align (curr + size)) size -- try the next (possibly) open space
 277         else findSpace (curr - 1) (cnt - 1)
 278   in findSpace (align (offset + size)) size
 279
 280 -- Find an open space on the stack, and assign it to the area.
 281 allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap
 282 allocSlotFrom ig areaSize from areaMap area =
 283   if elemFM area areaMap then areaMap
 284   else addToFM areaMap area $ freeSlotFrom ig areaSize from areaMap area
 285
 286 -- | Greedy stack layout.
 287 -- Compute liveness, build the interference graph, and allocate slots for the areas.
 288 -- We visit each basic block in a (generally) forward order.
 289
 290 -- At each instruction that names a register subarea r, we immediately allocate
 291 -- any available slot on the stack by the following procedure:
 292 --  1. Find the sub-areas S that conflict with r
 293 --  2. Find the stack slots used for S
 294 --  3. Choose a contiguous stack space s not in S (s must be large enough to hold r)
 295
 296 -- For a CallArea, we allocate the stack space only when we reach a function
 297 -- call that returns to the CallArea's blockId.
 298 -- Then, we allocate the Area subject to the following constraints:
 299 --   a) It must be younger than all the sub-areas that are live on entry to the block
 300 --         This constraint is only necessary for the successor of a call
 301 --   b) It must not overlap with any already-allocated Area with which it conflicts
 302 --         (ie at some point, not necessarily now, is live at the same time)
 303 --   Part (b) is just the 1,2,3 part above
 304
 305 -- Note: The stack pointer only has to be younger than the youngest live stack slot
 306 -- at proc points. Otherwise, the stack pointer can point anywhere.
 307
 308 layout :: ProcPointSet -> SlotEnv -> ByteOff -> LGraph Middle Last -> AreaMap
 309 -- The domain of the returned map includes an Area for EVERY block
 310 -- including each block that is not the successor of a call (ie is not a proc-point)
 311 -- That's how we return the info of what the SP should be at the entry of every block
 312
 313 layout procPoints env entry_off g =
 314   let ig = (igraph areaBuilder env g, areaBuilder)
 315       env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph"
 316       areaSize = getAreaSize entry_off g
 317       -- Find the slots that are live-in to a block tail
 318       live_in (ZTail m l) = liveInSlots m (live_in l)
 319       live_in (ZLast (LastOther l)) = liveLastIn l env'
 320       live_in (ZLast LastExit) = emptyFM
 321
 322       -- Find the youngest live stack slot that has already been allocated
 323       youngest_live :: AreaMap     -- Already allocated
 324                     -> SubAreaSet  -- Sub-areas live here
 325                     -> ByteOff     -- Offset of the youngest byte of any
 326                                    --    already-allocated, live sub-area
 327       youngest_live areaMap live = fold_subareas young_slot live 0
 328         where young_slot (a, o, _) z = case lookupFM areaMap a of
 329                                          Just top -> max z $ top + o
 330                                          Nothing  -> z
 331               fold_subareas f m z = foldFM (\_ s z -> foldr f z s) z m
 332
 333       -- Allocate space for spill slots and call areas
 334       allocVarSlot = allocSlotFrom ig areaSize 0
 335
 336       -- Update the successor's incoming SP.
 337       setSuccSPs inSp bid areaMap =
 338         case (lookupFM areaMap area, lookupBlockEnv (lg_blocks g) bid) of
 339           (Just _, _) -> areaMap -- succ already knows incoming SP
 340           (Nothing, Just (Block _ _)) ->
 341             if elemBlockSet bid procPoints then
 342               let young = youngest_live areaMap $ env' bid
 343                   -- start = case returnOff stackInfo of Just b  -> max b young
 344                   --                                     Nothing -> young
 345                   start = young -- maybe wrong, but I don't understand
 346                                 -- why the preceding is necessary...
 347               in  allocSlotFrom ig areaSize start areaMap area
 348             else addToFM areaMap area inSp
 349           (_, Nothing) -> panic "Block not found in cfg"
 350         where area = CallArea (Young bid)
 351
 352       allocLast (Block id _) areaMap l =
 353         fold_succs (setSuccSPs inSp) l areaMap
 354         where inSp = expectJust "sp in" $ lookupFM areaMap (CallArea (Young id))
 355
 356       allocMidCall m@(MidForeignCall (Safe bid _) _ _ _) t areaMap =
 357         let young     = youngest_live areaMap $ removeLiveSlotDefs (live_in t) m
 358             area      = CallArea (Young bid)
 359             areaSize' = addToFM areaSize area (widthInBytes (typeWidth gcWord))
 360         in  allocSlotFrom ig areaSize' young areaMap area
 361       allocMidCall _ _ areaMap = areaMap
 362
 363       alloc m t areaMap =
 364           foldSlotsDefd alloc' (foldSlotsUsed alloc' (allocMidCall m t areaMap) m) m
 365         where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
 366               alloc' areaMap _ = areaMap
 367
 368       layoutAreas areaMap b@(Block _ t) = layout areaMap t
 369         where layout areaMap (ZTail m t) = layout (alloc m t areaMap) t
 370               layout areaMap (ZLast l)   = allocLast b areaMap l
 371       initMap = addToFM (addToFM emptyFM (CallArea Old) 0)
 372                         (CallArea (Young (lg_entry g))) 0
 373       areaMap = foldl layoutAreas initMap (postorder_dfs g)
 374   in -- pprTrace "ProcPoints" (ppr procPoints) $
 375         -- pprTrace "Area SizeMap" (ppr areaSize) $
 376          -- pprTrace "Entry SP" (ppr entrySp) $
 377            -- pprTrace "Area Map" (ppr areaMap) $
 378      areaMap
 379
 380 -- After determining the stack layout, we can:
 381 -- 1. Replace references to stack Areas with addresses relative to the stack
 382 --    pointer.
 383 -- 2. Insert adjustments to the stack pointer to ensure that it is at a
 384 --    conventional location at each proc point.
 385 --    Because we don't take interrupts on the execution stack, we only need the
 386 --    stack pointer to be younger than the live values on the stack at proc points.
 387 -- 3. Compute the maximum stack offset used in the procedure and replace
 388 --    the stack high-water mark with that offset.
 389 manifestSP :: AreaMap -> ByteOff -> LGraph Middle Last -> FuelMonad (LGraph Middle Last)
 390 manifestSP areaMap entry_off g@(LGraph entry _blocks) =
 391   liftM (LGraph entry) $ foldl replB (return emptyBlockEnv) (postorder_dfs g)
 392   where slot a = -- pprTrace "slot" (ppr a) $
 393                    lookupFM areaMap a `orElse` panic "unallocated Area"
 394         slot' (Just id) = slot $ CallArea (Young id)
 395         slot' Nothing   = slot $ CallArea Old
 396         sp_high = maxSlot slot g
 397         proc_entry_sp = slot (CallArea Old) + entry_off
 398
 399         add_sp_off b env =
 400           case Z.last (unzip b) of
 401             LastOther (LastCall {cml_cont = Just succ, cml_ret_args = off}) ->
 402               extendBlockEnv env succ off
 403             _ -> env
 404         spEntryMap = fold_blocks add_sp_off (mkBlockEnv [(entry, entry_off)]) g
 405         spOffset id = lookupBlockEnv spEntryMap id `orElse` 0
 406
 407         sp_on_entry id | id == entry = proc_entry_sp
 408         sp_on_entry id = slot' (Just id) + spOffset id
 409
 410         -- On entry to procpoints, the stack pointer is conventional;
 411         -- otherwise, we check the SP set by predecessors.
 412         replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock)
 413         replB blocks (Block id t) =
 414           do bs <- replTail (Block id) spIn t
 415              -- pprTrace "spIn" (ppr id <+> ppr spIn) $ do
 416              liftM (flip (foldr insertBlock) bs) blocks
 417           where spIn = sp_on_entry id
 418         replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) ->
 419                     FuelMonad ([CmmBlock])
 420         replTail h spOff (ZTail m@(MidForeignCall (Safe bid _) _ _ _) t) =
 421           replTail (\t' -> h (setSp spOff spOff' (ZTail (middle spOff m) t'))) spOff' t
 422             where spOff' = slot' (Just bid) + widthInBytes (typeWidth gcWord)
 423         replTail h spOff (ZTail m t) = replTail (h . ZTail (middle spOff m)) spOff t
 424         replTail h spOff (ZLast (LastOther l)) = fixSp h spOff l
 425         replTail h _   l@(ZLast LastExit) = return [h l]
 426         middle spOff m = mapExpDeepMiddle (replSlot spOff) m
 427         last   spOff l = mapExpDeepLast   (replSlot spOff) l
 428         replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i))
 429         replSlot _ (CmmLit CmmHighStackMark) = -- replacing the high water mark
 430           CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord))
 431         replSlot _ e = e
 432         -- The block must establish the SP expected at each successsor.
 433         fixSp :: (ZTail Middle Last -> CmmBlock) -> Int -> Last -> FuelMonad ([CmmBlock])
 434         fixSp h spOff l@(LastCall _ k n _ _) = updSp h spOff (slot' k + n) l
 435         fixSp h spOff l@(LastBranch k) =
 436           let succSp = sp_on_entry k in
 437           if succSp /= spOff then
 438                -- pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $
 439                updSp h spOff succSp l
 440           else return $ [h (ZLast (LastOther (last spOff l)))]
 441         fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, [])
 442           where b = h (ZLast (LastOther (last spOff l)))
 443                 succ succId z =
 444                   let succSp = sp_on_entry succId in
 445                   if succSp /= spOff then
 446                     do (b,  bs)  <- z
 447                        (b', bs') <- insertBetween b [setSpMid spOff succSp] succId
 448                        return (b', bs ++ bs')
 449                   else z
 450         updSp h old new l = return [h $ setSp old new $ ZLast $ LastOther (last new l)]
 451         setSpMid sp sp' = MidAssign (CmmGlobal Sp) e
 452           where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off]
 453                 off = CmmLit $ CmmInt (toInteger $ sp - sp') wordWidth
 454         setSp sp sp' t = if sp == sp' then t else ZTail (setSpMid sp sp') t
 455
 456
 457 -- To compute the stack high-water mark, we fold over the graph and
 458 -- compute the highest slot offset.
 459 maxSlot :: (Area -> Int) -> CmmGraph -> Int
 460 maxSlot slotOff g = fold_blocks (fold_fwd_block (\ _ x -> x) highSlot highSlot) 0 g
 461   where highSlot i z = foldSlotsUsed add (foldSlotsDefd add z i) i
 462         add z (a, i, _) = max z (slotOff a + i)
 463
 464 -----------------------------------------------------------------------------
 465 -- | Sanity check: stub pointers immediately after they die
 466 -----------------------------------------------------------------------------
 467 -- This will miss stack slots that are last used in a Last node,
 468 -- but it should do pretty well...
 469
 470 type StubPtrFix = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet CmmGraph)
 471
 472 stubSlotsOnDeath :: (LGraph Middle Last) -> FuelMonad (LGraph Middle Last)
 473 stubSlotsOnDeath g = liftM zdfFpContents $ (res :: StubPtrFix)
 474     where res = zdfBRewriteFromL RewriteShallow emptyBlockEnv "stub ptrs" slotLattice
 475                                  liveSlotTransfers rewrites (fact_bot slotLattice) g
 476           rewrites = BackwardRewrites first middle last Nothing
 477           first _ _ = Nothing
 478           last  _ _ = Nothing
 479           middle m liveSlots = foldSlotsUsed (stub liveSlots m) Nothing m
 480           stub liveSlots m rst subarea@(a, off, w) =
 481             if elemSlot liveSlots subarea then rst
 482             else let store = mkStore (CmmStackSlot a off)
 483                                      (stackStubExpr (widthFromBytes w))
 484                  in case rst of Nothing -> Just (mkMiddle m <*> store)
 485                                 Just g  -> Just (g <*> store)