compiler/cmm/CmmStackLayout.hs

   1 {-# OPTIONS_GHC -XGADTs -XNoMonoLocalBinds #-}
   2 -- Norman likes local bindings
   3 -- If this module lives on I'd like to get rid of this flag in due course
   4
   5 -- Todo: remove
   6 {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
   7
   8 module CmmStackLayout
   9     ( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs
  10     , layout, manifestSP, igraph, areaBuilder
  11     , stubSlotsOnDeath ) -- to help crash early during debugging
  12 where
  13
  14 import Constants
  15 import Prelude hiding (succ, zip, unzip, last)
  16
  17 import BlockId
  18 import Cmm
  19 import CmmExpr
  20 import CmmProcPoint
  21 import Maybes
  22 import MkGraph (stackStubExpr)
  23 import Control.Monad
  24 import OptimizationFuel
  25 import Outputable
  26 import SMRep (ByteOff)
  27
  28 import Compiler.Hoopl
  29
  30 import Data.Map (Map)
  31 import qualified Data.Map as Map
  32 import qualified FiniteMap as Map
  33
  34 ------------------------------------------------------------------------
  35 --                    Stack Layout                                    --
  36 ------------------------------------------------------------------------
  37
  38 -- | Before we lay out the stack, we need to know something about the
  39 -- liveness of the stack slots. In particular, to decide whether we can
  40 -- reuse a stack location to hold multiple stack slots, we need to know
  41 -- when each of the stack slots is used.
  42 -- Although tempted to use something simpler, we really need a full interference
  43 -- graph. Consider the following case:
  44 --   case <...> of
  45 --     1 -> <spill x>; // y is dead out
  46 --     2 -> <spill y>; // x is dead out
  47 --     3 -> <spill x and y>
  48 -- If we consider the arms in order and we use just the deadness information given by a
  49 -- dataflow analysis, we might decide to allocate the stack slots for x and y
  50 -- to the same stack location, which will lead to incorrect code in the third arm.
  51 -- We won't make this mistake with an interference graph.
  52
  53 -- First, the liveness analysis.
  54 -- We represent a slot with an area, an offset into the area, and a width.
  55 -- Tracking the live slots is a bit tricky because there may be loads and stores
  56 -- into only a part of a stack slot (e.g. loading the low word of a 2-word long),
  57 -- e.g. Slot A 0 8 overlaps with Slot A 4 4.
  58 --
  59 -- The definition of a slot set is intended to reduce the number of overlap
  60 -- checks we have to make. There's no reason to check for overlap between
  61 -- slots in different areas, so we segregate the map by Area's.
  62 -- We expect few slots in each Area, so we collect them in an unordered list.
  63 -- To keep these lists short, any contiguous live slots are coalesced into
  64 -- a single slot, on insertion.
  65
  66 slotLattice :: DataflowLattice SubAreaSet
  67 slotLattice = DataflowLattice "live slots" Map.empty add
  68   where add _ (OldFact old) (NewFact new) = case Map.foldRightWithKey addArea (False, old) new of
  69                                               (change, x) -> (changeIf change, x)
  70         addArea a newSlots z = foldr (addSlot a) z newSlots
  71         addSlot a slot (changed, map) =
  72           let (c, live) = liveGen slot $ Map.findWithDefault [] a map
  73           in (c || changed, Map.insert a live map)
  74
  75 slotLatticeJoin :: [SubAreaSet] -> SubAreaSet
  76 slotLatticeJoin facts = foldr extend (fact_bot slotLattice) facts
  77   where extend fact res = snd $ fact_join slotLattice undefined (OldFact fact) (NewFact res)
  78
  79 type SlotEnv   = BlockEnv SubAreaSet
  80   -- The sub-areas live on entry to the block
  81
  82 liveSlotAnal :: CmmGraph -> FuelUniqSM SlotEnv
  83 liveSlotAnal g = liftM snd $ dataflowPassBwd g [] $ analBwd slotLattice liveSlotTransfers
  84
  85 -- Add the subarea s to the subareas in the list-set (possibly coalescing it with
  86 -- adjacent subareas), and also return whether s was a new addition.
  87 liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea])
  88 liveGen s set = liveGen' s set []
  89   where liveGen' s [] z = (True, s : z)
  90         liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z =
  91           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
  92             liveGen' s rst (s' : z)
  93           else if s' `contains` s then               -- old contains new
  94             (False, set)
  95           else                                       -- overlap: coalesce the slots
  96             let new_hi = max hi hi'
  97                 new_lo = min lo lo'
  98             in liveGen' (a, new_hi, new_hi - new_lo) rst z
  99           where lo  = hi  - w  -- remember: areas grow down
 100                 lo' = hi' - w'
 101         contains (a, hi, w) (a', hi', w') =
 102           a == a' && hi >= hi' && hi - w <= hi' - w'
 103
 104 liveKill :: SubArea -> [SubArea] -> [SubArea]
 105 liveKill (a, hi, w) set = -- pprTrace "killing slots in area" (ppr a) $
 106                           liveKill' set []
 107   where liveKill' [] z = z
 108         liveKill' (s'@(a', hi', w') : rst) z =
 109           if a /= a' || hi < lo' || lo > hi' then    -- no overlap
 110             liveKill' rst (s' : z)
 111           else                                       -- overlap: split the old slot
 112             let z'  = if hi' > hi  then (a, hi', hi' - hi)  : z else z
 113                 z'' = if lo  > lo' then (a, lo,  lo  - lo') : z' else z'
 114             in liveKill' rst z''
 115           where lo  = hi  - w  -- remember: areas grow down
 116                 lo' = hi' - w'
 117
 118 -- Note: the stack slots that hold variables returned on the stack are not
 119 -- considered live in to the block -- we treat the first node as a definition site.
 120 -- BEWARE?: Am I being a little careless here in failing to check for the
 121 -- entry Id (which would use the CallArea Old).
 122 liveSlotTransfers :: BwdTransfer CmmNode SubAreaSet
 123 liveSlotTransfers = mkBTransfer3 frt mid lst
 124   where frt :: CmmNode C O -> SubAreaSet -> SubAreaSet
 125         frt (CmmEntry l) f = Map.delete (CallArea (Young l)) f
 126         mid :: CmmNode O O -> SubAreaSet -> SubAreaSet
 127         mid n f = foldSlotsUsed addSlot (removeLiveSlotDefs f n) n
 128         lst :: CmmNode O C -> FactBase SubAreaSet -> SubAreaSet
 129         lst n f = liveInSlots n $ case n of
 130           CmmCall {cml_cont=Nothing, cml_args=args} -> add_area (CallArea Old) args out
 131           CmmCall {cml_cont=Just k, cml_args=args}  -> add_area (CallArea Old) args (add_area (CallArea (Young k)) args out)
 132           CmmForeignCall {succ=k, updfr=oldend}     -> add_area (CallArea Old) oldend (add_area (CallArea (Young k)) wORD_SIZE out)
 133           _                                         -> out
 134          where out = joinOutFacts slotLattice n f
 135                add_area _ n live | n == 0 = live
 136                add_area a n live = Map.insert a (snd $ liveGen (a, n, n) $ Map.findWithDefault [] a live) live
 137
 138 -- Slot sets: adding slots, removing slots, and checking for membership.
 139 liftToArea :: Area -> ([SubArea] -> [SubArea]) -> SubAreaSet -> SubAreaSet
 140 addSlot, removeSlot :: SubAreaSet -> SubArea -> SubAreaSet
 141 elemSlot            :: SubAreaSet -> SubArea -> Bool
 142 liftToArea a f map = Map.insert a (f (Map.findWithDefault [] a map)) map
 143 addSlot    live (a, i, w) = liftToArea a (snd . liveGen  (a, i, w)) live
 144 removeSlot live (a, i, w) = liftToArea a       (liveKill (a, i, w)) live
 145 elemSlot   live (a, i, w) =
 146   not $ fst $ liveGen  (a, i, w) (Map.findWithDefault [] a live)
 147
 148 removeLiveSlotDefs :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
 149 removeLiveSlotDefs = foldSlotsDefd removeSlot
 150
 151 liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => s -> SubAreaSet -> SubAreaSet
 152 liveInSlots x live = foldSlotsUsed addSlot (removeLiveSlotDefs live x) x
 153
 154 liveLastIn :: CmmNode O C -> (BlockId -> SubAreaSet) -> SubAreaSet
 155 liveLastIn l env = liveInSlots l (liveLastOut env l)
 156
 157 -- Don't forget to keep the outgoing parameters in the CallArea live,
 158 -- as well as the update frame.
 159 -- Note: We have to keep the update frame live at a call because of the
 160 -- case where the function doesn't return -- in that case, there won't
 161 -- be a return to keep the update frame live. We'd still better keep the
 162 -- info pointer in the update frame live at any call site;
 163 -- otherwise we could screw up the garbage collector.
 164 liveLastOut :: (BlockId -> SubAreaSet) -> CmmNode O C -> SubAreaSet
 165 liveLastOut env l =
 166   case l of
 167     CmmCall _ Nothing n _ _ ->
 168       add_area (CallArea Old) n out -- add outgoing args (includes upd frame)
 169     CmmCall _ (Just k) n _ _ ->
 170       add_area (CallArea Old) n (add_area (CallArea (Young k)) n out)
 171     CmmForeignCall { succ = k, updfr = oldend } ->
 172       add_area (CallArea Old) oldend (add_area (CallArea (Young k)) wORD_SIZE out)
 173     _ -> out
 174   where out = slotLatticeJoin $ map env $ successors l
 175         add_area _ n live | n == 0 = live
 176         add_area a n live =
 177           Map.insert a (snd $ liveGen (a, n, n) $ Map.findWithDefault [] a live) live
 178
 179 -- The liveness analysis must be precise: otherwise, we won't know if a definition
 180 -- should really kill a live-out stack slot.
 181 -- But the interference graph does not have to be precise -- it might decide that
 182 -- any live areas interfere. To maintain both a precise analysis and an imprecise
 183 -- interference graph, we need to convert the live-out stack slots to graph nodes
 184 -- at each and every instruction; rather than reconstruct a new list of nodes
 185 -- every time, I provide a function to fold over the nodes, which should be a
 186 -- reasonably efficient approach for the implementations we envision.
 187 -- Of course, it will probably be much easier to program if we just return a list...
 188 type Set x = Map x ()
 189 data IGraphBuilder n =
 190   Builder { foldNodes     :: forall z. SubArea -> (n -> z -> z) -> z -> z
 191           , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int]
 192           }
 193
 194 areaBuilder :: IGraphBuilder Area
 195 areaBuilder = Builder fold words
 196   where fold (a, _, _) f z = f a z
 197         words areaSize areaMap a =
 198           case Map.lookup a areaMap of
 199             Just addr -> [addr .. addr + (Map.lookup a areaSize `orElse`
 200                                           pprPanic "wordsOccupied: unknown area" (ppr areaSize <+> ppr a))]
 201             Nothing   -> []
 202
 203 --slotBuilder :: IGraphBuilder (Area, Int)
 204 --slotBuilder = undefined
 205
 206 -- Now, we can build the interference graph.
 207 -- The usual story: a definition interferes with all live outs and all other
 208 -- definitions.
 209 type IGraph x = Map x (Set x)
 210 type IGPair x = (IGraph x, IGraphBuilder x)
 211 igraph :: (Ord x) => IGraphBuilder x -> SlotEnv -> CmmGraph -> IGraph x
 212 igraph builder env g = foldr interfere Map.empty (postorderDfs g)
 213   where foldN = foldNodes builder
 214         interfere block igraph = foldBlockNodesB3 (first, middle, last) block igraph
 215           where first _ (igraph, _) = igraph
 216                 middle node (igraph, liveOut) =
 217                   (addEdges igraph node liveOut, liveInSlots node liveOut)
 218                 last node igraph =
 219                   (addEdges igraph node $ liveLastOut env' node, liveLastIn node env')
 220
 221                 -- add edges between a def and the other defs and liveouts
 222                 addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i
 223                 addDef (igraph, out) def@(a, _, _) =
 224                   (foldN def (addDefN out) igraph,
 225                    Map.insert a (snd $ liveGen def (Map.findWithDefault [] a out)) out)
 226                 addDefN out n igraph =
 227                   let addEdgeNO o igraph = foldN o addEdgeNN igraph
 228                       addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph
 229                       addEdgeNN' n n' igraph = Map.insert n (Map.insert n' () set) igraph
 230                         where set = Map.findWithDefault Map.empty n igraph
 231                   in Map.foldRightWithKey (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out
 232                 env' bid = mapLookup bid env `orElse` panic "unknown blockId in igraph"
 233
 234 -- Before allocating stack slots, we need to collect one more piece of information:
 235 -- what's the highest offset (in bytes) used in each Area?
 236 -- We'll need to allocate that much space for each Area.
 237
 238 -- JD: WHY CAN'T THIS COME FROM THE slot-liveness info?
 239 getAreaSize :: ByteOff -> CmmGraph -> AreaMap
 240   -- The domain of the returned mapping consists only of Areas
 241   -- used for (a) variable spill slots, and (b) parameter passing ares for calls
 242 getAreaSize entry_off g =
 243   foldGraphBlocks (foldBlockNodesF3 (first, add_regslots, last))
 244               (Map.singleton (CallArea Old) entry_off) g
 245   where first _  z = z
 246         last :: CmmNode O C -> Map Area Int -> Map Area Int
 247         last l@(CmmCall _ Nothing args res _) z  =  add_regslots l (add (add z area args) area res)
 248           where area = CallArea Old
 249         last l@(CmmCall _ (Just k) args res _) z =  add_regslots l (add (add z area args) area res)
 250           where area = CallArea (Young k)
 251         last l@(CmmForeignCall {succ = k}) z     =  add_regslots l (add z area wORD_SIZE)
 252           where area = CallArea (Young k)
 253         last l z                                 =  add_regslots l z
 254         add_regslots i z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z i) i
 255         addSlot z (a@(RegSlot (LocalReg _ ty)), _, _) =
 256           add z a $ widthInBytes $ typeWidth ty
 257         addSlot z _ = z
 258         add z a off = Map.insert a (max off (Map.findWithDefault 0 a z)) z
 259         -- The 'max' is important.  Two calls, to f and g, might share a common
 260         -- continuation (and hence a common CallArea), but their number of overflow
 261         -- parameters might differ.
 262
 263
 264 -- Find the Stack slots occupied by the subarea's conflicts
 265 conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int
 266 conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
 267   foldNodes subarea foldNode Map.empty
 268   where foldNode n set = Map.foldRightWithKey conflict set $ Map.findWithDefault Map.empty n ig
 269         conflict n' () set = liveInSlots areaMap n' set
 270         -- Add stack slots occupied by igraph node n
 271         liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
 272         setAdd w s = Map.insert w () s
 273
 274 -- Find any open space on the stack, starting from the offset.
 275 -- If the area is a CallArea or a spill slot for a pointer, then it must
 276 -- be word-aligned.
 277 freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int
 278 freeSlotFrom ig areaSize offset areaMap area =
 279   let size = Map.lookup area areaSize `orElse` 0
 280       conflicts = conflictSlots ig areaSize areaMap (area, size, size)
 281       -- CallAreas and Ptrs need to be word-aligned (round up!)
 282       align = case area of CallArea _                                -> align'
 283                            RegSlot  r | isGcPtrType (localRegType r) -> align'
 284                            RegSlot  _                                -> id
 285       align' n = (n + (wORD_SIZE - 1)) `div` wORD_SIZE * wORD_SIZE
 286       -- Find a space big enough to hold the area
 287       findSpace curr 0 = curr
 288       findSpace curr cnt = -- part of target slot, # of bytes left to check
 289         if Map.member curr conflicts then
 290           findSpace (align (curr + size)) size -- try the next (possibly) open space
 291         else findSpace (curr - 1) (cnt - 1)
 292   in findSpace (align (offset + size)) size
 293
 294 -- Find an open space on the stack, and assign it to the area.
 295 allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap
 296 allocSlotFrom ig areaSize from areaMap area =
 297   if Map.member area areaMap then areaMap
 298   else Map.insert area (freeSlotFrom ig areaSize from areaMap area) areaMap
 299
 300 -- | Greedy stack layout.
 301 -- Compute liveness, build the interference graph, and allocate slots for the areas.
 302 -- We visit each basic block in a (generally) forward order.
 303
 304 -- At each instruction that names a register subarea r, we immediately allocate
 305 -- any available slot on the stack by the following procedure:
 306 --  1. Find the sub-areas S that conflict with r
 307 --  2. Find the stack slots used for S
 308 --  3. Choose a contiguous stack space s not in S (s must be large enough to hold r)
 309
 310 -- For a CallArea, we allocate the stack space only when we reach a function
 311 -- call that returns to the CallArea's blockId.
 312 -- Then, we allocate the Area subject to the following constraints:
 313 --   a) It must be younger than all the sub-areas that are live on entry to the block
 314 --         This constraint is only necessary for the successor of a call
 315 --   b) It must not overlap with any already-allocated Area with which it conflicts
 316 --         (ie at some point, not necessarily now, is live at the same time)
 317 --   Part (b) is just the 1,2,3 part above
 318
 319 -- Note: The stack pointer only has to be younger than the youngest live stack slot
 320 -- at proc points. Otherwise, the stack pointer can point anywhere.
 321
 322 layout :: ProcPointSet -> SlotEnv -> ByteOff -> CmmGraph -> AreaMap
 323 -- The domain of the returned map includes an Area for EVERY block
 324 -- including each block that is not the successor of a call (ie is not a proc-point)
 325 -- That's how we return the info of what the SP should be at the entry of every block
 326
 327 layout procPoints env entry_off g =
 328   let ig = (igraph areaBuilder env g, areaBuilder)
 329       env' bid = mapLookup bid env `orElse` panic "unknown blockId in igraph"
 330       areaSize = getAreaSize entry_off g
 331
 332       -- Find the youngest live stack slot that has already been allocated
 333       youngest_live :: AreaMap     -- Already allocated
 334                     -> SubAreaSet  -- Sub-areas live here
 335                     -> ByteOff     -- Offset of the youngest byte of any
 336                                    --    already-allocated, live sub-area
 337       youngest_live areaMap live = fold_subareas young_slot live 0
 338         where young_slot (a, o, _) z = case Map.lookup a areaMap of
 339                                          Just top -> max z $ top + o
 340                                          Nothing  -> z
 341               fold_subareas f m z = Map.foldRightWithKey (\_ s z -> foldr f z s) z m
 342
 343       -- Allocate space for spill slots and call areas
 344       allocVarSlot = allocSlotFrom ig areaSize 0
 345
 346       -- Update the successor's incoming SP.
 347       setSuccSPs inSp bid areaMap =
 348         case (Map.lookup area areaMap , mapLookup bid (toBlockMap g)) of
 349           (Just _, _) -> areaMap -- succ already knows incoming SP
 350           (Nothing, Just _) ->
 351             if setMember bid procPoints then
 352               let young = youngest_live areaMap $ env' bid
 353                   -- start = case returnOff stackInfo of Just b  -> max b young
 354                   --                                     Nothing -> young
 355                   start = young -- maybe wrong, but I don't understand
 356                                 -- why the preceding is necessary...
 357               in  allocSlotFrom ig areaSize start areaMap area
 358             else Map.insert area inSp areaMap
 359           (_, Nothing) -> panic "Block not found in cfg"
 360         where area = CallArea (Young bid)
 361
 362       layoutAreas areaMap block = foldBlockNodesF3 (flip const, allocMid, allocLast (entryLabel block)) block areaMap
 363       allocMid m areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap m) m
 364       allocLast bid l areaMap =
 365         foldr (setSuccSPs inSp) areaMap' (successors l)
 366         where inSp = expectJust "sp in" $ Map.lookup (CallArea (Young bid)) areaMap
 367               areaMap' = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap l) l
 368       alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
 369       alloc' areaMap _ = areaMap
 370
 371       initMap = Map.insert (CallArea (Young (g_entry g))) 0 $
 372                   Map.insert (CallArea Old) 0 Map.empty
 373
 374       areaMap = foldl layoutAreas initMap (postorderDfs g)
 375   in -- pprTrace "ProcPoints" (ppr procPoints) $
 376         -- pprTrace "Area SizeMap" (ppr areaSize) $
 377          -- pprTrace "Entry SP" (ppr entrySp) $
 378            -- pprTrace "Area Map" (ppr areaMap) $
 379      areaMap
 380
 381 -- After determining the stack layout, we can:
 382 -- 1. Replace references to stack Areas with addresses relative to the stack
 383 --    pointer.
 384 -- 2. Insert adjustments to the stack pointer to ensure that it is at a
 385 --    conventional location at each proc point.
 386 --    Because we don't take interrupts on the execution stack, we only need the
 387 --    stack pointer to be younger than the live values on the stack at proc points.
 388 -- 3. Compute the maximum stack offset used in the procedure and replace
 389 --    the stack high-water mark with that offset.
 390 manifestSP :: AreaMap -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph
 391 manifestSP areaMap entry_off g@(CmmGraph {g_entry=entry}) =
 392   ofBlockMap entry `liftM` foldl replB (return mapEmpty) (postorderDfs g)
 393   where slot a = -- pprTrace "slot" (ppr a) $
 394                    Map.lookup a areaMap `orElse` panic "unallocated Area"
 395         slot' (Just id) = slot $ CallArea (Young id)
 396         slot' Nothing   = slot $ CallArea Old
 397         sp_high = maxSlot slot g
 398         proc_entry_sp = slot (CallArea Old) + entry_off
 399
 400         add_sp_off :: CmmBlock -> BlockEnv Int -> BlockEnv Int
 401         add_sp_off b env =
 402           case lastNode b of
 403             CmmCall {cml_cont=Just succ, cml_ret_args=off} -> mapInsert succ off env
 404             CmmForeignCall {succ=succ}                     -> mapInsert succ wORD_SIZE env
 405             _                                              -> env
 406         spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry entry_off emptyBlockMap) g
 407         spOffset id = mapLookup id spEntryMap `orElse` 0
 408
 409         sp_on_entry id | id == entry = proc_entry_sp
 410         sp_on_entry id = slot' (Just id) + spOffset id
 411
 412         -- On entry to procpoints, the stack pointer is conventional;
 413         -- otherwise, we check the SP set by predecessors.
 414         replB :: FuelUniqSM (BlockEnv CmmBlock) -> CmmBlock -> FuelUniqSM (BlockEnv CmmBlock)
 415         replB blocks block =
 416           do let (head, middles, JustC tail :: MaybeC C (CmmNode O C)) = blockToNodeList block
 417                  middles' = map (middle spIn) middles
 418              bs <- replLast head middles' tail
 419              flip (foldr insertBlock) bs `liftM` blocks
 420           where spIn = sp_on_entry (entryLabel block)
 421
 422                 middle spOff m = mapExpDeep (replSlot spOff) m
 423                 last   spOff l = mapExpDeep (replSlot spOff) l
 424                 replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i))
 425                 replSlot _ (CmmLit CmmHighStackMark) = -- replacing the high water mark
 426                   CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord))
 427                 replSlot _ e = e
 428
 429                 replLast :: MaybeC C (CmmNode C O) -> [CmmNode O O] -> CmmNode O C -> FuelUniqSM [CmmBlock]
 430                 replLast h m l@(CmmCall _ k n _ _)       = updSp (slot' k + n) h m l
 431                 -- JD: LastForeignCall probably ought to have an outgoing
 432                 --     arg size, just like LastCall
 433                 replLast h m l@(CmmForeignCall {succ=k}) = updSp (slot' (Just k) + wORD_SIZE) h m l
 434                 replLast h m l@(CmmBranch k)             = updSp (sp_on_entry k) h m l
 435                 replLast h m l                           = uncurry (:) `liftM` foldr succ (return (b, [])) (successors l)
 436                   where b :: CmmBlock
 437                         b = updSp' spIn h m l
 438                         succ succId z =
 439                           let succSp = sp_on_entry succId in
 440                           if succSp /= spIn then
 441                             do (b,  bs)  <- z
 442                                (b', bs') <- insertBetween b (adjustSp succSp) succId
 443                                return (b', bs' ++ bs)
 444                           else z
 445
 446                 updSp sp h m l = return [updSp' sp h m l]
 447                 updSp' sp h m l | sp == spIn = blockOfNodeList (h, m, JustC $ last sp l)
 448                                 | otherwise  = blockOfNodeList (h, m ++ adjustSp sp, JustC $ last sp l)
 449                 adjustSp sp = [CmmAssign (CmmGlobal Sp) e]
 450                   where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off]
 451                         off = CmmLit $ CmmInt (toInteger $ spIn - sp) wordWidth
 452
 453
 454 -- To compute the stack high-water mark, we fold over the graph and
 455 -- compute the highest slot offset.
 456 maxSlot :: (Area -> Int) -> CmmGraph -> Int
 457 maxSlot slotOff g = foldGraphBlocks (foldBlockNodesF3 (flip const, highSlot, highSlot)) 0 g
 458   where highSlot i z = foldSlotsUsed add (foldSlotsDefd add z i) i
 459         add z (a, i, _) = max z (slotOff a + i)
 460
 461 -----------------------------------------------------------------------------
 462 -- | Sanity check: stub pointers immediately after they die
 463 -----------------------------------------------------------------------------
 464 -- This will miss stack slots that are last used in a Last node,
 465 -- but it should do pretty well...
 466
 467 stubSlotsOnDeath :: CmmGraph -> FuelUniqSM CmmGraph
 468 stubSlotsOnDeath g = liftM fst $ dataflowPassBwd g [] $ analRewBwd slotLattice
 469                                                                    liveSlotTransfers
 470                                                                    rewrites
 471     where rewrites = mkBRewrite3 frt mid lst
 472           frt _ _ = return Nothing
 473           mid m liveSlots = return $ foldSlotsUsed (stub liveSlots m) Nothing m
 474           lst _ _ = return Nothing
 475           stub liveSlots m rst subarea@(a, off, w) =
 476             if elemSlot liveSlots subarea then rst
 477             else let store = mkMiddle $ CmmStore (CmmStackSlot a off)
 478                                                  (stackStubExpr (widthFromBytes w))
 479                  in case rst of Nothing -> Just (mkMiddle m <*> store)
 480                                 Just g  -> Just (g <*> store)