From dc6a72b94f1c2de24cf51a2ca8f44ada6db17ab9 Mon Sep 17 00:00:00 2001 From: "dias@eecs.harvard.edu" Date: Mon, 13 Oct 2008 13:42:51 +0000 Subject: [PATCH] forgot a few files --- compiler/cmm/CmmBuildInfoTables.hs | 556 ++++++++++++++++++++++++++++++++++++ compiler/cmm/CmmStackLayout.hs | 434 ++++++++++++++++++++++++++++ 2 files changed, 990 insertions(+) create mode 100644 compiler/cmm/CmmBuildInfoTables.hs create mode 100644 compiler/cmm/CmmStackLayout.hs diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs new file mode 100644 index 0000000..9a72166 --- /dev/null +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -0,0 +1,556 @@ +module CmmBuildInfoTables + ( CAFSet, CAFEnv, cafAnal, localCAFInfo, mkTopCAFInfo + , setInfoTableSRT, setInfoTableStackMap + , TopSRT, emptySRT, srtToData + , finishInfoTables, lowerSafeForeignCalls, extendEnvsForSafeForeignCalls ) +where + +#include "HsVersions.h" + +import Constants +import Digraph +import qualified Prelude as P +import Prelude +import Util (sortLe) + +import BlockId +import Bitmap +import CLabel +import Cmm hiding (blockId) +import CmmExpr +import CmmInfo +import CmmProcPointZ +import CmmStackLayout +import CmmTx +import DFMonad +import FastString +import FiniteMap +import ForeignCall +import IdInfo +import List (sortBy) +import Maybes +import MkZipCfg +import MkZipCfgCmm hiding (CmmAGraph, CmmBlock, CmmTopZ, CmmZ, CmmGraph) +import Monad +import Name +import Outputable +import Panic +import SMRep +import StgCmmClosure +import StgCmmForeign +import StgCmmMonad +import StgCmmUtils +import UniqSupply +import ZipCfg hiding (zip, unzip, last) +import qualified ZipCfg as G +import ZipCfgCmmRep +import ZipDataflow + +---------------------------------------------------------------- +-- Building InfoTables + + +----------------------------------------------------------------------- +-- Stack Maps + +-- Given a block ID, we return a representation of the layout of the stack, +-- as suspended before entering that block. +-- (For a return site to a function call, the layout does not include the +-- parameter passing area (or the "return address" on the stack)). +-- If the element is `Nothing`, then it represents a word of the stack that +-- does not contain a live pointer. +-- If the element is `Just` a register, then it represents a live spill slot +-- for a pointer; we assume that a pointer is the size of a word. +-- The head of the list represents the young end of the stack where the infotable +-- pointer for the block `Bid` is stored. +-- The infotable pointer itself is not included in the list. +-- Call areas are also excluded from the list: besides the stuff in the update +-- frame (and the return infotable), call areas should never be live across +-- function calls. + +-- RTS Invariant: All pointers must be word-aligned because each bit in the bitmap +-- represents a word. Consequently, we have to be careful when we see a live slot +-- on the stack: if we have packed multiple sub-word values into a word, +-- we have to make sure that we only mark the entire word as a non-pointer. + +-- Also, don't forget to stop at the old end of the stack (oldByte), +-- which may differ depending on whether there is an update frame. +live_ptrs :: ByteOff -> BlockEnv SubAreaSet -> AreaMap -> BlockId -> [Maybe LocalReg] +live_ptrs oldByte slotEnv areaMap bid = + pprTrace "live_ptrs for" (ppr bid <+> ppr youngByte <+> ppr liveSlots) $ + reverse $ slotsToList youngByte liveSlots [] + where slotsToList n [] results | n == oldByte = results -- at old end of stack frame + slotsToList n (s : _) _ | n == oldByte = + pprPanic "slot left off live_ptrs" (ppr s <+> ppr oldByte <+> + ppr n <+> ppr liveSlots <+> ppr youngByte) + slotsToList n _ _ | n < oldByte = + panic "stack slots not allocated on word boundaries?" + slotsToList n l@((n', r, w) : rst) results = + if n == (n' + w) then -- slot's young byte is at n + ASSERT (not (isPtr r) || + (n `mod` wORD_SIZE == 0 && w == wORD_SIZE)) -- ptrs must be aligned + slotsToList next (dropWhile (non_ptr_younger_than next) rst) + (stack_rep : results) + else slotsToList next (dropWhile (non_ptr_younger_than next) l) + (Nothing : results) + where next = n - wORD_SIZE + stack_rep = if isPtr r then Just r else Nothing + slotsToList n [] results = slotsToList (n - wORD_SIZE) [] (Nothing : results) + non_ptr_younger_than next (n', r, w) = + n' + w > next && + ASSERT (not (isPtr r)) + True + isPtr = isGcPtrType . localRegType + liveSlots = sortBy (\ (off,_,_) (off',_,_) -> compare off' off) + (foldFM (\_ -> flip $ foldl add_slot) [] slots) + + add_slot rst (a@(RegSlot r@(LocalReg _ ty)), off, w) = + if off == w && widthInBytes (typeWidth ty) == w then + (expectJust "add_slot" (lookupFM areaMap a), r, w) : rst + else panic "live_ptrs: only part of a variable live at a proc point" + add_slot rst (CallArea Old, off, w) = + rst -- the update frame (or return infotable) should be live + -- would be nice to check that only that part of the callarea is live... + add_slot rst c@((CallArea _), _, _) = + rst + -- JD: THIS ISN'T CURRENTLY A CORRECTNESS PROBLEM, BUT WE SHOULD REALLY + -- MAKE LIVENESS INFO AROUND CALLS MORE PRECISE -- FOR NOW, A 32-BIT + -- FLOAT PADS OUT TO 64 BITS, BUT WE ASSUME THE WHOLE PARAMETER-PASSING + -- AREA IS LIVE (WHICH IT ISN'T...). WE SHOULD JUST PUT THE LIVE AREAS + -- IN THE CALL NODES, WHICH SHOULD EVENTUALLY HAVE LIVE REGISTER AS WELL, + -- SO IT'S ALL GOING IN THE SAME DIRECTION. + -- pprPanic "CallAreas must not be live across function calls" (ppr bid <+> ppr c) + slots = expectJust "live_ptrs slots" $ lookupBlockEnv slotEnv bid + youngByte = expectJust "live_ptrs bid_pos" $ lookupFM areaMap (CallArea (Young bid)) + +-- Construct the stack maps for the given procedure. +setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmTopForInfoTables -> CmmTopForInfoTables +setInfoTableStackMap _ _ t@(NoInfoTable _) = t +setInfoTableStackMap slotEnv areaMap t@(FloatingInfoTable info bid updfr_off) = + updInfo (const (live_ptrs updfr_off slotEnv areaMap bid)) id t +setInfoTableStackMap slotEnv areaMap + t@(ProcInfoTable (CmmProc (CmmInfo _ _ infoTbl) _ _ g@(LGraph entry _ blocks)) + procpoints) = + case blockSetToList procpoints of + [bid] -> + let oldByte = case infoTbl of + CmmInfoTable _ _ _ (ContInfo _ _) -> + case lookupBlockEnv blocks bid of + Just (Block _ (StackInfo {returnOff = Just n}) _) -> n + _ -> pprPanic "misformed graph at procpoint" (ppr g) + _ -> initUpdFrameOff -- entry to top-level function + stack_vars = live_ptrs oldByte slotEnv areaMap bid + in updInfo (const stack_vars) id t + _ -> panic "setInfoTableStackMap: unexpect number of procpoints" + -- until we stop splitting the graphs at procpoints in the native path +setInfoTableStackMap _ _ _ = panic "unexpected case for setInfoTableStackMap" +{- +setInfoTableStackMap slotEnv areaMap + (Just bid, p@(CmmProc (CmmInfo _ _ infoTbl) _ _ g@(LGraph entry _ blocks))) = + let oldByte = case infoTbl of + CmmInfoTable _ _ _ (ContInfo _ _) -> + case lookupBlockEnv blocks bid of + Just (Block _ (StackInfo {returnOff = Just n}) _) -> n + _ -> pprPanic "misformed graph at procpoint" (ppr g) + _ -> initUpdFrameOff -- entry to top-level function + stack_vars = live_ptrs oldByte slotEnv areaMap bid + in (Just bid, upd_info_tbl (const stack_vars) id p) +setInfoTableStackMap _ _ t@(_, CmmData {}) = t +setInfoTableStackMap _ _ _ = panic "bad args to setInfoTableStackMap" +-} + + +----------------------------------------------------------------------- +-- SRTs + +-- WE NEED AN EXAMPLE HERE. +-- IN PARTICULAR, WE NEED TO POINT OUT THE DISTINCTION BETWEEN +-- FUNCTIONS WITH STATIC CLOSURES AND THOSE THAT MUST BE CONSTRUCTED +-- DYNAMICALLY (AND HENCE CAN'T BE REFERENCED IN AN SRT). +-- IN THE LATTER CASE, WE HAVE TO TAKE ALL THE CAFs REFERENCED BY +-- THE CLOSURE AND INLINE THEM INTO ANY SRT THAT MAY MENTION THE CLOSURE. +-- (I.E. TAKE THE TRANSITIVE CLOSURE, but only for non-static closures). + + +----------------------------------------------------------------------- +-- Finding the CAFs used by a procedure + +type CAFSet = FiniteMap CLabel () +type CAFEnv = BlockEnv CAFSet + +-- First, an analysis to find live CAFs. +cafLattice :: DataflowLattice CAFSet +cafLattice = DataflowLattice "live cafs" emptyFM add True + where add new old = if sizeFM new' > sizeFM old then aTx new' else noTx new' + where new' = new `plusFM` old + +cafTransfers :: BackwardTransfers Middle Last CAFSet +cafTransfers = BackwardTransfers first middle last + where first live _ = live + middle live m = pprTrace "cafmiddle" (ppr m) $ foldExpDeepMiddle addCaf m live + last env l = foldExpDeepLast addCaf l (joinOuts cafLattice env l) + addCaf e set = case e of + CmmLit (CmmLabel c) -> add c set + CmmLit (CmmLabelOff c _) -> add c set + CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set + _ -> set + add l s = pprTrace "CAF analysis saw label" (ppr l) $ + if hasCAF l then + pprTrace "has caf" (ppr l) $ addToFM s (cvtToClosureLbl l) () + else (pprTrace "no cafs" (ppr l) $ s) + +type CafFix a = FuelMonad (BackwardFixedPoint Middle Last CAFSet a) +cafAnal :: LGraph Middle Last -> FuelMonad CAFEnv +cafAnal g = liftM zdfFpFacts (res :: CafFix ()) + where res = zdfSolveFromL emptyBlockEnv "live CAF analysis" cafLattice + cafTransfers (fact_bot cafLattice) g + +----------------------------------------------------------------------- +-- Building the SRTs + +-- Description of the SRT for a given module. +-- Note that this SRT may grow as we greedily add new CAFs to it. +data TopSRT = TopSRT { lbl :: CLabel + , next_elt :: Int -- the next entry in the table + , rev_elts :: [CLabel] + , elt_map :: FiniteMap CLabel Int } + -- map: CLabel -> its last entry in the table +instance Outputable TopSRT where + ppr (TopSRT lbl next elts eltmap) = + text "TopSRT:" <+> ppr lbl <+> ppr next <+> ppr elts <+> ppr eltmap + +emptySRT :: MonadUnique m => m TopSRT +emptySRT = + do top_lbl <- getUniqueM >>= \ u -> return $ mkSRTLabel (mkFCallName u "srt") NoCafRefs + return TopSRT { lbl = top_lbl, next_elt = 0, rev_elts = [], elt_map = emptyFM } + +cafMember :: TopSRT -> CLabel -> Bool +cafMember srt lbl = elemFM lbl (elt_map srt) + +cafOffset :: TopSRT -> CLabel -> Maybe Int +cafOffset srt lbl = lookupFM (elt_map srt) lbl + +addCAF :: CLabel -> TopSRT -> TopSRT +addCAF caf srt = + srt { next_elt = last + 1 + , rev_elts = caf : rev_elts srt + , elt_map = addToFM (elt_map srt) caf last } + where last = next_elt srt + +srtToData :: TopSRT -> CmmZ +srtToData srt = Cmm [CmmData RelocatableReadOnlyData (CmmDataLabel (lbl srt) : tbl)] + where tbl = map (CmmStaticLit . CmmLabel) (reverse (rev_elts srt)) + +-- Once we have found the CAFs, we need to do two things: +-- 1. Build a table of all the CAFs used in the procedure. +-- 2. Compute the C_SRT describing the subset of CAFs live at each procpoint. +-- +-- When building the local view of the SRT, we first make sure that all the CAFs are +-- in the SRT. Then, if the number of CAFs is small enough to fit in a bitmap, +-- we make sure they're all close enough to the bottom of the table that the +-- bitmap will be able to cover all of them. +buildSRTs :: TopSRT -> FiniteMap CLabel CAFSet -> CAFSet -> + FuelMonad (TopSRT, Maybe CmmTopZ, C_SRT) +buildSRTs topSRT topCAFMap cafs = + -- This is surely the wrong way to get names, as in BlockId + do top_lbl <- getUniqueM >>= \ u -> return $ mkSRTLabel (mkFCallName u "srt") NoCafRefs + let liftCAF lbl () z = -- get CAFs for functions without static closures + case lookupFM topCAFMap lbl of Just cafs -> z `plusFM` cafs + Nothing -> addToFM z lbl () + sub_srt topSRT localCafs = + let cafs = keysFM (foldFM liftCAF emptyFM localCafs) + mkSRT topSRT = + do localSRTs <- procpointSRT (lbl topSRT) (elt_map topSRT) cafs + return (topSRT, localSRTs) + in pprTrace "cafs" (ppr cafs) $ + if length cafs > maxBmpSize then + mkSRT (foldl add_if_missing topSRT cafs) + else -- make sure all the cafs are near the bottom of the srt + mkSRT (add_if_too_far topSRT cafs) + add_if_missing srt caf = + if cafMember srt caf then srt else addCAF caf srt + -- If a CAF is more than maxBmpSize entries from the young end of the + -- SRT, then we add it to the SRT again. + -- (Note: Not in the SRT => infinitely far.) + add_if_too_far srt@(TopSRT {elt_map = m}) cafs = + add srt (sortBy farthestFst cafs) + where + farthestFst x y = case (lookupFM m x, lookupFM m y) of + (Nothing, Nothing) -> EQ + (Nothing, Just _) -> LT + (Just _, Nothing) -> GT + (Just d, Just d') -> compare d' d + add srt [] = srt + add srt@(TopSRT {next_elt = next}) (caf : rst) = + case cafOffset srt caf of + Just ix -> if next - ix > maxBmpSize then + add (addCAF caf srt) rst + else srt + Nothing -> add (addCAF caf srt) rst + (topSRT, subSRTs) <- sub_srt topSRT cafs + let (sub_tbls, blockSRTs) = subSRTs + return (topSRT, sub_tbls, blockSRTs) + +-- Construct an SRT bitmap. +-- Adapted from simpleStg/SRT.lhs, which expects Id's. +procpointSRT :: CLabel -> FiniteMap CLabel Int -> [CLabel] -> + FuelMonad (Maybe CmmTopZ, C_SRT) +procpointSRT top_srt top_table [] = + return (Nothing, NoC_SRT) +procpointSRT top_srt top_table entries = + do (top, srt) <- bitmap `seq` to_SRT top_srt offset len bitmap + return (top, srt) + where + ints = map (expectJust "constructSRT" . lookupFM top_table) entries + sorted_ints = sortLe (<=) ints + offset = head sorted_ints + bitmap_entries = map (subtract offset) sorted_ints + len = P.last bitmap_entries + 1 + bitmap = intsToBitmap len bitmap_entries + +maxBmpSize :: Int +maxBmpSize = widthInBits wordWidth `div` 2 + +-- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT. +to_SRT :: CLabel -> Int -> Int -> Bitmap -> FuelMonad (Maybe CmmTopZ, C_SRT) +to_SRT top_srt off len bmp + | len > maxBmpSize || bmp == [fromIntegral srt_escape] + = do id <- getUniqueM + let srt_desc_lbl = mkLargeSRTLabel id + tbl = CmmData RelocatableReadOnlyData $ + CmmDataLabel srt_desc_lbl : map CmmStaticLit + ( cmmLabelOffW top_srt off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + return (Just tbl, C_SRT srt_desc_lbl 0 srt_escape) + | otherwise + = return (Nothing, C_SRT top_srt off (fromIntegral (head bmp))) + -- The fromIntegral converts to StgHalfWord + +-- Gather CAF info for a procedure, but only if the procedure +-- doesn't have a static closure. +-- (If it has a static closure, it will already have an SRT to +-- keep its CAFs live.) +localCAFInfo :: CAFEnv -> CmmTopZ -> Maybe (CLabel, CAFSet) +localCAFInfo _ t@(CmmData _ _) = Nothing +localCAFInfo cafEnv (CmmProc (CmmInfo _ _ infoTbl) top_l _ (LGraph entry _ _)) = + case infoTbl of + CmmInfoTable False _ _ _ -> + Just (cvtToClosureLbl top_l, + expectJust "maybeBindCAFs" $ lookupBlockEnv cafEnv entry) + _ -> Nothing + +-- Once we have the local CAF sets for some (possibly) mutually +-- recursive functions, we can create an environment mapping +-- each function to its set of CAFs. Note that a CAF may +-- be a reference to a function. If that function f does not have +-- a static closure, then we need to refer specifically +-- to the set of CAFs used by f. Of course, the set of CAFs +-- used by f must be included in the local CAF sets that are input to +-- this function. To minimize lookup time later, we return +-- the environment with every reference to f replaced by its set of CAFs. +-- To do this replacement efficiently, we gather strongly connected +-- components, then we sort the components in topological order. +mkTopCAFInfo :: [(CLabel, CAFSet)] -> FiniteMap CLabel CAFSet +mkTopCAFInfo localCAFs = foldl addToTop emptyFM g + where addToTop env (AcyclicSCC (l, cafset)) = + addToFM env l (flatten env cafset) + addToTop env (CyclicSCC nodes) = + let (lbls, cafsets) = unzip nodes + cafset = foldl plusFM emptyFM cafsets `delListFromFM` lbls + in foldl (\env l -> addToFM env l (flatten env cafset)) env lbls + flatten env cafset = foldFM (lookup env) emptyFM cafset + lookup env caf () cafset' = + case lookupFM env caf of Just cafs -> foldFM add cafset' cafs + Nothing -> add caf () cafset' + add caf () cafset' = addToFM cafset' caf () + g = stronglyConnCompFromEdgedVertices + (map (\n@(l, cafs) -> (n, l, keysFM cafs)) localCAFs) + +type StackLayout = [Maybe LocalReg] + +-- Construct the SRTs for the given procedure. +setInfoTableSRT :: CAFEnv -> FiniteMap CLabel CAFSet -> TopSRT -> + CmmTopForInfoTables -> FuelMonad (TopSRT, [CmmTopForInfoTables]) +setInfoTableSRT cafEnv topCAFMap topSRT t@(ProcInfoTable p procpoints) = + case blockSetToList procpoints of + [bid] -> setSRT cafEnv topCAFMap topSRT t bid + _ -> panic "setInfoTableStackMap: unexpect number of procpoints" + -- until we stop splitting the graphs at procpoints in the native path +setInfoTableSRT cafEnv topCAFMap topSRT t@(FloatingInfoTable info bid _) = + setSRT cafEnv topCAFMap topSRT t bid +setInfoTableSRT _ _ topSRT t@(NoInfoTable _) = return (topSRT, [t]) + +setSRT :: CAFEnv -> FiniteMap CLabel CAFSet -> TopSRT -> + CmmTopForInfoTables -> BlockId -> FuelMonad (TopSRT, [CmmTopForInfoTables]) +setSRT cafEnv topCAFMap topSRT t bid = + do (topSRT, cafTable, srt) <- buildSRTs topSRT topCAFMap + (expectJust "sub_srt" $ lookupBlockEnv cafEnv bid) + let t' = updInfo id (const srt) t + case cafTable of + Just tbl -> return (topSRT, [t', NoInfoTable tbl]) + Nothing -> return (topSRT, [t']) + +updInfo :: (StackLayout -> StackLayout) -> (C_SRT -> C_SRT) -> + CmmTopForInfoTables -> CmmTopForInfoTables +updInfo toVars toSrt (ProcInfoTable (CmmProc info top_l top_args g) procpoints) = + ProcInfoTable (CmmProc (updInfoTbl toVars toSrt info) top_l top_args g) procpoints +updInfo toVars toSrt (FloatingInfoTable info bid updfr_off) = + FloatingInfoTable (updInfoTbl toVars toSrt info) bid updfr_off +updInfo toVars toSrt (NoInfoTable _) = panic "can't update NoInfoTable" +updInfo _ _ _ = panic "unexpected arg to updInfo" + +updInfoTbl :: (StackLayout -> StackLayout) -> (C_SRT -> C_SRT) -> CmmInfo -> CmmInfo +updInfoTbl toVars toSrt (CmmInfo gc upd_fr (CmmInfoTable s p t typeinfo)) + = CmmInfo gc upd_fr (CmmInfoTable s p t typeinfo') + where typeinfo' = case typeinfo of + t@(ConstrInfo _ _ _) -> t + (FunInfo c s a d e) -> FunInfo c (toSrt s) a d e + (ThunkInfo c s) -> ThunkInfo c (toSrt s) + (ThunkSelectorInfo x s) -> ThunkSelectorInfo x (toSrt s) + (ContInfo v s) -> ContInfo (toVars v) (toSrt s) +updInfoTbl toVars toSrt t@(CmmInfo _ _ CmmNonInfoTable) = t + +-- Lower the CmmTopForInfoTables type down to good old CmmTopZ +-- by emitting info tables as data where necessary. +finishInfoTables :: CmmTopForInfoTables -> IO [CmmTopZ] +finishInfoTables (NoInfoTable t) = return [t] +finishInfoTables (ProcInfoTable p _) = return [p] +finishInfoTables (FloatingInfoTable (CmmInfo _ _ infotbl) bid _) = + do uniq_supply <- mkSplitUniqSupply 'i' + return $ mkBareInfoTable (retPtLbl bid) (uniqFromSupply uniq_supply) infotbl + +---------------------------------------------------------------- +-- Safe foreign calls: +-- Our analyses capture the dataflow facts at block boundaries, but we need +-- to extend the CAF and live-slot analyses to safe foreign calls as well, +-- which show up as middle nodes. +extendEnvsForSafeForeignCalls :: CAFEnv -> SlotEnv -> CmmGraph -> (CAFEnv, SlotEnv) +extendEnvsForSafeForeignCalls cafEnv slotEnv g = + fold_blocks block (cafEnv, slotEnv) g + where block b@(Block _ _ t) z = + tail ( bt_last_in cafTransfers (lookupFn cafEnv) l + , bt_last_in liveSlotTransfers (lookupFn slotEnv) l) + z head + where (head, last) = goto_end (G.unzip b) + l = case last of LastOther l -> l + LastExit -> panic "extendEnvs lastExit" + tail lives z (ZFirst _ _) = z + tail lives@(cafs, slots) (cafEnv, slotEnv) + (ZHead h m@(MidForeignCall (Safe bid updfr_off) _ _ _)) = + let slots' = removeLiveSlotDefs slots m + slotEnv' = extendBlockEnv slotEnv bid slots' + cafEnv' = extendBlockEnv cafEnv bid cafs + in tail (upd lives m) (cafEnv', slotEnv') h + tail lives z (ZHead h m) = tail (upd lives m) z h + lookupFn map k = expectJust "extendEnvsForSafeFCalls" $ lookupBlockEnv map k + upd (cafs, slots) m = + (bt_middle_in cafTransfers cafs m, bt_middle_in liveSlotTransfers slots m) + +-- Safe foreign calls: We need to insert the code that suspends and resumes +-- the thread before and after a safe foreign call. +-- Why do we do this so late in the pipeline? +-- Because we need this code to appear without interrruption: you can't rely on the +-- value of the stack pointer between the call and resetting the thread state; +-- you need to have an infotable on the young end of the stack both when +-- suspending the thread and making the foreign call. +-- All of this is much easier if we insert the suspend and resume calls here. + +-- At the same time, we prepare for the stages of the compiler that +-- build the proc points. We have to do this at the same time because +-- the safe foreign calls need special treatment with respect to infotables. +-- A safe foreign call needs an infotable even though it isn't +-- a procpoint. The following datatype captures the information +-- needed to generate the infotables along with the Cmm data and procedures. + +data CmmTopForInfoTables + = NoInfoTable CmmTopZ -- must be CmmData + | ProcInfoTable CmmTopZ BlockSet -- CmmProc; argument is its set of procpoints + | FloatingInfoTable CmmInfo BlockId UpdFrameOffset +instance Outputable CmmTopForInfoTables where + ppr (NoInfoTable t) = text "NoInfoTable: " <+> ppr t + ppr (ProcInfoTable t bids) = text "ProcInfoTable: " <+> ppr t <+> ppr bids + ppr (FloatingInfoTable info bid upd) = + text "FloatingInfoTable: " <+> ppr info <+> ppr bid <+> ppr upd + +-- The `safeState' record collects the info we update while lowering the +-- safe foreign calls in the graph. +data SafeState = State { s_blocks :: BlockEnv CmmBlock + , s_pps :: ProcPointSet + , s_safeCalls :: [CmmTopForInfoTables]} + +lowerSafeForeignCalls + :: ProcPointSet -> [[CmmTopForInfoTables]] -> + CmmTopZ -> FuelMonad [[CmmTopForInfoTables]] +lowerSafeForeignCalls _ rst t@(CmmData _ _) = return $ [NoInfoTable t] : rst +lowerSafeForeignCalls procpoints rst + t@(CmmProc info l args g@(LGraph entry off blocks)) = do + let init = return $ State emptyBlockEnv emptyBlockSet [] + let block b@(Block bid _ _) z = do + state@(State {s_pps = ppset, s_blocks = blocks}) <- z + let ppset' = if bid == entry then extendBlockSet ppset bid else ppset + state' = state { s_pps = ppset' } + if hasSafeForeignCall b + then lowerSafeCallBlock state' b + else return (state' { s_blocks = insertBlock b blocks }) + State blocks' g_procpoints safeCalls <- fold_blocks block init g + return $ (ProcInfoTable (CmmProc info l args (LGraph entry off blocks')) g_procpoints + : safeCalls) : rst + +-- Check for foreign calls -- if none, then we can avoid copying the block. +hasSafeForeignCall :: CmmBlock -> Bool +hasSafeForeignCall (Block _ _ t) = tail t + where tail (ZTail (MidForeignCall (Safe _ _) _ _ _) t) = True + tail (ZTail _ t) = tail t + tail (ZLast _) = False + +-- Lower each safe call in the block, update the CAF and slot environments +-- to include each of those calls, and insert the new block in the blockEnv. +lowerSafeCallBlock :: SafeState-> CmmBlock -> FuelMonad SafeState +lowerSafeCallBlock state b = tail (return state) (ZBlock head (ZLast last)) + where (head, last) = goto_end (G.unzip b) + tail s b@(ZBlock (ZFirst _ _) _) = + do state <- s + return $ state { s_blocks = insertBlock (G.zip b) (s_blocks state) } + tail s (ZBlock (ZHead h m@(MidForeignCall (Safe bid updfr_off) _ _ _)) t) = + do state <- s + let state' = state + { s_safeCalls = FloatingInfoTable emptyContInfoTable bid updfr_off : + s_safeCalls state } + (state'', t') <- lowerSafeForeignCall state' m t + tail (return state'') (ZBlock h t') + tail s (ZBlock (ZHead h m) t) = tail s (ZBlock h (ZTail m t)) + + +-- Late in the code generator, we want to insert the code necessary +-- to lower a safe foreign call to a sequence of unsafe calls. +lowerSafeForeignCall :: + SafeState -> Middle -> ZTail Middle Last -> FuelMonad (SafeState, ZTail Middle Last) +lowerSafeForeignCall state m@(MidForeignCall (Safe infotable updfr) _ _ _) tail = do + let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS-only objects and are not subject to garbage collection + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) + let (caller_save, caller_load) = callerSaveVolatileRegs + load_tso <- newTemp gcWord -- TODO FIXME NOW + let suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "suspendThread"))) + resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "resumeThread"))) + suspend = mkStore (CmmReg spReg) (CmmLit (CmmBlock infotable)) <*> + saveThreadState <*> + caller_save <*> + mkUnsafeCall (ForeignTarget suspendThread + (ForeignConvention CCallConv [AddrHint] [AddrHint])) + [id] [CmmReg (CmmGlobal BaseReg)] + resume = mkUnsafeCall (ForeignTarget resumeThread + (ForeignConvention CCallConv [AddrHint] [AddrHint])) + [new_base] [CmmReg (CmmLocal id)] <*> + -- Assign the result to BaseReg: we + -- might now have a different Capability! + mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*> + caller_load <*> + loadThreadState load_tso + Graph tail' blocks' <- + liftUniq (graphOfAGraph (suspend <*> mkMiddle m <*> resume <*> mkZTail tail)) + return (state {s_blocks = s_blocks state `plusBlockEnv` blocks'}, tail') +lowerSafeForeignCall _ _ _ = panic "lowerSafeForeignCall was passed something else" diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs new file mode 100644 index 0000000..60f4b5c --- /dev/null +++ b/compiler/cmm/CmmStackLayout.hs @@ -0,0 +1,434 @@ +module CmmStackLayout + ( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs + , layout, manifestSP, igraph, areaBuilder + , stubSlotsOnDeath ) -- to help crash early during debugging +where + +import Constants +import qualified Prelude as P +import Prelude hiding (zip, unzip, last) + +import BlockId +import CmmExpr +import CmmProcPointZ +import CmmTx +import DFMonad +import FiniteMap +import Maybes +import MkZipCfg +import MkZipCfgCmm hiding (CmmBlock, CmmGraph) +import Monad +import Outputable +import Panic +import ZipCfg +import ZipCfgCmmRep +import ZipDataflow + +------------------------------------------------------------------------ +-- Stack Layout -- +------------------------------------------------------------------------ + +-- | Before we lay out the stack, we need to know something about the +-- liveness of the stack slots. In particular, to decide whether we can +-- reuse a stack location to hold multiple stack slots, we need to know +-- when each of the stack slots is used. +-- Although tempted to use something simpler, we really need a full interference +-- graph. Consider the following case: +-- case <...> of +-- 1 -> ; // y is dead out +-- 2 -> ; // x is dead out +-- 3 -> +-- If we consider the arms in order and we use just the deadness information given by a +-- dataflow analysis, we might decide to allocate the stack slots for x and y +-- to the same stack location, which will lead to incorrect code in the third arm. +-- We won't make this mistake with an interference graph. + +-- First, the liveness analysis. +-- We represent a slot with an area, an offset into the area, and a width. +-- Tracking the live slots is a bit tricky because there may be loads and stores +-- into only a part of a stack slot (e.g. loading the low word of a 2-word long), +-- e.g. Slot A 0 8 overlaps with Slot A 4 4. +-- +-- The definition of a slot set is intended to reduce the number of overlap +-- checks we have to make. There's no reason to check for overlap between +-- slots in different areas, so we segregate the map by Area's. +-- We expect few slots in each Area, so we collect them in an unordered list. +-- To keep these lists short, any contiguous live slots are coalesced into +-- a single slot, on insertion. + +slotLattice :: DataflowLattice SubAreaSet +slotLattice = DataflowLattice "live slots" emptyFM add True + where add new old = case foldFM addArea (False, old) new of + (True, x) -> aTx x + (False, x) -> noTx x + addArea a newSlots z = foldr (addSlot a) z newSlots + addSlot a slot (changed, map) = + let (c, live) = liveGen slot $ lookupWithDefaultFM map [] a + in (c || changed, addToFM map a live) + +type SlotEnv = BlockEnv SubAreaSet +type SlotFix a = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet a) + +liveSlotAnal :: LGraph Middle Last -> FuelMonad SlotEnv +liveSlotAnal g = liftM zdfFpFacts (res :: SlotFix ()) + where res = zdfSolveFromL emptyBlockEnv "live slot analysis" slotLattice + liveSlotTransfers (fact_bot slotLattice) g + +-- Add the subarea s to the subareas in the list-set (possibly coalescing it with +-- adjacent subareas), and also return whether s was a new addition. +liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea]) +liveGen s set = liveGen' s set [] + where liveGen' s [] z = (True, s : z) + liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z = + if a /= a' || hi < lo' || lo > hi' then -- no overlap + liveGen' s rst (s' : z) + else if s' `contains` s then -- old contains new + (False, set) + else -- overlap: coalesce the slots + let new_hi = max hi hi' + new_lo = min lo lo' + in liveGen' (a, new_hi, new_hi - new_lo) rst z + where lo = hi - w -- remember: areas grow down + lo' = hi' - w' + contains (a, hi, w) (a', hi', w') = + a == a' && hi >= hi' && hi - w <= hi' - w' + +liveKill :: SubArea -> [SubArea] -> [SubArea] +liveKill (a, hi, w) set = pprTrace "killing slots in area" (ppr a) $ liveKill' set [] + where liveKill' [] z = z + liveKill' (s'@(a', hi', w') : rst) z = + if a /= a' || hi < lo' || lo > hi' then -- no overlap + liveKill' rst (s' : z) + else -- overlap: split the old slot + let z' = if hi' > hi then (a, hi', hi' - hi) : z else z + z'' = if lo > lo' then (a, lo, lo - lo') : z' else z' + in liveKill' rst z'' + where lo = hi - w -- remember: areas grow down + lo' = hi' - w' + +-- Note: the stack slots that hold variables returned on the stack are not +-- considered live in to the block -- we treat the first node as a definition site. +-- BEWARE?: Am I being a little careless here in failing to check for the +-- entry Id (which would use the CallArea Old). +liveSlotTransfers :: BackwardTransfers Middle Last SubAreaSet +liveSlotTransfers = + BackwardTransfers first liveInSlots liveLastIn + where first live id = delFromFM live (CallArea (Young id)) + +-- Slot sets: adding slots, removing slots, and checking for membership. +liftToArea :: Area -> ([SubArea] -> [SubArea]) -> SubAreaSet -> SubAreaSet +addSlot, removeSlot :: SubAreaSet -> SubArea -> SubAreaSet +elemSlot :: SubAreaSet -> SubArea -> Bool +liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a) +addSlot live (a, i, w) = liftToArea a (snd . liveGen (a, i, w)) live +removeSlot live (a, i, w) = liftToArea a (liveKill (a, i, w)) live +elemSlot live (a, i, w) = + not $ fst $ liveGen (a, i, w) (lookupWithDefaultFM live [] a) + +removeLiveSlotDefs :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet +removeLiveSlotDefs = foldSlotsDefd removeSlot + +liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet +liveInSlots live x = foldSlotsUsed addSlot (removeLiveSlotDefs live x) x + +liveLastIn :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet +liveLastIn env l = liveInSlots (liveLastOut env l) l + +-- Don't forget to keep the outgoing parameters in the CallArea live, +-- as well as the update frame. +liveLastOut :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet +liveLastOut env l = + case l of + LastCall _ Nothing n _ -> + add_area (CallArea Old) n out -- add outgoing args (includes upd frame) + LastCall _ (Just k) n _ -> add_area (CallArea (Young k)) n out + _ -> out + where out = joinOuts slotLattice env l + add_area _ n live | n == 0 = live + add_area a n live = + addToFM live a $ snd $ liveGen (a, n, n) $ lookupWithDefaultFM live [] a + +-- The liveness analysis must be precise: otherwise, we won't know if a definition +-- should really kill a live-out stack slot. +-- But the interference graph does not have to be precise -- it might decide that +-- any live areas interfere. To maintain both a precise analysis and an imprecise +-- interference graph, we need to convert the live-out stack slots to graph nodes +-- at each and every instruction; rather than reconstruct a new list of nodes +-- every time, I provide a function to fold over the nodes, which should be a +-- reasonably efficient approach for the implementations we envision. +-- Of course, it will probably be much easier to program if we just return a list... +type Set x = FiniteMap x () +data IGraphBuilder n = + Builder { foldNodes :: forall z. SubArea -> (n -> z -> z) -> z -> z + , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int] + } + +areaBuilder :: IGraphBuilder Area +areaBuilder = Builder fold words + where fold (a, _, _) f z = f a z + words areaSize areaMap a = + case lookupFM areaMap a of + Just addr -> [addr .. addr + (lookupFM areaSize a `orElse` + pprPanic "wordsOccupied: unknown area" (ppr a))] + Nothing -> [] + +--slotBuilder :: IGraphBuilder (Area, Int) +--slotBuilder = undefined + +-- Now, we can build the interference graph. +-- The usual story: a definition interferes with all live outs and all other +-- definitions. +type IGraph x = FiniteMap x (Set x) +type IGPair x = (IGraph x, IGraphBuilder x) +igraph :: (Ord x) => IGraphBuilder x -> SlotEnv -> LGraph Middle Last -> IGraph x +igraph builder env g = foldr interfere emptyFM (postorder_dfs g) + where foldN = foldNodes builder + interfere block igraph = + let (h, l) = goto_end (unzip block) + --heads :: ZHead Middle -> (IGraph x, SubAreaSet) -> IGraph x + heads (ZFirst _ _) (igraph, _) = igraph + heads (ZHead h m) (igraph, liveOut) = + heads h (addEdges igraph m liveOut, liveInSlots liveOut m) + -- add edges between a def and the other defs and liveouts + addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i + addDef (igraph, out) def@(a, _, _) = + (foldN def (addDefN out) igraph, + addToFM out a (snd $ liveGen def (lookupWithDefaultFM out [] a))) + addDefN out n igraph = + let addEdgeNO o igraph = foldN o addEdgeNN igraph + addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph + addEdgeNN' n n' igraph = addToFM igraph n (addToFM set n' ()) + where set = lookupWithDefaultFM igraph emptyFM n + in foldFM (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out + env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph" + in heads h $ case l of LastExit -> (igraph, emptyFM) + LastOther l -> (addEdges igraph l $ liveLastOut env' l, + liveLastIn env' l) + +-- Before allocating stack slots, we need to collect one more piece of information: +-- what's the highest offset (in bytes) used in each Area? +-- We'll need to allocate that much space for each Area. +getAreaSize :: LGraph Middle Last -> AreaMap +getAreaSize g@(LGraph _ off _) = + fold_blocks (fold_fwd_block first add_regslots last) + (unitFM (CallArea Old) off) g + where first id (StackInfo {argBytes = Just off}) z = add z (CallArea (Young id)) off + first _ _ z = z + add_regslots i z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z i) i + last l@(LastOther (LastCall _ Nothing off _)) z = + add_regslots l (add z (CallArea Old) off) + last l@(LastOther (LastCall _ (Just k) off _)) z = + add_regslots l (add z (CallArea (Young k)) off) + last l z = add_regslots l z + addSlot z (a@(RegSlot _), off, _) = add z a off + addSlot z _ = z + add z a off = addToFM z a (max off (lookupWithDefaultFM z 0 a)) + + +-- Find the Stack slots occupied by the subarea's conflicts +conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int +conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea = + foldNodes subarea foldNode emptyFM + where foldNode n set = foldFM conflict set $ lookupWithDefaultFM ig emptyFM n + conflict n' () set = liveInSlots areaMap n' set + -- Add stack slots occupied by igraph node n + liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n) + setAdd w s = addToFM s w () + +-- Find any open space on the stack, starting from the offset. +-- If the area is a CallArea or a spill slot for a pointer, then it must +-- be word-aligned. +freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int +freeSlotFrom ig areaSize offset areaMap area = + let size = lookupFM areaSize area `orElse` 0 + conflicts = conflictSlots ig areaSize areaMap (area, size, size) + -- CallAreas and Ptrs need to be word-aligned (round up!) + align = case area of CallArea _ -> align' + RegSlot r | isGcPtrType (localRegType r) -> align' + RegSlot _ -> id + align' n = (n + (wORD_SIZE - 1)) `div` wORD_SIZE * wORD_SIZE + -- Find a space big enough to hold the area + findSpace curr 0 = curr + findSpace curr cnt = -- part of target slot, # of bytes left to check + if elemFM curr conflicts then + findSpace (align (curr + size)) size -- try the next (possibly) open space + else findSpace (curr - 1) (cnt - 1) + in findSpace (align (offset + size)) size + +-- Find an open space on the stack, and assign it to the area. +allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap +allocSlotFrom ig areaSize from areaMap area = + if elemFM area areaMap then areaMap + else addToFM areaMap area $ freeSlotFrom ig areaSize from areaMap area + +-- | Greedy stack layout. +-- Compute liveness, build the interference graph, and allocate slots for the areas. +-- We visit each basic block in a (generally) forward order. +-- At each instruction that names a register subarea r, we immediately allocate +-- any available slot on the stack by the following procedure: +-- 1. Find the nodes N' that conflict with r +-- 2. Find the stack slots used for N' +-- 3. Choose a contiguous stack space s not in N' (s must be large enough to hold r) +-- For a CallArea, we allocate the stack space only when we reach a function +-- call that returns to the CallArea's blockId. +-- We use a similar procedure, with one exception: the stack space +-- must be allocated below the youngest stack slot that is live out. + +-- Note: The stack pointer only has to be younger than the youngest live stack slot +-- at proc points. Otherwise, the stack pointer can point anywhere. +layout :: ProcPointSet -> SlotEnv -> LGraph Middle Last -> AreaMap +layout procPoints env g@(LGraph _ entrySp _) = + let builder = areaBuilder + ig = (igraph builder env g, builder) + env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph" + areaSize = getAreaSize g + -- Find the slots that are live-in to the block + live_in (ZTail m l) = liveInSlots (live_in l) m + live_in (ZLast (LastOther l)) = liveLastIn env' l + live_in (ZLast LastExit) = emptyFM + -- Find the youngest live stack slot + youngest_live areaMap live = fold_subareas young_slot live 0 + where young_slot (a, o, _) z = case lookupFM areaMap a of + Just top -> max z $ top + o + Nothing -> z + fold_subareas :: (SubArea -> z -> z) -> SubAreaSet -> z -> z + fold_subareas f m z = foldFM (\_ s z -> foldr f z s) z m + -- Allocate space for spill slots and call areas + allocVarSlot = allocSlotFrom ig areaSize 0 + allocCallSlot areaMap (Block id stackInfo t) + | elemBlockSet id procPoints = + let young = youngest_live areaMap $ live_in t + start = case returnOff stackInfo of Just b -> max b young + Nothing -> young + z = allocSlotFrom ig areaSize start areaMap (CallArea (Young id)) + in pprTrace "allocCallSlot for" (ppr id <+> ppr young <+> ppr (live_in t) <+> ppr z) z + allocCallSlot areaMap _ = areaMap + -- mid foreign calls need to have info tables placed on the stack + allocMidCall m@(MidForeignCall (Safe bid _) _ _ _) t areaMap = + let young = youngest_live areaMap $ removeLiveSlotDefs (live_in t) m + area = CallArea (Young bid) + areaSize' = addToFM areaSize area (widthInBytes (typeWidth gcWord)) + in allocSlotFrom ig areaSize' young areaMap area + allocMidCall _ _ areaMap = areaMap + alloc m t areaMap = + foldSlotsDefd alloc' (foldSlotsUsed alloc' (allocMidCall m t areaMap) m) m + where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a + alloc' areaMap _ = areaMap + layoutAreas areaMap b@(Block _ _ t) = layout areaMap t + where layout areaMap (ZTail m t) = layout (alloc m t areaMap) t + layout areaMap (ZLast _) = allocCallSlot areaMap b + areaMap = foldl layoutAreas (addToFM emptyFM (CallArea Old) 0) (postorder_dfs g) + in pprTrace "ProcPoints" (ppr procPoints) $ + pprTrace "Area SizeMap" (ppr areaSize) $ + pprTrace "Entry SP" (ppr entrySp) $ + pprTrace "Area Map" (ppr areaMap) $ areaMap + +-- After determining the stack layout, we can: +-- 1. Replace references to stack Areas with addresses relative to the stack +-- pointer. +-- 2. Insert adjustments to the stack pointer to ensure that it is at a +-- conventional location at each proc point. +-- Because we don't take interrupts on the execution stack, we only need the +-- stack pointer to be younger than the live values on the stack at proc points. +-- 3. Compute the maximum stack offset used in the procedure and replace +-- the stack high-water mark with that offset. +manifestSP :: ProcPointSet -> BlockEnv Status -> AreaMap -> + LGraph Middle Last -> FuelMonad (LGraph Middle Last) +manifestSP procPoints procMap areaMap g@(LGraph entry args blocks) = + liftM (LGraph entry args) blocks' + where blocks' = foldl replB (return emptyBlockEnv) (postorder_dfs g) + slot a = pprTrace "slot" (ppr a) $ + lookupFM areaMap a `orElse` panic "unallocated Area" + slot' (Just id) = slot $ CallArea (Young id) + slot' Nothing = slot $ CallArea Old + sp_high = maxSlot slot g + proc_entry_sp = slot (CallArea Old) + args + sp_on_entry id | id == entry = proc_entry_sp + sp_on_entry id = + case lookupBlockEnv blocks id of + Just (Block _ (StackInfo {argBytes = Just o}) _) -> slot' (Just id) + o + _ -> + case expectJust "sp_on_entry" (lookupBlockEnv procMap id) of + ReachedBy pp -> + case blockSetToList pp of + [id] -> sp_on_entry id + _ -> panic "block not reached by one proc point" + ProcPoint -> pprPanic "procpoint doesn't take any arguments?" + (ppr id <+> ppr g <+> ppr procPoints <+> ppr procMap) + + -- On entry to procpoints, the stack pointer is conventional; + -- otherwise, we check the SP set by predecessors. + replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock) + replB blocks (Block id o t) = + do bs <- replTail (Block id o) spIn t + pprTrace "spIn" (ppr id <+> ppr spIn)$ + liftM (flip (foldr insertBlock) bs) blocks + where spIn = sp_on_entry id + replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) -> + FuelMonad ([CmmBlock]) + replTail h spOff (ZTail m@(MidForeignCall (Safe bid _) _ _ _) t) = + replTail (\t' -> h (setSp spOff spOff' (ZTail (middle spOff m) t'))) spOff' t + where spOff' = slot' (Just bid) + widthInBytes (typeWidth gcWord) + replTail h spOff (ZTail m t) = replTail (h . ZTail (middle spOff m)) spOff t + replTail h spOff (ZLast (LastOther l)) = fixSp h spOff l + replTail h _ l@(ZLast LastExit) = return [h l] + middle spOff m = mapExpDeepMiddle (replSlot spOff) m + last spOff l = mapExpDeepLast (replSlot spOff) l + replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i)) + replSlot spOff (CmmLit CmmHighStackMark) = -- replacing the high water mark + CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord)) + replSlot _ e = e + -- The block must establish the SP expected at each successsor. + fixSp :: (ZTail Middle Last -> CmmBlock) -> Int -> Last -> FuelMonad ([CmmBlock]) + fixSp h spOff l@(LastCall _ k n _) = updSp h spOff (slot' k + n) l + fixSp h spOff l@(LastBranch k) = + let succSp = sp_on_entry k in + if succSp /= spOff then + pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $ + updSp h spOff succSp l + else return $ [h (ZLast (LastOther (last spOff l)))] + fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, []) + where b = h (ZLast (LastOther (last spOff l))) + succ succId z = + let succSp = sp_on_entry succId in + if succSp /= spOff then + do (b, bs) <- z + (b', bs') <- insertBetween b [setSpMid spOff succSp] succId + return (b', bs ++ bs') + else z + updSp h old new l = return [h $ setSp old new $ ZLast $ LastOther (last new l)] + setSpMid sp sp' = MidAssign (CmmGlobal Sp) e + where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off] + off = CmmLit $ CmmInt (toInteger $ sp - sp') wordWidth + setSp sp sp' t = if sp == sp' then t else ZTail (setSpMid sp sp') t + + +-- To compute the stack high-water mark, we fold over the graph and +-- compute the highest slot offset. +maxSlot :: (Area -> Int) -> CmmGraph -> Int +maxSlot slotOff g = fold_blocks (fold_fwd_block (\ _ _ x -> x) highSlot highSlot) 0 g + where highSlot i z = foldSlotsUsed add (foldSlotsDefd add z i) i + add z (a, i, w) = max z (slotOff a + i) + +----------------------------------------------------------------------------- +-- | Sanity check: stub pointers immediately after they die +----------------------------------------------------------------------------- +-- This will miss stack slots that are last used in a Last node, +-- but it should do pretty well... + +type StubPtrFix = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet CmmGraph) + +stubSlotsOnDeath :: (LGraph Middle Last) -> FuelMonad (LGraph Middle Last) +stubSlotsOnDeath g = liftM zdfFpContents $ (res :: StubPtrFix) + where res = zdfBRewriteFromL RewriteShallow emptyBlockEnv "stub ptrs" slotLattice + liveSlotTransfers rewrites (fact_bot slotLattice) g + rewrites = BackwardRewrites first middle last Nothing + first _ _ = Nothing + last _ _ = Nothing + middle liveSlots m = foldSlotsUsed (stub liveSlots m) Nothing m + stub liveSlots m rst subarea@(a, off, w) = + if elemSlot liveSlots subarea then rst + else let store = mkStore (CmmStackSlot a off) + (stackStubExpr (widthFromBytes w)) + in case rst of Nothing -> Just (mkMiddle m <*> store) + Just g -> Just (g <*> store) -- 1.7.10.4