compiler/cmm/CmmProcPointZ.hs

   1
   2 module CmmProcPointZ
   3     ( callProcPoints, minimalProcPointSet
   4     , addProcPointProtocols
   5     , splitAtProcPoints
   6     )
   7 where
   8
   9 import Prelude hiding (zip, unzip, last)
  10
  11 import BlockId
  12 import CLabel
  13 --import ClosureInfo
  14 import Cmm hiding (blockId)
  15 import CmmExpr
  16 import CmmContFlowOpt
  17 import CmmLiveZ
  18 import CmmTx
  19 import DFMonad
  20 import FiniteMap
  21 import MachOp (MachHint(NoHint))
  22 import Maybes
  23 import MkZipCfgCmm hiding (CmmBlock, CmmGraph)
  24 import Monad
  25 import Name
  26 import Outputable
  27 import Panic
  28 import UniqFM
  29 import UniqSet
  30 import UniqSupply
  31 import ZipCfg
  32 import ZipCfgCmmRep
  33 import ZipDataflow
  34
  35 -- Compute a minimal set of proc points for a control-flow graph.
  36
  37 -- Determine a protocol for each proc point (which live variables will
  38 -- be passed as arguments and which will be on the stack).
  39
  40 {-
  41 A proc point is a basic block that, after CPS transformation, will
  42 start a new function.  The entry block of the original function is a
  43 proc point, as is the continuation of each function call.
  44 A third kind of proc point arises if we want to avoid copying code.
  45 Suppose we have code like the following:
  46
  47   f() {
  48     if (...) { ..1..; call foo(); ..2..}
  49     else     { ..3..; call bar(); ..4..}
  50     x = y + z;
  51     return x;
  52   }
  53
  54 The statement 'x = y + z' can be reached from two different proc
  55 points: the continuations of foo() and bar().  We would prefer not to
  56 put a copy in each continuation; instead we would like 'x = y + z' to
  57 be the start of a new procedure to which the continuations can jump:
  58
  59   f_cps () {
  60     if (...) { ..1..; push k_foo; jump foo_cps(); }
  61     else     { ..3..; push k_bar; jump bar_cps(); }
  62   }
  63   k_foo() { ..2..; jump k_join(y, z); }
  64   k_bar() { ..4..; jump k_join(y, z); }
  65   k_join(y, z) { x = y + z; return x; }
  66
  67 You might think then that a criterion to make a node a proc point is
  68 that it is directly reached by two distinct proc points.  (Note
  69 [Direct reachability].)  But this criterion is a bit two simple; for
  70 example, 'return x' is also reached by two proc points, yet there is
  71 no point in pulling it out of k_join.  A good criterion would be to
  72 say that a node should be made a proc point if it is reached by a set
  73 of proc points that is different than its immediate dominator.  NR
  74 believes this criterion can be shown to produce a minimum set of proc
  75 points, and given a dominator tree, the proc points can be chosen in
  76 time linear in the number of blocks.  Lacking a dominator analysis,
  77 however, we turn instead to an iterative solution, starting with no
  78 proc points and adding them according to these rules:
  79
  80   1. The entry block is a proc point.
  81   2. The continuation of a call is a proc point.
  82   3. A node is a proc point if it is directly reached by more proc
  83      points than one of its predecessors.
  84
  85 Because we don't understand the problem very well, we apply rule 3 at
  86 most once per iteration, then recompute the reachability information.
  87 (See Note [No simple dataflow].)  The choice of the new proc point is
  88 arbitrary, and I don't know if the choice affects the final solution,
  89 so I don't know if the number of proc points chosen is the
  90 minimum---but the set will be minimal.
  91 -}
  92
  93 type ProcPointSet = BlockSet
  94
  95 data Status
  96   = ReachedBy ProcPointSet  -- set of proc points that directly reach the block
  97   | ProcPoint               -- this block is itself a proc point
  98
  99 instance Outputable Status where
 100   ppr (ReachedBy ps)
 101       | isEmptyUniqSet ps = text "<not-reached>"
 102       | otherwise = text "reached by" <+>
 103                     (hsep $ punctuate comma $ map ppr $ uniqSetToList ps)
 104   ppr ProcPoint = text "<procpt>"
 105
 106
 107 lattice :: DataflowLattice Status
 108 lattice = DataflowLattice "direct proc-point reachability" unreached add_to False
 109     where unreached = ReachedBy emptyBlockSet
 110           add_to _ ProcPoint = noTx ProcPoint
 111           add_to ProcPoint _ = aTx ProcPoint -- aTx because of previous case again
 112           add_to (ReachedBy p) (ReachedBy p') =
 113               let union = unionUniqSets p p'
 114               in  if sizeUniqSet union > sizeUniqSet p' then
 115                       aTx (ReachedBy union)
 116                   else
 117                       noTx (ReachedBy p')
 118 --------------------------------------------------
 119 -- transfer equations
 120
 121 forward :: ForwardTransfers Middle Last Status
 122 forward = ForwardTransfers first middle last exit
 123     where first ProcPoint id = ReachedBy $ unitUniqSet id
 124           first  x _ = x
 125           middle x _ = x
 126           last _ (LastCall _ (Just id)) = LastOutFacts [(id, ProcPoint)]
 127           last x l = LastOutFacts $ map (\id -> (id, x)) (succs l)
 128           exit x   = x
 129
 130 -- It is worth distinguishing two sets of proc points:
 131 -- those that are induced by calls in the original graph
 132 -- and those that are introduced because they're reachable from multiple proc points.
 133 callProcPoints      :: CmmGraph -> ProcPointSet
 134 minimalProcPointSet :: ProcPointSet -> CmmGraph -> FuelMonad ProcPointSet
 135
 136 callProcPoints g = fold_blocks add entryPoint g
 137   where entryPoint = unitUniqSet (lg_entry g)
 138         add b set = case last $ unzip b of
 139                       LastOther (LastCall _ (Just k)) -> extendBlockSet set k
 140                       _ -> set
 141
 142 minimalProcPointSet callProcPoints g = extendPPSet g (postorder_dfs g) callProcPoints
 143
 144 type PPFix = FuelMonad (ForwardFixedPoint Middle Last Status ())
 145
 146 procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelMonad PPFix
 147 procPointAnalysis procPoints g =
 148   let addPP env id = extendBlockEnv env id ProcPoint
 149       initProcPoints = foldl addPP emptyBlockEnv (uniqSetToList procPoints)
 150   in runDFM lattice $ -- init with old facts and solve
 151        return $ (zdfSolveFrom initProcPoints "proc-point reachability" lattice
 152                               forward (fact_bot lattice) $ graphOfLGraph g :: PPFix)
 153
 154 extendPPSet :: CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelMonad ProcPointSet
 155 extendPPSet g blocks procPoints =
 156     do res <- procPointAnalysis procPoints g
 157        env <- liftM zdfFpFacts res
 158        let add block pps = let id = blockId block
 159                            in  case lookupBlockEnv env id of
 160                                  Just ProcPoint -> extendBlockSet pps id
 161                                  _ -> pps
 162            procPoints' = fold_blocks add emptyBlockSet g
 163            newPoint = listToMaybe (mapMaybe ppSuccessor blocks)
 164            ppSuccessor b@(Block id _) =
 165                let nreached id = case lookupBlockEnv env id `orElse` panic "no ppt" of
 166                                    ProcPoint -> 1
 167                                    ReachedBy ps -> sizeUniqSet ps
 168                    my_nreached = nreached id
 169                    -- | Looking for a successor of b that is reached by
 170                    -- more proc points than b and is not already a proc
 171                    -- point.  If found, it can become a proc point.
 172                    newId succ_id = not (elemBlockSet succ_id procPoints') &&
 173                                    nreached succ_id > my_nreached
 174                in  listToMaybe $ filter newId $ succs b
 175        case newPoint of Just id ->
 176                           if elemBlockSet id procPoints' then panic "added old proc pt"
 177                           else extendPPSet g blocks (extendBlockSet procPoints' id)
 178                         Nothing -> return procPoints'
 179
 180
 181
 182
 183 ------------------------------------------------------------------------
 184 --                    Computing Proc-Point Protocols                  --
 185 ------------------------------------------------------------------------
 186
 187 {-
 188
 189 There is one major trick, discovered by Michael Adams, which is that
 190 we want to choose protocols in a way that enables us to optimize away
 191 some continuations.  The optimization is very much like branch-chain
 192 elimination, except that it involves passing results as well as
 193 control.  The idea is that if a call's continuation k does nothing but
 194 CopyIn its results and then goto proc point P, the call's continuation
 195 may be changed to P, *provided* P's protocol is identical to the
 196 protocol for the CopyIn.  We choose protocols to make this so.
 197
 198 Here's an explanatory example; we begin with the source code (lines
 199 separate basic blocks):
 200
 201   ..1..;
 202   x, y = g();
 203   goto P;
 204   -------
 205   P: ..2..;
 206
 207 Zipperization converts this code as follows:
 208
 209   ..1..;
 210   call g() returns to k;
 211   -------
 212   k: CopyIn(x, y);
 213      goto P;
 214   -------
 215   P: ..2..;
 216
 217 What we'd like to do is assign P the same CopyIn protocol as k, so we
 218 can eliminate k:
 219
 220   ..1..;
 221   call g() returns to P;
 222   -------
 223   P: CopyIn(x, y); ..2..;
 224
 225 Of course, P may be the target of more than one continuation, and
 226 different continuations may have different protocols.  Michael Adams
 227 implemented a voting mechanism, but he thinks a simple greedy
 228 algorithm would be just as good, so that's what we do.
 229
 230 -}
 231
 232 data Protocol = Protocol Convention CmmFormals Area
 233   deriving Eq
 234 instance Outputable Protocol where
 235   ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a
 236
 237 -- | Function 'optimize_calls' chooses protocols only for those proc
 238 -- points that are relevant to the optimization explained above.
 239 -- The others are assigned by 'add_unassigned', which is not yet clever.
 240
 241 addProcPointProtocols :: ProcPointSet -> ProcPointSet -> CmmGraph -> FuelMonad CmmGraph
 242 addProcPointProtocols callPPs procPoints g =
 243   do liveness <- cmmLivenessZ g
 244      (protos, g') <- return $ optimize_calls liveness g
 245      blocks'' <- add_CopyOuts protos procPoints g'
 246      return $ LGraph (lg_entry g) blocks''
 247     where optimize_calls liveness g =  -- see Note [Separate Adams optimization]
 248               let (protos, blocks') =
 249                       fold_blocks maybe_add_call (init_protocols, emptyBlockEnv) g
 250                   protos' = add_unassigned liveness procPoints protos
 251                   g'  = LGraph (lg_entry g) $ add_CopyIns callPPs protos' blocks'
 252               in  (protos', runTx removeUnreachableBlocksZ g')
 253           maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock)
 254                          -> (BlockEnv Protocol, BlockEnv CmmBlock)
 255           -- ^ If the block is a call whose continuation goes to a proc point
 256           -- whose protocol either matches the continuation's or is not yet set,
 257           -- redirect the call (cf 'newblock') and set the protocol if necessary
 258           maybe_add_call block (protos, blocks) =
 259               case goto_end $ unzip block of
 260                 (h, LastOther (LastCall tgt (Just k)))
 261                     | Just proto <- lookupBlockEnv protos k,
 262                       Just pee   <- jumpsToProcPoint k
 263                     -> let newblock =
 264                                zipht h (tailOfLast (LastCall tgt (Just pee)))
 265                            changed_blocks   = insertBlock newblock blocks
 266                            unchanged_blocks = insertBlock block    blocks
 267                        in case lookupBlockEnv protos pee of
 268                             Nothing -> (extendBlockEnv protos pee proto,changed_blocks)
 269                             Just proto' ->
 270                               if proto == proto' then (protos, changed_blocks)
 271                               else (protos, unchanged_blocks)
 272                 _ -> (protos, insertBlock block blocks)
 273
 274           jumpsToProcPoint :: BlockId -> Maybe BlockId
 275           -- ^ Tells whether the named block is just a jump to a proc point
 276           jumpsToProcPoint id =
 277               let (Block _ t) = lookupBlockEnv (lg_blocks g) id `orElse`
 278                                 panic "jump out of graph"
 279               in case t of
 280                    ZTail (CopyIn {}) (ZLast (LastOther (LastBranch pee)))
 281                        | elemBlockSet pee procPoints -> Just pee
 282                    _ -> Nothing
 283           init_protocols = fold_blocks maybe_add_proto emptyBlockEnv g
 284           maybe_add_proto :: CmmBlock -> BlockEnv Protocol -> BlockEnv Protocol
 285           maybe_add_proto (Block id (ZTail (CopyIn c fs _srt) _)) env =
 286               extendBlockEnv env id (Protocol c fs $ toArea id fs)
 287           maybe_add_proto _ env = env
 288           toArea id fs = mkCallArea id fs $ Just fs
 289
 290 -- | For now, following a suggestion by Ben Lippmeier, we pass all
 291 -- live variables as arguments, hoping that a clever register
 292 -- allocator might help.
 293
 294 add_unassigned :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol ->
 295                   BlockEnv Protocol
 296 add_unassigned = pass_live_vars_as_args
 297
 298 pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet ->
 299                           BlockEnv Protocol -> BlockEnv Protocol
 300 pass_live_vars_as_args liveness procPoints protos = protos'
 301   where protos' = foldUniqSet addLiveVars protos procPoints
 302         addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol
 303         addLiveVars id protos =
 304             case lookupBlockEnv protos id of
 305               Just _  -> protos
 306               Nothing -> let live = lookupBlockEnv liveness id `orElse`
 307                                     panic ("no liveness at block " ++ show id)
 308                              formals = map (\x -> CmmKinded x NoHint) $ uniqSetToList live
 309                              prot = Protocol ConventionPrivate formals $
 310                                              mkCallArea id formals $ Just formals
 311                          in  extendBlockEnv protos id prot
 312
 313
 314 -- | Add copy-in instructions to each proc point that did not arise from a call
 315 -- instruction. (Proc-points that arise from calls already have their copy-in instructions.)
 316
 317 add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock
 318 add_CopyIns callPPs protos = mapUFM maybe_insert_CopyIns
 319     where maybe_insert_CopyIns :: CmmBlock -> CmmBlock
 320           maybe_insert_CopyIns b@(Block id t) | not $ elementOfUniqSet id callPPs =
 321             case lookupBlockEnv protos id of
 322               Nothing -> b
 323               Just (Protocol c fs area) ->
 324                   case t of
 325                     --ZTail (CopyIn c' fs' _) _ ->
 326                     --  if c == c' && fs == fs' then b
 327                     --  else panic ("mismatched protocols for block " ++ show id)
 328                     _ -> Block id -- (ZTail (CopyIn c fs NoC_SRT) t)
 329                            $ foldr ZTail t (copyIn c area fs)
 330           maybe_insert_CopyIns b = b
 331
 332 -- | Add a CopyOut node before each procpoint.
 333 -- If the predecessor is a call, then the CopyOut should already exist (in the callee).
 334
 335 add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph ->
 336                 FuelMonad (BlockEnv CmmBlock)
 337 add_CopyOuts protos procPoints g = fold_blocks maybe_insert_CopyOut (return emptyBlockEnv) g
 338     where maybe_insert_CopyOut :: CmmBlock -> FuelMonad (BlockEnv CmmBlock) ->
 339                                   FuelMonad (BlockEnv CmmBlock)
 340           maybe_insert_CopyOut b@(Block bid _) blocks | bid == lg_entry g = skip b blocks
 341           maybe_insert_CopyOut b blocks =
 342             case last $ unzip b of
 343               LastOther (LastCall _ _) -> skip b blocks -- copy out done by callee
 344               _ -> maybe_insert_CopyOut' b blocks
 345           maybe_insert_CopyOut' b blocks = fold_succs trySucc b init >>= finish
 346             where init = blocks >>= (\bmap -> return (b, bmap))
 347                   trySucc succId z =
 348                     if elemBlockSet succId procPoints then
 349                       case lookupBlockEnv protos succId of
 350                         Nothing -> z
 351                         Just (Protocol c fs area) ->
 352                           insert z succId $ copyOut c area $ map fetch fs
 353                           -- CopyOut c $ map fetch fs
 354                     else z
 355                   fetch k = k {kindlessCmm = CmmReg $ CmmLocal $ kindlessCmm k}
 356                   insert z succId m =
 357                     do (b, bmap) <- z
 358                        (b, bs)   <- insertBetween b m succId
 359                        return $ (b, foldl (flip insertBlock) bmap bs)
 360                   finish (b@(Block bid _), bmap) = return $ extendBlockEnv bmap bid b
 361           skip b@(Block bid _) bs = bs >>= (\bmap -> return $ extendBlockEnv bmap bid b)
 362
 363
 364
 365 -- Input invariant: A block should only be reachable from a single ProcPoint.
 366 -- If you want to duplicate blocks, do it before this gets called.
 367 splitAtProcPoints :: CmmFormalsWithoutKinds -> CLabel -> ProcPointSet ->
 368                      CmmGraph -> FuelMonad [CmmGraph]
 369 splitAtProcPoints formals entry_label procPoints g@(LGraph entry _) =
 370   do let layout = layout_stack formals g
 371      pprTrace "stack layout" (ppr layout) $ return ()
 372      res <- procPointAnalysis procPoints g
 373      procMap <- liftM zdfFpFacts res
 374      let addBlock b@(Block bid _) graphEnv =
 375                case lookupBlockEnv procMap bid of
 376                  Just ProcPoint -> add graphEnv bid bid b
 377                  Just (ReachedBy set) ->
 378                    case uniqSetToList set of
 379                      []   -> graphEnv
 380                      [id] -> add graphEnv id bid b
 381                      _ -> panic "Each block should be reachable from only one ProcPoint"
 382                  Nothing -> panic "block not reached by a proc point?"
 383          add graphEnv procId bid b = extendBlockEnv graphEnv procId graph'
 384                where graph  = lookupBlockEnv graphEnv procId `orElse` emptyBlockEnv
 385                      graph' = extendBlockEnv graph bid b
 386      graphEnv <- return $ fold_blocks addBlock emptyBlockEnv g
 387      -- Build a map from proc point BlockId to labels for their new procedures
 388      let add_label map pp = clabel pp >>= (\l -> return $ (pp, l) : map)
 389          clabel procPoint = if procPoint == entry then return entry_label
 390                             else getUniqueM >>= return . to_label
 391          to_label u = mkEntryLabel (mkFCallName u "procpoint")
 392      procLabels <- foldM add_label [] (uniqSetToList procPoints)
 393      -- In each new graph, add blocks jumping off to the new procedures,
 394      -- and replace branches to procpoints with branches to the jump-off blocks
 395      let add_jump_block (env, bs) (pp, l) =
 396            do bid <- liftM mkBlockId getUniqueM
 397               let b = Block bid (ZLast (LastOther (LastJump $ CmmLit $ CmmLabel l)))
 398               return $ (extendBlockEnv env pp bid, b : bs)
 399          add_jumps newGraphEnv (guniq, blockEnv) =
 400            do (jumpEnv, jumpBlocks) <- foldM add_jump_block (emptyBlockEnv, []) procLabels
 401               let ppId = mkBlockId guniq
 402                   LGraph _ blockEnv' = replaceLabelsZ jumpEnv $ LGraph ppId blockEnv
 403                   blockEnv'' = foldl (flip insertBlock) blockEnv' jumpBlocks
 404               return $ extendBlockEnv newGraphEnv ppId $
 405                        runTx cmmCfgOptsZ $ LGraph ppId blockEnv''
 406      _ <- return $ replaceLabelsZ
 407      graphEnv <- foldM add_jumps emptyBlockEnv $ ufmToList graphEnv
 408      return $ pprTrace "procLabels" (ppr procLabels) $
 409               pprTrace "splitting graphs" (ppr graphEnv) $ [g]
 410
 411 ------------------------------------------------------------------------
 412 --                    Stack Layout (completely bogus for now)         --
 413 ------------------------------------------------------------------------
 414
 415 -- At some point, we'll do stack layout properly.
 416 -- But for now, we can move forward on generating code by just producing
 417 -- a brain dead layout, giving a separate slot to every variable,
 418 -- and (incorrectly) assuming that all parameters are passed on the stack.
 419
 420 -- For now, variables are placed at explicit offsets from a virtual
 421 -- frame pointer.
 422 -- We may want to use abstract stack slots at some point.
 423 data Placement = VFPMinus Int
 424
 425 instance Outputable Placement where
 426   ppr (VFPMinus k) = text "VFP - " <> int k
 427
 428 -- Build a map from registers to stack locations.
 429 -- Return that map along with the offset to the end of the block
 430 -- containing local registers.
 431 layout_stack ::CmmFormalsWithoutKinds -> CmmGraph ->
 432                (Int, FiniteMap LocalReg Placement, FiniteMap LocalReg Placement)
 433 layout_stack formals g = (ix', incomingMap, localMap)
 434     where (ix, incomingMap) = foldl (flip place) (1, emptyFM) formals -- IGNORES CC'S
 435                  -- 1 leaves space for the return infotable
 436           (ix', localMap) = foldUniqSet place (ix, emptyFM) regs
 437           place r (ix, map) = (ix', addToFM map r $ VFPMinus ix') where ix' = ix + 1
 438           regs = fold_blocks (fold_fwd_block (\_ y -> y) add addL) emptyRegSet g
 439           add  x y = foldRegsDefd extendRegSet y x
 440           addL (LastOther l) z = add l z
 441           addL LastExit      z = z
 442
 443
 444 ----------------------------------------------------------------
 445
 446 {-
 447 Note [Direct reachability]
 448
 449 Block B is directly reachable from proc point P iff control can flow
 450 from P to B without passing through an intervening proc point.
 451 -}
 452
 453 ----------------------------------------------------------------
 454
 455 {-
 456 Note [No simple dataflow]
 457
 458 Sadly, it seems impossible to compute the proc points using a single
 459 dataflow pass.  One might attempt to use this simple lattice:
 460
 461   data Location = Unknown
 462                 | InProc BlockId -- node is in procedure headed by the named proc point
 463                 | ProcPoint      -- node is itself a proc point
 464
 465 At a join, a node in two different blocks becomes a proc point.
 466 The difficulty is that the change of information during iterative
 467 computation may promote a node prematurely.  Here's a program that
 468 illustrates the difficulty:
 469
 470   f () {
 471   entry:
 472     ....
 473   L1:
 474     if (...) { ... }
 475     else { ... }
 476
 477   L2: if (...) { g(); goto L1; }
 478       return x + y;
 479   }
 480
 481 The only proc-point needed (besides the entry) is L1.  But in an
 482 iterative analysis, consider what happens to L2.  On the first pass
 483 through, it rises from Unknown to 'InProc entry', but when L1 is
 484 promoted to a proc point (because it's the successor of g()), L1's
 485 successors will be promoted to 'InProc L1'.  The problem hits when the
 486 new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'.
 487 The join operation makes it a proc point when in fact it needn't be,
 488 because its immediate dominator L1 is already a proc point and there
 489 are no other proc points that directly reach L2.
 490 -}
 491
 492
 493
 494 {- Note [Separate Adams optimization]
 495 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 496 It may be worthwhile to attempt the Adams optimization by rewriting
 497 the graph before the assignment of proc-point protocols.  Here are a
 498 couple of rules:
 499
 500   g() returns to k;                    g() returns to L;
 501   k: CopyIn c ress; goto L:
 502    ...                        ==>        ...
 503   L: // no CopyIn node here            L: CopyIn c ress;
 504
 505
 506 And when c == c' and ress == ress', this also:
 507
 508   g() returns to k;                    g() returns to L;
 509   k: CopyIn c ress; goto L:
 510    ...                        ==>        ...
 511   L: CopyIn c' ress'                   L: CopyIn c' ress' ;
 512
 513 In both cases the goal is to eliminate k.
 514 -}