compiler/cmm/CmmProcPointZ.hs

   1
   2 module CmmProcPointZ
   3     ( minimalProcPointSet
   4     , addProcPointProtocols
   5     )
   6 where
   7
   8 import Prelude hiding (zip, unzip)
   9
  10 import ClosureInfo
  11 import Cmm hiding (blockId)
  12 import CmmExpr
  13 import CmmContFlowOpt
  14 import CmmLiveZ
  15 import CmmTx
  16 import DFMonad
  17 import ForeignCall -- used in protocol for the entry point
  18 import MachOp (MachHint(NoHint))
  19 import Maybes
  20 import Outputable
  21 import Panic
  22 import UniqFM
  23 import UniqSet
  24 import ZipCfg
  25 import ZipCfgCmmRep
  26 import ZipDataflow0
  27
  28 -- Compute a minimal set of proc points for a control-flow graph.
  29
  30 -- Determine a protocol for each proc point (which live variables will
  31 -- be passed as arguments and which will be on the stack).
  32
  33 {-
  34 A proc point is a basic block that, after CPS transformation, will
  35 start a new function.  The entry block of the original function is a
  36 proc point, as is the continuation of each function call.
  37 A third kind of proc point arises if we want to avoid copying code.
  38 Suppose we have code like the following:
  39
  40   f() {
  41     if (...) { ..1..; call foo(); ..2..}
  42     else     { ..3..; call bar(); ..4..}
  43     x = y + z;
  44     return x;
  45   }
  46
  47 The statement 'x = y + z' can be reached from two different proc
  48 points: the continuations of foo() and bar().  We would prefer not to
  49 put a copy in each continuation; instead we would like 'x = y + z' to
  50 be the start of a new procedure to which the continuations can jump:
  51
  52   f_cps () {
  53     if (...) { ..1..; push k_foo; jump foo_cps(); }
  54     else     { ..3..; push k_bar; jump bar_cps(); }
  55   }
  56   k_foo() { ..2..; jump k_join(y, z); }
  57   k_bar() { ..4..; jump k_join(y, z); }
  58   k_join(y, z) { x = y + z; return x; }
  59
  60 You might think then that a criterion to make a node a proc point is
  61 that it is directly reached by two distinct proc points.  (Note
  62 [Direct reachability].)  But this criterion is a bit two simple; for
  63 example, 'return x' is also reached by two proc points, yet there is
  64 no point in pulling it out of k_join.  A good criterion would be to
  65 say that a node should be made a proc point if it is reached by a set
  66 of proc points that is different than its immediate dominator.  NR
  67 believes this criterion can be shown to produce a minimum set of proc
  68 points, and given a dominator tree, the proc points can be chosen in
  69 time linear in the number of blocks.  Lacking a dominator analysis,
  70 however, we turn instead to an iterative solution, starting with no
  71 proc points and adding them according to these rules:
  72
  73   1. The entry block is a proc point.
  74   2. The continuation of a call is a proc point.
  75   3. A node is a proc point if it is directly reached by more proc
  76      points than one of its predecessors.
  77
  78 Because we don't understand the problem very well, we apply rule 3 at
  79 most once per iteration, then recompute the reachability information.
  80 (See Note [No simple dataflow].)  The choice of the new proc point is
  81 arbitrary, and I don't know if the choice affects the final solution,
  82 so I don't know if the number of proc points chosen is the
  83 minimum---but the set will be minimal.
  84 -}
  85
  86 type ProcPointSet = BlockSet
  87
  88 data Status
  89   = ReachedBy ProcPointSet  -- set of proc points that directly reach the block
  90   | ProcPoint               -- this block is itself a proc point
  91
  92 instance Outputable Status where
  93   ppr (ReachedBy ps)
  94       | isEmptyUniqSet ps = text "<not-reached>"
  95       | otherwise = text "reached by" <+>
  96                     (hsep $ punctuate comma $ map ppr $ uniqSetToList ps)
  97   ppr ProcPoint = text "<procpt>"
  98
  99
 100 lattice :: DataflowLattice Status
 101 lattice = DataflowLattice "direct proc-point reachability" unreached add_to False
 102     where unreached = ReachedBy emptyBlockSet
 103           add_to _ ProcPoint = noTx ProcPoint
 104           add_to ProcPoint _ = aTx ProcPoint -- aTx because of previous case again
 105           add_to (ReachedBy p) (ReachedBy p') =
 106               let union = unionUniqSets p p'
 107               in  if sizeUniqSet union > sizeUniqSet p' then
 108                       aTx (ReachedBy union)
 109                   else
 110                       noTx (ReachedBy p')
 111 --------------------------------------------------
 112 -- transfer equations
 113
 114 forward :: FAnalysis Middle Last Status
 115 forward = FComp "proc-point reachability" first middle last exit
 116     where first ProcPoint id = ReachedBy $ unitUniqSet id
 117           first  x _ = x
 118           middle x _ = x
 119           last _ (LastCall _ (Just id)) = LastOutFacts [(id, ProcPoint)]
 120           last x l = LastOutFacts $ map (\id -> (id, x)) (succs l)
 121           exit x   = x
 122
 123 minimalProcPointSet :: CmmGraph -> ProcPointSet
 124 minimalProcPointSet g = extendPPSet g (postorder_dfs g) entryPoint
 125     where entryPoint = unitUniqSet (lg_entry g)
 126
 127 extendPPSet :: CmmGraph -> [CmmBlock] -> ProcPointSet -> ProcPointSet
 128 extendPPSet g blocks procPoints =
 129     case newPoint of Just id ->
 130                        if elemBlockSet id procPoints' then panic "added old proc pt"
 131                        else extendPPSet g blocks (extendBlockSet procPoints' id)
 132                      Nothing -> procPoints'
 133     where env = runDFA lattice $
 134                 do refine_f_anal forward g set_init_points
 135                    getAllFacts
 136           set_init_points = mapM_ (\id -> setFact id ProcPoint)
 137                             (uniqSetToList procPoints)
 138           procPoints' = fold_blocks add emptyBlockSet g
 139           add block pps = let id = blockId block
 140                           in  case lookupBlockEnv env id of
 141                                 Just ProcPoint -> extendBlockSet pps id
 142                                 _ -> pps
 143
 144           newPoint = listToMaybe (mapMaybe ppSuccessor blocks)
 145           ppSuccessor b@(Block id _) =
 146               let nreached id = case lookupBlockEnv env id `orElse` panic "no ppt" of
 147                                   ProcPoint -> 1
 148                                   ReachedBy ps -> sizeUniqSet ps
 149                   my_nreached = nreached id
 150                   -- | Looking for a successor of b that is reached by
 151                   -- more proc points than b and is not already a proc
 152                   -- point.  If found, it can become a proc point.
 153                   newId succ_id = not (elemBlockSet succ_id procPoints') &&
 154                                   nreached succ_id > my_nreached
 155               in  listToMaybe $ filter newId $ succs b
 156
 157
 158 ------------------------------------------------------------------------
 159 --                    Computing Proc-Point Protocols                  --
 160 ------------------------------------------------------------------------
 161
 162 {-
 163
 164 There is one major trick, discovered by Michael Adams, which is that
 165 we want to choose protocols in a way that enables us to optimize away
 166 some continuations.  The optimization is very much like branch-chain
 167 elimination, except that it involves passing results as well as
 168 control.  The idea is that if a call's continuation k does nothing but
 169 CopyIn its results and then goto proc point P, the call's continuation
 170 may be changed to P, *provided* P's protocol is identical to the
 171 protocol for the CopyIn.  We choose protocols to make this so.
 172
 173 Here's an explanatory example; we begin with the source code (lines
 174 separate basic blocks):
 175
 176   ..1..;
 177   x, y = g();
 178   goto P;
 179   -------
 180   P: ..2..;
 181
 182 Zipperization converts this code as follows:
 183
 184   ..1..;
 185   call g() returns to k;
 186   -------
 187   k: CopyIn(x, y);
 188      goto P;
 189   -------
 190   P: ..2..;
 191
 192 What we'd like to do is assign P the same CopyIn protocol as k, so we
 193 can eliminate k:
 194
 195   ..1..;
 196   call g() returns to P;
 197   -------
 198   P: CopyIn(x, y); ..2..;
 199
 200 Of course, P may be the target of more than one continuation, and
 201 different continuations may have different protocols.  Michael Adams
 202 implemented a voting mechanism, but he thinks a simple greedy
 203 algorithm would be just as good, so that's what we do.
 204
 205 -}
 206
 207 data Protocol = Protocol Convention CmmFormals
 208   deriving Eq
 209
 210 -- | Function 'optimize_calls' chooses protocols only for those proc
 211 -- points that are relevant to the optimization explained above.
 212 -- The others are assigned by 'add_unassigned', which is not yet clever.
 213
 214 addProcPointProtocols :: ProcPointSet -> CmmFormalsWithoutKinds -> CmmGraph -> CmmGraph
 215 addProcPointProtocols procPoints formals g =
 216        snd $ add_unassigned procPoints $ optimize_calls g
 217     where optimize_calls g =  -- see Note [Separate Adams optimization]
 218               let (protos, blocks') =
 219                       fold_blocks maybe_add_call (init_protocols, emptyBlockEnv) g
 220                   g' = LGraph (lg_entry g) (add_CopyIns protos blocks')
 221               in  (protos, runTx removeUnreachableBlocksZ g')
 222           maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock)
 223                          -> (BlockEnv Protocol, BlockEnv CmmBlock)
 224           -- ^ If the block is a call whose continuation goes to a proc point
 225           -- whose protocol either matches the continuation's or is not yet set,
 226           -- redirect the call (cf 'newblock') and set the protocol if necessary
 227           maybe_add_call block (protos, blocks) =
 228               case goto_end $ unzip block of
 229                 (h, LastOther (LastCall tgt (Just k)))
 230                     | Just proto <- lookupBlockEnv protos k,
 231                       Just pee <- jumpsToProcPoint k
 232                     -> let newblock =
 233                                zipht h (tailOfLast (LastCall tgt (Just pee)))
 234                            changed_blocks   = insertBlock newblock blocks
 235                            unchanged_blocks = insertBlock block    blocks
 236                        in case lookupBlockEnv protos pee of
 237                             Nothing -> (extendBlockEnv protos pee proto,changed_blocks)
 238                             Just proto' ->
 239                               if proto == proto' then (protos, changed_blocks)
 240                               else (protos, unchanged_blocks)
 241                 _ -> (protos, insertBlock block blocks)
 242
 243           jumpsToProcPoint :: BlockId -> Maybe BlockId
 244           -- ^ Tells whether the named block is just a jump to a proc point
 245           jumpsToProcPoint id =
 246               let (Block _ t) = lookupBlockEnv (lg_blocks g) id `orElse`
 247                                 panic "jump out of graph"
 248               in case t of
 249                    ZTail (CopyIn {}) (ZLast (LastOther (LastBranch pee)))
 250                        | elemBlockSet pee procPoints -> Just pee
 251                    _ -> Nothing
 252           init_protocols = fold_blocks maybe_add_proto emptyBlockEnv g
 253           maybe_add_proto :: CmmBlock -> BlockEnv Protocol -> BlockEnv Protocol
 254           maybe_add_proto (Block id (ZTail (CopyIn c fs _srt) _)) env =
 255               extendBlockEnv env id (Protocol c fs)
 256           maybe_add_proto (Block id _) env | id == lg_entry g =
 257               extendBlockEnv env id (Protocol stdArgConvention hinted_formals)
 258           maybe_add_proto _ env = env
 259           hinted_formals = map (\x -> CmmKinded x NoHint) formals
 260           stdArgConvention = ConventionStandard CmmCallConv Arguments
 261
 262 -- | For now, following a suggestion by Ben Lippmeier, we pass all
 263 -- live variables as arguments, hoping that a clever register
 264 -- allocator might help.
 265
 266 add_unassigned
 267     :: ProcPointSet -> (BlockEnv Protocol, CmmGraph) -> (BlockEnv Protocol, CmmGraph)
 268 add_unassigned = pass_live_vars_as_args
 269
 270 pass_live_vars_as_args
 271     :: ProcPointSet -> (BlockEnv Protocol, CmmGraph) -> (BlockEnv Protocol, CmmGraph)
 272 pass_live_vars_as_args procPoints (protos, g) = (protos', g')
 273   where liveness = cmmLivenessZ g
 274         protos' = foldUniqSet addLiveVars protos procPoints
 275         addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol
 276         addLiveVars id protos =
 277             case lookupBlockEnv protos id of
 278               Just _ -> protos
 279               Nothing -> let live = lookupBlockEnv liveness id `orElse`
 280                                     emptyRegSet -- XXX there's a bug lurking!
 281                                     -- panic ("no liveness at block " ++ show id)
 282                              formals = map (\x -> CmmKinded x NoHint) $ uniqSetToList live
 283                          in  extendBlockEnv protos id (Protocol ConventionPrivate formals)
 284         g' = g { lg_blocks = add_CopyIns protos' (lg_blocks g) }
 285
 286
 287 -- | Add a CopyIn node to each block that has a protocol but lacks the
 288 -- appropriate CopyIn node.
 289
 290 add_CopyIns :: BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock
 291 add_CopyIns protos = mapUFM (maybe_insert_CopyIn protos)
 292     where maybe_insert_CopyIn :: BlockEnv Protocol -> CmmBlock -> CmmBlock
 293           maybe_insert_CopyIn protos b@(Block id t) =
 294             case lookupBlockEnv protos id of
 295               Nothing -> b
 296               Just (Protocol c fs) ->
 297                   case t of
 298                     ZTail (CopyIn c' fs' _) _ ->
 299                       if c == c' && fs == fs' then b
 300                       else panic ("mismatched protocols for block " ++ show id)
 301                     _ -> Block id (ZTail (CopyIn c fs NoC_SRT) t)
 302
 303 -- XXX also need to add the relevant CopyOut nodes!!!
 304
 305 ----------------------------------------------------------------
 306
 307 {-
 308 Note [Direct reachability]
 309
 310 Block B is directly reachable from proc point P iff control can flow
 311 from P to B without passing through an intervening proc point.
 312 -}
 313
 314 ----------------------------------------------------------------
 315
 316 {-
 317 Note [No simple dataflow]
 318
 319 Sadly, it seems impossible to compute the proc points using a single
 320 dataflow pass.  One might attempt to use this simple lattice:
 321
 322   data Location = Unknown
 323                 | InProc BlockId -- node is in procedure headed by the named proc point
 324                 | ProcPoint      -- node is itself a proc point
 325
 326 At a join, a node in two different blocks becomes a proc point.
 327 The difficulty is that the change of information during iterative
 328 computation may promote a node prematurely.  Here's a program that
 329 illustrates the difficulty:
 330
 331   f () {
 332   entry:
 333     ....
 334   L1:
 335     if (...) { ... }
 336     else { ... }
 337
 338   L2: if (...) { g(); goto L1; }
 339       return x + y;
 340   }
 341
 342 The only proc-point needed (besides the entry) is L1.  But in an
 343 iterative analysis, consider what happens to L2.  On the first pass
 344 through, it rises from Unknown to 'InProc entry', but when L1 is
 345 promoted to a proc point (because it's the successor of g()), L1's
 346 successors will be promoted to 'InProc L1'.  The problem hits when the
 347 new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'.
 348 The join operation makes it a proc point when in fact it needn't be,
 349 because its immediate dominator L1 is already a proc point and there
 350 are no other proc points that directly reach L2.
 351 -}
 352
 353
 354
 355 {- Note [Separate Adams optimization]
 356 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 357 It may be worthwhile to attempt the Adams optimization by rewriting
 358 the graph before the assignment of proc-point protocols.  Here are a
 359 couple of rules:
 360
 361   g() returns to k;                    g() returns to L;
 362   k: CopyIn c ress; goto L:
 363    ...                        ==>        ...
 364   L: // no CopyIn node here            L: CopyIn c ress;
 365
 366
 367 And when c == c' and ress == ress', this also:
 368
 369   g() returns to k;                    g() returns to L;
 370   k: CopyIn c ress; goto L:
 371    ...                        ==>        ...
 372   L: CopyIn c' ress'                   L: CopyIn c' ress' ;
 373
 374 In both cases the goal is to eliminate k.
 375 -}