compiler/cmm/ZipCfg.hs

   1 {-# OPTIONS -Wall -fno-warn-name-shadowing #-}
   2 module ZipCfg
   3     ( BlockId(..), freshBlockId
   4     , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, insertBlock, mkBlockEnv
   5     , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, mkBlockSet
   6     , Graph(..), LGraph(..), FGraph(..)
   7     , Block(..), ZBlock(..), ZHead(..), ZTail(..), ZLast(..)
   8     , HavingSuccessors, succs, fold_succs
   9     , LastNode, mkBranchNode, isBranchNode, branchNodeTarget
  10
  11         -- Observers and transformers
  12     , blockId, zip, unzip, last, goto_end, zipht, tailOfLast
  13     , remove_entry_label
  14     , splice_tail, splice_head, splice_head_only
  15     , of_block_list, to_block_list
  16     , map_nodes
  17     , postorder_dfs
  18     , fold_layout
  19     , fold_blocks
  20     , translate
  21
  22     , pprLgraph
  23
  24     {-
  25     -- the following functions might one day be useful and can be found
  26     -- either below or in ZipCfgExtras:
  27     , entry, exit, focus, focusp, unfocus
  28     , ht_to_first, ht_to_last,
  29     , splice_focus_entry, splice_focus_exit
  30     , fold_fwd_block, foldM_fwd_block
  31     -}
  32
  33     )
  34 where
  35
  36 import Outputable hiding (empty)
  37 import Panic
  38 import Prelude hiding (zip, unzip, last)
  39 import Unique
  40 import UniqFM
  41 import UniqSet
  42 import UniqSupply
  43
  44 -------------------------------------------------------------------------
  45 --               GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH               --
  46 -------------------------------------------------------------------------
  47 {-
  48
  49 This module defines datatypes used to represent control-flow graphs,
  50 along with some functions for analyzing and splicing graphs.
  51 Functions for building graphs are found in a separate module 'MkZipCfg'.
  52
  53 Every graph has a distinguished entry point.  A graph has at least one
  54 exit; most exits are instructions (or statements) like 'jump' or
  55 'return', which transfer control to other procedures, but a graph may
  56 have up to one 'fall through' exit.  (A graph that represents an
  57 entire Haskell or C-- procedure does not have a 'fall through' exit.)
  58
  59 A graph is a collection of basic blocks.  A basic block begins with a
  60 label (unique id; see Note [Unique BlockId]) which is followed by a
  61 sequence of zero or more 'middle' nodes; the basic block ends with a
  62 'last' node.  Each 'middle' node is a single-entry, single-exit,
  63 uninterruptible computation.  A 'last' node is a single-entry,
  64 multiple-exit computation.  A last node may have zero or more successors,
  65 which are identified by their unique ids.
  66
  67 A special case of last node is the ``default exit,'' which represents
  68 'falling off the end' of the graph.  Such a node is always represented by
  69 the data constructor 'LastExit'.  A graph may contain at most one
  70 'LastExit' node, and a graph representing a full procedure should not
  71 contain any 'LastExit' nodes.  'LastExit' nodes are used only to splice
  72 graphs together, either during graph construction (see module 'MkZipCfg')
  73 or during optimization (see module 'ZipDataflow').
  74
  75 A graph is parameterized over the types of middle and last nodes.  Each of
  76 these types will typically be instantiated with a subset of C-- statements
  77 (see module 'ZipCfgCmmRep') or a subset of machine instructions (yet to be
  78 implemented as of August 2007).
  79
  80
  81
  82 This module exposes three representations of graphs.  In order of
  83 increasing complexity, they are:
  84
  85   Graph  m l      The basic graph with its distinguished entry point
  86
  87   LGraph m l      A graph with a *labelled* entry point
  88
  89   FGraph m l      A labelled graph with the *focus* on a particular edge
  90
  91 There are three types because each type offers a slightly different
  92 invariant or cost model.
  93
  94   * The distinguished entry of a Graph has no label.  Because labels must
  95     be unique, acquiring one requires a monadic operation ('freshBlockId').
  96     The primary advantage of the Graph representation is that we can build
  97     a small Graph purely functionally, without entering a monad.  For
  98     example, during optimization we can easily rewrite a single middle
  99     node into a Graph containing a sequence of two middle nodes followed by
 100     LastExit.
 101
 102   * In an LGraph, every basic block is labelled.  The primary advantage of
 103     this representation is its simplicity: each basic block can be treated
 104     like any other.  This representation is used for mapping, folding, and
 105     translation, as well as layout.
 106
 107     Like any graph, an LGraph still has a distinguished entry point,
 108     which you can discover using 'gr_entry'.
 109
 110   * An FGraph is an LGraph with the *focus* on one particular edge.  The
 111     primary advantage of this representation is that it provides
 112     constant-time access to the nodes connected by that edge, and it also
 113     allows constant-time, functional *replacement* of those nodes---in the
 114     style of Huet's 'zipper'.
 115
 116 None of these representations is ideally suited to the incremental
 117 construction of large graphs.  A separate module, 'MkZipCfg', provides a
 118 fourth representation that is asymptotically optimal for such construction.
 119
 120 -}
 121
 122 --------------- Representation --------------------
 123
 124 -- | A basic block is a [[first]] node, followed by zero or more [[middle]]
 125 -- nodes, followed by a [[last]] node.
 126
 127 -- eventually this module should probably replace the original Cmm, but for
 128 -- now we leave it to dynamic invariants what can be found where
 129
 130 data ZLast l
 131   = LastExit     -- fall through; used for the block that has no last node
 132                  -- LastExit is a device used only for graphs under
 133                  -- construction, or framgments of graph under optimisation,
 134                  -- so we don't want to pollute the 'l' type parameter with it
 135   | LastOther l
 136
 137 data ZHead m   = ZFirst BlockId  | ZHead (ZHead m) m
 138     -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId
 139 data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l)
 140     -- ZTail is a sequence of middle nodes followed by a last node
 141
 142 -- | Blocks and flow graphs
 143 data Block m l = Block BlockId (ZTail m l)
 144
 145 data Graph m l = Graph (ZTail m l) (BlockEnv (Block m l))
 146
 147 data LGraph m l = LGraph  { gr_entry  :: BlockId
 148                           , gr_blocks :: BlockEnv (Block m l) }
 149
 150 -- | And now the zipper.  The focus is between the head and tail.
 151 -- Notice we cannot ever focus on an inter-block edge.
 152 data ZBlock m l = ZBlock (ZHead m) (ZTail m l)
 153 data FGraph m l = FGraph { zg_entry  :: BlockId
 154                          , zg_focus  :: ZBlock m l
 155                          , zg_others :: BlockEnv (Block m l) }
 156                     -- Invariant: the block represented by 'zg_focus' is *not*
 157                     -- in the map 'zg_others'
 158
 159 ----  Utility functions ---
 160
 161 blockId   :: Block  m l -> BlockId
 162 zip       :: ZBlock m l -> Block m l
 163 unzip     :: Block m l  -> ZBlock m l
 164
 165 last     :: ZBlock m l -> ZLast l
 166 goto_end :: ZBlock m l -> (ZHead m, ZLast l)
 167
 168 tailOfLast :: l -> ZTail m l
 169
 170 -- | Some ways to combine parts:
 171 ht_to_first :: ZHead m -> ZTail m l -> Block m l -- was (ZFirst, ZTail)
 172 ht_to_last  :: ZHead m -> ZTail m l -> (ZHead m, ZLast l)
 173
 174 zipht       :: ZHead m -> ZTail m l -> Block m l
 175
 176 -- | We can splice a single-entry, single-exit LGraph onto a head or a tail.
 177 -- For a head, we have a head~[[h]] followed by a LGraph~[[g]].
 178 -- The entry node of~[[g]] gets joined to~[[h]], forming the entry into
 179 -- the new LGraph.  The exit of~[[g]] becomes the new head.
 180 -- For both arguments and results, the order of values is the order of
 181 -- control flow: before splicing, the head flows into the LGraph; after
 182 -- splicing, the LGraph flows into the head.
 183 -- Splicing a tail is the dual operation.
 184 -- (In order to maintain the order-means-control-flow convention, the
 185 -- orders are reversed.)
 186
 187 splice_head :: ZHead m   -> LGraph m l -> (LGraph m l, ZHead m)
 188 splice_tail :: LGraph m l -> ZTail m l -> (ZTail m l, LGraph m l)
 189
 190 -- | We can also splice a single-entry, no-exit LGraph into a head.
 191 splice_head_only :: ZHead m -> LGraph m l -> LGraph m l
 192
 193 -- | Finally, we can remove the entry label of an LGraph and remove
 194 -- it, leaving a Graph:
 195 remove_entry_label :: LGraph m l -> Graph m l
 196
 197 of_block_list :: BlockId -> [Block m l] -> LGraph m l  -- N log N
 198 to_block_list :: LGraph m l -> [Block m l]  -- N log N
 199
 200 -- | Traversal: [[postorder_dfs]] returns a list of blocks reachable from
 201 -- the entry node.
 202 -- The postorder depth-first-search order means the list is in roughly
 203 -- first-to-last order, as suitable for use in a forward dataflow problem.
 204
 205 postorder_dfs :: LastNode l => LGraph m l -> [Block m l]
 206
 207 -- | For layout, we fold over pairs of [[Block m l]] and [[Maybe BlockId]]
 208 -- in layout order.  The [[BlockId]], if any, identifies the block that
 209 -- will be the layout successor of the current block.  This may be
 210 -- useful to help an emitter omit the final [[goto]] of a block that
 211 -- flows directly to its layout successor.
 212 fold_layout ::
 213     LastNode l => (Block m l -> Maybe BlockId -> a -> a) -> a -> LGraph m l-> a
 214
 215 -- | We can also fold and iterate over blocks.
 216 fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a
 217
 218 map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l'
 219    -- mapping includes the entry id!
 220 translate :: (m -> UniqSM (LGraph m' l')) -> (l -> UniqSM (LGraph m' l')) ->
 221              LGraph m l -> UniqSM (LGraph m' l')
 222
 223 {-
 224 translateA :: (m -> Agraph m' l') -> (l -> AGraph m' l') -> LGraph m l -> LGraph m' l'
 225 -}
 226
 227 ------------------- Last nodes
 228
 229 -- | We can't make a graph out of just any old 'last node' type.  A
 230 -- last node has to be able to find its successors, and we need to
 231 -- be able to create and identify unconditional branches.  We put
 232 -- these capabilities in a type class.
 233
 234 class HavingSuccessors b where
 235   succs :: b -> [BlockId]
 236   fold_succs :: (BlockId -> a -> a) -> b -> a -> a
 237
 238   fold_succs add l z = foldr add z $ succs l
 239
 240 class HavingSuccessors l => LastNode l where
 241   mkBranchNode :: BlockId -> l
 242   isBranchNode :: l -> Bool
 243   branchNodeTarget :: l -> BlockId  -- panics if not branch node
 244
 245 instance HavingSuccessors l => HavingSuccessors (ZLast l) where
 246     succs LastExit = []
 247     succs (LastOther l) = succs l
 248     fold_succs _ LastExit z = z
 249     fold_succs f (LastOther l) z = fold_succs f l z
 250
 251 instance LastNode l => LastNode (ZLast l) where
 252     mkBranchNode id = LastOther $ mkBranchNode id
 253     isBranchNode LastExit = False
 254     isBranchNode (LastOther l) = isBranchNode l
 255     branchNodeTarget LastExit = panic "branchNodeTarget LastExit"
 256     branchNodeTarget (LastOther l) = branchNodeTarget l
 257
 258 instance LastNode l => HavingSuccessors (ZBlock m l) where
 259     succs b = succs (last b)
 260
 261 instance LastNode l => HavingSuccessors (Block m l) where
 262     succs b = succs (unzip b)
 263
 264
 265 -- ================ IMPLEMENTATION ================--
 266
 267 blockId (Block id _) = id
 268
 269 -- | Convert block between forms.
 270 -- These functions are tail-recursive, so we can go as deep as we like
 271 -- without fear of stack overflow.
 272
 273 ht_to_first head tail = case head of
 274   ZFirst id -> Block id tail
 275   ZHead h m -> ht_to_first h (ZTail m tail)
 276
 277 head_id :: ZHead m -> BlockId
 278 head_id (ZFirst id) = id
 279 head_id (ZHead h _) = head_id h
 280
 281 zip (ZBlock h t) = ht_to_first h t
 282
 283 ht_to_last head (ZLast l)   = (head, l)
 284 ht_to_last head (ZTail m t) = ht_to_last (ZHead head m) t
 285
 286 goto_end (ZBlock h t) = ht_to_last h t
 287
 288 tailOfLast l = ZLast (LastOther l)
 289
 290 zipht = ht_to_first
 291 unzip (Block id t) = ZBlock (ZFirst id) t
 292
 293 last (ZBlock _ t) = lastt t
 294   where lastt (ZLast l) = l
 295         lastt (ZTail _ t) = lastt t
 296
 297 focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id
 298 focus id (LGraph entry blocks) =
 299     case lookupBlockEnv blocks id of
 300       Just b -> FGraph entry (unzip b) (delFromUFM blocks id)
 301       Nothing -> panic "asked for nonexistent block in flow graph"
 302
 303 splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) ->
 304                  Maybe (Block m l, BlockEnv (Block m l))
 305 splitp_blocks p blocks = lift $ foldUFM scan (Nothing, emptyBlockEnv) blocks
 306     where scan b (yes, no) =
 307               case yes of
 308                 Nothing | p b -> (Just b, no)
 309                         | otherwise -> (yes, insertBlock b no)
 310                 Just _ -> (yes, insertBlock b no)
 311           lift (Nothing, _) = Nothing
 312           lift (Just b, bs) = Just (b, bs)
 313
 314 is_exit :: Block m l -> Bool
 315 is_exit b = case last (unzip b) of { LastExit -> True; _ -> False }
 316
 317 -- | 'insertBlock' should not be used to *replace* an existing block
 318 -- but only to insert a new one
 319 insertBlock :: Block m l -> BlockEnv (Block m l) -> BlockEnv (Block m l)
 320 insertBlock b bs =
 321     case lookupBlockEnv bs id of
 322       Nothing -> extendBlockEnv bs id b
 323       Just _ -> panic ("duplicate labels " ++ show id ++ " in ZipCfg graph")
 324     where id = blockId b
 325
 326 check_single_exit :: LGraph l m -> a -> a
 327 check_single_exit g =
 328   let check block found = case last (unzip block) of
 329                             LastExit -> if found then panic "graph has multiple exits"
 330                                         else True
 331                             _ -> found
 332   in if not (foldUFM check False (gr_blocks g)) then
 333          panic "graph does not have an exit"
 334      else
 335          \a -> a
 336
 337 freshBlockId :: String -> UniqSM BlockId
 338 freshBlockId _ = do { u <- getUniqueUs; return $ BlockId u }
 339
 340 entry   :: LGraph m l -> FGraph m l         -- focus on edge out of entry node
 341 entry g@(LGraph eid _) = focus eid g
 342
 343
 344
 345 postorder_dfs g@(LGraph _ blocks) =
 346   let FGraph _ eblock _ = entry g
 347   in  vnode (zip eblock) (\acc _visited -> acc) [] emptyBlockSet
 348   where
 349     -- vnode :: Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet ->a
 350     vnode block@(Block id _) cont acc visited =
 351         if elemBlockSet id visited then
 352             cont acc visited
 353         else
 354             vchildren block (get_children block) cont acc (extendBlockSet visited id)
 355     vchildren block bs cont acc visited =
 356         let next children acc visited =
 357                 case children of []     -> cont (block : acc) visited
 358                                  (b:bs) -> vnode b (next bs) acc visited
 359         in next bs acc visited
 360     get_children block = foldl add_id [] (succs block)
 361     add_id rst id = case lookupBlockEnv blocks id of
 362                       Just b -> b : rst
 363                       Nothing -> rst
 364
 365 fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z
 366   where fold blocks z =
 367             case blocks of [] -> z
 368                            [b] -> f b Nothing z
 369                            b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z)
 370         nextlabel (Block id _) =
 371             if id == eid then panic "entry as successor"
 372             else Just id
 373
 374 fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks
 375
 376 map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks)
 377     where block (Block id t) = Block (idm id) (tail t)
 378           tail (ZTail m t) = ZTail (middle m) (tail t)
 379           tail (ZLast LastExit) = ZLast LastExit
 380           tail (ZLast (LastOther l)) = ZLast (LastOther (last l))
 381
 382 of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks
 383 to_block_list (LGraph _ blocks) = eltsUFM blocks
 384
 385 {-
 386 \paragraph{Splicing support}
 387
 388 We want to be able to scrutinize a single-entry, single-exit LGraph for
 389 splicing purposes.
 390 There are two useful cases: the LGraph is a single block or it isn't.
 391 We use continuation-passing style.
 392 -}
 393
 394 prepare_for_splicing ::
 395   LGraph m l -> (ZTail m l -> a) -> (ZTail m l -> ZHead m -> BlockEnv (Block m l) -> a)
 396   -> a
 397 prepare_for_splicing g single multi =
 398   let FGraph _ gentry gblocks = entry g
 399       ZBlock _ etail = gentry
 400   in if isNullUFM gblocks then
 401          case last gentry of
 402            LastExit -> single etail
 403            _ -> panic "bad single block"
 404      else
 405        case splitp_blocks is_exit gblocks of
 406          Nothing -> panic "Can't find an exit block"
 407          Just (gexit, gblocks) ->
 408               let (gh, gl) = goto_end $ unzip gexit in
 409               case gl of LastExit -> multi etail gh gblocks
 410                          _ -> panic "exit is not exit?!"
 411
 412 splice_head head g =
 413   check_single_exit g $
 414   let eid = head_id head
 415       splice_one_block tail' =
 416           case ht_to_last head tail' of
 417             (head, LastExit) -> (LGraph eid emptyBlockEnv, head)
 418             _ -> panic "spliced LGraph without exit"
 419       splice_many_blocks entry exit others =
 420           (LGraph eid (insertBlock (zipht head entry) others), exit)
 421   in  prepare_for_splicing g splice_one_block splice_many_blocks
 422
 423 splice_tail g tail =
 424   check_single_exit g $
 425   let splice_one_block tail' =  -- return tail' .. tail
 426         case ht_to_last (ZFirst (gr_entry g)) tail' of
 427           (head', LastExit) ->
 428               case ht_to_first head' tail of
 429                  Block id t | id == gr_entry g -> (t, LGraph id emptyBlockEnv)
 430                  _ -> panic "entry in; garbage out"
 431           _ -> panic "spliced single block without Exit"
 432       splice_many_blocks entry exit others =
 433          (entry, LGraph (gr_entry g) (insertBlock (zipht exit tail) others))
 434   in  prepare_for_splicing g splice_one_block splice_many_blocks
 435
 436 splice_head_only head g =
 437   let FGraph eid gentry gblocks = entry g
 438   in case gentry of
 439        ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks)
 440        _ -> panic "entry not at start of block?!"
 441
 442 remove_entry_label g =
 443     let FGraph e eblock others = entry g
 444     in case eblock of
 445          ZBlock (ZFirst id) tail
 446              | id == e -> Graph tail others
 447          _ -> panic "id doesn't match on entry block"
 448
 449 --- Translation
 450
 451 translate txm txl (LGraph eid blocks) =
 452     do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks
 453        return $ LGraph eid blocks'
 454     where
 455       -- txblock ::
 456       -- Block m l -> UniqSM (BlockEnv (Block m' l')) -> UniqSM (BlockEnv (Block m' l'))
 457       txblock (Block id t) expanded =
 458         do blocks' <- expanded
 459            txtail (ZFirst id) t blocks'
 460       -- txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') ->
 461       --           UniqSM (BlockEnv (Block m' l'))
 462       txtail h (ZTail m t) blocks' =
 463         do m' <- txm m
 464            let (g, h') = splice_head h m'
 465            txtail h' t (plusUFM (gr_blocks g) blocks')
 466       txtail h (ZLast (LastOther l)) blocks' =
 467         do l' <- txl l
 468            return $ plusUFM (gr_blocks (splice_head_only h l')) blocks'
 469       txtail h (ZLast LastExit) blocks' =
 470         return $ insertBlock (zipht h (ZLast LastExit)) blocks'
 471
 472 ----------------------------------------------------------------
 473 --- Block Ids, their environments, and their sets
 474
 475 {- Note [Unique BlockId]
 476 ~~~~~~~~~~~~~~~~~~~~~~~~
 477 Although a 'BlockId' is a local label, for reasons of implementation,
 478 'BlockId's must be unique within an entire compilation unit.  The reason
 479 is that each local label is mapped to an assembly-language label, and in
 480 most assembly languages allow, a label is visible throughout the enitre
 481 compilation unit in which it appears.
 482 -}
 483
 484 newtype BlockId = BlockId Unique
 485   deriving (Eq,Ord)
 486
 487 instance Uniquable BlockId where
 488   getUnique (BlockId u) = u
 489
 490 instance Show BlockId where
 491   show (BlockId u) = show u
 492
 493 instance Outputable BlockId where
 494   ppr = ppr . getUnique
 495
 496
 497 type BlockEnv a = UniqFM {- BlockId -} a
 498 emptyBlockEnv :: BlockEnv a
 499 emptyBlockEnv = emptyUFM
 500 lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
 501 lookupBlockEnv = lookupUFM
 502 extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
 503 extendBlockEnv = addToUFM
 504 mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
 505 mkBlockEnv = listToUFM
 506
 507 type BlockSet = UniqSet BlockId
 508 emptyBlockSet :: BlockSet
 509 emptyBlockSet = emptyUniqSet
 510 elemBlockSet :: BlockId -> BlockSet -> Bool
 511 elemBlockSet = elementOfUniqSet
 512 extendBlockSet :: BlockSet -> BlockId -> BlockSet
 513 extendBlockSet = addOneToUniqSet
 514 mkBlockSet :: [BlockId] -> BlockSet
 515 mkBlockSet = mkUniqSet
 516
 517 ----------------------------------------------------------------
 518 -- putting this code in PprCmmZ leads to circular imports :-(
 519
 520 instance (Outputable m, Outputable l) => Outputable (ZTail m l) where
 521     ppr = pprTail
 522
 523 -- | 'pprTail' is used for debugging only
 524 pprTail :: (Outputable m, Outputable l) => ZTail m l -> SDoc
 525 pprTail (ZTail m t) = ppr m $$ ppr t
 526 pprTail (ZLast LastExit) = text "<exit>"
 527 pprTail (ZLast (LastOther l)) = ppr l
 528
 529 pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc
 530 pprLgraph g = text "{" $$ nest 2 (vcat $ map pprBlock blocks) $$ text "}"
 531     where pprBlock (Block id tail) = ppr id <> colon $$ ppr tail
 532           blocks = postorder_dfs g