1 {-# OPTIONS -Wall -fno-warn-name-shadowing #-}
3 ( BlockId(..), freshBlockId
4 , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, insertBlock, mkBlockEnv
5 , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, mkBlockSet
6 , Graph(..), LGraph(..), FGraph(..)
7 , Block(..), ZBlock(..), ZHead(..), ZTail(..), ZLast(..)
8 , HavingSuccessors, succs, fold_succs
9 , LastNode, mkBranchNode, isBranchNode, branchNodeTarget
11 -- Observers and transformers
12 , entry, exit, focus, focusp, unfocus
13 , blockId, zip, unzip, last, goto_end, ht_to_first, ht_to_last, zipht
15 , splice_head, splice_tail, splice_head_only, splice_focus_entry
16 , splice_focus_exit, remove_entry_label
17 , of_block_list, to_block_list
19 , fold_layout, fold_blocks
20 , fold_fwd_block, foldM_fwd_block
21 , map_nodes, translate
28 import Outputable hiding (empty)
30 import Prelude hiding (zip, unzip, last)
36 -------------------------------------------------------------------------
37 -- GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH --
38 -------------------------------------------------------------------------
41 This module defines datatypes used to represent control-flow graphs,
42 along with some functions for analyzing and splicing graphs.
43 Functions for building graphs are found in a separate module 'MkZipCfg'.
45 Every graph has a distinguished entry point. A graph has at least one
46 exit; most exits are instructions (or statements) like 'jump' or
47 'return', which transfer control to other procedures, but a graph may
48 have up to one 'fall through' exit. (A graph that represents an
49 entire Haskell or C-- procedure does not have a 'fall through' exit.)
51 A graph is a collection of basic blocks. A basic block begins with a
52 label (unique id; see Note [Unique BlockId]) which is followed by a
53 sequence of zero or more 'middle' nodes; the basic block ends with a
54 'last' node. Each 'middle' node is a single-entry, single-exit,
55 uninterruptible computation. A 'last' node is a single-entry,
56 multiple-exit computation. A last node may have zero or more successors,
57 which are identified by their unique ids.
59 A special case of last node is the ``default exit,'' which represents
60 'falling off the end' of the graph. Such a node is always represented by
61 the data constructor 'LastExit'. A graph may contain at most one
62 'LastExit' node, and a graph representing a full procedure should not
63 contain any 'LastExit' nodes. 'LastExit' nodes are used only to splice
64 graphs together, either during graph construction (see module 'MkZipCfg')
65 or during optimization (see module 'ZipDataflow').
67 A graph is parameterized over the types of middle and last nodes. Each of
68 these types will typically be instantiated with a subset of C-- statements
69 (see module 'ZipCfgCmmRep') or a subset of machine instructions (yet to be
70 implemented as of August 2007).
74 This module exposes three representations of graphs. In order of
75 increasing complexity, they are:
77 Graph m l The basic graph with its distinguished entry point
79 LGraph m l A graph with a *labelled* entry point
81 FGraph m l A labelled graph with the *focus* on a particular edge
83 There are three types because each type offers a slightly different
84 invariant or cost model.
86 * The distinguished entry of a Graph has no label. Because labels must
87 be unique, acquiring one requires a monadic operation ('freshBlockId').
88 The primary advantage of the Graph representation is that we can build
89 a small Graph purely functionally, without entering a monad. For
90 example, during optimization we can easily rewrite a single middle
91 node into a Graph containing a sequence of two middle nodes followed by
94 * In an LGraph, every basic block is labelled. The primary advantage of
95 this representation is its simplicity: each basic block can be treated
96 like any other. This representation is used for mapping, folding, and
97 translation, as well as layout.
99 Like any graph, an LGraph still has a distinguished entry point,
100 which you can discover using 'gr_entry'.
102 * An FGraph is an LGraph with the *focus* on one particular edge. The
103 primary advantage of this representation is that it provides
104 constant-time access to the nodes connected by that edge, and it also
105 allows constant-time, functional *replacement* of those nodes---in the
106 style of Huet's 'zipper'.
108 None of these representations is ideally suited to the incremental
109 construction of large graphs. A separate module, 'MkZipCfg', provides a
110 fourth representation that is asymptotically optimal for such construction.
114 entry :: LGraph m l -> FGraph m l -- focus on edge out of entry node
115 exit :: LGraph m l -> FGraph m l -- focus on edge into default exit node
116 -- (fails if there isn't one)
117 focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id
118 focusp :: (Block m l -> Bool) -> LGraph m l -> Maybe (FGraph m l)
119 -- focus on start of block satisfying predicate
120 unfocus :: FGraph m l -> LGraph m l -- lose focus
122 -- | We can insert a single-entry, single-exit subgraph at
123 -- the current focus.
124 -- The new focus can be at either the entry edge or the exit edge.
126 splice_focus_entry :: FGraph m l -> LGraph m l -> FGraph m l
127 splice_focus_exit :: FGraph m l -> LGraph m l -> FGraph m l
129 --------------- Representation --------------------
131 -- | A basic block is a [[first]] node, followed by zero or more [[middle]]
132 -- nodes, followed by a [[last]] node.
134 -- eventually this module should probably replace the original Cmm, but for
135 -- now we leave it to dynamic invariants what can be found where
138 = LastExit -- fall through; used for the block that has no last node
139 -- LastExit is a device used only for graphs under
140 -- construction, or framgments of graph under optimisation,
141 -- so we don't want to pollute the 'l' type parameter with it
144 data ZHead m = ZFirst BlockId | ZHead (ZHead m) m
145 -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId
146 data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l)
147 -- ZTail is a sequence of middle nodes followed by a last node
149 -- | Blocks and flow graphs
150 data Block m l = Block BlockId (ZTail m l)
152 data Graph m l = Graph (ZTail m l) (BlockEnv (Block m l))
154 data LGraph m l = LGraph { gr_entry :: BlockId
155 , gr_blocks :: BlockEnv (Block m l) }
157 -- | And now the zipper. The focus is between the head and tail.
158 -- Notice we cannot ever focus on an inter-block edge.
159 data ZBlock m l = ZBlock (ZHead m) (ZTail m l)
160 data FGraph m l = FGraph { zg_entry :: BlockId
161 , zg_focus :: ZBlock m l
162 , zg_others :: BlockEnv (Block m l) }
163 -- Invariant: the block represented by 'zg_focus' is *not*
164 -- in the map 'zg_others'
166 ---- Utility functions ---
168 blockId :: Block m l -> BlockId
169 zip :: ZBlock m l -> Block m l
170 unzip :: Block m l -> ZBlock m l
172 last :: ZBlock m l -> ZLast l
173 goto_end :: ZBlock m l -> (ZHead m, ZLast l)
175 tailOfLast :: l -> ZTail m l
177 -- | Some ways to combine parts:
178 ht_to_first :: ZHead m -> ZTail m l -> Block m l -- was (ZFirst, ZTail)
179 ht_to_last :: ZHead m -> ZTail m l -> (ZHead m, ZLast l)
181 zipht :: ZHead m -> ZTail m l -> Block m l
183 -- | We can splice a single-entry, single-exit LGraph onto a head or a tail.
184 -- For a head, we have a head~[[h]] followed by a LGraph~[[g]].
185 -- The entry node of~[[g]] gets joined to~[[h]], forming the entry into
186 -- the new LGraph. The exit of~[[g]] becomes the new head.
187 -- For both arguments and results, the order of values is the order of
188 -- control flow: before splicing, the head flows into the LGraph; after
189 -- splicing, the LGraph flows into the head.
190 -- Splicing a tail is the dual operation.
191 -- (In order to maintain the order-means-control-flow convention, the
192 -- orders are reversed.)
194 splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m)
195 splice_tail :: LGraph m l -> ZTail m l -> (ZTail m l, LGraph m l)
197 -- | We can also splice a single-entry, no-exit LGraph into a head.
198 splice_head_only :: ZHead m -> LGraph m l -> LGraph m l
200 -- | Finally, we can remove the entry label of an LGraph and remove
201 -- it, leaving a Graph:
202 remove_entry_label :: LGraph m l -> Graph m l
204 of_block_list :: BlockId -> [Block m l] -> LGraph m l -- N log N
205 to_block_list :: LGraph m l -> [Block m l] -- N log N
207 -- | Traversal: [[postorder_dfs]] returns a list of blocks reachable from
209 -- The postorder depth-first-search order means the list is in roughly
210 -- first-to-last order, as suitable for use in a forward dataflow problem.
212 postorder_dfs :: LastNode l => LGraph m l -> [Block m l]
214 -- | For layout, we fold over pairs of [[Block m l]] and [[Maybe BlockId]]
215 -- in layout order. The [[BlockId]], if any, identifies the block that
216 -- will be the layout successor of the current block. This may be
217 -- useful to help an emitter omit the final [[goto]] of a block that
218 -- flows directly to its layout successor.
220 LastNode l => (Block m l -> Maybe BlockId -> a -> a) -> a -> LGraph m l-> a
222 -- | We can also fold and iterate over blocks.
223 fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a
225 map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l'
226 -- mapping includes the entry id!
227 translate :: (m -> UniqSM (LGraph m' l')) -> (l -> UniqSM (LGraph m' l')) ->
228 LGraph m l -> UniqSM (LGraph m' l')
231 translateA :: (m -> Agraph m' l') -> (l -> AGraph m' l') -> LGraph m l -> LGraph m' l'
234 ------------------- Last nodes
236 -- | We can't make a graph out of just any old 'last node' type. A
237 -- last node has to be able to find its successors, and we need to
238 -- be able to create and identify unconditional branches. We put
239 -- these capabilities in a type class.
241 class HavingSuccessors b where
242 succs :: b -> [BlockId]
243 fold_succs :: (BlockId -> a -> a) -> b -> a -> a
245 fold_succs add l z = foldr add z $ succs l
247 class HavingSuccessors l => LastNode l where
248 mkBranchNode :: BlockId -> l
249 isBranchNode :: l -> Bool
250 branchNodeTarget :: l -> BlockId -- panics if not branch node
252 instance HavingSuccessors l => HavingSuccessors (ZLast l) where
254 succs (LastOther l) = succs l
255 fold_succs _ LastExit z = z
256 fold_succs f (LastOther l) z = fold_succs f l z
258 instance LastNode l => LastNode (ZLast l) where
259 mkBranchNode id = LastOther $ mkBranchNode id
260 isBranchNode LastExit = False
261 isBranchNode (LastOther l) = isBranchNode l
262 branchNodeTarget LastExit = panic "branchNodeTarget LastExit"
263 branchNodeTarget (LastOther l) = branchNodeTarget l
265 instance LastNode l => HavingSuccessors (ZBlock m l) where
266 succs b = succs (last b)
268 instance LastNode l => HavingSuccessors (Block m l) where
269 succs b = succs (unzip b)
272 ------------------- Observing nodes
274 -- | Fold from first to last
276 (BlockId -> a -> a) -> (m -> a -> a) -> (ZLast l -> a -> a) ->
279 -- | iterate from first to last
281 Monad m => (BlockId -> a -> m a) -> (mid -> a -> m a) -> (ZLast l -> a -> m a) ->
282 Block mid l -> a -> m a
284 -- ================ IMPLEMENTATION ================--
286 blockId (Block id _) = id
288 -- | Convert block between forms.
289 -- These functions are tail-recursive, so we can go as deep as we like
290 -- without fear of stack overflow.
292 ht_to_first head tail = case head of
293 ZFirst id -> Block id tail
294 ZHead h m -> ht_to_first h (ZTail m tail)
296 head_id :: ZHead m -> BlockId
297 head_id (ZFirst id) = id
298 head_id (ZHead h _) = head_id h
300 zip (ZBlock h t) = ht_to_first h t
302 ht_to_last head (ZLast l) = (head, l)
303 ht_to_last head (ZTail m t) = ht_to_last (ZHead head m) t
305 goto_end (ZBlock h t) = ht_to_last h t
307 tailOfLast l = ZLast (LastOther l)
310 unzip (Block id t) = ZBlock (ZFirst id) t
312 last (ZBlock _ t) = lastt t
313 where lastt (ZLast l) = l
314 lastt (ZTail _ t) = lastt t
316 focus id (LGraph entry blocks) =
317 case lookupBlockEnv blocks id of
318 Just b -> FGraph entry (unzip b) (delFromUFM blocks id)
319 Nothing -> panic "asked for nonexistent block in flow graph"
321 focusp p (LGraph entry blocks) =
322 fmap (\(b, bs) -> FGraph entry (unzip b) bs) (splitp_blocks p blocks)
324 splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) ->
325 Maybe (Block m l, BlockEnv (Block m l))
326 splitp_blocks p blocks = lift $ foldUFM scan (Nothing, emptyBlockEnv) blocks
327 where scan b (yes, no) =
329 Nothing | p b -> (Just b, no)
330 | otherwise -> (yes, insertBlock b no)
331 Just _ -> (yes, insertBlock b no)
332 lift (Nothing, _) = Nothing
333 lift (Just b, bs) = Just (b, bs)
335 entry g@(LGraph eid _) = focus eid g
337 exit g@(LGraph eid _) = FGraph eid (ZBlock h (ZLast l)) others
338 where FGraph _ b others = focusp is_exit g `orElse` panic "no exit in flow graph"
341 is_exit :: Block m l -> Bool
342 is_exit b = case last (unzip b) of { LastExit -> True; _ -> False }
344 -- | 'insertBlock' should not be used to *replace* an existing block
345 -- but only to insert a new one
346 insertBlock :: Block m l -> BlockEnv (Block m l) -> BlockEnv (Block m l)
348 case lookupBlockEnv bs id of
349 Nothing -> extendBlockEnv bs id b
350 Just _ -> panic ("duplicate labels " ++ show id ++ " in ZipCfg graph")
353 unfocus (FGraph e bz bs) = LGraph e (insertBlock (zip bz) bs)
355 check_single_exit :: LGraph l m -> a -> a
356 check_single_exit g =
357 let check block found = case last (unzip block) of
358 LastExit -> if found then panic "graph has multiple exits"
361 in if not (foldUFM check False (gr_blocks g)) then
362 panic "graph does not have an exit"
366 freshBlockId :: String -> UniqSM BlockId
367 freshBlockId _ = do { u <- getUniqueUs; return $ BlockId u }
369 postorder_dfs g@(LGraph _ blocks) =
370 let FGraph _ eblock _ = entry g
371 in vnode (zip eblock) (\acc _visited -> acc) [] emptyBlockSet
373 -- vnode :: Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet ->a
374 vnode block@(Block id _) cont acc visited =
375 if elemBlockSet id visited then
378 vchildren block (get_children block) cont acc (extendBlockSet visited id)
379 vchildren block bs cont acc visited =
380 let next children acc visited =
381 case children of [] -> cont (block : acc) visited
382 (b:bs) -> vnode b (next bs) acc visited
383 in next bs acc visited
384 get_children block = foldl add_id [] (succs block)
385 add_id rst id = case lookupBlockEnv blocks id of
389 fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z
390 where fold blocks z =
391 case blocks of [] -> z
393 b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z)
394 nextlabel (Block id _) =
395 if id == eid then panic "entry as successor"
398 fold_fwd_block first middle last (Block id t) z = tail t (first id z)
399 where tail (ZTail m t) z = tail t (middle m z)
400 tail (ZLast l) z = last l z
402 foldM_fwd_block first middle last (Block id t) z = do { z <- first id z; tail t z }
403 where tail (ZTail m t) z = do { z <- middle m z; tail t z }
404 tail (ZLast l) z = last l z
406 fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks
408 map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks)
409 where block (Block id t) = Block (idm id) (tail t)
410 tail (ZTail m t) = ZTail (middle m) (tail t)
411 tail (ZLast LastExit) = ZLast LastExit
412 tail (ZLast (LastOther l)) = ZLast (LastOther (last l))
414 of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks
415 to_block_list (LGraph _ blocks) = eltsUFM blocks
418 \paragraph{Splicing support}
420 We want to be able to scrutinize a single-entry, single-exit LGraph for
422 There are two useful cases: the LGraph is a single block or it isn't.
423 We use continuation-passing style.
426 prepare_for_splicing ::
427 LGraph m l -> (ZTail m l -> a) -> (ZTail m l -> ZHead m -> BlockEnv (Block m l) -> a)
429 prepare_for_splicing g single multi =
430 let FGraph _ gentry gblocks = entry g
431 ZBlock _ etail = gentry
432 in if isNullUFM gblocks then
434 LastExit -> single etail
435 _ -> panic "bad single block"
437 case splitp_blocks is_exit gblocks of
438 Nothing -> panic "Can't find an exit block"
439 Just (gexit, gblocks) ->
440 let (gh, gl) = goto_end $ unzip gexit in
441 case gl of LastExit -> multi etail gh gblocks
442 _ -> panic "exit is not exit?!"
445 check_single_exit g $
446 let eid = head_id head
447 splice_one_block tail' =
448 case ht_to_last head tail' of
449 (head, LastExit) -> (LGraph eid emptyBlockEnv, head)
450 _ -> panic "spliced LGraph without exit"
451 splice_many_blocks entry exit others =
452 (LGraph eid (insertBlock (zipht head entry) others), exit)
453 in prepare_for_splicing g splice_one_block splice_many_blocks
456 check_single_exit g $
457 let splice_one_block tail' = -- return tail' .. tail
458 case ht_to_last (ZFirst (gr_entry g)) tail' of
460 case ht_to_first head' tail of
461 Block id t | id == gr_entry g -> (t, LGraph id emptyBlockEnv)
462 _ -> panic "entry in; garbage out"
463 _ -> panic "spliced single block without Exit"
464 splice_many_blocks entry exit others =
465 (entry, LGraph (gr_entry g) (insertBlock (zipht exit tail) others))
466 in prepare_for_splicing g splice_one_block splice_many_blocks
468 splice_focus_entry (FGraph eid (ZBlock head tail) blocks) g =
469 let (tail', g') = splice_tail g tail in
470 FGraph eid (ZBlock head tail') (plusUFM (gr_blocks g') blocks)
472 splice_focus_exit (FGraph eid (ZBlock head tail) blocks) g =
473 let (g', head') = splice_head head g in
474 FGraph eid (ZBlock head' tail) (plusUFM (gr_blocks g') blocks)
476 splice_head_only head g =
477 let FGraph eid gentry gblocks = entry g
479 ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks)
480 _ -> panic "entry not at start of block?!"
482 remove_entry_label g =
483 let FGraph e eblock others = entry g
485 ZBlock (ZFirst id) tail
486 | id == e -> Graph tail others
487 _ -> panic "id doesn't match on entry block"
491 translate txm txl (LGraph eid blocks) =
492 do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks
493 return $ LGraph eid blocks'
496 -- Block m l -> UniqSM (BlockEnv (Block m' l')) -> UniqSM (BlockEnv (Block m' l'))
497 txblock (Block id t) expanded =
498 do blocks' <- expanded
499 txtail (ZFirst id) t blocks'
500 -- txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') ->
501 -- UniqSM (BlockEnv (Block m' l'))
502 txtail h (ZTail m t) blocks' =
504 let (g, h') = splice_head h m'
505 txtail h' t (plusUFM (gr_blocks g) blocks')
506 txtail h (ZLast (LastOther l)) blocks' =
508 return $ plusUFM (gr_blocks (splice_head_only h l')) blocks'
509 txtail h (ZLast LastExit) blocks' =
510 return $ insertBlock (zipht h (ZLast LastExit)) blocks'
512 ----------------------------------------------------------------
513 --- Block Ids, their environments, and their sets
515 {- Note [Unique BlockId]
516 ~~~~~~~~~~~~~~~~~~~~~~~~
517 Although a 'BlockId' is a local label, for reasons of implementation,
518 'BlockId's must be unique within an entire compilation unit. The reason
519 is that each local label is mapped to an assembly-language label, and in
520 most assembly languages allow, a label is visible throughout the enitre
521 compilation unit in which it appears.
524 newtype BlockId = BlockId Unique
527 instance Uniquable BlockId where
528 getUnique (BlockId u) = u
530 instance Show BlockId where
531 show (BlockId u) = show u
533 instance Outputable BlockId where
534 ppr = ppr . getUnique
537 type BlockEnv a = UniqFM {- BlockId -} a
538 emptyBlockEnv :: BlockEnv a
539 emptyBlockEnv = emptyUFM
540 lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
541 lookupBlockEnv = lookupUFM
542 extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
543 extendBlockEnv = addToUFM
544 mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
545 mkBlockEnv = listToUFM
547 type BlockSet = UniqSet BlockId
548 emptyBlockSet :: BlockSet
549 emptyBlockSet = emptyUniqSet
550 elemBlockSet :: BlockId -> BlockSet -> Bool
551 elemBlockSet = elementOfUniqSet
552 extendBlockSet :: BlockSet -> BlockId -> BlockSet
553 extendBlockSet = addOneToUniqSet
554 mkBlockSet :: [BlockId] -> BlockSet
555 mkBlockSet = mkUniqSet
557 ----------------------------------------------------------------
558 -- putting this code in PprCmmZ leads to circular imports :-(
560 instance (Outputable m, Outputable l) => Outputable (ZTail m l) where
563 -- | 'pprTail' is used for debugging only
564 pprTail :: (Outputable m, Outputable l) => ZTail m l -> SDoc
565 pprTail (ZTail m t) = ppr m $$ ppr t
566 pprTail (ZLast LastExit) = text "<exit>"
567 pprTail (ZLast (LastOther l)) = ppr l
569 pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc
570 pprLgraph g = text "{" $$ nest 2 (vcat $ map pprBlock blocks) $$ text "}"
571 where pprBlock (Block id tail) = ppr id <> colon $$ ppr tail
572 blocks = postorder_dfs g