1 {-# LANGUAGE ScopedTypeVariables #-}
2 {-# OPTIONS -Wall -fno-warn-name-shadowing #-}
4 ( BlockId(..), freshBlockId
5 , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, insertBlock, mkBlockEnv
6 , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, mkBlockSet
7 , Graph(..), LGraph(..), FGraph(..)
8 , Block(..), ZBlock(..), ZHead(..), ZTail(..), ZLast(..)
9 , HavingSuccessors, succs, fold_succs
10 , LastNode, mkBranchNode, isBranchNode, branchNodeTarget
12 -- Observers and transformers
13 , entry, exit, focus, focusp, unfocus
14 , blockId, zip, unzip, last, goto_end, ht_to_first, ht_to_last, zipht
16 , splice_head, splice_tail, splice_head_only, splice_focus_entry
17 , splice_focus_exit, remove_entry_label
18 , of_block_list, to_block_list
20 , fold_layout, fold_blocks
21 , fold_fwd_block, foldM_fwd_block
22 , map_nodes, translate
29 import Outputable hiding (empty)
31 import Prelude hiding (zip, unzip, last)
37 -------------------------------------------------------------------------
38 -- GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH --
39 -------------------------------------------------------------------------
42 This module defines datatypes used to represent control-flow graphs,
43 along with some functions for analyzing and splicing graphs.
44 Functions for building graphs are found in a separate module 'MkZipCfg'.
46 Every graph has a distinguished entry point. A graph has at least one
47 exit; most exits are instructions (or statements) like 'jump' or
48 'return', which transfer control to other procedures, but a graph may
49 have up to one 'fall through' exit. (A graph that represents an
50 entire Haskell or C-- procedure does not have a 'fall through' exit.)
52 A graph is a collection of basic blocks. A basic block begins with a
53 label (unique id; see Note [Unique BlockId]) which is followed by a
54 sequence of zero or more 'middle' nodes; the basic block ends with a
55 'last' node. Each 'middle' node is a single-entry, single-exit,
56 uninterruptible computation. A 'last' node is a single-entry,
57 multiple-exit computation. A last node may have zero or more successors,
58 which are identified by their unique ids.
60 A special case of last node is the ``default exit,'' which represents
61 'falling off the end' of the graph. Such a node is always represented by
62 the data constructor 'LastExit'. A graph may contain at most one
63 'LastExit' node, and a graph representing a full procedure should not
64 contain any 'LastExit' nodes. 'LastExit' nodes are used only to splice
65 graphs together, either during graph construction (see module 'MkZipCfg')
66 or during optimization (see module 'ZipDataflow').
68 A graph is parameterized over the types of middle and last nodes. Each of
69 these types will typically be instantiated with a subset of C-- statements
70 (see module 'ZipCfgCmm') or a subset of machine instructions (yet to be
71 implemented as of August 2007).
75 This module exposes three representations of graphs. In order of
76 increasing complexity, they are:
78 Graph m l The basic graph with its distinguished entry point
80 LGraph m l A graph with a *labelled* entry point
82 FGraph m l A labelled graph with the *focus* on a particular edge
84 There are three types because each type offers a slightly different
85 invariant or cost model.
87 * The distinguished entry of a Graph has no label. Because labels must
88 be unique, acquiring one requires a monadic operation ('freshBlockId').
89 The primary advantage of the Graph representation is that we can build
90 a small Graph purely functionally, without entering a monad. For
91 example, during optimization we can easily rewrite a single middle
92 node into a Graph containing a sequence of two middle nodes followed by
95 * In an LGraph, every basic block is labelled. The primary advantage of
96 this representation is its simplicity: each basic block can be treated
97 like any other. This representation is used for mapping, folding, and
98 translation, as well as layout.
100 Like any graph, an LGraph still has a distinguished entry point,
101 which you can discover using 'gr_entry'.
103 * An FGraph is an LGraph with the *focus* on one particular edge. The
104 primary advantage of this representation is that it provides
105 constant-time access to the nodes connected by that edge, and it also
106 allows constant-time, functional *replacement* of those nodes---in the
107 style of Huet's 'zipper'.
109 None of these representations is ideally suited to the incremental
110 construction of large graphs. A separate module, 'MkZipCfg', provides a
111 fourth representation that is asymptotically optimal for such construction.
115 entry :: LGraph m l -> FGraph m l -- focus on edge out of entry node
116 exit :: LGraph m l -> FGraph m l -- focus on edge into default exit node
117 -- (fails if there isn't one)
118 focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id
119 focusp :: (Block m l -> Bool) -> LGraph m l -> Maybe (FGraph m l)
120 -- focus on start of block satisfying predicate
121 unfocus :: FGraph m l -> LGraph m l -- lose focus
123 -- | We can insert a single-entry, single-exit subgraph at
124 -- the current focus.
125 -- The new focus can be at either the entry edge or the exit edge.
127 splice_focus_entry :: FGraph m l -> LGraph m l -> FGraph m l
128 splice_focus_exit :: FGraph m l -> LGraph m l -> FGraph m l
130 --------------- Representation --------------------
132 -- | A basic block is a [[first]] node, followed by zero or more [[middle]]
133 -- nodes, followed by a [[last]] node.
135 -- eventually this module should probably replace the original Cmm, but for
136 -- now we leave it to dynamic invariants what can be found where
139 = LastExit -- fall through; used for the block that has no last node
140 -- LastExit is a device used only for graphs under
141 -- construction, or framgments of graph under optimisation,
142 -- so we don't want to pollute the 'l' type parameter with it
145 data ZHead m = ZFirst BlockId | ZHead (ZHead m) m
146 -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId
147 data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l)
148 -- ZTail is a sequence of middle nodes followed by a last node
150 -- | Blocks and flow graphs
151 data Block m l = Block BlockId (ZTail m l)
153 data Graph m l = Graph (ZTail m l) (BlockEnv (Block m l))
155 data LGraph m l = LGraph { gr_entry :: BlockId
156 , gr_blocks :: BlockEnv (Block m l) }
158 -- | And now the zipper. The focus is between the head and tail.
159 -- Notice we cannot ever focus on an inter-block edge.
160 data ZBlock m l = ZBlock (ZHead m) (ZTail m l)
161 data FGraph m l = FGraph { zg_entry :: BlockId
162 , zg_focus :: ZBlock m l
163 , zg_others :: BlockEnv (Block m l) }
164 -- Invariant: the block represented by 'zg_focus' is *not*
165 -- in the map 'zg_others'
167 ---- Utility functions ---
169 blockId :: Block m l -> BlockId
170 zip :: ZBlock m l -> Block m l
171 unzip :: Block m l -> ZBlock m l
173 last :: ZBlock m l -> ZLast l
174 goto_end :: ZBlock m l -> (ZHead m, ZLast l)
176 tailOfLast :: l -> ZTail m l
178 -- | Some ways to combine parts:
179 ht_to_first :: ZHead m -> ZTail m l -> Block m l -- was (ZFirst, ZTail)
180 ht_to_last :: ZHead m -> ZTail m l -> (ZHead m, ZLast l)
182 zipht :: ZHead m -> ZTail m l -> Block m l
184 -- | We can splice a single-entry, single-exit LGraph onto a head or a tail.
185 -- For a head, we have a head~[[h]] followed by a LGraph~[[g]].
186 -- The entry node of~[[g]] gets joined to~[[h]], forming the entry into
187 -- the new LGraph. The exit of~[[g]] becomes the new head.
188 -- For both arguments and results, the order of values is the order of
189 -- control flow: before splicing, the head flows into the LGraph; after
190 -- splicing, the LGraph flows into the head.
191 -- Splicing a tail is the dual operation.
192 -- (In order to maintain the order-means-control-flow convention, the
193 -- orders are reversed.)
195 splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m)
196 splice_tail :: LGraph m l -> ZTail m l -> (ZTail m l, LGraph m l)
198 -- | We can also splice a single-entry, no-exit LGraph into a head.
199 splice_head_only :: ZHead m -> LGraph m l -> LGraph m l
201 -- | Finally, we can remove the entry label of an LGraph and remove
202 -- it, leaving a Graph:
203 remove_entry_label :: LGraph m l -> Graph m l
205 of_block_list :: BlockId -> [Block m l] -> LGraph m l -- N log N
206 to_block_list :: LGraph m l -> [Block m l] -- N log N
208 -- | Traversal: [[postorder_dfs]] returns a list of blocks reachable from
210 -- The postorder depth-first-search order means the list is in roughly
211 -- first-to-last order, as suitable for use in a forward dataflow problem.
213 postorder_dfs :: forall m l . LastNode l => LGraph m l -> [Block m l]
215 -- | For layout, we fold over pairs of [[Block m l]] and [[Maybe BlockId]]
216 -- in layout order. The [[BlockId]], if any, identifies the block that
217 -- will be the layout successor of the current block. This may be
218 -- useful to help an emitter omit the final [[goto]] of a block that
219 -- flows directly to its layout successor.
221 LastNode l => (Block m l -> Maybe BlockId -> a -> a) -> a -> LGraph m l-> a
223 -- | We can also fold and iterate over blocks.
224 fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a
226 map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l'
227 -- mapping includes the entry id!
228 translate :: forall m l m' l' .
229 (m -> UniqSM (LGraph m' l')) -> (l -> UniqSM (LGraph m' l')) ->
230 LGraph m l -> UniqSM (LGraph m' l')
233 translateA :: forall m l m' l' .
234 (m -> Agraph m' l') -> (l -> AGraph m' l') -> LGraph m l -> LGraph m' l'
237 ------------------- Last nodes
239 -- | We can't make a graph out of just any old 'last node' type. A
240 -- last node has to be able to find its successors, and we need to
241 -- be able to create and identify unconditional branches. We put
242 -- these capabilities in a type class.
244 class HavingSuccessors b where
245 succs :: b -> [BlockId]
246 fold_succs :: (BlockId -> a -> a) -> b -> a -> a
248 fold_succs add l z = foldr add z $ succs l
250 class HavingSuccessors l => LastNode l where
251 mkBranchNode :: BlockId -> l
252 isBranchNode :: l -> Bool
253 branchNodeTarget :: l -> BlockId -- panics if not branch node
255 instance HavingSuccessors l => HavingSuccessors (ZLast l) where
257 succs (LastOther l) = succs l
258 fold_succs _ LastExit z = z
259 fold_succs f (LastOther l) z = fold_succs f l z
261 instance LastNode l => LastNode (ZLast l) where
262 mkBranchNode id = LastOther $ mkBranchNode id
263 isBranchNode LastExit = False
264 isBranchNode (LastOther l) = isBranchNode l
265 branchNodeTarget LastExit = panic "branchNodeTarget LastExit"
266 branchNodeTarget (LastOther l) = branchNodeTarget l
268 instance LastNode l => HavingSuccessors (ZBlock m l) where
269 succs b = succs (last b)
271 instance LastNode l => HavingSuccessors (Block m l) where
272 succs b = succs (unzip b)
275 ------------------- Observing nodes
277 -- | Fold from first to last
279 (BlockId -> a -> a) -> (m -> a -> a) -> (ZLast l -> a -> a) ->
282 -- | iterate from first to last
284 Monad m => (BlockId -> a -> m a) -> (mid -> a -> m a) -> (ZLast l -> a -> m a) ->
285 Block mid l -> a -> m a
287 -- ================ IMPLEMENTATION ================--
289 blockId (Block id _) = id
291 -- | Convert block between forms.
292 -- These functions are tail-recursive, so we can go as deep as we like
293 -- without fear of stack overflow.
295 ht_to_first head tail = case head of
296 ZFirst id -> Block id tail
297 ZHead h m -> ht_to_first h (ZTail m tail)
299 head_id :: ZHead m -> BlockId
300 head_id (ZFirst id) = id
301 head_id (ZHead h _) = head_id h
303 zip (ZBlock h t) = ht_to_first h t
305 ht_to_last head (ZLast l) = (head, l)
306 ht_to_last head (ZTail m t) = ht_to_last (ZHead head m) t
308 goto_end (ZBlock h t) = ht_to_last h t
310 tailOfLast l = ZLast (LastOther l)
313 unzip (Block id t) = ZBlock (ZFirst id) t
315 last (ZBlock _ t) = lastt t
316 where lastt (ZLast l) = l
317 lastt (ZTail _ t) = lastt t
319 focus id (LGraph entry blocks) =
320 case lookupBlockEnv blocks id of
321 Just b -> FGraph entry (unzip b) (delFromUFM blocks id)
322 Nothing -> panic "asked for nonexistent block in flow graph"
324 focusp p (LGraph entry blocks) =
325 fmap (\(b, bs) -> FGraph entry (unzip b) bs) (splitp_blocks p blocks)
327 splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) ->
328 Maybe (Block m l, BlockEnv (Block m l))
329 splitp_blocks p blocks = lift $ foldUFM scan (Nothing, emptyBlockEnv) blocks
330 where scan b (yes, no) =
332 Nothing | p b -> (Just b, no)
333 | otherwise -> (yes, insertBlock b no)
334 Just _ -> (yes, insertBlock b no)
335 lift (Nothing, _) = Nothing
336 lift (Just b, bs) = Just (b, bs)
338 entry g@(LGraph eid _) = focus eid g
340 exit g@(LGraph eid _) = FGraph eid (ZBlock h (ZLast l)) others
341 where FGraph _ b others = focusp is_exit g `orElse` panic "no exit in flow graph"
344 is_exit :: Block m l -> Bool
345 is_exit b = case last (unzip b) of { LastExit -> True; _ -> False }
347 -- | 'insertBlock' should not be used to *replace* an existing block
348 -- but only to insert a new one
349 insertBlock :: Block m l -> BlockEnv (Block m l) -> BlockEnv (Block m l)
351 case lookupBlockEnv bs id of
352 Nothing -> extendBlockEnv bs id b
353 Just _ -> panic ("duplicate labels " ++ show id ++ " in ZipCfg graph")
356 unfocus (FGraph e bz bs) = LGraph e (insertBlock (zip bz) bs)
358 check_single_exit :: LGraph l m -> a -> a
359 check_single_exit g =
360 let check block found = case last (unzip block) of
361 LastExit -> if found then panic "graph has multiple exits"
364 in if not (foldUFM check False (gr_blocks g)) then
365 panic "graph does not have an exit"
369 freshBlockId :: String -> UniqSM BlockId
370 freshBlockId _ = do { u <- getUniqueUs; return $ BlockId u }
372 postorder_dfs g@(LGraph _ blocks) =
373 let FGraph _ eblock _ = entry g
374 in vnode (zip eblock) (\acc _visited -> acc) [] emptyBlockSet
376 vnode :: Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet ->a
377 vnode block@(Block id _) cont acc visited =
378 if elemBlockSet id visited then
381 vchildren block (get_children block) cont acc (extendBlockSet visited id)
382 vchildren block bs cont acc visited =
383 let next children acc visited =
384 case children of [] -> cont (block : acc) visited
385 (b:bs) -> vnode b (next bs) acc visited
386 in next bs acc visited
387 get_children block = foldl add_id [] (succs block)
388 add_id rst id = case lookupBlockEnv blocks id of
392 fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z
393 where fold blocks z =
394 case blocks of [] -> z
396 b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z)
397 nextlabel (Block id _) =
398 if id == eid then panic "entry as successor"
401 fold_fwd_block first middle last (Block id t) z = tail t (first id z)
402 where tail (ZTail m t) z = tail t (middle m z)
403 tail (ZLast l) z = last l z
405 foldM_fwd_block first middle last (Block id t) z = do { z <- first id z; tail t z }
406 where tail (ZTail m t) z = do { z <- middle m z; tail t z }
407 tail (ZLast l) z = last l z
409 fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks
411 map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks)
412 where block (Block id t) = Block (idm id) (tail t)
413 tail (ZTail m t) = ZTail (middle m) (tail t)
414 tail (ZLast LastExit) = ZLast LastExit
415 tail (ZLast (LastOther l)) = ZLast (LastOther (last l))
417 of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks
418 to_block_list (LGraph _ blocks) = eltsUFM blocks
421 \paragraph{Splicing support}
423 We want to be able to scrutinize a single-entry, single-exit LGraph for
425 There are two useful cases: the LGraph is a single block or it isn't.
426 We use continuation-passing style.
429 prepare_for_splicing ::
430 LGraph m l -> (ZTail m l -> a) -> (ZTail m l -> ZHead m -> BlockEnv (Block m l) -> a)
432 prepare_for_splicing g single multi =
433 let FGraph _ gentry gblocks = entry g
434 ZBlock _ etail = gentry
435 in if isNullUFM gblocks then
437 LastExit -> single etail
438 _ -> panic "bad single block"
440 case splitp_blocks is_exit gblocks of
441 Nothing -> panic "Can't find an exit block"
442 Just (gexit, gblocks) ->
443 let (gh, gl) = goto_end $ unzip gexit in
444 case gl of LastExit -> multi etail gh gblocks
445 _ -> panic "exit is not exit?!"
448 check_single_exit g $
449 let eid = head_id head
450 splice_one_block tail' =
451 case ht_to_last head tail' of
452 (head, LastExit) -> (LGraph eid emptyBlockEnv, head)
453 _ -> panic "spliced LGraph without exit"
454 splice_many_blocks entry exit others =
455 (LGraph eid (insertBlock (zipht head entry) others), exit)
456 in prepare_for_splicing g splice_one_block splice_many_blocks
459 check_single_exit g $
460 let splice_one_block tail' = -- return tail' .. tail
461 case ht_to_last (ZFirst (gr_entry g)) tail' of
463 case ht_to_first head' tail of
464 Block id t | id == gr_entry g -> (t, LGraph id emptyBlockEnv)
465 _ -> panic "entry in; garbage out"
466 _ -> panic "spliced single block without Exit"
467 splice_many_blocks entry exit others =
468 (entry, LGraph (gr_entry g) (insertBlock (zipht exit tail) others))
469 in prepare_for_splicing g splice_one_block splice_many_blocks
471 splice_focus_entry (FGraph eid (ZBlock head tail) blocks) g =
472 let (tail', g') = splice_tail g tail in
473 FGraph eid (ZBlock head tail') (plusUFM (gr_blocks g') blocks)
475 splice_focus_exit (FGraph eid (ZBlock head tail) blocks) g =
476 let (g', head') = splice_head head g in
477 FGraph eid (ZBlock head' tail) (plusUFM (gr_blocks g') blocks)
479 splice_head_only head g =
480 let FGraph eid gentry gblocks = entry g
482 ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks)
483 _ -> panic "entry not at start of block?!"
485 remove_entry_label g =
486 let FGraph e eblock others = entry g
488 ZBlock (ZFirst id) tail
489 | id == e -> Graph tail others
490 _ -> panic "id doesn't match on entry block"
494 translate txm txl (LGraph eid blocks) =
495 do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks
496 return $ LGraph eid blocks'
499 Block m l -> UniqSM (BlockEnv (Block m' l')) -> UniqSM (BlockEnv (Block m' l'))
500 txblock (Block id t) expanded =
501 do blocks' <- expanded
502 txtail (ZFirst id) t blocks'
503 txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') ->
504 UniqSM (BlockEnv (Block m' l'))
505 txtail h (ZTail m t) blocks' =
507 let (g, h') = splice_head h m'
508 txtail h' t (plusUFM (gr_blocks g) blocks')
509 txtail h (ZLast (LastOther l)) blocks' =
511 return $ plusUFM (gr_blocks (splice_head_only h l')) blocks'
512 txtail h (ZLast LastExit) blocks' =
513 return $ insertBlock (zipht h (ZLast LastExit)) blocks'
515 ----------------------------------------------------------------
516 --- Block Ids, their environments, and their sets
518 {- Note [Unique BlockId]
519 ~~~~~~~~~~~~~~~~~~~~~~~~
520 Although a 'BlockId' is a local label, for reasons of implementation,
521 'BlockId's must be unique within an entire compilation unit. The reason
522 is that each local label is mapped to an assembly-language label, and in
523 most assembly languages allow, a label is visible throughout the enitre
524 compilation unit in which it appears.
527 newtype BlockId = BlockId Unique
530 instance Uniquable BlockId where
531 getUnique (BlockId u) = u
533 instance Show BlockId where
534 show (BlockId u) = show u
536 instance Outputable BlockId where
537 ppr = ppr . getUnique
540 type BlockEnv a = UniqFM {- BlockId -} a
541 emptyBlockEnv :: BlockEnv a
542 emptyBlockEnv = emptyUFM
543 lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
544 lookupBlockEnv = lookupUFM
545 extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
546 extendBlockEnv = addToUFM
547 mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
548 mkBlockEnv = listToUFM
550 type BlockSet = UniqSet BlockId
551 emptyBlockSet :: BlockSet
552 emptyBlockSet = emptyUniqSet
553 elemBlockSet :: BlockId -> BlockSet -> Bool
554 elemBlockSet = elementOfUniqSet
555 extendBlockSet :: BlockSet -> BlockId -> BlockSet
556 extendBlockSet = addOneToUniqSet
557 mkBlockSet :: [BlockId] -> BlockSet
558 mkBlockSet = mkUniqSet
560 ----------------------------------------------------------------
561 -- putting this code in PprCmmZ leads to circular imports :-(
563 instance (Outputable m, Outputable l) => Outputable (ZTail m l) where
566 -- | 'pprTail' is used for debugging only
567 pprTail :: (Outputable m, Outputable l) => ZTail m l -> SDoc
568 pprTail (ZTail m t) = ppr m $$ ppr t
569 pprTail (ZLast LastExit) = text "<exit>"
570 pprTail (ZLast (LastOther l)) = ppr l
572 pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc
573 pprLgraph g = text "{" $$ nest 2 (vcat $ map pprBlock blocks) $$ text "}"
574 where pprBlock (Block id tail) = ppr id <> colon $$ ppr tail
575 blocks = postorder_dfs g