1 {-# OPTIONS -Wall -fno-warn-name-shadowing #-}
3 ( BlockId(..), freshBlockId
4 , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, insertBlock, mkBlockEnv
5 , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, mkBlockSet
6 , Graph(..), LGraph(..), FGraph(..)
7 , Block(..), ZBlock(..), ZHead(..), ZTail(..), ZLast(..)
8 , HavingSuccessors, succs, fold_succs
9 , LastNode, mkBranchNode, isBranchNode, branchNodeTarget
11 -- Observers and transformers
12 , blockId, zip, unzip, last, goto_end, zipht, tailOfLast
14 , splice_tail, splice_head, splice_head_only
15 , of_block_list, to_block_list
25 -- the following functions might one day be useful and can be found
26 -- either below or in ZipCfgExtras:
27 , entry, exit, focus, focusp, unfocus
28 , ht_to_first, ht_to_last,
29 , splice_focus_entry, splice_focus_exit
30 , fold_fwd_block, foldM_fwd_block
36 import Outputable hiding (empty)
38 import Prelude hiding (zip, unzip, last)
44 -------------------------------------------------------------------------
45 -- GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH --
46 -------------------------------------------------------------------------
49 This module defines datatypes used to represent control-flow graphs,
50 along with some functions for analyzing and splicing graphs.
51 Functions for building graphs are found in a separate module 'MkZipCfg'.
53 Every graph has a distinguished entry point. A graph has at least one
54 exit; most exits are instructions (or statements) like 'jump' or
55 'return', which transfer control to other procedures, but a graph may
56 have up to one 'fall through' exit. (A graph that represents an
57 entire Haskell or C-- procedure does not have a 'fall through' exit.)
59 A graph is a collection of basic blocks. A basic block begins with a
60 label (unique id; see Note [Unique BlockId]) which is followed by a
61 sequence of zero or more 'middle' nodes; the basic block ends with a
62 'last' node. Each 'middle' node is a single-entry, single-exit,
63 uninterruptible computation. A 'last' node is a single-entry,
64 multiple-exit computation. A last node may have zero or more successors,
65 which are identified by their unique ids.
67 A special case of last node is the ``default exit,'' which represents
68 'falling off the end' of the graph. Such a node is always represented by
69 the data constructor 'LastExit'. A graph may contain at most one
70 'LastExit' node, and a graph representing a full procedure should not
71 contain any 'LastExit' nodes. 'LastExit' nodes are used only to splice
72 graphs together, either during graph construction (see module 'MkZipCfg')
73 or during optimization (see module 'ZipDataflow').
75 A graph is parameterized over the types of middle and last nodes. Each of
76 these types will typically be instantiated with a subset of C-- statements
77 (see module 'ZipCfgCmmRep') or a subset of machine instructions (yet to be
78 implemented as of August 2007).
82 This module exposes three representations of graphs. In order of
83 increasing complexity, they are:
85 Graph m l The basic graph with its distinguished entry point
87 LGraph m l A graph with a *labelled* entry point
89 FGraph m l A labelled graph with the *focus* on a particular edge
91 There are three types because each type offers a slightly different
92 invariant or cost model.
94 * The distinguished entry of a Graph has no label. Because labels must
95 be unique, acquiring one requires a monadic operation ('freshBlockId').
96 The primary advantage of the Graph representation is that we can build
97 a small Graph purely functionally, without entering a monad. For
98 example, during optimization we can easily rewrite a single middle
99 node into a Graph containing a sequence of two middle nodes followed by
102 * In an LGraph, every basic block is labelled. The primary advantage of
103 this representation is its simplicity: each basic block can be treated
104 like any other. This representation is used for mapping, folding, and
105 translation, as well as layout.
107 Like any graph, an LGraph still has a distinguished entry point,
108 which you can discover using 'gr_entry'.
110 * An FGraph is an LGraph with the *focus* on one particular edge. The
111 primary advantage of this representation is that it provides
112 constant-time access to the nodes connected by that edge, and it also
113 allows constant-time, functional *replacement* of those nodes---in the
114 style of Huet's 'zipper'.
116 None of these representations is ideally suited to the incremental
117 construction of large graphs. A separate module, 'MkZipCfg', provides a
118 fourth representation that is asymptotically optimal for such construction.
122 --------------- Representation --------------------
124 -- | A basic block is a [[first]] node, followed by zero or more [[middle]]
125 -- nodes, followed by a [[last]] node.
127 -- eventually this module should probably replace the original Cmm, but for
128 -- now we leave it to dynamic invariants what can be found where
131 = LastExit -- fall through; used for the block that has no last node
132 -- LastExit is a device used only for graphs under
133 -- construction, or framgments of graph under optimisation,
134 -- so we don't want to pollute the 'l' type parameter with it
137 data ZHead m = ZFirst BlockId | ZHead (ZHead m) m
138 -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId
139 data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l)
140 -- ZTail is a sequence of middle nodes followed by a last node
142 -- | Blocks and flow graphs
143 data Block m l = Block BlockId (ZTail m l)
145 data Graph m l = Graph (ZTail m l) (BlockEnv (Block m l))
147 data LGraph m l = LGraph { gr_entry :: BlockId
148 , gr_blocks :: BlockEnv (Block m l) }
150 -- | And now the zipper. The focus is between the head and tail.
151 -- Notice we cannot ever focus on an inter-block edge.
152 data ZBlock m l = ZBlock (ZHead m) (ZTail m l)
153 data FGraph m l = FGraph { zg_entry :: BlockId
154 , zg_focus :: ZBlock m l
155 , zg_others :: BlockEnv (Block m l) }
156 -- Invariant: the block represented by 'zg_focus' is *not*
157 -- in the map 'zg_others'
159 ---- Utility functions ---
161 blockId :: Block m l -> BlockId
162 zip :: ZBlock m l -> Block m l
163 unzip :: Block m l -> ZBlock m l
165 last :: ZBlock m l -> ZLast l
166 goto_end :: ZBlock m l -> (ZHead m, ZLast l)
168 tailOfLast :: l -> ZTail m l
170 -- | Some ways to combine parts:
171 ht_to_first :: ZHead m -> ZTail m l -> Block m l -- was (ZFirst, ZTail)
172 ht_to_last :: ZHead m -> ZTail m l -> (ZHead m, ZLast l)
174 zipht :: ZHead m -> ZTail m l -> Block m l
176 -- | We can splice a single-entry, single-exit LGraph onto a head or a tail.
177 -- For a head, we have a head~[[h]] followed by a LGraph~[[g]].
178 -- The entry node of~[[g]] gets joined to~[[h]], forming the entry into
179 -- the new LGraph. The exit of~[[g]] becomes the new head.
180 -- For both arguments and results, the order of values is the order of
181 -- control flow: before splicing, the head flows into the LGraph; after
182 -- splicing, the LGraph flows into the head.
183 -- Splicing a tail is the dual operation.
184 -- (In order to maintain the order-means-control-flow convention, the
185 -- orders are reversed.)
187 splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m)
188 splice_tail :: LGraph m l -> ZTail m l -> (ZTail m l, LGraph m l)
190 -- | We can also splice a single-entry, no-exit LGraph into a head.
191 splice_head_only :: ZHead m -> LGraph m l -> LGraph m l
193 -- | Finally, we can remove the entry label of an LGraph and remove
194 -- it, leaving a Graph:
195 remove_entry_label :: LGraph m l -> Graph m l
197 of_block_list :: BlockId -> [Block m l] -> LGraph m l -- N log N
198 to_block_list :: LGraph m l -> [Block m l] -- N log N
200 -- | Traversal: [[postorder_dfs]] returns a list of blocks reachable from
202 -- The postorder depth-first-search order means the list is in roughly
203 -- first-to-last order, as suitable for use in a forward dataflow problem.
205 postorder_dfs :: LastNode l => LGraph m l -> [Block m l]
207 -- | For layout, we fold over pairs of [[Block m l]] and [[Maybe BlockId]]
208 -- in layout order. The [[BlockId]], if any, identifies the block that
209 -- will be the layout successor of the current block. This may be
210 -- useful to help an emitter omit the final [[goto]] of a block that
211 -- flows directly to its layout successor.
213 LastNode l => (Block m l -> Maybe BlockId -> a -> a) -> a -> LGraph m l-> a
215 -- | We can also fold and iterate over blocks.
216 fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a
218 map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l'
219 -- mapping includes the entry id!
220 translate :: (m -> UniqSM (LGraph m' l')) -> (l -> UniqSM (LGraph m' l')) ->
221 LGraph m l -> UniqSM (LGraph m' l')
224 translateA :: (m -> Agraph m' l') -> (l -> AGraph m' l') -> LGraph m l -> LGraph m' l'
227 ------------------- Last nodes
229 -- | We can't make a graph out of just any old 'last node' type. A
230 -- last node has to be able to find its successors, and we need to
231 -- be able to create and identify unconditional branches. We put
232 -- these capabilities in a type class.
234 class HavingSuccessors b where
235 succs :: b -> [BlockId]
236 fold_succs :: (BlockId -> a -> a) -> b -> a -> a
238 fold_succs add l z = foldr add z $ succs l
240 class HavingSuccessors l => LastNode l where
241 mkBranchNode :: BlockId -> l
242 isBranchNode :: l -> Bool
243 branchNodeTarget :: l -> BlockId -- panics if not branch node
245 instance HavingSuccessors l => HavingSuccessors (ZLast l) where
247 succs (LastOther l) = succs l
248 fold_succs _ LastExit z = z
249 fold_succs f (LastOther l) z = fold_succs f l z
251 instance LastNode l => LastNode (ZLast l) where
252 mkBranchNode id = LastOther $ mkBranchNode id
253 isBranchNode LastExit = False
254 isBranchNode (LastOther l) = isBranchNode l
255 branchNodeTarget LastExit = panic "branchNodeTarget LastExit"
256 branchNodeTarget (LastOther l) = branchNodeTarget l
258 instance LastNode l => HavingSuccessors (ZBlock m l) where
259 succs b = succs (last b)
261 instance LastNode l => HavingSuccessors (Block m l) where
262 succs b = succs (unzip b)
265 -- ================ IMPLEMENTATION ================--
267 blockId (Block id _) = id
269 -- | Convert block between forms.
270 -- These functions are tail-recursive, so we can go as deep as we like
271 -- without fear of stack overflow.
273 ht_to_first head tail = case head of
274 ZFirst id -> Block id tail
275 ZHead h m -> ht_to_first h (ZTail m tail)
277 head_id :: ZHead m -> BlockId
278 head_id (ZFirst id) = id
279 head_id (ZHead h _) = head_id h
281 zip (ZBlock h t) = ht_to_first h t
283 ht_to_last head (ZLast l) = (head, l)
284 ht_to_last head (ZTail m t) = ht_to_last (ZHead head m) t
286 goto_end (ZBlock h t) = ht_to_last h t
288 tailOfLast l = ZLast (LastOther l)
291 unzip (Block id t) = ZBlock (ZFirst id) t
293 last (ZBlock _ t) = lastt t
294 where lastt (ZLast l) = l
295 lastt (ZTail _ t) = lastt t
297 focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id
298 focus id (LGraph entry blocks) =
299 case lookupBlockEnv blocks id of
300 Just b -> FGraph entry (unzip b) (delFromUFM blocks id)
301 Nothing -> panic "asked for nonexistent block in flow graph"
303 splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) ->
304 Maybe (Block m l, BlockEnv (Block m l))
305 splitp_blocks p blocks = lift $ foldUFM scan (Nothing, emptyBlockEnv) blocks
306 where scan b (yes, no) =
308 Nothing | p b -> (Just b, no)
309 | otherwise -> (yes, insertBlock b no)
310 Just _ -> (yes, insertBlock b no)
311 lift (Nothing, _) = Nothing
312 lift (Just b, bs) = Just (b, bs)
314 is_exit :: Block m l -> Bool
315 is_exit b = case last (unzip b) of { LastExit -> True; _ -> False }
317 -- | 'insertBlock' should not be used to *replace* an existing block
318 -- but only to insert a new one
319 insertBlock :: Block m l -> BlockEnv (Block m l) -> BlockEnv (Block m l)
321 case lookupBlockEnv bs id of
322 Nothing -> extendBlockEnv bs id b
323 Just _ -> panic ("duplicate labels " ++ show id ++ " in ZipCfg graph")
326 check_single_exit :: LGraph l m -> a -> a
327 check_single_exit g =
328 let check block found = case last (unzip block) of
329 LastExit -> if found then panic "graph has multiple exits"
332 in if not (foldUFM check False (gr_blocks g)) then
333 panic "graph does not have an exit"
337 freshBlockId :: String -> UniqSM BlockId
338 freshBlockId _ = do { u <- getUniqueUs; return $ BlockId u }
340 entry :: LGraph m l -> FGraph m l -- focus on edge out of entry node
341 entry g@(LGraph eid _) = focus eid g
345 postorder_dfs g@(LGraph _ blocks) =
346 let FGraph _ eblock _ = entry g
347 in vnode (zip eblock) (\acc _visited -> acc) [] emptyBlockSet
349 -- vnode :: Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet ->a
350 vnode block@(Block id _) cont acc visited =
351 if elemBlockSet id visited then
354 vchildren block (get_children block) cont acc (extendBlockSet visited id)
355 vchildren block bs cont acc visited =
356 let next children acc visited =
357 case children of [] -> cont (block : acc) visited
358 (b:bs) -> vnode b (next bs) acc visited
359 in next bs acc visited
360 get_children block = foldl add_id [] (succs block)
361 add_id rst id = case lookupBlockEnv blocks id of
365 fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z
366 where fold blocks z =
367 case blocks of [] -> z
369 b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z)
370 nextlabel (Block id _) =
371 if id == eid then panic "entry as successor"
374 fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks
376 map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks)
377 where block (Block id t) = Block (idm id) (tail t)
378 tail (ZTail m t) = ZTail (middle m) (tail t)
379 tail (ZLast LastExit) = ZLast LastExit
380 tail (ZLast (LastOther l)) = ZLast (LastOther (last l))
382 of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks
383 to_block_list (LGraph _ blocks) = eltsUFM blocks
386 \paragraph{Splicing support}
388 We want to be able to scrutinize a single-entry, single-exit LGraph for
390 There are two useful cases: the LGraph is a single block or it isn't.
391 We use continuation-passing style.
394 prepare_for_splicing ::
395 LGraph m l -> (ZTail m l -> a) -> (ZTail m l -> ZHead m -> BlockEnv (Block m l) -> a)
397 prepare_for_splicing g single multi =
398 let FGraph _ gentry gblocks = entry g
399 ZBlock _ etail = gentry
400 in if isNullUFM gblocks then
402 LastExit -> single etail
403 _ -> panic "bad single block"
405 case splitp_blocks is_exit gblocks of
406 Nothing -> panic "Can't find an exit block"
407 Just (gexit, gblocks) ->
408 let (gh, gl) = goto_end $ unzip gexit in
409 case gl of LastExit -> multi etail gh gblocks
410 _ -> panic "exit is not exit?!"
413 check_single_exit g $
414 let eid = head_id head
415 splice_one_block tail' =
416 case ht_to_last head tail' of
417 (head, LastExit) -> (LGraph eid emptyBlockEnv, head)
418 _ -> panic "spliced LGraph without exit"
419 splice_many_blocks entry exit others =
420 (LGraph eid (insertBlock (zipht head entry) others), exit)
421 in prepare_for_splicing g splice_one_block splice_many_blocks
424 check_single_exit g $
425 let splice_one_block tail' = -- return tail' .. tail
426 case ht_to_last (ZFirst (gr_entry g)) tail' of
428 case ht_to_first head' tail of
429 Block id t | id == gr_entry g -> (t, LGraph id emptyBlockEnv)
430 _ -> panic "entry in; garbage out"
431 _ -> panic "spliced single block without Exit"
432 splice_many_blocks entry exit others =
433 (entry, LGraph (gr_entry g) (insertBlock (zipht exit tail) others))
434 in prepare_for_splicing g splice_one_block splice_many_blocks
436 splice_head_only head g =
437 let FGraph eid gentry gblocks = entry g
439 ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks)
440 _ -> panic "entry not at start of block?!"
442 remove_entry_label g =
443 let FGraph e eblock others = entry g
445 ZBlock (ZFirst id) tail
446 | id == e -> Graph tail others
447 _ -> panic "id doesn't match on entry block"
451 translate txm txl (LGraph eid blocks) =
452 do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks
453 return $ LGraph eid blocks'
456 -- Block m l -> UniqSM (BlockEnv (Block m' l')) -> UniqSM (BlockEnv (Block m' l'))
457 txblock (Block id t) expanded =
458 do blocks' <- expanded
459 txtail (ZFirst id) t blocks'
460 -- txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') ->
461 -- UniqSM (BlockEnv (Block m' l'))
462 txtail h (ZTail m t) blocks' =
464 let (g, h') = splice_head h m'
465 txtail h' t (plusUFM (gr_blocks g) blocks')
466 txtail h (ZLast (LastOther l)) blocks' =
468 return $ plusUFM (gr_blocks (splice_head_only h l')) blocks'
469 txtail h (ZLast LastExit) blocks' =
470 return $ insertBlock (zipht h (ZLast LastExit)) blocks'
472 ----------------------------------------------------------------
473 --- Block Ids, their environments, and their sets
475 {- Note [Unique BlockId]
476 ~~~~~~~~~~~~~~~~~~~~~~~~
477 Although a 'BlockId' is a local label, for reasons of implementation,
478 'BlockId's must be unique within an entire compilation unit. The reason
479 is that each local label is mapped to an assembly-language label, and in
480 most assembly languages allow, a label is visible throughout the enitre
481 compilation unit in which it appears.
484 newtype BlockId = BlockId Unique
487 instance Uniquable BlockId where
488 getUnique (BlockId u) = u
490 instance Show BlockId where
491 show (BlockId u) = show u
493 instance Outputable BlockId where
494 ppr = ppr . getUnique
497 type BlockEnv a = UniqFM {- BlockId -} a
498 emptyBlockEnv :: BlockEnv a
499 emptyBlockEnv = emptyUFM
500 lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
501 lookupBlockEnv = lookupUFM
502 extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
503 extendBlockEnv = addToUFM
504 mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
505 mkBlockEnv = listToUFM
507 type BlockSet = UniqSet BlockId
508 emptyBlockSet :: BlockSet
509 emptyBlockSet = emptyUniqSet
510 elemBlockSet :: BlockId -> BlockSet -> Bool
511 elemBlockSet = elementOfUniqSet
512 extendBlockSet :: BlockSet -> BlockId -> BlockSet
513 extendBlockSet = addOneToUniqSet
514 mkBlockSet :: [BlockId] -> BlockSet
515 mkBlockSet = mkUniqSet
517 ----------------------------------------------------------------
518 -- putting this code in PprCmmZ leads to circular imports :-(
520 instance (Outputable m, Outputable l) => Outputable (ZTail m l) where
523 -- | 'pprTail' is used for debugging only
524 pprTail :: (Outputable m, Outputable l) => ZTail m l -> SDoc
525 pprTail (ZTail m t) = ppr m $$ ppr t
526 pprTail (ZLast LastExit) = text "<exit>"
527 pprTail (ZLast (LastOther l)) = ppr l
529 pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc
530 pprLgraph g = text "{" $$ nest 2 (vcat $ map pprBlock blocks) $$ text "}"
531 where pprBlock (Block id tail) = ppr id <> colon $$ ppr tail
532 blocks = postorder_dfs g