1 {-# LANGUAGE ScopedTypeVariables #-}
3 ( BlockId(..), freshBlockId
4 , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, insertBlock, mkBlockEnv
5 , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, mkBlockSet
6 , Graph(..), LGraph(..), FGraph(..)
7 , Block(..), ZBlock(..), ZHead(..), ZTail(..), ZLast(..)
8 , HavingSuccessors, succs, fold_succs
9 , LastNode, mkBranchNode, isBranchNode, branchNodeTarget
11 -- Observers and transformers
12 , blockId, zip, unzip, last, goto_end, zipht, tailOfLast
14 , splice_tail, splice_head, splice_head_only
15 , of_block_list, to_block_list
25 -- the following functions might one day be useful and can be found
26 -- either below or in ZipCfgExtras:
27 , entry, exit, focus, focusp, unfocus
28 , ht_to_block, ht_to_last,
29 , splice_focus_entry, splice_focus_exit
30 , fold_fwd_block, foldM_fwd_block
36 #include "HsVersions.h"
38 import Outputable hiding (empty)
46 import Prelude hiding (zip, unzip, last)
48 -------------------------------------------------------------------------
49 -- GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH --
50 -------------------------------------------------------------------------
53 This module defines datatypes used to represent control-flow graphs,
54 along with some functions for analyzing and splicing graphs.
55 Functions for building graphs are found in a separate module 'MkZipCfg'.
57 Every graph has a distinguished entry point. A graph has at least one
58 exit; most exits are instructions (or statements) like 'jump' or
59 'return', which transfer control to other procedures, but a graph may
60 have up to one 'fall through' exit. (A graph that represents an
61 entire Haskell or C-- procedure does not have a 'fall through' exit.)
63 A graph is a collection of basic blocks. A basic block begins with a
64 label (unique id; see Note [Unique BlockId]) which is followed by a
65 sequence of zero or more 'middle' nodes; the basic block ends with a
66 'last' node. Each 'middle' node is a single-entry, single-exit,
67 uninterruptible computation. A 'last' node is a single-entry,
68 multiple-exit computation. A last node may have zero or more successors,
69 which are identified by their unique ids.
71 A special case of last node is the ``default exit,'' which represents
72 'falling off the end' of the graph. Such a node is always represented by
73 the data constructor 'LastExit'. A graph may contain at most one
74 'LastExit' node, and a graph representing a full procedure should not
75 contain any 'LastExit' nodes. 'LastExit' nodes are used only to splice
76 graphs together, either during graph construction (see module 'MkZipCfg')
77 or during optimization (see module 'ZipDataflow').
79 A graph is parameterized over the types of middle and last nodes. Each of
80 these types will typically be instantiated with a subset of C-- statements
81 (see module 'ZipCfgCmmRep') or a subset of machine instructions (yet to be
82 implemented as of August 2007).
86 This module exposes three representations of graphs. In order of
87 increasing complexity, they are:
89 Graph m l The basic graph with its distinguished entry point
91 LGraph m l A graph with a *labelled* entry point
93 FGraph m l A labelled graph with the *focus* on a particular edge
95 There are three types because each type offers a slightly different
96 invariant or cost model.
98 * The distinguished entry of a Graph has no label. Because labels must
99 be unique, acquiring one requires a monadic operation ('freshBlockId').
100 The primary advantage of the Graph representation is that we can build
101 a small Graph purely functionally, without entering a monad. For
102 example, during optimization we can easily rewrite a single middle
103 node into a Graph containing a sequence of two middle nodes followed by
106 * In an LGraph, every basic block is labelled. The primary advantage of
107 this representation is its simplicity: each basic block can be treated
108 like any other. This representation is used for mapping, folding, and
109 translation, as well as layout.
111 Like any graph, an LGraph still has a distinguished entry point,
112 which you can discover using 'lg_entry'.
114 * An FGraph is an LGraph with the *focus* on one particular edge. The
115 primary advantage of this representation is that it provides
116 constant-time access to the nodes connected by that edge, and it also
117 allows constant-time, functional *replacement* of those nodes---in the
118 style of Huet's 'zipper'.
120 None of these representations is ideally suited to the incremental
121 construction of large graphs. A separate module, 'MkZipCfg', provides a
122 fourth representation that is asymptotically optimal for such construction.
126 --------------- Representation --------------------
128 -- | A basic block is a 'first' node, followed by zero or more 'middle'
129 -- nodes, followed by a 'last' node.
131 -- eventually this module should probably replace the original Cmm, but for
132 -- now we leave it to dynamic invariants what can be found where
135 = LastExit -- fall through; used for the block that has no last node
136 -- LastExit is a device used only for graphs under
137 -- construction, or framgments of graph under optimisation,
138 -- so we don't want to pollute the 'l' type parameter with it
141 data ZHead m = ZFirst BlockId | ZHead (ZHead m) m
142 -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId
143 data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l)
144 -- ZTail is a sequence of middle nodes followed by a last node
146 -- | Blocks and flow graphs
147 data Block m l = Block BlockId (ZTail m l)
149 data Graph m l = Graph (ZTail m l) (BlockEnv (Block m l))
151 data LGraph m l = LGraph { lg_entry :: BlockId
152 , lg_blocks :: BlockEnv (Block m l) }
154 -- | And now the zipper. The focus is between the head and tail.
155 -- We cannot ever focus on an inter-block edge.
156 data ZBlock m l = ZBlock (ZHead m) (ZTail m l)
157 data FGraph m l = FGraph { fg_entry :: BlockId
158 , fg_focus :: ZBlock m l
159 , fg_others :: BlockEnv (Block m l) }
160 -- Invariant: the block represented by 'fg_focus' is *not*
161 -- in the map 'fg_others'
163 ---- Utility functions ---
165 blockId :: Block m l -> BlockId
166 zip :: ZBlock m l -> Block m l
167 unzip :: Block m l -> ZBlock m l
169 last :: ZBlock m l -> ZLast l
170 goto_end :: ZBlock m l -> (ZHead m, ZLast l)
172 tailOfLast :: l -> ZTail m l
174 -- | Take a head and tail and go to beginning or end. The asymmetry
175 -- in the types and names is a bit unfortunate, but 'Block m l' is
176 -- effectively '(BlockId, ZTail m l)' and is accepted in many more places.
178 ht_to_block, zipht :: ZHead m -> ZTail m l -> Block m l
179 ht_to_last :: ZHead m -> ZTail m l -> (ZHead m, ZLast l)
181 -- | We can splice a single-entry, single-exit LGraph onto a head or a tail.
182 -- For a head, we have a head 'h' followed by a LGraph 'g'.
183 -- The entry node of 'g' gets joined to 'h', forming the entry into
184 -- the new LGraph. The exit of 'g' becomes the new head.
185 -- For both arguments and results, the order of values is the order of
186 -- control flow: before splicing, the head flows into the LGraph; after
187 -- splicing, the LGraph flows into the head.
188 -- Splicing a tail is the dual operation.
189 -- (In order to maintain the order-means-control-flow convention, the
190 -- orders are reversed.)
192 splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m)
193 splice_tail :: LGraph m l -> ZTail m l -> (ZTail m l, LGraph m l)
195 -- | We can also splice a single-entry, no-exit LGraph into a head.
196 splice_head_only :: ZHead m -> LGraph m l -> LGraph m l
198 -- | Finally, we can remove the entry label of an LGraph and remove
199 -- it, leaving a Graph:
200 remove_entry_label :: LGraph m l -> Graph m l
202 -- | Conversion to and from the environment form is convenient. For
203 -- layout or dataflow, however, one will want to use 'postorder_dfs'
204 -- in order to get the blocks in an order that relates to the control
205 -- flow in the procedure.
206 of_block_list :: BlockId -> [Block m l] -> LGraph m l -- N log N
207 to_block_list :: LGraph m l -> [Block m l] -- N log N
209 -- | Traversal: 'postorder_dfs' returns a list of blocks reachable
210 -- from the entry node. The postorder depth-first-search order means
211 -- the list is in roughly first-to-last order, as suitable for use in
212 -- a forward dataflow problem. For a backward problem, simply reverse
213 -- the list. ('postorder_dfs' is sufficiently trick to implement that
214 -- one doesn't want to try and maintain both forward and backward
217 postorder_dfs :: LastNode l => LGraph m l -> [Block m l]
219 -- | For layout, we fold over pairs of 'Block m l' and 'Maybe BlockId'
220 -- in layout order. The 'Maybe BlockId', if present, identifies the
221 -- block that will be the layout successor of the current block. This
222 -- may be useful to help an emitter omit the final 'goto' of a block
223 -- that flows directly to its layout successor.
225 LastNode l => (Block m l -> Maybe BlockId -> a -> a) -> a -> LGraph m l-> a
227 -- | We can also fold over blocks in an unspecified order. The
228 -- 'ZipCfgExtras' module provides a monadic version, which we
229 -- haven't needed (else it would be here).
230 fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a
232 map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l'
233 -- mapping includes the entry id!
235 -- | These translation functions are speculative. I hope eventually
236 -- they will be used in the native-code back ends ---NR
237 translate :: (m -> UniqSM (LGraph m' l')) ->
238 (l -> UniqSM (LGraph m' l')) ->
239 (LGraph m l -> UniqSM (LGraph m' l'))
242 -- | It's possible that another form of translation would be more suitable:
243 translateA :: (m -> Agraph m' l') -> (l -> AGraph m' l') -> LGraph m l -> LGraph m' l'
246 ------------------- Last nodes
248 -- | We can't make a graph out of just any old 'last node' type. A last node
249 -- has to be able to find its successors, and we need to be able to create and
250 -- identify unconditional branches. We put these capabilities in a type class.
251 -- Moreover, the property of having successors is also shared by 'Block's and
252 -- 'ZTails', so it is useful to have that property in a type class of its own.
254 class HavingSuccessors b where
255 succs :: b -> [BlockId]
256 fold_succs :: (BlockId -> a -> a) -> b -> a -> a
258 fold_succs add l z = foldr add z $ succs l
260 class HavingSuccessors l => LastNode l where
261 mkBranchNode :: BlockId -> l
262 isBranchNode :: l -> Bool
263 branchNodeTarget :: l -> BlockId -- panics if not branch node
264 -- ^ N.B. This interface seems to make for more congenial clients than a
265 -- single function of type 'l -> Maybe BlockId'
267 instance HavingSuccessors l => HavingSuccessors (ZLast l) where
269 succs (LastOther l) = succs l
270 fold_succs _ LastExit z = z
271 fold_succs f (LastOther l) z = fold_succs f l z
273 instance LastNode l => LastNode (ZLast l) where
274 mkBranchNode id = LastOther $ mkBranchNode id
275 isBranchNode LastExit = False
276 isBranchNode (LastOther l) = isBranchNode l
277 branchNodeTarget LastExit = panic "branchNodeTarget LastExit"
278 branchNodeTarget (LastOther l) = branchNodeTarget l
280 instance LastNode l => HavingSuccessors (ZBlock m l) where
281 succs b = succs (last b)
283 instance LastNode l => HavingSuccessors (Block m l) where
284 succs b = succs (unzip b)
287 -- ================ IMPLEMENTATION ================--
289 ----- block manipulations
291 blockId (Block id _) = id
293 -- | The string argument was originally helpful in debugging the Quick C--
294 -- compiler, so I have kept it here even though at present it is thrown away at
295 -- this spot---there's no reason a BlockId couldn't one day carry a string.
297 freshBlockId :: String -> UniqSM BlockId
298 freshBlockId _ = do { u <- getUniqueUs; return $ BlockId u }
300 -- | Convert block between forms.
301 -- These functions are tail-recursive, so we can go as deep as we like
302 -- without fear of stack overflow.
304 ht_to_block head tail = case head of
305 ZFirst id -> Block id tail
306 ZHead h m -> ht_to_block h (ZTail m tail)
308 ht_to_last head (ZLast l) = (head, l)
309 ht_to_last head (ZTail m t) = ht_to_last (ZHead head m) t
311 zipht h t = ht_to_block h t
312 zip (ZBlock h t) = ht_to_block h t
313 goto_end (ZBlock h t) = ht_to_last h t
315 unzip (Block id t) = ZBlock (ZFirst id) t
317 head_id :: ZHead m -> BlockId
318 head_id (ZFirst id) = id
319 head_id (ZHead h _) = head_id h
321 last (ZBlock _ t) = lastt t
322 where lastt (ZLast l) = l
323 lastt (ZTail _ t) = lastt t
325 tailOfLast l = ZLast (LastOther l) -- ^ tedious to write in every client
328 ------------------ simple graph manipulations
330 focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id
331 focus id (LGraph entry blocks) =
332 case lookupBlockEnv blocks id of
333 Just b -> FGraph entry (unzip b) (delFromUFM blocks id)
334 Nothing -> panic "asked for nonexistent block in flow graph"
336 entry :: LGraph m l -> FGraph m l -- focus on edge out of entry node
337 entry g@(LGraph eid _) = focus eid g
339 -- | pull out a block satisfying the predicate, if any
340 splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) ->
341 Maybe (Block m l, BlockEnv (Block m l))
342 splitp_blocks p blocks = lift $ foldUFM scan (Nothing, emptyBlockEnv) blocks
343 where scan b (yes, no) =
345 Nothing | p b -> (Just b, no)
346 | otherwise -> (yes, insertBlock b no)
347 Just _ -> (yes, insertBlock b no)
348 lift (Nothing, _) = Nothing
349 lift (Just b, bs) = Just (b, bs)
351 -- | 'insertBlock' should not be used to *replace* an existing block
352 -- but only to insert a new one
353 insertBlock :: Block m l -> BlockEnv (Block m l) -> BlockEnv (Block m l)
355 ASSERT (isNothing $ lookupBlockEnv bs id)
356 extendBlockEnv bs id b
359 -- | Used in assertions; tells if a graph has exactly one exit
360 single_exit :: LGraph l m -> Bool
361 single_exit g = foldUFM check 0 (lg_blocks g) == 1
362 where check block count = case last (unzip block) of
363 LastExit -> count + (1 :: Int)
366 ------------------ graph traversals
368 -- | This is the most important traversal over this data structure. It drops
369 -- unreachable code and puts blocks in an order that is good for solving forward
370 -- dataflow problems quickly. The reverse order is good for solving backward
371 -- dataflow problems quickly. The forward order is also reasonably good for
372 -- emitting instructions, except that it will not usually exploit Forrest
373 -- Baskett's trick of eliminating the unconditional branch from a loop. For
374 -- that you would need a more serious analysis, probably based on dominators, to
375 -- identify loop headers.
377 -- The ubiquity of 'postorder_dfs' is one reason for the ubiquity of the 'LGraph'
378 -- representation, when for most purposes the plain 'Graph' representation is
379 -- more mathematically elegant (but results in more complicated code).
381 postorder_dfs g@(LGraph _ blocks) =
382 let FGraph _ eblock _ = entry g
383 in vnode (zip eblock) (\acc _visited -> acc) [] emptyBlockSet
386 -- Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet -> a
387 vnode block@(Block id _) cont acc visited =
388 if elemBlockSet id visited then
391 vchildren block (get_children block) cont acc (extendBlockSet visited id)
392 vchildren block bs cont acc visited =
393 let next children acc visited =
394 case children of [] -> cont (block : acc) visited
395 (b:bs) -> vnode b (next bs) acc visited
396 in next bs acc visited
397 get_children block = foldl add_id [] (succs block)
398 add_id rst id = case lookupBlockEnv blocks id of
403 -- | Slightly more complicated than the usual fold because we want to tell block
404 -- 'b1' what its inline successor is going to be, so that if 'b1' ends with
405 -- 'goto b2', the goto can be omitted.
407 fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z
408 where fold blocks z =
409 case blocks of [] -> z
411 b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z)
412 nextlabel (Block id _) =
413 if id == eid then panic "entry as successor"
416 -- | The rest of the traversals are straightforward
418 map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks)
419 where block (Block id t) = Block (idm id) (tail t)
420 tail (ZTail m t) = ZTail (middle m) (tail t)
421 tail (ZLast LastExit) = ZLast LastExit
422 tail (ZLast (LastOther l)) = ZLast (LastOther (last l))
424 fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks
426 of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks
427 to_block_list (LGraph _ blocks) = eltsUFM blocks
432 -- We want to be able to scrutinize a single-entry, single-exit 'LGraph' for
433 -- splicing purposes. There are two useful cases: the 'LGraph' is a single block
434 -- or it isn't. We use continuation-passing style.
436 prepare_for_splicing ::
437 LGraph m l -> (ZTail m l -> a) -> (ZTail m l -> ZHead m -> BlockEnv (Block m l) -> a)
439 prepare_for_splicing g single multi =
440 let FGraph _ gentry gblocks = entry g
441 ZBlock _ etail = gentry
442 in if isNullUFM gblocks then
444 LastExit -> single etail
445 _ -> panic "bad single block"
447 case splitp_blocks is_exit gblocks of
448 Nothing -> panic "Can't find an exit block"
449 Just (gexit, gblocks) ->
450 let (gh, gl) = goto_end $ unzip gexit in
451 case gl of LastExit -> multi etail gh gblocks
452 _ -> panic "exit is not exit?!"
454 is_exit :: Block m l -> Bool
455 is_exit b = case last (unzip b) of { LastExit -> True; _ -> False }
458 ASSERT (single_exit g) prepare_for_splicing g splice_one_block splice_many_blocks
459 where eid = head_id head
460 splice_one_block tail' =
461 case ht_to_last head tail' of
462 (head, LastExit) -> (LGraph eid emptyBlockEnv, head)
463 _ -> panic "spliced LGraph without exit"
464 splice_many_blocks entry exit others =
465 (LGraph eid (insertBlock (zipht head entry) others), exit)
468 ASSERT (single_exit g) prepare_for_splicing g splice_one_block splice_many_blocks
469 where splice_one_block tail' = -- return tail' .. tail
470 case ht_to_last (ZFirst (lg_entry g)) tail' of
472 case ht_to_block head' tail of
473 Block id t | id == lg_entry g -> (t, LGraph id emptyBlockEnv)
474 _ -> panic "entry in; garbage out"
475 _ -> panic "spliced single block without Exit"
476 splice_many_blocks entry exit others =
477 (entry, LGraph (lg_entry g) (insertBlock (zipht exit tail) others))
479 splice_head_only head g =
480 let FGraph eid gentry gblocks = entry g
482 ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks)
483 _ -> panic "entry not at start of block?!"
485 remove_entry_label g =
486 let FGraph e eblock others = entry g
488 ZBlock (ZFirst id) tail
489 | id == e -> Graph tail others
490 _ -> panic "id doesn't match on entry block"
494 translate txm txl (LGraph eid blocks) =
495 do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks
496 return $ LGraph eid blocks'
499 -- Block m l -> UniqSM (BlockEnv (Block m' l')) -> UniqSM (BlockEnv (Block m' l'))
500 txblock (Block id t) expanded =
501 do blocks' <- expanded
502 txtail (ZFirst id) t blocks'
503 -- txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') ->
504 -- UniqSM (BlockEnv (Block m' l'))
505 txtail h (ZTail m t) blocks' =
507 let (g, h') = splice_head h m'
508 txtail h' t (plusUFM (lg_blocks g) blocks')
509 txtail h (ZLast (LastOther l)) blocks' =
511 return $ plusUFM (lg_blocks (splice_head_only h l')) blocks'
512 txtail h (ZLast LastExit) blocks' =
513 return $ insertBlock (zipht h (ZLast LastExit)) blocks'
515 ----------------------------------------------------------------
516 --- Block Ids, their environments, and their sets
518 {- Note [Unique BlockId]
519 ~~~~~~~~~~~~~~~~~~~~~~~~
520 Although a 'BlockId' is a local label, for reasons of implementation,
521 'BlockId's must be unique within an entire compilation unit. The reason
522 is that each local label is mapped to an assembly-language label, and in
523 most assembly languages allow, a label is visible throughout the enitre
524 compilation unit in which it appears.
527 newtype BlockId = BlockId Unique
530 instance Uniquable BlockId where
531 getUnique (BlockId u) = u
533 instance Show BlockId where
534 show (BlockId u) = show u
536 instance Outputable BlockId where
537 ppr = ppr . getUnique
540 type BlockEnv a = UniqFM {- BlockId -} a
541 emptyBlockEnv :: BlockEnv a
542 emptyBlockEnv = emptyUFM
543 lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
544 lookupBlockEnv = lookupUFM
545 extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
546 extendBlockEnv = addToUFM
547 mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
548 mkBlockEnv = listToUFM
550 type BlockSet = UniqSet BlockId
551 emptyBlockSet :: BlockSet
552 emptyBlockSet = emptyUniqSet
553 elemBlockSet :: BlockId -> BlockSet -> Bool
554 elemBlockSet = elementOfUniqSet
555 extendBlockSet :: BlockSet -> BlockId -> BlockSet
556 extendBlockSet = addOneToUniqSet
557 mkBlockSet :: [BlockId] -> BlockSet
558 mkBlockSet = mkUniqSet
560 ----------------------------------------------------------------
562 ----------------------------------------------------------------
564 -- putting this code in PprCmmZ leads to circular imports :-(
566 instance (Outputable m, Outputable l) => Outputable (ZTail m l) where
569 pprTail :: (Outputable m, Outputable l) => ZTail m l -> SDoc
570 pprTail (ZTail m t) = ppr m $$ ppr t
571 pprTail (ZLast LastExit) = text "<exit>"
572 pprTail (ZLast (LastOther l)) = ppr l
574 pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc
575 pprLgraph g = text "{" $$ nest 2 (vcat $ map pprBlock blocks) $$ text "}"
576 where pprBlock (Block id tail) = ppr id <> colon $$ ppr tail
577 blocks = postorder_dfs g
579 _unused :: FS.FastString