1 {-# LANGUAGE ScopedTypeVariables #-}
3 ( -- These data types and names are carefully thought out
4 BlockId(..), freshBlockId -- ToDo: BlockId should be abstract,
6 , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, insertBlock, mkBlockEnv
7 , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, mkBlockSet
8 , Graph(..), LGraph(..), FGraph(..)
9 , Block(..), ZBlock(..), ZHead(..), ZTail(..), ZLast(..)
10 , HavingSuccessors, succs, fold_succs
11 , LastNode, mkBranchNode, isBranchNode, branchNodeTarget
13 -- Observers and transformers
14 -- (open to renaming suggestions here)
15 , blockId, zip, unzip, last, goto_end, zipht, tailOfLast
17 , splice_tail, splice_head, splice_head_only
18 , of_block_list, to_block_list
28 -- the following functions might one day be useful and can be found
29 -- either below or in ZipCfgExtras:
30 , entry, exit, focus, focusp, unfocus
31 , ht_to_block, ht_to_last,
32 , splice_focus_entry, splice_focus_exit
33 , fold_fwd_block, foldM_fwd_block
39 #include "HsVersions.h"
41 import Outputable hiding (empty)
49 import Prelude hiding (zip, unzip, last)
51 -------------------------------------------------------------------------
52 -- GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH --
53 -------------------------------------------------------------------------
56 This module defines datatypes used to represent control-flow graphs,
57 along with some functions for analyzing and splicing graphs.
58 Functions for building graphs are found in a separate module 'MkZipCfg'.
60 Every graph has a distinguished entry point. A graph has at least one
61 exit; most exits are instructions (or statements) like 'jump' or
62 'return', which transfer control to other procedures, but a graph may
63 have up to one 'fall through' exit. (A graph that represents an
64 entire Haskell or C-- procedure does not have a 'fall through' exit.)
66 A graph is a collection of basic blocks. A basic block begins with a
67 label (unique id; see Note [Unique BlockId]) which is followed by a
68 sequence of zero or more 'middle' nodes; the basic block ends with a
69 'last' node. Each 'middle' node is a single-entry, single-exit,
70 uninterruptible computation. A 'last' node is a single-entry,
71 multiple-exit computation. A last node may have zero or more successors,
72 which are identified by their unique ids.
74 A special case of last node is the ``default exit,'' which represents
75 'falling off the end' of the graph. Such a node is always represented by
76 the data constructor 'LastExit'. A graph may contain at most one
77 'LastExit' node, and a graph representing a full procedure should not
78 contain any 'LastExit' nodes. 'LastExit' nodes are used only to splice
79 graphs together, either during graph construction (see module 'MkZipCfg')
80 or during optimization (see module 'ZipDataflow').
82 A graph is parameterized over the types of middle and last nodes. Each of
83 these types will typically be instantiated with a subset of C-- statements
84 (see module 'ZipCfgCmmRep') or a subset of machine instructions (yet to be
85 implemented as of August 2007).
88 Note [Kinds of Graphs]
89 ~~~~~~~~~~~~~~~~~~~~~~
90 This module exposes three representations of graphs. In order of
91 increasing complexity, they are:
93 Graph m l The basic graph with its distinguished entry point
95 LGraph m l A graph with a *labelled* entry point
97 FGraph m l A labelled graph with the *focus* on a particular edge
99 There are three types because each type offers a slightly different
100 invariant or cost model.
102 * The distinguished entry of a Graph has no label. Because labels must
103 be unique, acquiring one requires a monadic operation ('freshBlockId').
104 The primary advantage of the Graph representation is that we can build
105 a small Graph purely functionally, without entering a monad. For
106 example, during optimization we can easily rewrite a single middle
107 node into a Graph containing a sequence of two middle nodes followed by
110 * In an LGraph, every basic block is labelled. The primary advantage of
111 this representation is its simplicity: each basic block can be treated
112 like any other. This representation is used for mapping, folding, and
113 translation, as well as layout.
115 Like any graph, an LGraph still has a distinguished entry point,
116 which you can discover using 'lg_entry'.
118 * An FGraph is an LGraph with the *focus* on one particular edge. The
119 primary advantage of this representation is that it provides
120 constant-time access to the nodes connected by that edge, and it also
121 allows constant-time, functional *replacement* of those nodes---in the
122 style of Huet's 'zipper'.
124 None of these representations is ideally suited to the incremental
125 construction of large graphs. A separate module, 'MkZipCfg', provides a
126 fourth representation that is asymptotically optimal for such construction.
130 --------------- Representation --------------------
132 -- | A basic block is a 'first' node, followed by zero or more 'middle'
133 -- nodes, followed by a 'last' node.
135 -- eventually this module should probably replace the original Cmm, but for
136 -- now we leave it to dynamic invariants what can be found where
139 = LastExit -- fall through; used for the block that has no last node
140 -- LastExit is a device used only for graphs under
141 -- construction, or framgments of graph under optimisation,
142 -- so we don't want to pollute the 'l' type parameter with it
145 data ZHead m = ZFirst BlockId | ZHead (ZHead m) m
146 -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId
147 data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l)
148 -- ZTail is a sequence of middle nodes followed by a last node
150 -- | Blocks and flow graphs; see Note [Kinds of graphs]
151 data Block m l = Block BlockId (ZTail m l)
153 data Graph m l = Graph (ZTail m l) (BlockEnv (Block m l))
155 data LGraph m l = LGraph { lg_entry :: BlockId
156 , lg_blocks :: BlockEnv (Block m l) }
157 -- Invariant: lg_entry is in domain( lg_blocks )
159 -- | And now the zipper. The focus is between the head and tail.
160 -- We cannot ever focus on an inter-block edge.
161 data ZBlock m l = ZBlock (ZHead m) (ZTail m l)
162 data FGraph m l = FGraph { fg_entry :: BlockId
163 , fg_focus :: ZBlock m l
164 , fg_others :: BlockEnv (Block m l) }
165 -- Invariant: the block represented by 'fg_focus' is *not*
166 -- in the map 'fg_others'
168 ---- Utility functions ---
170 -- | The string argument to 'freshBlockId' was originally helpful in debugging the Quick C--
171 -- compiler, so I have kept it here even though at present it is thrown away at
172 -- this spot---there's no reason a BlockId couldn't one day carry a string.
173 freshBlockId :: String -> UniqSM BlockId
175 blockId :: Block m l -> BlockId
176 zip :: ZBlock m l -> Block m l
177 unzip :: Block m l -> ZBlock m l
179 last :: ZBlock m l -> ZLast l
180 goto_end :: ZBlock m l -> (ZHead m, ZLast l)
182 tailOfLast :: l -> ZTail m l
184 -- | Take a head and tail and go to beginning or end. The asymmetry
185 -- in the types and names is a bit unfortunate, but 'Block m l' is
186 -- effectively '(BlockId, ZTail m l)' and is accepted in many more places.
188 ht_to_block, zipht :: ZHead m -> ZTail m l -> Block m l
189 ht_to_last :: ZHead m -> ZTail m l -> (ZHead m, ZLast l)
191 -- | We can splice a single-entry, single-exit LGraph onto a head or a tail.
192 -- For a head, we have a head 'h' followed by a LGraph 'g'.
193 -- The entry node of 'g' gets joined to 'h', forming the entry into
194 -- the new LGraph. The exit of 'g' becomes the new head.
195 -- For both arguments and results, the order of values is the order of
196 -- control flow: before splicing, the head flows into the LGraph; after
197 -- splicing, the LGraph flows into the head.
198 -- Splicing a tail is the dual operation.
199 -- (In order to maintain the order-means-control-flow convention, the
200 -- orders are reversed.)
202 -- For example, assume
204 -- grph = (M, [M: <stuff>,
206 -- N: y:=x; LastExit])
207 -- tail = [return (y,x)]
209 -- Then splice_head head grph
210 -- = ((L, [L: x:=0; goto M,
215 -- Then splice_tail grph tail
217 -- , (???, [<blocks>,
218 -- N: y:=x; return (y,x)])
220 splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m)
221 splice_tail :: LGraph m l -> ZTail m l -> (ZTail m l, LGraph m l)
223 -- | We can also splice a single-entry, no-exit LGraph into a head.
224 splice_head_only :: ZHead m -> LGraph m l -> LGraph m l
226 -- | Finally, we can remove the entry label of an LGraph and remove
227 -- it, leaving a Graph:
228 remove_entry_label :: LGraph m l -> Graph m l
230 -- | Conversion to and from the environment form is convenient. For
231 -- layout or dataflow, however, one will want to use 'postorder_dfs'
232 -- in order to get the blocks in an order that relates to the control
233 -- flow in the procedure.
234 of_block_list :: BlockId -> [Block m l] -> LGraph m l -- N log N
235 to_block_list :: LGraph m l -> [Block m l] -- N log N
237 -- | Traversal: 'postorder_dfs' returns a list of blocks reachable
238 -- from the entry node. This list has the following property:
240 -- Say a "back reference" exists if one of a block's
241 -- control-flow successors precedes it in the output list
243 -- Then there are as few back references as possible
245 -- The output is suitable for use in
246 -- a forward dataflow problem. For a backward problem, simply reverse
247 -- the list. ('postorder_dfs' is sufficiently tricky to implement that
248 -- one doesn't want to try and maintain both forward and backward
251 postorder_dfs :: LastNode l => LGraph m l -> [Block m l]
253 -- | For layout, we fold over pairs of 'Block m l' and 'Maybe BlockId'
254 -- in layout order. The 'Maybe BlockId', if present, identifies the
255 -- block that will be the layout successor of the current block. This
256 -- may be useful to help an emitter omit the final 'goto' of a block
257 -- that flows directly to its layout successor.
259 -- For example: fold_layout f z [ L1:B1, L2:B2, L3:B3 ]
260 -- = z <$> f (L1:B1) (Just L2)
261 -- <$> f (L2:B2) (Just L3)
262 -- <$> f (L3:B3) Nothing
263 -- where a <$> f = f a
265 LastNode l => (Block m l -> Maybe BlockId -> a -> a) -> a -> LGraph m l-> a
267 -- | We can also fold over blocks in an unspecified order. The
268 -- 'ZipCfgExtras' module provides a monadic version, which we
269 -- haven't needed (else it would be here).
270 fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a
272 map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l'
273 -- mapping includes the entry id!
275 -- | These translation functions are speculative. I hope eventually
276 -- they will be used in the native-code back ends ---NR
277 translate :: (m -> UniqSM (LGraph m' l')) ->
278 (l -> UniqSM (LGraph m' l')) ->
279 (LGraph m l -> UniqSM (LGraph m' l'))
282 -- | It's possible that another form of translation would be more suitable:
283 translateA :: (m -> Agraph m' l') -> (l -> AGraph m' l') -> LGraph m l -> LGraph m' l'
286 ------------------- Last nodes
288 -- | We can't make a graph out of just any old 'last node' type. A last node
289 -- has to be able to find its successors, and we need to be able to create and
290 -- identify unconditional branches. We put these capabilities in a type class.
291 -- Moreover, the property of having successors is also shared by 'Block's and
292 -- 'ZTails', so it is useful to have that property in a type class of its own.
294 class HavingSuccessors b where
295 succs :: b -> [BlockId]
296 fold_succs :: (BlockId -> a -> a) -> b -> a -> a
298 fold_succs add l z = foldr add z $ succs l
300 class HavingSuccessors l => LastNode l where
301 mkBranchNode :: BlockId -> l
302 isBranchNode :: l -> Bool
303 branchNodeTarget :: l -> BlockId -- panics if not branch node
304 -- ^ N.B. This interface seems to make for more congenial clients than a
305 -- single function of type 'l -> Maybe BlockId'
307 instance HavingSuccessors l => HavingSuccessors (ZLast l) where
309 succs (LastOther l) = succs l
310 fold_succs _ LastExit z = z
311 fold_succs f (LastOther l) z = fold_succs f l z
313 instance LastNode l => LastNode (ZLast l) where
314 mkBranchNode id = LastOther $ mkBranchNode id
315 isBranchNode LastExit = False
316 isBranchNode (LastOther l) = isBranchNode l
317 branchNodeTarget LastExit = panic "branchNodeTarget LastExit"
318 branchNodeTarget (LastOther l) = branchNodeTarget l
320 instance LastNode l => HavingSuccessors (ZBlock m l) where
321 succs b = succs (last b)
323 instance LastNode l => HavingSuccessors (Block m l) where
324 succs b = succs (unzip b)
327 -- ================ IMPLEMENTATION ================--
329 ----- block manipulations
331 blockId (Block id _) = id
333 freshBlockId _ = do { u <- getUniqueUs; return $ BlockId u }
335 -- | Convert block between forms.
336 -- These functions are tail-recursive, so we can go as deep as we like
337 -- without fear of stack overflow.
339 ht_to_block head tail = case head of
340 ZFirst id -> Block id tail
341 ZHead h m -> ht_to_block h (ZTail m tail)
343 ht_to_last head (ZLast l) = (head, l)
344 ht_to_last head (ZTail m t) = ht_to_last (ZHead head m) t
346 zipht h t = ht_to_block h t
347 zip (ZBlock h t) = ht_to_block h t
348 goto_end (ZBlock h t) = ht_to_last h t
350 unzip (Block id t) = ZBlock (ZFirst id) t
352 head_id :: ZHead m -> BlockId
353 head_id (ZFirst id) = id
354 head_id (ZHead h _) = head_id h
356 last (ZBlock _ t) = lastt t
357 where lastt (ZLast l) = l
358 lastt (ZTail _ t) = lastt t
360 tailOfLast l = ZLast (LastOther l) -- ^ tedious to write in every client
363 ------------------ simple graph manipulations
365 focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id
366 focus id (LGraph entry blocks) =
367 case lookupBlockEnv blocks id of
368 Just b -> FGraph entry (unzip b) (delFromUFM blocks id)
369 Nothing -> panic "asked for nonexistent block in flow graph"
371 entry :: LGraph m l -> FGraph m l -- focus on edge out of entry node
372 entry g@(LGraph eid _) = focus eid g
374 -- | pull out a block satisfying the predicate, if any
375 splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) ->
376 Maybe (Block m l, BlockEnv (Block m l))
377 splitp_blocks p blocks = lift $ foldUFM scan (Nothing, emptyBlockEnv) blocks
378 where scan b (yes, no) =
380 Nothing | p b -> (Just b, no)
381 | otherwise -> (yes, insertBlock b no)
382 Just _ -> (yes, insertBlock b no)
383 lift (Nothing, _) = Nothing
384 lift (Just b, bs) = Just (b, bs)
386 -- | 'insertBlock' should not be used to *replace* an existing block
387 -- but only to insert a new one
388 insertBlock :: Block m l -> BlockEnv (Block m l) -> BlockEnv (Block m l)
390 ASSERT (isNothing $ lookupBlockEnv bs id)
391 extendBlockEnv bs id b
394 -- | Used in assertions; tells if a graph has exactly one exit
395 single_exit :: LGraph l m -> Bool
396 single_exit g = foldUFM check 0 (lg_blocks g) == 1
397 where check block count = case last (unzip block) of
398 LastExit -> count + (1 :: Int)
401 ------------------ graph traversals
403 -- | This is the most important traversal over this data structure. It drops
404 -- unreachable code and puts blocks in an order that is good for solving forward
405 -- dataflow problems quickly. The reverse order is good for solving backward
406 -- dataflow problems quickly. The forward order is also reasonably good for
407 -- emitting instructions, except that it will not usually exploit Forrest
408 -- Baskett's trick of eliminating the unconditional branch from a loop. For
409 -- that you would need a more serious analysis, probably based on dominators, to
410 -- identify loop headers.
412 -- The ubiquity of 'postorder_dfs' is one reason for the ubiquity of the 'LGraph'
413 -- representation, when for most purposes the plain 'Graph' representation is
414 -- more mathematically elegant (but results in more complicated code).
416 -- Here's an easy way to go wrong! Consider
420 -- Then ordinary dfs would give [A,B,D,C] which has a back ref from C to D.
421 -- Better to geot [A,B,C,D]
423 -- postorder_dfs :: LastNode l => LGraph m l -> [Block m l]
424 postorder_dfs g@(LGraph _ blocks) =
425 let FGraph _ eblock _ = entry g
426 in vnode (zip eblock) (\acc _visited -> acc) [] emptyBlockSet
429 -- Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet -> a
430 vnode block@(Block id _) cont acc visited =
431 if elemBlockSet id visited then
434 vchildren block (get_children block) cont acc (extendBlockSet visited id)
435 vchildren block bs cont acc visited =
436 let next children acc visited =
437 case children of [] -> cont (block : acc) visited
438 (b:bs) -> vnode b (next bs) acc visited
439 in next bs acc visited
440 get_children block = foldl add_id [] (succs block)
441 add_id rst id = case lookupBlockEnv blocks id of
446 -- | Slightly more complicated than the usual fold because we want to tell block
447 -- 'b1' what its inline successor is going to be, so that if 'b1' ends with
448 -- 'goto b2', the goto can be omitted.
450 fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z
451 where fold blocks z =
452 case blocks of [] -> z
454 b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z)
455 nextlabel (Block id _) =
456 if id == eid then panic "entry as successor"
459 -- | The rest of the traversals are straightforward
461 map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks)
462 where block (Block id t) = Block (idm id) (tail t)
463 tail (ZTail m t) = ZTail (middle m) (tail t)
464 tail (ZLast LastExit) = ZLast LastExit
465 tail (ZLast (LastOther l)) = ZLast (LastOther (last l))
467 fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks
469 of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks
470 to_block_list (LGraph _ blocks) = eltsUFM blocks
475 -- We want to be able to scrutinize a single-entry, single-exit 'LGraph' for
476 -- splicing purposes. There are two useful cases: the 'LGraph' is a single block
477 -- or it isn't. We use continuation-passing style.
479 prepare_for_splicing ::
480 LGraph m l -> (ZTail m l -> a) -> (ZTail m l -> ZHead m -> BlockEnv (Block m l) -> a)
482 prepare_for_splicing g single multi =
483 let FGraph _ gentry gblocks = entry g
484 ZBlock _ etail = gentry
485 in if isNullUFM gblocks then
487 LastExit -> single etail
488 _ -> panic "bad single block"
490 case splitp_blocks is_exit gblocks of
491 Nothing -> panic "Can't find an exit block"
492 Just (gexit, gblocks) ->
493 let (gh, gl) = goto_end $ unzip gexit in
494 case gl of LastExit -> multi etail gh gblocks
495 _ -> panic "exit is not exit?!"
497 is_exit :: Block m l -> Bool
498 is_exit b = case last (unzip b) of { LastExit -> True; _ -> False }
501 ASSERT (single_exit g) prepare_for_splicing g splice_one_block splice_many_blocks
502 where eid = head_id head
503 splice_one_block tail' =
504 case ht_to_last head tail' of
505 (head, LastExit) -> (LGraph eid emptyBlockEnv, head)
506 _ -> panic "spliced LGraph without exit"
507 splice_many_blocks entry exit others =
508 (LGraph eid (insertBlock (zipht head entry) others), exit)
511 ASSERT (single_exit g) prepare_for_splicing g splice_one_block splice_many_blocks
512 where splice_one_block tail' = -- return tail' .. tail
513 case ht_to_last (ZFirst (lg_entry g)) tail' of
515 case ht_to_block head' tail of
516 Block id t | id == lg_entry g -> (t, LGraph id emptyBlockEnv)
517 _ -> panic "entry in; garbage out"
518 _ -> panic "spliced single block without Exit"
519 splice_many_blocks entry exit others =
520 (entry, LGraph (lg_entry g) (insertBlock (zipht exit tail) others))
522 splice_head_only head g =
523 let FGraph eid gentry gblocks = entry g
525 ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks)
526 _ -> panic "entry not at start of block?!"
528 remove_entry_label g =
529 let FGraph e eblock others = entry g
531 ZBlock (ZFirst id) tail
532 | id == e -> Graph tail others
533 _ -> panic "id doesn't match on entry block"
537 translate txm txl (LGraph eid blocks) =
538 do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks
539 return $ LGraph eid blocks'
542 -- Block m l -> UniqSM (BlockEnv (Block m' l')) -> UniqSM (BlockEnv (Block m' l'))
543 txblock (Block id t) expanded =
544 do blocks' <- expanded
545 txtail (ZFirst id) t blocks'
546 -- txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') ->
547 -- UniqSM (BlockEnv (Block m' l'))
548 txtail h (ZTail m t) blocks' =
550 let (g, h') = splice_head h m'
551 txtail h' t (plusUFM (lg_blocks g) blocks')
552 txtail h (ZLast (LastOther l)) blocks' =
554 return $ plusUFM (lg_blocks (splice_head_only h l')) blocks'
555 txtail h (ZLast LastExit) blocks' =
556 return $ insertBlock (zipht h (ZLast LastExit)) blocks'
558 ----------------------------------------------------------------
559 --- Block Ids, their environments, and their sets
561 {- Note [Unique BlockId]
562 ~~~~~~~~~~~~~~~~~~~~~~~~
563 Although a 'BlockId' is a local label, for reasons of implementation,
564 'BlockId's must be unique within an entire compilation unit. The reason
565 is that each local label is mapped to an assembly-language label, and in
566 most assembly languages allow, a label is visible throughout the enitre
567 compilation unit in which it appears.
570 newtype BlockId = BlockId Unique
573 instance Uniquable BlockId where
574 getUnique (BlockId u) = u
576 instance Show BlockId where
577 show (BlockId u) = show u
579 instance Outputable BlockId where
580 ppr = ppr . getUnique
583 type BlockEnv a = UniqFM {- BlockId -} a
584 emptyBlockEnv :: BlockEnv a
585 emptyBlockEnv = emptyUFM
586 lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
587 lookupBlockEnv = lookupUFM
588 extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
589 extendBlockEnv = addToUFM
590 mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
591 mkBlockEnv = listToUFM
593 type BlockSet = UniqSet BlockId
594 emptyBlockSet :: BlockSet
595 emptyBlockSet = emptyUniqSet
596 elemBlockSet :: BlockId -> BlockSet -> Bool
597 elemBlockSet = elementOfUniqSet
598 extendBlockSet :: BlockSet -> BlockId -> BlockSet
599 extendBlockSet = addOneToUniqSet
600 mkBlockSet :: [BlockId] -> BlockSet
601 mkBlockSet = mkUniqSet
603 ----------------------------------------------------------------
605 ----------------------------------------------------------------
607 -- putting this code in PprCmmZ leads to circular imports :-(
609 instance (Outputable m, Outputable l) => Outputable (ZTail m l) where
612 pprTail :: (Outputable m, Outputable l) => ZTail m l -> SDoc
613 pprTail (ZTail m t) = ppr m $$ ppr t
614 pprTail (ZLast LastExit) = text "<exit>"
615 pprTail (ZLast (LastOther l)) = ppr l
617 pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc
618 pprLgraph g = text "{" $$ nest 2 (vcat $ map pprBlock blocks) $$ text "}"
619 where pprBlock (Block id tail) = ppr id <> colon $$ ppr tail
620 blocks = postorder_dfs g
622 _unused :: FS.FastString