core expression with (hopefully) improved usage information.
\begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
module OccurAnal (
occurAnalysePgm, occurAnalyseExpr
) where
import CoreSyn
import CoreFVs ( idRuleVars )
import CoreUtils ( exprIsTrivial, isDefaultAlt )
-import Id ( isDataConWorkId, isOneShotBndr, setOneShotLambda,
- idOccInfo, setIdOccInfo, isLocalId,
- isExportedId, idArity, idHasRules,
- idUnique, Id
- )
+import Id
+import IdInfo
import BasicTypes ( OccInfo(..), isOneOcc, InterestingCxt )
import VarSet
import PrelNames ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey )
import Unique ( Unique )
import UniqFM ( keysUFM, intersectsUFM )
-import Util ( mapAndUnzip, mapAccumL )
+import Util ( mapAndUnzip )
import Outputable
+
+import Data.List
\end{code}
\begin{code}
occAnalBind env (Rec pairs) body_usage
- = foldr (_scc_ "occAnalBind.dofinal" do_final_bind) (body_usage, []) sccs
+ = foldr ({-# SCC "occAnalBind.dofinal" #-} do_final_bind) (body_usage, []) sccs
where
analysed_pairs :: [Details]
analysed_pairs = [ (bndr, rhs_usage, rhs')
]
sccs :: [SCC (Node Details)]
- sccs = _scc_ "occAnalBind.scc" stronglyConnCompR edges
+ sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompR edges
---- stuff for dependency analysis of binds -------------------------------
edges :: [Node Details]
- edges = _scc_ "occAnalBind.assoc"
+ edges = {-# SCC "occAnalBind.assoc" #-}
[ (details, idUnique id, edges_from id rhs_usage)
| details@(id, rhs_usage, rhs) <- analysed_pairs
]
-- which has n**2 cost, and this meant that edges_from alone
-- consumed 10% of total runtime!
edges_from :: Id -> UsageDetails -> [Unique]
- edges_from bndr rhs_usage = _scc_ "occAnalBind.edges_from"
+ edges_from bndr rhs_usage = {-# SCC "occAnalBind.edges_from" #-}
keysUFM (addRuleUsage rhs_usage bndr)
---- Stuff to "re-constitute" bindings from dependency-analysis info ------
score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker
score ((bndr, _, rhs), _, _)
+ | workerExists (idWorkerInfo bndr) = 10
+ -- Note [Worker inline loop]
+
| exprIsTrivial rhs = 4 -- Practically certain to be inlined
-- Used to have also: && not (isExportedId bndr)
-- But I found this sometimes cost an extra iteration when we have
-- where df is the exported dictionary. Then df makes a really
-- bad choice for loop breaker
- | is_con_app rhs = 3 -- Data types help with cases
- -- This used to have a lower score than inlineCandidate, but
- -- it's *really* helpful if dictionaries get inlined fast,
- -- so I'm experimenting with giving higher priority to data-typed things
-
- | inlineCandidate bndr rhs = 2 -- Likely to be inlined
-
- | idHasRules bndr = 1
+ | idHasRules bndr = 3
-- Avoid things with specialisations; we'd like
-- to take advantage of them in the subsequent bindings
+ -- Also vital to avoid risk of divergence:
+ -- Note [Recursive rules]
+
+ | is_con_app rhs = 2 -- Data types help with cases
+ -- Note [conapp]
+
+ | inlineCandidate bndr rhs = 1 -- Likely to be inlined
+ -- Note [Inline candidates]
| otherwise = 0
inlineCandidate id (Note InlineMe _) = True
inlineCandidate id rhs = isOneOcc (idOccInfo id)
- -- Real example (the Enum Ordering instance from PrelBase):
+ -- Note [conapp]
+ --
+ -- It's really really important to inline dictionaries. Real
+ -- example (the Enum Ordering instance from GHC.Base):
+ --
-- rec f = \ x -> case d of (p,q,r) -> p x
-- g = \ x -> case d of (p,q,r) -> q x
-- d = (v, f, g)
-- On the other hand we *could* simplify those case expressions if
-- we didn't stupidly choose d as the loop breaker.
-- But we won't because constructor args are marked "Many".
+ -- Inlining dictionaries is really essential to unravelling
+ -- the loops in static numeric dictionaries, see GHC.Float.
-- Cheap and cheerful; the simplifer moves casts out of the way
-- The lambda case is important to spot x = /\a. C (f a)
rules_only = bndrs `intersectsUFM` rhs_usg
\end{code}
+Note [Worker inline loop]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Never choose a wrapper as the loop breaker! Because
+wrappers get auto-generated inlinings when importing, and
+that can lead to an infinite inlining loop. For example:
+ rec {
+ $wfoo x = ....foo x....
+
+ {-loop brk-} foo x = ...$wfoo x...
+ }
+
+The interface file sees the unfolding for $wfoo, and sees that foo is
+strict (and hence it gets an auto-generated wrapper). Result: an
+infinite inlining in the importing scope. So be a bit careful if you
+change this. A good example is Tree.repTree in
+nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
+breaker then compiling Game.hs goes into an infinite loop (this
+happened when we gave is_con_app a lower score than inline candidates).
+
+Note [Recursive rules]
+~~~~~~~~~~~~~~~~~~~~~~
+Consider this group, which is typical of what SpecConstr builds:
+
+ fs a = ....f (C a)....
+ f x = ....f (C a)....
+ {-# RULE f (C a) = fs a #-}
+
+So 'f' and 'fs' are mutually recursive. If we choose 'fs' as the loop breaker,
+all is well; the RULE is applied, and 'fs' becomes self-recursive.
+
+But if we choose 'f' as the loop breaker, we may get an infinite loop:
+ - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
+ - fs is inlined (say it's small)
+ - now there's another opportunity to apply the RULE
+
+So it's very important not to choose the RULE-variable as the loop breaker.
+This showed up when compiling Control.Concurrent.Chan.getChanContents.
+
Note [Closure conversion]
~~~~~~~~~~~~~~~~~~~~~~~~~
We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.