Add -fwarn-lazy-unlifted-bindings to the list of flags

[ghc-hetmet.git] / compiler / stranal / WorkWrap.lhs
diff --git a/compiler/stranal/WorkWrap.lhs b/compiler/stranal/WorkWrap.lhs

index 438afd6..05c3148 100644 (file)
--- a/compiler/stranal/WorkWrap.lhs
+++ b/compiler/stranal/WorkWrap.lhs
@@ -4,42 +4,27 @@
  \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
  
  \begin{code}
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
  module WorkWrap ( wwTopBinds, mkWrapper ) where
  
-#include "HsVersions.h"
-
  import CoreSyn
-import CoreUnfold      ( certainlyWillInline )
-import CoreLint                ( showPass, endPass )
-import CoreUtils       ( exprType, exprIsHNF, exprArity )
-import Id              ( Id, idType, isOneShotLambda, 
-                         setIdNewStrictness, mkWorkerId,
-                         setIdWorkerInfo, setInlinePragma,
-                         setIdArity, idInfo )
-import MkId            ( lazyIdKey, lazyIdUnfolding )
+import CoreUnfold      ( certainlyWillInline, mkInlineUnfolding, mkWwInlineRule )
+import CoreUtils       ( exprType, exprIsHNF )
+import CoreArity       ( exprArity )
+import Var
+import Id
  import Type            ( Type )
-import IdInfo          ( WorkerInfo(..), arityInfo,
-                         newDemandInfo, newStrictnessInfo, unfoldingInfo, inlinePragInfo
-                       )
-import NewDemand        ( Demand(..), StrictSig(..), DmdType(..), DmdResult(..), 
-                         Demands(..), mkTopDmdType, isBotRes, returnsCPR, topSig, isAbsent
-                       )
+import IdInfo
+import Demand
  import UniqSupply
-import Unique          ( hasKey )
-import BasicTypes      ( RecFlag(..), isNonRec, isNeverActive )
+import BasicTypes
  import VarEnv          ( isEmptyVarEnv )
  import Maybes          ( orElse )
  import WwLib
  import Util            ( lengthIs, notNull )
  import Outputable
  import MonadUtils
+
+#include "HsVersions.h"
  \end{code}
  
  We take Core bindings whose binders have:
@@ -114,16 +99,9 @@ matching by looking for strict arguments of the correct type.
  \begin{code}
  wwExpr :: CoreExpr -> UniqSM CoreExpr
  
-wwExpr e@(Type _)             = return e
-wwExpr e@(Lit _)              = return e
-wwExpr e@(Note InlineMe expr) = return e
-       -- Don't w/w inside InlineMe's
-
-wwExpr e@(Var v)
-  | v `hasKey` lazyIdKey = return lazyIdUnfolding
-  | otherwise            = return e
-       -- HACK alert: Inline 'lazy' after strictness analysis
-       -- (but not inside InlineMe's)
+wwExpr e@(Type {}) = return e
+wwExpr e@(Lit  {}) = return e
+wwExpr e@(Var  {}) = return e
  
  wwExpr (Lam binder expr)
    = Lam binder <$> wwExpr expr
@@ -161,35 +139,83 @@ wwExpr (Case expr binder ty alts) = do
  front-end into the proper form, then calls @mkWwBodies@ to do
  the business.
  
-We have to BE CAREFUL that we don't worker-wrapperize an Id that has
-already been w-w'd!  (You can end up with several liked-named Ids
-bouncing around at the same time---absolute mischief.)  So the
-criterion we use is: if an Id already has an unfolding (for whatever
-reason), then we don't w-w it.
-
  The only reason this is monadised is for the unique supply.
  
-Note [Don't w/w inline things]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It's very important to refrain from w/w-ing an INLINE function
-If we do so by mistake we transform
-       f = __inline (\x -> E)
-into
-       f = __inline (\x -> case x of (a,b) -> fw E)
-       fw = \ab -> (__inline (\x -> E)) (a,b)
-and the original __inline now vanishes, so E is no longer
-inside its __inline wrapper.  Death!  Disaster!
+Note [Don't w/w INLINE things]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's very important to refrain from w/w-ing an INLINE function (ie one
+with an InlineRule) because the wrapper will then overwrite the
+InlineRule unfolding.
  
  Furthermore, if the programmer has marked something as INLINE, 
  we may lose by w/w'ing it.
  
  If the strictness analyser is run twice, this test also prevents
-wrappers (which are INLINEd) from being re-done.
+wrappers (which are INLINEd) from being re-done.  (You can end up with
+several liked-named Ids bouncing around at the same time---absolute
+mischief.)  
  
  Notice that we refrain from w/w'ing an INLINE function even if it is
  in a recursive group.  It might not be the loop breaker.  (We could
  test for loop-breaker-hood, but I'm not sure that ever matters.)
  
+Note [Don't w/w INLINABLE things]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+  {-# INLINABLE f #-}
+  f x y = ....
+then in principle we might get a more efficient loop by w/w'ing f.
+But that would make a new unfolding which would overwrite the old
+one.  So we leave INLINABLE things alone too.
+
+This is a slight infelicity really, because it means that adding
+an INLINABLE pragma could make a program a bit less efficient,
+because you lose the worker/wrapper stuff.  But I don't see a way 
+to avoid that.
+
+Note [Don't w/w inline small non-loop-breker things]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In general, we refrain from w/w-ing *small* functions, which are not
+loop breakers, because they'll inline anyway.  But we must take care:
+it may look small now, but get to be big later after other inlining
+has happened.  So we take the precaution of adding an INLINE pragma to
+any such functions.
+
+I made this change when I observed a big function at the end of
+compilation with a useful strictness signature but no w-w.  When 
+I measured it on nofib, it didn't make much difference; just a few
+percent improved allocation on one benchmark (bspt/Euclid.space).  
+But nothing got worse.
+
+Note [Wrapper activation]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+When should the wrapper inlining be active?  It must not be active
+earlier than the current Activation of the Id (eg it might have a
+NOINLINE pragma).  But in fact strictness analysis happens fairly
+late in the pipeline, and we want to prioritise specialisations over
+strictness.  Eg if we have 
+  module Foo where
+    f :: Num a => a -> Int -> a
+    f n 0 = n                     -- Strict in the Int, hence wrapper
+    f n x = f (n+n) (x-1)
+
+    g :: Int -> Int
+    g x = f x x                   -- Provokes a specialisation for f
+
+  module Bsr where
+    import Foo
+
+    h :: Int -> Int
+    h x = f 3 x
+
+Then we want the specialisation for 'f' to kick in before the wrapper does.
+
+Now in fact the 'gentle' simplification pass encourages this, by
+having rules on, but inlinings off.  But that's kind of lucky. It seems 
+more robust to give the wrapper an Activation of (ActiveAfter 0),
+so that it becomes active in an importing module at the same time that
+it appears in the first place in the defining module.
+
  \begin{code}
  tryWW  :: RecFlag
         -> Id                           -- The fn binder
@@ -201,34 +227,35 @@ tryWW     :: RecFlag
                                         -- if two, then a worker and a
                                         -- wrapper.
  tryWW is_rec fn_id rhs
-  |  -- isNonRec is_rec &&     -- Now omitted: see Note [Don't w/w inline things]
-     certainlyWillInline unfolding
-
-  || isNeverActive inline_prag
+  | isNeverActive inline_act
         -- No point in worker/wrappering if the thing is never inlined!
         -- Because the no-inline prag will prevent the wrapper ever
         -- being inlined at a call site. 
-  = return [ (new_fn_id, rhs) ]
+       -- 
+       -- Furthermore, don't even expose strictness info
+  = return [ (fn_id, rhs) ]
  
    | is_thunk && worthSplittingThunk maybe_fn_dmd res_info
+       -- See Note [Thunk splitting]
    = ASSERT2( isNonRec is_rec, ppr new_fn_id )  -- The thunk must be non-recursive
+    checkSize new_fn_id rhs $ 
      splitThunk new_fn_id rhs
  
    | is_fun && worthSplittingFun wrap_dmds res_info
-  = splitFun new_fn_id fn_info wrap_dmds res_info inline_prag rhs
+  = checkSize new_fn_id rhs $
+    splitFun new_fn_id fn_info wrap_dmds res_info rhs
  
    | otherwise
    = return [ (new_fn_id, rhs) ]
  
    where
      fn_info     = idInfo fn_id
-    maybe_fn_dmd = newDemandInfo fn_info
-    unfolding   = unfoldingInfo fn_info
-    inline_prag  = inlinePragInfo fn_info
+    maybe_fn_dmd = demandInfo fn_info
+    inline_act   = inlinePragmaActivation (inlinePragInfo fn_info)
  
         -- In practice it always will have a strictness 
         -- signature, even if it's a uninformative one
-    strict_sig  = newStrictnessInfo fn_info `orElse` topSig
+    strict_sig  = strictnessInfo fn_info `orElse` topSig
      StrictSig (DmdType env wrap_dmds res_info) = strict_sig
  
         -- new_fn_id has the DmdEnv zapped.  
@@ -237,14 +264,37 @@ tryWW is_rec fn_id rhs
         --      (c) it becomes incorrect as things are cloned, because
         --          we don't push the substitution into it
      new_fn_id | isEmptyVarEnv env = fn_id
-             | otherwise         = fn_id `setIdNewStrictness` 
+             | otherwise         = fn_id `setIdStrictness` 
                                      StrictSig (mkTopDmdType wrap_dmds res_info)
  
      is_fun    = notNull wrap_dmds
      is_thunk  = not is_fun && not (exprIsHNF rhs)
  
  ---------------------
-splitFun fn_id fn_info wrap_dmds res_info inline_prag rhs
+checkSize :: Id -> CoreExpr
+         -> UniqSM [(Id,CoreExpr)] -> UniqSM [(Id,CoreExpr)]
+checkSize fn_id rhs thing_inside
+  | isStableUnfolding (realIdUnfolding fn_id)
+  = return [ (fn_id, rhs) ]
+      -- See Note [Don't w/w INLINABLE things]
+      -- and Note [Don't w/w INLINABLABLE things]
+      -- NB: use realIdUnfolding because we want to see the unfolding
+      --     even if it's a loop breaker!
+
+  | certainlyWillInline (idUnfolding fn_id)
+  = return [ (fn_id `setIdUnfolding` inline_rule, rhs) ]
+       -- Note [Don't w/w inline small non-loop-breaker things]
+       -- NB: use idUnfolding because we don't want to apply
+       --     this criterion to a loop breaker!
+
+  | otherwise = thing_inside
+  where
+    inline_rule = mkInlineUnfolding Nothing rhs
+
+---------------------
+splitFun :: Id -> IdInfo -> [Demand] -> DmdResult -> Expr Var
+         -> UniqSM [(Id, CoreExpr)]
+splitFun fn_id fn_info wrap_dmds res_info rhs
    = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) 
      (do {
         -- The arity should match the signature
@@ -253,31 +303,55 @@ splitFun fn_id fn_info wrap_dmds res_info inline_prag rhs
      ; let
         work_rhs = work_fn rhs
         work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs) 
-                       `setInlinePragma` inline_prag
-                               -- Any inline pragma (which sets when inlining is active) 
-                               -- on the original function is duplicated on the worker and wrapper
+                       `setIdOccInfo` occInfo fn_info
+                               -- Copy over occurrence info from parent
+                               -- Notably whether it's a loop breaker
+                               -- Doesn't matter much, since we will simplify next, but
+                               -- seems right-er to do so
+
+                       `setInlineActivation` (inlinePragmaActivation inl_prag)
+                               -- Any inline activation (which sets when inlining is active) 
+                               -- on the original function is duplicated on the worker
                                 -- It *matters* that the pragma stays on the wrapper
                                 -- It seems sensible to have it on the worker too, although we
                                 -- can't think of a compelling reason. (In ptic, INLINE things are 
-                               -- not w/wd)
-                       `setIdNewStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
+                               -- not w/wd). However, the RuleMatchInfo is not transferred since
+                                -- it does not make sense for workers to be constructorlike.
+
+                       `setIdStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
                                 -- Even though we may not be at top level, 
                                 -- it's ok to give it an empty DmdEnv
+
                          `setIdArity` (exprArity work_rhs)
                                  -- Set the arity so that the Core Lint check that the 
                                  -- arity is consistent with the demand type goes through
  
-       wrap_rhs = wrap_fn work_id
-       wrap_id  = fn_id `setIdWorkerInfo` HasWorker work_id arity
+       wrap_rhs  = wrap_fn work_id
+       wrap_prag = InlinePragma { inl_inline = Inline
+                                 , inl_sat    = Nothing
+                                 , inl_act    = ActiveAfter 0
+                                 , inl_rule   = rule_match_info }
+               -- See Note [Wrapper activation]
+               -- The RuleMatchInfo is (and must be) unaffected
+               -- The inl_inline is bound to be False, else we would not be
+               --    making a wrapper
+
+       wrap_id   = fn_id `setIdUnfolding` mkWwInlineRule work_id wrap_rhs arity
+                         `setInlinePragma` wrap_prag
+                         `setIdOccInfo` NoOccInfo
+                               -- Zap any loop-breaker-ness, to avoid bleating from Lint
+                               -- about a loop breaker with an INLINE rule
  
      ; return ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) })
         -- Worker first, because wrapper mentions it
         -- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it
    where
-    fun_ty = idType fn_id
-
-    arity  = arityInfo fn_info -- The arity is set by the simplifier using exprEtaExpandArity
-                               -- So it may be more than the number of top-level-visible lambdas
+    fun_ty          = idType fn_id
+    inl_prag        = inlinePragInfo fn_info
+    rule_match_info = inlinePragmaRuleMatchInfo inl_prag
+    arity           = arityInfo fn_info        
+                   -- The arity is set by the simplifier using exprEtaExpandArity
+                   -- So it may be more than the number of top-level-visible lambdas
  
      work_res_info | isBotRes res_info = BotRes -- Cpr stuff done by wrapper
                   | otherwise         = TopRes
@@ -288,15 +362,16 @@ splitFun fn_id fn_info wrap_dmds res_info inline_prag rhs
  -- make the wrapper and worker have corresponding one-shot arguments too.
  -- Otherwise we spuriously float stuff out of case-expression join points,
  -- which is very annoying.
+get_one_shots :: Expr Var -> [Bool]
  get_one_shots (Lam b e)
-  | isIdVar b = isOneShotLambda b : get_one_shots e
+  | isId b    = isOneShotLambda b : get_one_shots e
    | otherwise = get_one_shots e
  get_one_shots (Note _ e) = get_one_shots e
-get_one_shots other     = noOneShotInfo
+get_one_shots _         = noOneShotInfo
  \end{code}
  
-Thunk splitting
-~~~~~~~~~~~~~~~
+Note [Thunk splitting]
+~~~~~~~~~~~~~~~~~~~~~~
  Suppose x is used strictly (never mind whether it has the CPR
  property).  
  
@@ -330,6 +405,7 @@ function, so that if x's demand is deeper (say U(U(L,L),L))
  then the splitting will go deeper too.
  
  \begin{code}
+-- See Note [Thunk splitting]
  -- splitThunk converts the *non-recursive* binding
  --     x = e
  -- into
@@ -338,6 +414,7 @@ then the splitting will go deeper too.
  --              I# y -> let x = I# y in x }
  -- See comments above. Is it not beautifully short?
  
+splitThunk :: Var -> Expr Var -> UniqSM [(Var, Expr Var)]
  splitThunk fn_id rhs = do
      (_, wrap_fn, work_fn) <- mkWWstr [fn_id]
      return [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
@@ -360,8 +437,8 @@ worthSplittingFun ds res
    -- See Note [Worker-wrapper for bottoming functions]
    where
      worth_it Abs             = True    -- Absent arg
-    worth_it (Eval (Prod ds)) = True   -- Product arg to evaluate
-    worth_it other           = False
+    worth_it (Eval (Prod _)) = True    -- Product arg to evaluate
+    worth_it _               = False
  
  worthSplittingThunk :: Maybe Demand    -- Demand on the thunk
                     -> DmdResult        -- CPR info for the thunk
@@ -371,7 +448,7 @@ worthSplittingThunk maybe_dmd res
    where
         -- Split if the thing is unpacked
      worth_it (Just (Eval (Prod ds))) = not (all isAbsent ds)
-    worth_it other                  = False
+    worth_it _                      = False
  \end{code}
  
  Note [Worker-wrapper for bottoming functions]
@@ -407,5 +484,6 @@ mkWrapper fun_ty (StrictSig (DmdType _ demands res_info)) = do
      (_, wrap_fn, _) <- mkWwBodies fun_ty demands res_info noOneShotInfo
      return wrap_fn
  
+noOneShotInfo :: [Bool]
  noOneShotInfo = repeat False
  \end{code}