compiler/stranal/WorkWrap.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
   5
   6 \begin{code}
   7 module WorkWrap ( wwTopBinds, mkWrapper ) where
   8
   9 import CoreSyn
  10 import CoreUnfold       ( certainlyWillInline, mkInlineRule, mkWwInlineRule )
  11 import CoreUtils        ( exprType, exprIsHNF )
  12 import CoreArity        ( exprArity )
  13 import Var
  14 import Id
  15 import Type             ( Type )
  16 import IdInfo
  17 import Demand           ( Demand(..), StrictSig(..), DmdType(..), DmdResult(..),
  18                           Demands(..), mkTopDmdType, isBotRes, returnsCPR, topSig, isAbsent
  19                         )
  20 import UniqSupply
  21 import BasicTypes       ( RecFlag(..), isNonRec, isNeverActive,
  22                           Activation(..), InlinePragma(..),
  23                           inlinePragmaActivation, inlinePragmaRuleMatchInfo )
  24 import VarEnv           ( isEmptyVarEnv )
  25 import Maybes           ( orElse )
  26 import WwLib
  27 import Util             ( lengthIs, notNull )
  28 import Outputable
  29 import MonadUtils
  30
  31 #include "HsVersions.h"
  32 \end{code}
  33
  34 We take Core bindings whose binders have:
  35
  36 \begin{enumerate}
  37
  38 \item Strictness attached (by the front-end of the strictness
  39 analyser), and / or
  40
  41 \item Constructed Product Result information attached by the CPR
  42 analysis pass.
  43
  44 \end{enumerate}
  45
  46 and we return some ``plain'' bindings which have been
  47 worker/wrapper-ified, meaning:
  48
  49 \begin{enumerate}
  50
  51 \item Functions have been split into workers and wrappers where
  52 appropriate.  If a function has both strictness and CPR properties
  53 then only one worker/wrapper doing both transformations is produced;
  54
  55 \item Binders' @IdInfos@ have been updated to reflect the existence of
  56 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
  57 info for exported values).
  58 \end{enumerate}
  59
  60 \begin{code}
  61 wwTopBinds :: UniqSupply -> [CoreBind] -> [CoreBind]
  62
  63 wwTopBinds us top_binds
  64   = initUs_ us $ do
  65     top_binds' <- mapM wwBind top_binds
  66     return (concat top_binds')
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
  76 turn.  Non-recursive case first, then recursive...
  77
  78 \begin{code}
  79 wwBind  :: CoreBind
  80         -> UniqSM [CoreBind]    -- returns a WwBinding intermediate form;
  81                                 -- the caller will convert to Expr/Binding,
  82                                 -- as appropriate.
  83
  84 wwBind (NonRec binder rhs) = do
  85     new_rhs <- wwExpr rhs
  86     new_pairs <- tryWW NonRecursive binder new_rhs
  87     return [NonRec b e | (b,e) <- new_pairs]
  88       -- Generated bindings must be non-recursive
  89       -- because the original binding was.
  90
  91 wwBind (Rec pairs)
  92   = return . Rec <$> concatMapM do_one pairs
  93   where
  94     do_one (binder, rhs) = do new_rhs <- wwExpr rhs
  95                               tryWW Recursive binder new_rhs
  96 \end{code}
  97
  98 @wwExpr@ basically just walks the tree, looking for appropriate
  99 annotations that can be used. Remember it is @wwBind@ that does the
 100 matching by looking for strict arguments of the correct type.
 101 @wwExpr@ is a version that just returns the ``Plain'' Tree.
 102
 103 \begin{code}
 104 wwExpr :: CoreExpr -> UniqSM CoreExpr
 105
 106 wwExpr e@(Type {}) = return e
 107 wwExpr e@(Lit  {}) = return e
 108 wwExpr e@(Var  {}) = return e
 109
 110 wwExpr (Lam binder expr)
 111   = Lam binder <$> wwExpr expr
 112
 113 wwExpr (App f a)
 114   = App <$> wwExpr f <*> wwExpr a
 115
 116 wwExpr (Note note expr)
 117   = Note note <$> wwExpr expr
 118
 119 wwExpr (Cast expr co) = do
 120     new_expr <- wwExpr expr
 121     return (Cast new_expr co)
 122
 123 wwExpr (Let bind expr)
 124   = mkLets <$> wwBind bind <*> wwExpr expr
 125
 126 wwExpr (Case expr binder ty alts) = do
 127     new_expr <- wwExpr expr
 128     new_alts <- mapM ww_alt alts
 129     return (Case new_expr binder ty new_alts)
 130   where
 131     ww_alt (con, binders, rhs) = do
 132         new_rhs <- wwExpr rhs
 133         return (con, binders, new_rhs)
 134 \end{code}
 135
 136 %************************************************************************
 137 %*                                                                      *
 138 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
 139 %*                                                                      *
 140 %************************************************************************
 141
 142 @tryWW@ just accumulates arguments, converts strictness info from the
 143 front-end into the proper form, then calls @mkWwBodies@ to do
 144 the business.
 145
 146 The only reason this is monadised is for the unique supply.
 147
 148 Note [Don't w/w inline things (a)]
 149 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 150
 151 It's very important to refrain from w/w-ing an INLINE function (ie one
 152 with an InlineRule) because the wrapper will then overwrite the
 153 InlineRule unfolding.
 154
 155 Furthermore, if the programmer has marked something as INLINE,
 156 we may lose by w/w'ing it.
 157
 158 If the strictness analyser is run twice, this test also prevents
 159 wrappers (which are INLINEd) from being re-done.  (You can end up with
 160 several liked-named Ids bouncing around at the same time---absolute
 161 mischief.)
 162
 163 Notice that we refrain from w/w'ing an INLINE function even if it is
 164 in a recursive group.  It might not be the loop breaker.  (We could
 165 test for loop-breaker-hood, but I'm not sure that ever matters.)
 166
 167 Note [Don't w/w inline things (b)]
 168 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 169 In general, we refrain from w/w-ing *small* functions, because they'll
 170 inline anyway.  But we must take care: it may look small now, but get
 171 to be big later after other inling has happened.  So we take the
 172 precaution of adding an INLINE pragma to any such functions.
 173
 174 I made this change when I observed a big function at the end of
 175 compilation with a useful strictness signature but no w-w.  When
 176 I measured it on nofib, it didn't make much difference; just a few
 177 percent improved allocation on one benchmark (bspt/Euclid.space).
 178 But nothing got worse.
 179
 180 Note [Wrapper activation]
 181 ~~~~~~~~~~~~~~~~~~~~~~~~~
 182 When should the wrapper inlining be active?  It must not be active
 183 earlier than the current Activation of the Id (eg it might have a
 184 NOINLINE pragma).  But in fact strictness analysis happens fairly
 185 late in the pipeline, and we want to prioritise specialisations over
 186 strictness.  Eg if we have
 187   module Foo where
 188     f :: Num a => a -> Int -> a
 189     f n 0 = n              -- Strict in the Int, hence wrapper
 190     f n x = f (n+n) (x-1)
 191
 192     g :: Int -> Int
 193     g x = f x x            -- Provokes a specialisation for f
 194
 195   module Bsr where
 196     import Foo
 197
 198     h :: Int -> Int
 199     h x = f 3 x
 200
 201 Then we want the specialisation for 'f' to kick in before the wrapper does.
 202
 203 Now in fact the 'gentle' simplification pass encourages this, by
 204 having rules on, but inlinings off.  But that's kind of lucky. It seems
 205 more robust to give the wrapper an Activation of (ActiveAfter 0),
 206 so that it becomes active in an importing module at the same time that
 207 it appears in the first place in the defining module.
 208
 209 \begin{code}
 210 tryWW   :: RecFlag
 211         -> Id                           -- The fn binder
 212         -> CoreExpr                     -- The bound rhs; its innards
 213                                         --   are already ww'd
 214         -> UniqSM [(Id, CoreExpr)]      -- either *one* or *two* pairs;
 215                                         -- if one, then no worker (only
 216                                         -- the orig "wrapper" lives on);
 217                                         -- if two, then a worker and a
 218                                         -- wrapper.
 219 tryWW is_rec fn_id rhs
 220   | isNeverActive inline_act
 221         -- No point in worker/wrappering if the thing is never inlined!
 222         -- Because the no-inline prag will prevent the wrapper ever
 223         -- being inlined at a call site.
 224         --
 225         -- Furthermore, don't even expose strictness info
 226   = return [ (fn_id, rhs) ]
 227
 228   | is_thunk && worthSplittingThunk maybe_fn_dmd res_info
 229   = ASSERT2( isNonRec is_rec, ppr new_fn_id )   -- The thunk must be non-recursive
 230     checkSize new_fn_id rhs $
 231     splitThunk new_fn_id rhs
 232
 233   | is_fun && worthSplittingFun wrap_dmds res_info
 234   = checkSize new_fn_id rhs $
 235     splitFun new_fn_id fn_info wrap_dmds res_info rhs
 236
 237   | otherwise
 238   = return [ (new_fn_id, rhs) ]
 239
 240   where
 241     fn_info      = idInfo fn_id
 242     maybe_fn_dmd = demandInfo fn_info
 243     inline_act   = inlinePragmaActivation (inlinePragInfo fn_info)
 244
 245         -- In practice it always will have a strictness
 246         -- signature, even if it's a uninformative one
 247     strict_sig  = strictnessInfo fn_info `orElse` topSig
 248     StrictSig (DmdType env wrap_dmds res_info) = strict_sig
 249
 250         -- new_fn_id has the DmdEnv zapped.
 251         --      (a) it is never used again
 252         --      (b) it wastes space
 253         --      (c) it becomes incorrect as things are cloned, because
 254         --          we don't push the substitution into it
 255     new_fn_id | isEmptyVarEnv env = fn_id
 256               | otherwise         = fn_id `setIdStrictness`
 257                                      StrictSig (mkTopDmdType wrap_dmds res_info)
 258
 259     is_fun    = notNull wrap_dmds
 260     is_thunk  = not is_fun && not (exprIsHNF rhs)
 261
 262 ---------------------
 263 checkSize :: Id -> CoreExpr
 264           -> UniqSM [(Id,CoreExpr)] -> UniqSM [(Id,CoreExpr)]
 265  -- See Note [Don't w/w inline things (a) and (b)]
 266 checkSize fn_id rhs thing_inside
 267   | isStableUnfolding unfolding    -- For DFuns and INLINE things, leave their
 268   = return [ (fn_id, rhs) ]        -- unfolding unchanged; but still attach
 269                                    -- strictness info to the Id
 270
 271   | certainlyWillInline unfolding
 272   = return [ (fn_id `setIdUnfolding` inline_rule, rhs) ]
 273                 -- Note [Don't w/w inline things (b)]
 274
 275   | otherwise = thing_inside
 276   where
 277     unfolding   = idUnfolding fn_id
 278     inline_rule = mkInlineRule rhs Nothing
 279
 280 ---------------------
 281 splitFun :: Id -> IdInfo -> [Demand] -> DmdResult -> Expr Var
 282          -> UniqSM [(Id, CoreExpr)]
 283 splitFun fn_id fn_info wrap_dmds res_info rhs
 284   = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) )
 285     (do {
 286         -- The arity should match the signature
 287       (work_demands, wrap_fn, work_fn) <- mkWwBodies fun_ty wrap_dmds res_info one_shots
 288     ; work_uniq <- getUniqueM
 289     ; let
 290         work_rhs = work_fn rhs
 291         work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs)
 292                         `setIdOccInfo` occInfo fn_info
 293                                 -- Copy over occurrence info from parent
 294                                 -- Notably whether it's a loop breaker
 295                                 -- Doesn't matter much, since we will simplify next, but
 296                                 -- seems right-er to do so
 297
 298                         `setInlineActivation` (inlinePragmaActivation inl_prag)
 299                                 -- Any inline activation (which sets when inlining is active)
 300                                 -- on the original function is duplicated on the worker
 301                                 -- It *matters* that the pragma stays on the wrapper
 302                                 -- It seems sensible to have it on the worker too, although we
 303                                 -- can't think of a compelling reason. (In ptic, INLINE things are
 304                                 -- not w/wd). However, the RuleMatchInfo is not transferred since
 305                                 -- it does not make sense for workers to be constructorlike.
 306
 307                         `setIdStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
 308                                 -- Even though we may not be at top level,
 309                                 -- it's ok to give it an empty DmdEnv
 310
 311                         `setIdArity` (exprArity work_rhs)
 312                                 -- Set the arity so that the Core Lint check that the
 313                                 -- arity is consistent with the demand type goes through
 314
 315         wrap_rhs  = wrap_fn work_id
 316         wrap_prag = InlinePragma { inl_inline = True
 317                                  , inl_sat    = Nothing
 318                                  , inl_act    = ActiveAfter 0
 319                                  , inl_rule   = rule_match_info }
 320                 -- See Note [Wrapper activation]
 321                 -- The RuleMatchInfo is (and must be) unaffected
 322                 -- The inl_inline is bound to be False, else we would not be
 323                 --    making a wrapper
 324
 325         wrap_id   = fn_id `setIdUnfolding` mkWwInlineRule work_id wrap_rhs arity
 326                           `setInlinePragma` wrap_prag
 327                           `setIdOccInfo` NoOccInfo
 328                                 -- Zap any loop-breaker-ness, to avoid bleating from Lint
 329                                 -- about a loop breaker with an INLINE rule
 330
 331     ; return ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) })
 332         -- Worker first, because wrapper mentions it
 333         -- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it
 334   where
 335     fun_ty          = idType fn_id
 336     inl_prag        = inlinePragInfo fn_info
 337     rule_match_info = inlinePragmaRuleMatchInfo inl_prag
 338     arity           = arityInfo fn_info
 339                     -- The arity is set by the simplifier using exprEtaExpandArity
 340                     -- So it may be more than the number of top-level-visible lambdas
 341
 342     work_res_info | isBotRes res_info = BotRes  -- Cpr stuff done by wrapper
 343                   | otherwise         = TopRes
 344
 345     one_shots = get_one_shots rhs
 346
 347 -- If the original function has one-shot arguments, it is important to
 348 -- make the wrapper and worker have corresponding one-shot arguments too.
 349 -- Otherwise we spuriously float stuff out of case-expression join points,
 350 -- which is very annoying.
 351 get_one_shots :: Expr Var -> [Bool]
 352 get_one_shots (Lam b e)
 353   | isId b    = isOneShotLambda b : get_one_shots e
 354   | otherwise = get_one_shots e
 355 get_one_shots (Note _ e) = get_one_shots e
 356 get_one_shots _          = noOneShotInfo
 357 \end{code}
 358
 359 Thunk splitting
 360 ~~~~~~~~~~~~~~~
 361 Suppose x is used strictly (never mind whether it has the CPR
 362 property).
 363
 364       let
 365         x* = x-rhs
 366       in body
 367
 368 splitThunk transforms like this:
 369
 370       let
 371         x* = case x-rhs of { I# a -> I# a }
 372       in body
 373
 374 Now simplifier will transform to
 375
 376       case x-rhs of
 377         I# a -> let x* = I# a
 378                 in body
 379
 380 which is what we want. Now suppose x-rhs is itself a case:
 381
 382         x-rhs = case e of { T -> I# a; F -> I# b }
 383
 384 The join point will abstract over a, rather than over (which is
 385 what would have happened before) which is fine.
 386
 387 Notice that x certainly has the CPR property now!
 388
 389 In fact, splitThunk uses the function argument w/w splitting
 390 function, so that if x's demand is deeper (say U(U(L,L),L))
 391 then the splitting will go deeper too.
 392
 393 \begin{code}
 394 -- splitThunk converts the *non-recursive* binding
 395 --      x = e
 396 -- into
 397 --      x = let x = e
 398 --          in case x of
 399 --               I# y -> let x = I# y in x }
 400 -- See comments above. Is it not beautifully short?
 401
 402 splitThunk :: Var -> Expr Var -> UniqSM [(Var, Expr Var)]
 403 splitThunk fn_id rhs = do
 404     (_, wrap_fn, work_fn) <- mkWWstr [fn_id]
 405     return [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
 406 \end{code}
 407
 408
 409 %************************************************************************
 410 %*                                                                      *
 411 \subsection{Functions over Demands}
 412 %*                                                                      *
 413 %************************************************************************
 414
 415 \begin{code}
 416 worthSplittingFun :: [Demand] -> DmdResult -> Bool
 417                 -- True <=> the wrapper would not be an identity function
 418 worthSplittingFun ds res
 419   = any worth_it ds || returnsCPR res
 420         -- worthSplitting returns False for an empty list of demands,
 421         -- and hence do_strict_ww is False if arity is zero and there is no CPR
 422   -- See Note [Worker-wrapper for bottoming functions]
 423   where
 424     worth_it Abs              = True    -- Absent arg
 425     worth_it (Eval (Prod _)) = True     -- Product arg to evaluate
 426     worth_it _                = False
 427
 428 worthSplittingThunk :: Maybe Demand     -- Demand on the thunk
 429                     -> DmdResult        -- CPR info for the thunk
 430                     -> Bool
 431 worthSplittingThunk maybe_dmd res
 432   = worth_it maybe_dmd || returnsCPR res
 433   where
 434         -- Split if the thing is unpacked
 435     worth_it (Just (Eval (Prod ds))) = not (all isAbsent ds)
 436     worth_it _                       = False
 437 \end{code}
 438
 439 Note [Worker-wrapper for bottoming functions]
 440 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 441 We used not to split if the result is bottom.
 442 [Justification:  there's no efficiency to be gained.]
 443
 444 But it's sometimes bad not to make a wrapper.  Consider
 445         fw = \x# -> let x = I# x# in case e of
 446                                         p1 -> error_fn x
 447                                         p2 -> error_fn x
 448                                         p3 -> the real stuff
 449 The re-boxing code won't go away unless error_fn gets a wrapper too.
 450 [We don't do reboxing now, but in general it's better to pass an
 451 unboxed thing to f, and have it reboxed in the error cases....]
 452
 453
 454 %************************************************************************
 455 %*                                                                      *
 456 \subsection{The worker wrapper core}
 457 %*                                                                      *
 458 %************************************************************************
 459
 460 @mkWrapper@ is called when importing a function.  We have the type of
 461 the function and the name of its worker, and we want to make its body (the wrapper).
 462
 463 \begin{code}
 464 mkWrapper :: Type               -- Wrapper type
 465           -> StrictSig          -- Wrapper strictness info
 466           -> UniqSM (Id -> CoreExpr)    -- Wrapper body, missing worker Id
 467
 468 mkWrapper fun_ty (StrictSig (DmdType _ demands res_info)) = do
 469     (_, wrap_fn, _) <- mkWwBodies fun_ty demands res_info noOneShotInfo
 470     return wrap_fn
 471
 472 noOneShotInfo :: [Bool]
 473 noOneShotInfo = repeat False
 474 \end{code}