compiler/stranal/WorkWrap.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
   5
   6 \begin{code}
   7 module WorkWrap ( wwTopBinds, mkWrapper ) where
   8
   9 import CoreSyn
  10 import CoreUnfold       ( certainlyWillInline, mkInlineRule, mkWwInlineRule )
  11 import CoreUtils        ( exprType, exprIsHNF )
  12 import CoreArity        ( exprArity )
  13 import Var
  14 import Id
  15 import Type             ( Type )
  16 import IdInfo
  17 import Demand           ( Demand(..), StrictSig(..), DmdType(..), DmdResult(..),
  18                           Demands(..), mkTopDmdType, isBotRes, returnsCPR, topSig, isAbsent
  19                         )
  20 import UniqSupply
  21 import BasicTypes       ( RecFlag(..), isNonRec, isNeverActive,
  22                           Activation(..), InlinePragma(..),
  23                           inlinePragmaActivation, inlinePragmaRuleMatchInfo )
  24 import VarEnv           ( isEmptyVarEnv )
  25 import Maybes           ( orElse )
  26 import WwLib
  27 import Util             ( lengthIs, notNull )
  28 import Outputable
  29 import MonadUtils
  30
  31 #include "HsVersions.h"
  32 \end{code}
  33
  34 We take Core bindings whose binders have:
  35
  36 \begin{enumerate}
  37
  38 \item Strictness attached (by the front-end of the strictness
  39 analyser), and / or
  40
  41 \item Constructed Product Result information attached by the CPR
  42 analysis pass.
  43
  44 \end{enumerate}
  45
  46 and we return some ``plain'' bindings which have been
  47 worker/wrapper-ified, meaning:
  48
  49 \begin{enumerate}
  50
  51 \item Functions have been split into workers and wrappers where
  52 appropriate.  If a function has both strictness and CPR properties
  53 then only one worker/wrapper doing both transformations is produced;
  54
  55 \item Binders' @IdInfos@ have been updated to reflect the existence of
  56 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
  57 info for exported values).
  58 \end{enumerate}
  59
  60 \begin{code}
  61 wwTopBinds :: UniqSupply -> [CoreBind] -> [CoreBind]
  62
  63 wwTopBinds us top_binds
  64   = initUs_ us $ do
  65     top_binds' <- mapM wwBind top_binds
  66     return (concat top_binds')
  67 \end{code}
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
  76 turn.  Non-recursive case first, then recursive...
  77
  78 \begin{code}
  79 wwBind  :: CoreBind
  80         -> UniqSM [CoreBind]    -- returns a WwBinding intermediate form;
  81                                 -- the caller will convert to Expr/Binding,
  82                                 -- as appropriate.
  83
  84 wwBind (NonRec binder rhs) = do
  85     new_rhs <- wwExpr rhs
  86     new_pairs <- tryWW NonRecursive binder new_rhs
  87     return [NonRec b e | (b,e) <- new_pairs]
  88       -- Generated bindings must be non-recursive
  89       -- because the original binding was.
  90
  91 wwBind (Rec pairs)
  92   = return . Rec <$> concatMapM do_one pairs
  93   where
  94     do_one (binder, rhs) = do new_rhs <- wwExpr rhs
  95                               tryWW Recursive binder new_rhs
  96 \end{code}
  97
  98 @wwExpr@ basically just walks the tree, looking for appropriate
  99 annotations that can be used. Remember it is @wwBind@ that does the
 100 matching by looking for strict arguments of the correct type.
 101 @wwExpr@ is a version that just returns the ``Plain'' Tree.
 102
 103 \begin{code}
 104 wwExpr :: CoreExpr -> UniqSM CoreExpr
 105
 106 wwExpr e@(Type {}) = return e
 107 wwExpr e@(Lit  {}) = return e
 108 wwExpr e@(Var  {}) = return e
 109
 110 wwExpr (Lam binder expr)
 111   = Lam binder <$> wwExpr expr
 112
 113 wwExpr (App f a)
 114   = App <$> wwExpr f <*> wwExpr a
 115
 116 wwExpr (Note note expr)
 117   = Note note <$> wwExpr expr
 118
 119 wwExpr (Cast expr co) = do
 120     new_expr <- wwExpr expr
 121     return (Cast new_expr co)
 122
 123 wwExpr (Let bind expr)
 124   = mkLets <$> wwBind bind <*> wwExpr expr
 125
 126 wwExpr (Case expr binder ty alts) = do
 127     new_expr <- wwExpr expr
 128     new_alts <- mapM ww_alt alts
 129     return (Case new_expr binder ty new_alts)
 130   where
 131     ww_alt (con, binders, rhs) = do
 132         new_rhs <- wwExpr rhs
 133         return (con, binders, new_rhs)
 134 \end{code}
 135
 136 %************************************************************************
 137 %*                                                                      *
 138 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
 139 %*                                                                      *
 140 %************************************************************************
 141
 142 @tryWW@ just accumulates arguments, converts strictness info from the
 143 front-end into the proper form, then calls @mkWwBodies@ to do
 144 the business.
 145
 146 The only reason this is monadised is for the unique supply.
 147
 148 Note [Don't w/w inline things (a)]
 149 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 150
 151 It's very important to refrain from w/w-ing an INLINE function (ie one
 152 with an InlineRule) because the wrapper will then overwrite the
 153 InlineRule unfolding.
 154
 155 Furthermore, if the programmer has marked something as INLINE,
 156 we may lose by w/w'ing it.
 157
 158 If the strictness analyser is run twice, this test also prevents
 159 wrappers (which are INLINEd) from being re-done.  (You can end up with
 160 several liked-named Ids bouncing around at the same time---absolute
 161 mischief.)
 162
 163 Notice that we refrain from w/w'ing an INLINE function even if it is
 164 in a recursive group.  It might not be the loop breaker.  (We could
 165 test for loop-breaker-hood, but I'm not sure that ever matters.)
 166
 167 Note [Don't w/w inline things (b)]
 168 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 169 In general, we refrain from w/w-ing *small* functions, because they'll
 170 inline anyway.  But we must take care: it may look small now, but get
 171 to be big later after other inling has happened.  So we take the
 172 precaution of adding an INLINE pragma to any such functions.
 173
 174 I made this change when I observed a big function at the end of
 175 compilation with a useful strictness signature but no w-w.  When
 176 I measured it on nofib, it didn't make much difference; just a few
 177 percent improved allocation on one benchmark (bspt/Euclid.space).
 178 But nothing got worse.
 179
 180 Note [Wrapper activation]
 181 ~~~~~~~~~~~~~~~~~~~~~~~~~
 182 When should the wrapper inlining be active?  It must not be active
 183 earlier than the current Activation of the Id (eg it might have a
 184 NOINLINE pragma).  But in fact strictness analysis happens fairly
 185 late in the pipeline, and we want to prioritise specialisations over
 186 strictness.  Eg if we have
 187   module Foo where
 188     f :: Num a => a -> Int -> a
 189     f n 0 = n              -- Strict in the Int, hence wrapper
 190     f n x = f (n+n) (x-1)
 191
 192     g :: Int -> Int
 193     g x = f x x            -- Provokes a specialisation for f
 194
 195   module Bsr where
 196     import Foo
 197
 198     h :: Int -> Int
 199     h x = f 3 x
 200
 201 Then we want the specialisation for 'f' to kick in before the wrapper does.
 202
 203 Now in fact the 'gentle' simplification pass encourages this, by
 204 having rules on, but inlinings off.  But that's kind of lucky. It seems
 205 more robust to give the wrapper an Activation of (ActiveAfter 0),
 206 so that it becomes active in an importing module at the same time that
 207 it appears in the first place in the defining module.
 208
 209 \begin{code}
 210 tryWW   :: RecFlag
 211         -> Id                           -- The fn binder
 212         -> CoreExpr                     -- The bound rhs; its innards
 213                                         --   are already ww'd
 214         -> UniqSM [(Id, CoreExpr)]      -- either *one* or *two* pairs;
 215                                         -- if one, then no worker (only
 216                                         -- the orig "wrapper" lives on);
 217                                         -- if two, then a worker and a
 218                                         -- wrapper.
 219 tryWW is_rec fn_id rhs
 220   | isNeverActive inline_act
 221         -- No point in worker/wrappering if the thing is never inlined!
 222         -- Because the no-inline prag will prevent the wrapper ever
 223         -- being inlined at a call site.
 224         --
 225         -- Furthermore, don't even expose strictness info
 226   = return [ (fn_id, rhs) ]
 227
 228   | is_thunk && worthSplittingThunk maybe_fn_dmd res_info
 229         -- See Note [Thunk splitting]
 230   = ASSERT2( isNonRec is_rec, ppr new_fn_id )   -- The thunk must be non-recursive
 231     checkSize new_fn_id rhs $
 232     splitThunk new_fn_id rhs
 233
 234   | is_fun && worthSplittingFun wrap_dmds res_info
 235   = checkSize new_fn_id rhs $
 236     splitFun new_fn_id fn_info wrap_dmds res_info rhs
 237
 238   | otherwise
 239   = return [ (new_fn_id, rhs) ]
 240
 241   where
 242     fn_info      = idInfo fn_id
 243     maybe_fn_dmd = demandInfo fn_info
 244     inline_act   = inlinePragmaActivation (inlinePragInfo fn_info)
 245
 246         -- In practice it always will have a strictness
 247         -- signature, even if it's a uninformative one
 248     strict_sig  = strictnessInfo fn_info `orElse` topSig
 249     StrictSig (DmdType env wrap_dmds res_info) = strict_sig
 250
 251         -- new_fn_id has the DmdEnv zapped.
 252         --      (a) it is never used again
 253         --      (b) it wastes space
 254         --      (c) it becomes incorrect as things are cloned, because
 255         --          we don't push the substitution into it
 256     new_fn_id | isEmptyVarEnv env = fn_id
 257               | otherwise         = fn_id `setIdStrictness`
 258                                      StrictSig (mkTopDmdType wrap_dmds res_info)
 259
 260     is_fun    = notNull wrap_dmds
 261     is_thunk  = not is_fun && not (exprIsHNF rhs)
 262
 263 ---------------------
 264 checkSize :: Id -> CoreExpr
 265           -> UniqSM [(Id,CoreExpr)] -> UniqSM [(Id,CoreExpr)]
 266  -- See Note [Don't w/w inline things (a) and (b)]
 267 checkSize fn_id rhs thing_inside
 268   | isStableUnfolding unfolding    -- For DFuns and INLINE things, leave their
 269   = return [ (fn_id, rhs) ]        -- unfolding unchanged; but still attach
 270                                    -- strictness info to the Id
 271
 272   | certainlyWillInline unfolding
 273   = return [ (fn_id `setIdUnfolding` inline_rule, rhs) ]
 274                 -- Note [Don't w/w inline things (b)]
 275
 276   | otherwise = thing_inside
 277   where
 278     unfolding   = idUnfolding fn_id
 279     inline_rule = mkInlineRule rhs Nothing
 280
 281 ---------------------
 282 splitFun :: Id -> IdInfo -> [Demand] -> DmdResult -> Expr Var
 283          -> UniqSM [(Id, CoreExpr)]
 284 splitFun fn_id fn_info wrap_dmds res_info rhs
 285   = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) )
 286     (do {
 287         -- The arity should match the signature
 288       (work_demands, wrap_fn, work_fn) <- mkWwBodies fun_ty wrap_dmds res_info one_shots
 289     ; work_uniq <- getUniqueM
 290     ; let
 291         work_rhs = work_fn rhs
 292         work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs)
 293                         `setIdOccInfo` occInfo fn_info
 294                                 -- Copy over occurrence info from parent
 295                                 -- Notably whether it's a loop breaker
 296                                 -- Doesn't matter much, since we will simplify next, but
 297                                 -- seems right-er to do so
 298
 299                         `setInlineActivation` (inlinePragmaActivation inl_prag)
 300                                 -- Any inline activation (which sets when inlining is active)
 301                                 -- on the original function is duplicated on the worker
 302                                 -- It *matters* that the pragma stays on the wrapper
 303                                 -- It seems sensible to have it on the worker too, although we
 304                                 -- can't think of a compelling reason. (In ptic, INLINE things are
 305                                 -- not w/wd). However, the RuleMatchInfo is not transferred since
 306                                 -- it does not make sense for workers to be constructorlike.
 307
 308                         `setIdStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
 309                                 -- Even though we may not be at top level,
 310                                 -- it's ok to give it an empty DmdEnv
 311
 312                         `setIdArity` (exprArity work_rhs)
 313                                 -- Set the arity so that the Core Lint check that the
 314                                 -- arity is consistent with the demand type goes through
 315
 316         wrap_rhs  = wrap_fn work_id
 317         wrap_prag = InlinePragma { inl_inline = True
 318                                  , inl_sat    = Nothing
 319                                  , inl_act    = ActiveAfter 0
 320                                  , inl_rule   = rule_match_info }
 321                 -- See Note [Wrapper activation]
 322                 -- The RuleMatchInfo is (and must be) unaffected
 323                 -- The inl_inline is bound to be False, else we would not be
 324                 --    making a wrapper
 325
 326         wrap_id   = fn_id `setIdUnfolding` mkWwInlineRule work_id wrap_rhs arity
 327                           `setInlinePragma` wrap_prag
 328                           `setIdOccInfo` NoOccInfo
 329                                 -- Zap any loop-breaker-ness, to avoid bleating from Lint
 330                                 -- about a loop breaker with an INLINE rule
 331
 332     ; return ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) })
 333         -- Worker first, because wrapper mentions it
 334         -- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it
 335   where
 336     fun_ty          = idType fn_id
 337     inl_prag        = inlinePragInfo fn_info
 338     rule_match_info = inlinePragmaRuleMatchInfo inl_prag
 339     arity           = arityInfo fn_info
 340                     -- The arity is set by the simplifier using exprEtaExpandArity
 341                     -- So it may be more than the number of top-level-visible lambdas
 342
 343     work_res_info | isBotRes res_info = BotRes  -- Cpr stuff done by wrapper
 344                   | otherwise         = TopRes
 345
 346     one_shots = get_one_shots rhs
 347
 348 -- If the original function has one-shot arguments, it is important to
 349 -- make the wrapper and worker have corresponding one-shot arguments too.
 350 -- Otherwise we spuriously float stuff out of case-expression join points,
 351 -- which is very annoying.
 352 get_one_shots :: Expr Var -> [Bool]
 353 get_one_shots (Lam b e)
 354   | isId b    = isOneShotLambda b : get_one_shots e
 355   | otherwise = get_one_shots e
 356 get_one_shots (Note _ e) = get_one_shots e
 357 get_one_shots _          = noOneShotInfo
 358 \end{code}
 359
 360 Note [Thunk splitting]
 361 ~~~~~~~~~~~~~~~~~~~~~~
 362 Suppose x is used strictly (never mind whether it has the CPR
 363 property).
 364
 365       let
 366         x* = x-rhs
 367       in body
 368
 369 splitThunk transforms like this:
 370
 371       let
 372         x* = case x-rhs of { I# a -> I# a }
 373       in body
 374
 375 Now simplifier will transform to
 376
 377       case x-rhs of
 378         I# a -> let x* = I# a
 379                 in body
 380
 381 which is what we want. Now suppose x-rhs is itself a case:
 382
 383         x-rhs = case e of { T -> I# a; F -> I# b }
 384
 385 The join point will abstract over a, rather than over (which is
 386 what would have happened before) which is fine.
 387
 388 Notice that x certainly has the CPR property now!
 389
 390 In fact, splitThunk uses the function argument w/w splitting
 391 function, so that if x's demand is deeper (say U(U(L,L),L))
 392 then the splitting will go deeper too.
 393
 394 \begin{code}
 395 -- See Note [Thunk splitting]
 396 -- splitThunk converts the *non-recursive* binding
 397 --      x = e
 398 -- into
 399 --      x = let x = e
 400 --          in case x of
 401 --               I# y -> let x = I# y in x }
 402 -- See comments above. Is it not beautifully short?
 403
 404 splitThunk :: Var -> Expr Var -> UniqSM [(Var, Expr Var)]
 405 splitThunk fn_id rhs = do
 406     (_, wrap_fn, work_fn) <- mkWWstr [fn_id]
 407     return [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
 408 \end{code}
 409
 410
 411 %************************************************************************
 412 %*                                                                      *
 413 \subsection{Functions over Demands}
 414 %*                                                                      *
 415 %************************************************************************
 416
 417 \begin{code}
 418 worthSplittingFun :: [Demand] -> DmdResult -> Bool
 419                 -- True <=> the wrapper would not be an identity function
 420 worthSplittingFun ds res
 421   = any worth_it ds || returnsCPR res
 422         -- worthSplitting returns False for an empty list of demands,
 423         -- and hence do_strict_ww is False if arity is zero and there is no CPR
 424   -- See Note [Worker-wrapper for bottoming functions]
 425   where
 426     worth_it Abs              = True    -- Absent arg
 427     worth_it (Eval (Prod _)) = True     -- Product arg to evaluate
 428     worth_it _                = False
 429
 430 worthSplittingThunk :: Maybe Demand     -- Demand on the thunk
 431                     -> DmdResult        -- CPR info for the thunk
 432                     -> Bool
 433 worthSplittingThunk maybe_dmd res
 434   = worth_it maybe_dmd || returnsCPR res
 435   where
 436         -- Split if the thing is unpacked
 437     worth_it (Just (Eval (Prod ds))) = not (all isAbsent ds)
 438     worth_it _                       = False
 439 \end{code}
 440
 441 Note [Worker-wrapper for bottoming functions]
 442 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 443 We used not to split if the result is bottom.
 444 [Justification:  there's no efficiency to be gained.]
 445
 446 But it's sometimes bad not to make a wrapper.  Consider
 447         fw = \x# -> let x = I# x# in case e of
 448                                         p1 -> error_fn x
 449                                         p2 -> error_fn x
 450                                         p3 -> the real stuff
 451 The re-boxing code won't go away unless error_fn gets a wrapper too.
 452 [We don't do reboxing now, but in general it's better to pass an
 453 unboxed thing to f, and have it reboxed in the error cases....]
 454
 455
 456 %************************************************************************
 457 %*                                                                      *
 458 \subsection{The worker wrapper core}
 459 %*                                                                      *
 460 %************************************************************************
 461
 462 @mkWrapper@ is called when importing a function.  We have the type of
 463 the function and the name of its worker, and we want to make its body (the wrapper).
 464
 465 \begin{code}
 466 mkWrapper :: Type               -- Wrapper type
 467           -> StrictSig          -- Wrapper strictness info
 468           -> UniqSM (Id -> CoreExpr)    -- Wrapper body, missing worker Id
 469
 470 mkWrapper fun_ty (StrictSig (DmdType _ demands res_info)) = do
 471     (_, wrap_fn, _) <- mkWwBodies fun_ty demands res_info noOneShotInfo
 472     return wrap_fn
 473
 474 noOneShotInfo :: [Bool]
 475 noOneShotInfo = repeat False
 476 \end{code}