compiler/stranal/WorkWrap.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
   5
   6 \begin{code}
   7 module WorkWrap ( wwTopBinds, mkWrapper ) where
   8
   9 import CoreSyn
  10 import CoreUnfold       ( certainlyWillInline, mkInlineUnfolding, mkWwInlineRule )
  11 import CoreUtils        ( exprType, exprIsHNF )
  12 import CoreArity        ( exprArity )
  13 import Var
  14 import Id
  15 import Type             ( Type )
  16 import IdInfo
  17 import Demand
  18 import UniqSupply
  19 import BasicTypes
  20 import VarEnv           ( isEmptyVarEnv )
  21 import Maybes           ( orElse )
  22 import WwLib
  23 import Util             ( lengthIs, notNull )
  24 import Outputable
  25 import MonadUtils
  26
  27 #include "HsVersions.h"
  28 \end{code}
  29
  30 We take Core bindings whose binders have:
  31
  32 \begin{enumerate}
  33
  34 \item Strictness attached (by the front-end of the strictness
  35 analyser), and / or
  36
  37 \item Constructed Product Result information attached by the CPR
  38 analysis pass.
  39
  40 \end{enumerate}
  41
  42 and we return some ``plain'' bindings which have been
  43 worker/wrapper-ified, meaning:
  44
  45 \begin{enumerate}
  46
  47 \item Functions have been split into workers and wrappers where
  48 appropriate.  If a function has both strictness and CPR properties
  49 then only one worker/wrapper doing both transformations is produced;
  50
  51 \item Binders' @IdInfos@ have been updated to reflect the existence of
  52 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
  53 info for exported values).
  54 \end{enumerate}
  55
  56 \begin{code}
  57 wwTopBinds :: UniqSupply -> [CoreBind] -> [CoreBind]
  58
  59 wwTopBinds us top_binds
  60   = initUs_ us $ do
  61     top_binds' <- mapM wwBind top_binds
  62     return (concat top_binds')
  63 \end{code}
  64
  65 %************************************************************************
  66 %*                                                                      *
  67 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
  68 %*                                                                      *
  69 %************************************************************************
  70
  71 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
  72 turn.  Non-recursive case first, then recursive...
  73
  74 \begin{code}
  75 wwBind  :: CoreBind
  76         -> UniqSM [CoreBind]    -- returns a WwBinding intermediate form;
  77                                 -- the caller will convert to Expr/Binding,
  78                                 -- as appropriate.
  79
  80 wwBind (NonRec binder rhs) = do
  81     new_rhs <- wwExpr rhs
  82     new_pairs <- tryWW NonRecursive binder new_rhs
  83     return [NonRec b e | (b,e) <- new_pairs]
  84       -- Generated bindings must be non-recursive
  85       -- because the original binding was.
  86
  87 wwBind (Rec pairs)
  88   = return . Rec <$> concatMapM do_one pairs
  89   where
  90     do_one (binder, rhs) = do new_rhs <- wwExpr rhs
  91                               tryWW Recursive binder new_rhs
  92 \end{code}
  93
  94 @wwExpr@ basically just walks the tree, looking for appropriate
  95 annotations that can be used. Remember it is @wwBind@ that does the
  96 matching by looking for strict arguments of the correct type.
  97 @wwExpr@ is a version that just returns the ``Plain'' Tree.
  98
  99 \begin{code}
 100 wwExpr :: CoreExpr -> UniqSM CoreExpr
 101
 102 wwExpr e@(Type {}) = return e
 103 wwExpr e@(Coercion {}) = return e
 104 wwExpr e@(Lit  {}) = return e
 105 wwExpr e@(Var  {}) = return e
 106
 107 wwExpr (Lam binder expr)
 108   = Lam binder <$> wwExpr expr
 109
 110 wwExpr (App f a)
 111   = App <$> wwExpr f <*> wwExpr a
 112
 113 wwExpr (Note note expr)
 114   = Note note <$> wwExpr expr
 115
 116 wwExpr (Cast expr co) = do
 117     new_expr <- wwExpr expr
 118     return (Cast new_expr co)
 119
 120 wwExpr (Let bind expr)
 121   = mkLets <$> wwBind bind <*> wwExpr expr
 122
 123 wwExpr (Case expr binder ty alts) = do
 124     new_expr <- wwExpr expr
 125     new_alts <- mapM ww_alt alts
 126     return (Case new_expr binder ty new_alts)
 127   where
 128     ww_alt (con, binders, rhs) = do
 129         new_rhs <- wwExpr rhs
 130         return (con, binders, new_rhs)
 131 \end{code}
 132
 133 %************************************************************************
 134 %*                                                                      *
 135 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
 136 %*                                                                      *
 137 %************************************************************************
 138
 139 @tryWW@ just accumulates arguments, converts strictness info from the
 140 front-end into the proper form, then calls @mkWwBodies@ to do
 141 the business.
 142
 143 The only reason this is monadised is for the unique supply.
 144
 145 Note [Don't w/w INLINE things]
 146 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 147 It's very important to refrain from w/w-ing an INLINE function (ie one
 148 with an InlineRule) because the wrapper will then overwrite the
 149 InlineRule unfolding.
 150
 151 Furthermore, if the programmer has marked something as INLINE,
 152 we may lose by w/w'ing it.
 153
 154 If the strictness analyser is run twice, this test also prevents
 155 wrappers (which are INLINEd) from being re-done.  (You can end up with
 156 several liked-named Ids bouncing around at the same time---absolute
 157 mischief.)
 158
 159 Notice that we refrain from w/w'ing an INLINE function even if it is
 160 in a recursive group.  It might not be the loop breaker.  (We could
 161 test for loop-breaker-hood, but I'm not sure that ever matters.)
 162
 163 Note [Don't w/w INLINABLE things]
 164 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 165 If we have
 166   {-# INLINABLE f #-}
 167   f x y = ....
 168 then in principle we might get a more efficient loop by w/w'ing f.
 169 But that would make a new unfolding which would overwrite the old
 170 one.  So we leave INLINABLE things alone too.
 171
 172 This is a slight infelicity really, because it means that adding
 173 an INLINABLE pragma could make a program a bit less efficient,
 174 because you lose the worker/wrapper stuff.  But I don't see a way
 175 to avoid that.
 176
 177 Note [Don't w/w inline small non-loop-breaker things]
 178 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 179 In general, we refrain from w/w-ing *small* functions, which are not
 180 loop breakers, because they'll inline anyway.  But we must take care:
 181 it may look small now, but get to be big later after other inlining
 182 has happened.  So we take the precaution of adding an INLINE pragma to
 183 any such functions.
 184
 185 I made this change when I observed a big function at the end of
 186 compilation with a useful strictness signature but no w-w.  (It was
 187 small during demand analysis, we refrained from w/w, and then got big
 188 when something was inlined in its rhs.) When I measured it on nofib,
 189 it didn't make much difference; just a few percent improved allocation
 190 on one benchmark (bspt/Euclid.space).  But nothing got worse.
 191
 192 There is an infelicity though.  We may get something like
 193       f = g val
 194 ==>
 195       g x = case gw x of r -> I# r
 196
 197       f {- InlineStable, Template = g val -}
 198       f = case gw x of r -> I# r
 199
 200 The code for f duplicates that for g, without any real benefit. It
 201 won't really be executed, because calls to f will go via the inlining.
 202
 203 Note [Wrapper activation]
 204 ~~~~~~~~~~~~~~~~~~~~~~~~~
 205 When should the wrapper inlining be active?  It must not be active
 206 earlier than the current Activation of the Id (eg it might have a
 207 NOINLINE pragma).  But in fact strictness analysis happens fairly
 208 late in the pipeline, and we want to prioritise specialisations over
 209 strictness.  Eg if we have
 210   module Foo where
 211     f :: Num a => a -> Int -> a
 212     f n 0 = n              -- Strict in the Int, hence wrapper
 213     f n x = f (n+n) (x-1)
 214
 215     g :: Int -> Int
 216     g x = f x x            -- Provokes a specialisation for f
 217
 218   module Bsr where
 219     import Foo
 220
 221     h :: Int -> Int
 222     h x = f 3 x
 223
 224 Then we want the specialisation for 'f' to kick in before the wrapper does.
 225
 226 Now in fact the 'gentle' simplification pass encourages this, by
 227 having rules on, but inlinings off.  But that's kind of lucky. It seems
 228 more robust to give the wrapper an Activation of (ActiveAfter 0),
 229 so that it becomes active in an importing module at the same time that
 230 it appears in the first place in the defining module.
 231
 232 \begin{code}
 233 tryWW   :: RecFlag
 234         -> Id                           -- The fn binder
 235         -> CoreExpr                     -- The bound rhs; its innards
 236                                         --   are already ww'd
 237         -> UniqSM [(Id, CoreExpr)]      -- either *one* or *two* pairs;
 238                                         -- if one, then no worker (only
 239                                         -- the orig "wrapper" lives on);
 240                                         -- if two, then a worker and a
 241                                         -- wrapper.
 242 tryWW is_rec fn_id rhs
 243   | isNeverActive inline_act
 244         -- No point in worker/wrappering if the thing is never inlined!
 245         -- Because the no-inline prag will prevent the wrapper ever
 246         -- being inlined at a call site.
 247         --
 248         -- Furthermore, don't even expose strictness info
 249   = return [ (fn_id, rhs) ]
 250
 251   | is_thunk && worthSplittingThunk maybe_fn_dmd res_info
 252         -- See Note [Thunk splitting]
 253   = ASSERT2( isNonRec is_rec, ppr new_fn_id )   -- The thunk must be non-recursive
 254     checkSize new_fn_id rhs $
 255     splitThunk new_fn_id rhs
 256
 257   | is_fun && worthSplittingFun wrap_dmds res_info
 258   = checkSize new_fn_id rhs $
 259     splitFun new_fn_id fn_info wrap_dmds res_info rhs
 260
 261   | otherwise
 262   = return [ (new_fn_id, rhs) ]
 263
 264   where
 265     fn_info      = idInfo fn_id
 266     maybe_fn_dmd = demandInfo fn_info
 267     inline_act   = inlinePragmaActivation (inlinePragInfo fn_info)
 268
 269         -- In practice it always will have a strictness
 270         -- signature, even if it's a uninformative one
 271     strict_sig  = strictnessInfo fn_info `orElse` topSig
 272     StrictSig (DmdType env wrap_dmds res_info) = strict_sig
 273
 274         -- new_fn_id has the DmdEnv zapped.
 275         --      (a) it is never used again
 276         --      (b) it wastes space
 277         --      (c) it becomes incorrect as things are cloned, because
 278         --          we don't push the substitution into it
 279     new_fn_id | isEmptyVarEnv env = fn_id
 280               | otherwise         = fn_id `setIdStrictness`
 281                                      StrictSig (mkTopDmdType wrap_dmds res_info)
 282
 283     is_fun    = notNull wrap_dmds
 284     is_thunk  = not is_fun && not (exprIsHNF rhs)
 285
 286 ---------------------
 287 checkSize :: Id -> CoreExpr
 288           -> UniqSM [(Id,CoreExpr)] -> UniqSM [(Id,CoreExpr)]
 289 checkSize fn_id rhs thing_inside
 290   | isStableUnfolding (realIdUnfolding fn_id)
 291   = return [ (fn_id, rhs) ]
 292       -- See Note [Don't w/w INLINABLE things]
 293       -- and Note [Don't w/w INLINABLABLE things]
 294       -- NB: use realIdUnfolding because we want to see the unfolding
 295       --     even if it's a loop breaker!
 296
 297   | certainlyWillInline (idUnfolding fn_id)
 298   = return [ (fn_id `setIdUnfolding` inline_rule, rhs) ]
 299         -- Note [Don't w/w inline small non-loop-breaker things]
 300         -- NB: use idUnfolding because we don't want to apply
 301         --     this criterion to a loop breaker!
 302
 303   | otherwise = thing_inside
 304   where
 305     inline_rule = mkInlineUnfolding Nothing rhs
 306
 307 ---------------------
 308 splitFun :: Id -> IdInfo -> [Demand] -> DmdResult -> Expr Var
 309          -> UniqSM [(Id, CoreExpr)]
 310 splitFun fn_id fn_info wrap_dmds res_info rhs
 311   = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) )
 312     (do {
 313         -- The arity should match the signature
 314       (work_demands, wrap_fn, work_fn) <- mkWwBodies fun_ty wrap_dmds res_info one_shots
 315     ; work_uniq <- getUniqueM
 316     ; let
 317         work_rhs = work_fn rhs
 318         work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs)
 319                         `setIdOccInfo` occInfo fn_info
 320                                 -- Copy over occurrence info from parent
 321                                 -- Notably whether it's a loop breaker
 322                                 -- Doesn't matter much, since we will simplify next, but
 323                                 -- seems right-er to do so
 324
 325                         `setInlineActivation` (inlinePragmaActivation inl_prag)
 326                                 -- Any inline activation (which sets when inlining is active)
 327                                 -- on the original function is duplicated on the worker
 328                                 -- It *matters* that the pragma stays on the wrapper
 329                                 -- It seems sensible to have it on the worker too, although we
 330                                 -- can't think of a compelling reason. (In ptic, INLINE things are
 331                                 -- not w/wd). However, the RuleMatchInfo is not transferred since
 332                                 -- it does not make sense for workers to be constructorlike.
 333
 334                         `setIdStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
 335                                 -- Even though we may not be at top level,
 336                                 -- it's ok to give it an empty DmdEnv
 337
 338                         `setIdArity` (exprArity work_rhs)
 339                                 -- Set the arity so that the Core Lint check that the
 340                                 -- arity is consistent with the demand type goes through
 341
 342         wrap_rhs  = wrap_fn work_id
 343         wrap_prag = InlinePragma { inl_inline = Inline
 344                                  , inl_sat    = Nothing
 345                                  , inl_act    = ActiveAfter 0
 346                                  , inl_rule   = rule_match_info }
 347                 -- See Note [Wrapper activation]
 348                 -- The RuleMatchInfo is (and must be) unaffected
 349                 -- The inl_inline is bound to be False, else we would not be
 350                 --    making a wrapper
 351
 352         wrap_id   = fn_id `setIdUnfolding` mkWwInlineRule work_id wrap_rhs arity
 353                           `setInlinePragma` wrap_prag
 354                           `setIdOccInfo` NoOccInfo
 355                                 -- Zap any loop-breaker-ness, to avoid bleating from Lint
 356                                 -- about a loop breaker with an INLINE rule
 357
 358     ; return ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) })
 359         -- Worker first, because wrapper mentions it
 360         -- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it
 361   where
 362     fun_ty          = idType fn_id
 363     inl_prag        = inlinePragInfo fn_info
 364     rule_match_info = inlinePragmaRuleMatchInfo inl_prag
 365     arity           = arityInfo fn_info
 366                     -- The arity is set by the simplifier using exprEtaExpandArity
 367                     -- So it may be more than the number of top-level-visible lambdas
 368
 369     work_res_info | isBotRes res_info = BotRes  -- Cpr stuff done by wrapper
 370                   | otherwise         = TopRes
 371
 372     one_shots = get_one_shots rhs
 373
 374 -- If the original function has one-shot arguments, it is important to
 375 -- make the wrapper and worker have corresponding one-shot arguments too.
 376 -- Otherwise we spuriously float stuff out of case-expression join points,
 377 -- which is very annoying.
 378 get_one_shots :: Expr Var -> [Bool]
 379 get_one_shots (Lam b e)
 380   | isId b    = isOneShotLambda b : get_one_shots e
 381   | otherwise = get_one_shots e
 382 get_one_shots (Note _ e) = get_one_shots e
 383 get_one_shots _          = noOneShotInfo
 384 \end{code}
 385
 386 Note [Thunk splitting]
 387 ~~~~~~~~~~~~~~~~~~~~~~
 388 Suppose x is used strictly (never mind whether it has the CPR
 389 property).
 390
 391       let
 392         x* = x-rhs
 393       in body
 394
 395 splitThunk transforms like this:
 396
 397       let
 398         x* = case x-rhs of { I# a -> I# a }
 399       in body
 400
 401 Now simplifier will transform to
 402
 403       case x-rhs of
 404         I# a -> let x* = I# a
 405                 in body
 406
 407 which is what we want. Now suppose x-rhs is itself a case:
 408
 409         x-rhs = case e of { T -> I# a; F -> I# b }
 410
 411 The join point will abstract over a, rather than over (which is
 412 what would have happened before) which is fine.
 413
 414 Notice that x certainly has the CPR property now!
 415
 416 In fact, splitThunk uses the function argument w/w splitting
 417 function, so that if x's demand is deeper (say U(U(L,L),L))
 418 then the splitting will go deeper too.
 419
 420 \begin{code}
 421 -- See Note [Thunk splitting]
 422 -- splitThunk converts the *non-recursive* binding
 423 --      x = e
 424 -- into
 425 --      x = let x = e
 426 --          in case x of
 427 --               I# y -> let x = I# y in x }
 428 -- See comments above. Is it not beautifully short?
 429 -- Moreover, it works just as well when there are
 430 -- several binders, and if the binders are lifted
 431 -- E.g.     x = e
 432 --     -->  x = let x = e in
 433 --              case x of (a,b) -> let x = (a,b)  in x
 434
 435 splitThunk :: Var -> Expr Var -> UniqSM [(Var, Expr Var)]
 436 splitThunk fn_id rhs = do
 437     (_, wrap_fn, work_fn) <- mkWWstr [fn_id]
 438     return [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
 439 \end{code}
 440
 441
 442 %************************************************************************
 443 %*                                                                      *
 444 \subsection{Functions over Demands}
 445 %*                                                                      *
 446 %************************************************************************
 447
 448 \begin{code}
 449 worthSplittingFun :: [Demand] -> DmdResult -> Bool
 450                 -- True <=> the wrapper would not be an identity function
 451 worthSplittingFun ds res
 452   = any worth_it ds || returnsCPR res
 453         -- worthSplitting returns False for an empty list of demands,
 454         -- and hence do_strict_ww is False if arity is zero and there is no CPR
 455   -- See Note [Worker-wrapper for bottoming functions]
 456   where
 457     worth_it Abs              = True    -- Absent arg
 458     worth_it (Eval (Prod _)) = True     -- Product arg to evaluate
 459     worth_it _                = False
 460
 461 worthSplittingThunk :: Maybe Demand     -- Demand on the thunk
 462                     -> DmdResult        -- CPR info for the thunk
 463                     -> Bool
 464 worthSplittingThunk maybe_dmd res
 465   = worth_it maybe_dmd || returnsCPR res
 466   where
 467         -- Split if the thing is unpacked
 468     worth_it (Just (Eval (Prod ds))) = not (all isAbsent ds)
 469     worth_it _                       = False
 470 \end{code}
 471
 472 Note [Worker-wrapper for bottoming functions]
 473 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 474 We used not to split if the result is bottom.
 475 [Justification:  there's no efficiency to be gained.]
 476
 477 But it's sometimes bad not to make a wrapper.  Consider
 478         fw = \x# -> let x = I# x# in case e of
 479                                         p1 -> error_fn x
 480                                         p2 -> error_fn x
 481                                         p3 -> the real stuff
 482 The re-boxing code won't go away unless error_fn gets a wrapper too.
 483 [We don't do reboxing now, but in general it's better to pass an
 484 unboxed thing to f, and have it reboxed in the error cases....]
 485
 486
 487 %************************************************************************
 488 %*                                                                      *
 489 \subsection{The worker wrapper core}
 490 %*                                                                      *
 491 %************************************************************************
 492
 493 @mkWrapper@ is called when importing a function.  We have the type of
 494 the function and the name of its worker, and we want to make its body (the wrapper).
 495
 496 \begin{code}
 497 mkWrapper :: Type               -- Wrapper type
 498           -> StrictSig          -- Wrapper strictness info
 499           -> UniqSM (Id -> CoreExpr)    -- Wrapper body, missing worker Id
 500
 501 mkWrapper fun_ty (StrictSig (DmdType _ demands res_info)) = do
 502     (_, wrap_fn, _) <- mkWwBodies fun_ty demands res_info noOneShotInfo
 503     return wrap_fn
 504
 505 noOneShotInfo :: [Bool]
 506 noOneShotInfo = repeat False
 507 \end{code}