From 1d7a3cf332532b1f9d798b44e76c4be6f0c74dcf Mon Sep 17 00:00:00 2001
From: "simonpj@microsoft.com" <unknown>
Date: Tue, 21 Dec 2010 16:58:00 +0000
Subject: [PATCH] Add a simple arity analyser

I've wanted to do this for ages, but never gotten around to
it.  The main notes are in Note [Arity analysis] in SimplUtils.

The motivating example for arity analysis is this:

  f = \x. let g = f (x+1)
          in \y. ...g...

What arity does f have?  Really it should have arity 2, but a naive
look at the RHS won't see that.  You need a fixpoint analysis which
says it has arity "infinity" the first time round.

This makes things more robust to the way in which you write code.  For
example, see Trac #4474 which is fixed by this change.

Not a huge difference, but worth while:

        Program           Size    Allocs   Runtime   Elapsed
--------------------------------------------------------------------------------
            Min          -0.4%     -2.2%    -10.0%    -10.0%
            Max          +2.7%     +0.3%     +7.1%     +6.9%
 Geometric Mean          -0.3%     -0.2%     -2.1%     -2.2%

I don't really believe the runtime numbers, because the machine was
busy, but the bottom line is that not much changes, and what does
change reliably (allocation and size) is in the right direction.
---
 compiler/coreSyn/CoreArity.lhs    |   88 ++++++++++++++--------------------
 compiler/coreSyn/CoreUtils.lhs    |   11 +++--
 compiler/simplCore/SimplUtils.lhs |   95 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 136 insertions(+), 58 deletions(-)
diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs
index 46cf255..678c961 100644
--- a/compiler/coreSyn/CoreArity.lhs
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -9,7 +9,7 @@
 -- | Arit and eta expansion
 module CoreArity (
 	manifestArity, exprArity, exprBotStrictness_maybe,
-	exprEtaExpandArity, etaExpand
+	exprEtaExpandArity, CheapFun, etaExpand
     ) where
 
 #include "HsVersions.h"
@@ -24,12 +24,10 @@ import VarEnv
 import Id
 import Type
 import TyCon	( isRecursiveTyCon, isClassTyCon )
-import TcType	( isDictLikeTy )
 import Coercion
 import BasicTypes
 import Unique
 import Outputable
-import DynFlags
 import FastString
 \end{code}
 
@@ -120,9 +118,11 @@ exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, StrictSig)
 -- and gives them a suitable strictness signatures.  It's used during
 -- float-out
 exprBotStrictness_maybe e
-  = case getBotArity (arityType False e) of
+  = case getBotArity (arityType is_cheap e) of
 	Nothing -> Nothing
 	Just ar -> Just (ar, mkStrictSig (mkTopDmdType (replicate ar topDmd) BotRes))
+  where
+    is_cheap _ _ = False  -- Irrelevant for this purpose
 \end{code}
 
 Note [exprArity invariant]
@@ -436,18 +436,17 @@ vanillaArityType = ATop []	-- Totally uninformative
 
 -- ^ The Arity returned is the number of value args the [_$_]
 -- expression can be applied to without doing much work
-exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
+exprEtaExpandArity :: CheapFun -> CoreExpr -> Arity
 -- exprEtaExpandArity is used when eta expanding
 -- 	e  ==>  \xy -> e x y
-exprEtaExpandArity dflags e
-  = case (arityType dicts_cheap e) of
+exprEtaExpandArity cheap_fun e
+  = case (arityType cheap_fun e) of
       ATop (os:oss) 
         | os || has_lam e -> 1 + length oss	-- Note [Eta expanding thunks]
         | otherwise       -> 0
       ATop []             -> 0
       ABot n              -> n
   where
-    dicts_cheap = dopt Opt_DictsCheap dflags
     has_lam (Note _ e) = has_lam e
     has_lam (Lam b e)  = isId b || has_lam e
     has_lam _          = False
@@ -484,13 +483,13 @@ floatIn True  (ATop as) = ATop as
 floatIn False (ATop as) = ATop (takeWhile id as)
    -- If E is not cheap, keep arity only for one-shots
 
-arityApp :: ArityType -> CoreExpr -> ArityType
+arityApp :: ArityType -> Bool -> ArityType
 -- Processing (fun arg) where at is the ArityType of fun,
 -- Knock off an argument and behave like 'let'
-arityApp (ABot 0)      _   = ABot 0
-arityApp (ABot n)      _   = ABot (n-1)
-arityApp (ATop [])     _   = ATop []
-arityApp (ATop (_:as)) arg = floatIn (exprIsCheap arg) (ATop as)
+arityApp (ABot 0)      _     = ABot 0
+arityApp (ABot n)      _     = ABot (n-1)
+arityApp (ATop [])     _     = ATop []
+arityApp (ATop (_:as)) cheap = floatIn cheap (ATop as)
 
 andArityType :: ArityType -> ArityType -> ArityType   -- Used for branches of a 'case'
 andArityType (ABot n1) (ABot n2) 
@@ -527,7 +526,12 @@ lambda wasn't one-shot we don't want to do this.
 
 \begin{code}
 ---------------------------
-arityType :: Bool -> CoreExpr -> ArityType
+type CheapFun = CoreExpr -> Maybe Type -> Bool
+     	-- How to decide if an expression is cheap
+	-- If the Maybe is Just, the type is the type
+	-- of the expression; Nothing means "don't know"
+
+arityType :: CheapFun -> CoreExpr -> ArityType
 arityType _ (Var v)
   | Just strict_sig <- idStrictness_maybe v
   , (ds, res) <- splitStrictSig strict_sig
@@ -541,15 +545,15 @@ arityType _ (Var v)
     one_shots = typeArity (idType v)
 
 	-- Lambdas; increase arity
-arityType dicts_cheap (Lam x e)
-  | isId x    = arityLam x (arityType dicts_cheap e)
-  | otherwise = arityType dicts_cheap e
+arityType cheap_fn (Lam x e)
+  | isId x    = arityLam x (arityType cheap_fn e)
+  | otherwise = arityType cheap_fn e
 
 	-- Applications; decrease arity
-arityType dicts_cheap (App fun (Type _))
-   = arityType dicts_cheap fun
-arityType dicts_cheap (App fun arg )
-   = arityApp (arityType dicts_cheap fun) arg 
+arityType cheap_fn (App fun (Type _))
+   = arityType cheap_fn fun
+arityType cheap_fn (App fun arg )
+   = arityApp (arityType cheap_fn fun) (cheap_fn arg Nothing) 
 
 	-- Case/Let; keep arity if either the expression is cheap
 	-- or it's a 1-shot lambda
@@ -558,41 +562,21 @@ arityType dicts_cheap (App fun arg )
 	--  ===>
 	--	f x y = case x of { (a,b) -> e }
 	-- The difference is observable using 'seq'
-arityType dicts_cheap (Case scrut _ _ alts)
-  = floatIn (exprIsCheap scrut)
-	      (foldr1 andArityType [arityType dicts_cheap rhs | (_,_,rhs) <- alts])
+arityType cheap_fn (Case scrut bndr _ alts)
+  = floatIn (cheap_fn scrut (Just (idType bndr)))
+	    (foldr1 andArityType [arityType cheap_fn rhs | (_,_,rhs) <- alts])
 
-arityType dicts_cheap (Let b e) 
-  = floatIn (cheap_bind b) (arityType dicts_cheap e)
+arityType cheap_fn (Let b e) 
+  = floatIn (cheap_bind b) (arityType cheap_fn e)
   where
     cheap_bind (NonRec b e) = is_cheap (b,e)
     cheap_bind (Rec prs)    = all is_cheap prs
-    is_cheap (b,e) = (dicts_cheap && isDictLikeTy (idType b))
-		   || exprIsCheap e
-	-- If the experimental -fdicts-cheap flag is on, we eta-expand through
-	-- dictionary bindings.  This improves arities. Thereby, it also
-	-- means that full laziness is less prone to floating out the
-  	-- application of a function to its dictionary arguments, which
-	-- can thereby lose opportunities for fusion.  Example:
-	-- 	foo :: Ord a => a -> ...
-	--	foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
-	--		-- So foo has arity 1
-	--
-	--	f = \x. foo dInt $ bar x
-	--
-	-- The (foo DInt) is floated out, and makes ineffective a RULE 
-	--	foo (bar x) = ...
-  	--
-	-- One could go further and make exprIsCheap reply True to any
-	-- dictionary-typed expression, but that's more work.
-	-- 
-	-- See Note [Dictionary-like types] in TcType.lhs for why we use
-	-- isDictLikeTy here rather than isDictTy
-
-arityType dicts_cheap (Note n e) 
-  | notSccNote n                 = arityType dicts_cheap e
-arityType dicts_cheap (Cast e _) = arityType dicts_cheap e
-arityType _           _          = vanillaArityType
+    is_cheap (b,e) = cheap_fn e (Just (idType b))
+
+arityType cheap_fn (Note n e) 
+  | notSccNote n              = arityType cheap_fn e
+arityType cheap_fn (Cast e _) = arityType cheap_fn e
+arityType _           _       = vanillaArityType
 \end{code}
   
   
diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs
index 72977be..3b7f1af 100644
--- a/compiler/coreSyn/CoreUtils.lhs
+++ b/compiler/coreSyn/CoreUtils.lhs
@@ -25,7 +25,8 @@ module CoreUtils (
 
 	-- * Properties of expressions
 	exprType, coreAltType, coreAltsType,
-	exprIsDupable, exprIsTrivial, exprIsCheap, exprIsExpandable,
+	exprIsDupable, exprIsTrivial, 
+        exprIsCheap, exprIsExpandable, exprIsCheap', CheapAppFun,
 	exprIsHNF, exprOkForSpeculation, exprIsBig, exprIsConLike,
 	rhsIsStatic, isCheapApp, isExpandableApp,
 
@@ -513,8 +514,8 @@ exprIsCheap = exprIsCheap' isCheapApp
 exprIsExpandable :: CoreExpr -> Bool
 exprIsExpandable = exprIsCheap' isExpandableApp	-- See Note [CONLIKE pragma] in BasicTypes
 
-
-exprIsCheap' :: (Id -> Int -> Bool) -> CoreExpr -> Bool
+type CheapAppFun = Id -> Int -> Bool
+exprIsCheap' :: CheapAppFun -> CoreExpr -> Bool
 exprIsCheap' _          (Lit _)   = True
 exprIsCheap' _          (Type _)  = True
 exprIsCheap' _          (Var _)   = True
@@ -582,12 +583,12 @@ exprIsCheap' good_app other_expr 	-- Applications and variables
   		-- BUT: Take care with (sel d x)!  The (sel d) might be cheap, but
   		--	there's no guarantee that (sel d x) will be too.  Hence (n_val_args == 1)
 
-isCheapApp :: Id -> Int -> Bool
+isCheapApp :: CheapAppFun
 isCheapApp fn n_val_args
   = isDataConWorkId fn 
   || n_val_args < idArity fn
 
-isExpandableApp :: Id -> Int -> Bool
+isExpandableApp :: CheapAppFun
 isExpandableApp fn n_val_args
   =  isConLikeId fn
   || n_val_args < idArity fn
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
index a2fe28d..76ce1f9 100644
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -45,6 +45,7 @@ import Id
 import Var
 import Demand
 import SimplMonad
+import TcType	( isDictLikeTy )
 import Type	hiding( substTy )
 import Coercion ( coercionKind )
 import TyCon
@@ -1069,6 +1070,9 @@ because the latter is not well-kinded.
 %*									*
 %************************************************************************
 
+When we meet a let-binding we try eta-expansion.  To find the 
+arity of the RHS we use a little fixpoint analysis; see Note [Arity analysis]
+
 \begin{code}
 tryEtaExpand :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
 -- See Note [Eta-expanding at let bindings]
@@ -1085,7 +1089,8 @@ tryEtaExpand env bndr rhs
     try_expand dflags
       | sm_eta_expand (getMode env)      -- Provided eta-expansion is on
       , not (exprIsTrivial rhs)
-      , let new_arity = exprEtaExpandArity dflags rhs
+      , let dicts_cheap = dopt Opt_DictsCheap dflags
+            new_arity   = findArity dicts_cheap bndr rhs old_arity
       , new_arity > rhs_arity
       = do { tick (EtaExpansion bndr)
            ; return (new_arity, etaExpand new_arity rhs) }
@@ -1095,6 +1100,67 @@ tryEtaExpand env bndr rhs
     rhs_arity  = exprArity rhs
     old_arity  = idArity bndr
     _dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
+
+findArity :: Bool -> Id -> CoreExpr -> Arity -> Arity
+-- This implements the fixpoint loop for arity analysis
+-- See Note [Arity analysis]
+findArity dicts_cheap bndr rhs old_arity
+  = go (exprEtaExpandArity (mk_cheap_fn dicts_cheap init_cheap_app) rhs)
+       -- We always call exprEtaExpandArity once, but usually 
+       -- that produces a result equal to old_arity, and then
+       -- we stop right away (since arities should not decrease)
+       -- Result: the common case is that there is just one iteration
+  where
+    go :: Arity -> Arity
+    go cur_arity
+      | cur_arity <= old_arity = cur_arity	
+      | new_arity == cur_arity = cur_arity
+      | otherwise = ASSERT( new_arity < cur_arity )
+                    pprTrace "Exciting arity" 
+                       (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
+                             , ppr rhs])
+                    go new_arity
+      where
+        new_arity = exprEtaExpandArity (mk_cheap_fn dicts_cheap cheap_app) rhs
+      
+        cheap_app :: CheapAppFun
+        cheap_app fn n_val_args
+          | fn == bndr = n_val_args < cur_arity
+          | otherwise  = isCheapApp fn n_val_args
+
+    init_cheap_app :: CheapAppFun
+    init_cheap_app fn n_val_args
+      | fn == bndr = True
+      | otherwise  = isCheapApp fn n_val_args
+ 
+mk_cheap_fn :: Bool -> CheapAppFun -> CheapFun
+mk_cheap_fn dicts_cheap cheap_app
+  | not dicts_cheap
+  = \e _     -> exprIsCheap' cheap_app e
+  | otherwise
+  = \e mb_ty -> exprIsCheap' cheap_app e
+             || case mb_ty of
+                  Nothing -> False
+                  Just ty -> isDictLikeTy ty
+	-- If the experimental -fdicts-cheap flag is on, we eta-expand through
+	-- dictionary bindings.  This improves arities. Thereby, it also
+	-- means that full laziness is less prone to floating out the
+	-- application of a function to its dictionary arguments, which
+	-- can thereby lose opportunities for fusion.  Example:
+	-- 	foo :: Ord a => a -> ...
+	--	foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+	--		-- So foo has arity 1
+	--
+	--	f = \x. foo dInt $ bar x
+	--
+	-- The (foo DInt) is floated out, and makes ineffective a RULE 
+	--	foo (bar x) = ...
+	--
+	-- One could go further and make exprIsCheap reply True to any
+	-- dictionary-typed expression, but that's more work.
+	-- 
+	-- See Note [Dictionary-like types] in TcType.lhs for why we use
+  	-- isDictLikeTy here rather than isDictTy
 \end{code}
 
 Note [Eta-expanding at let bindings]
@@ -1120,6 +1186,33 @@ because then 'genMap' will inline, and it really shouldn't: at least
 as far as the programmer is concerned, it's not applied to two
 arguments!
 
+Note [Arity analysis]
+~~~~~~~~~~~~~~~~~~~~~
+The motivating example for arity analysis is this:
+ 
+  f = \x. let g = f (x+1) 
+          in \y. ...g...
+
+What arity does f have?  Really it should have arity 2, but a naive
+look at the RHS won't see that.  You need a fixpoint analysis which
+says it has arity "infinity" the first time round.
+
+This example happens a lot; it first showed up in Andy Gill's thesis,
+fifteen years ago!  It also shows up in the code for 'rnf' on lists
+in Trac #4138.
+
+The analysis is easy to achieve because exprEtaExpandArity takes an
+argument
+     type CheapFun = CoreExpr -> Maybe Type -> Bool
+used to decide if an expression is cheap enough to push inside a 
+lambda.  And exprIsCheap' in turn takes an argument
+     type CheapAppFun = Id -> Int -> Bool
+which tells when an application is cheap. This makes it easy to
+write the analysis loop.
+
+The analysis is cheap-and-cheerful because it doesn't deal with
+mutual recursion.  But the self-recursive case is the important one.
+
 
 %************************************************************************
 %*									*
-- 
1.7.10.4