[project @ 2004-11-10 13:24:06 by simonmar]

[ghc-hetmet.git] / ghc / compiler / simplCore / SetLevels.lhs
diff --git a/ghc/compiler/simplCore/SetLevels.lhs b/ghc/compiler/simplCore/SetLevels.lhs

index 91dbe75..08f3d84 100644 (file)
--- a/ghc/compiler/simplCore/SetLevels.lhs
+++ b/ghc/compiler/simplCore/SetLevels.lhs
@@ -7,59 +7,74 @@
                         Overview
                 ***************************
  
-* We attach binding levels to Core bindings, in preparation for floating
-  outwards (@FloatOut@).
+1. We attach binding levels to Core bindings, in preparation for floating
+   outwards (@FloatOut@).
  
-* We also let-ify many expressions (notably case scrutinees), so they
-  will have a fighting chance of being floated sensible.
+2. We also let-ify many expressions (notably case scrutinees), so they
+   will have a fighting chance of being floated sensible.
  
-* We clone the binders of any floatable let-binding, so that when it is
-  floated out it will be unique.  (This used to be done by the simplifier
-  but the latter now only ensures that there's no shadowing.)
-  NOTE: Very tiresomely, we must apply this substitution to
-       the rules stored inside a variable too.
+3. We clone the binders of any floatable let-binding, so that when it is
+   floated out it will be unique.  (This used to be done by the simplifier
+   but the latter now only ensures that there's no shadowing; indeed, even 
+   that may not be true.)
  
-  We do *not* clone top-level bindings, because some of them must not change,
-  but we *do* clone bindings that are heading for the top level
+   NOTE: this can't be done using the uniqAway idea, because the variable
+        must be unique in the whole program, not just its current scope,
+        because two variables in different scopes may float out to the
+        same top level place
  
-* In the expression
+   NOTE: Very tiresomely, we must apply this substitution to
+        the rules stored inside a variable too.
+
+   We do *not* clone top-level bindings, because some of them must not change,
+   but we *do* clone bindings that are heading for the top level
+
+4. In the expression
         case x of wild { p -> ...wild... }
-  we substitute x for wild in the RHS of the case alternatives:
+   we substitute x for wild in the RHS of the case alternatives:
         case x of wild { p -> ...x... }
-  This means that a sub-expression involving x is not "trapped" inside the RHS.
-  And it's not inconvenient because we already have a substitution.
+   This means that a sub-expression involving x is not "trapped" inside the RHS.
+   And it's not inconvenient because we already have a substitution.
+
+  Note that this is EXACTLY BACKWARDS from the what the simplifier does.
+  The simplifier tries to get rid of occurrences of x, in favour of wild,
+  in the hope that there will only be one remaining occurrence of x, namely
+  the scrutinee of the case, and we can inline it.  
  
  \begin{code}
  module SetLevels (
-       setLevels,
+       setLevels, 
  
         Level(..), tOP_LEVEL,
+       LevelledBind, LevelledExpr,
  
-       incMinorLvl, ltMajLvl, ltLvl, isTopLvl
+       incMinorLvl, ltMajLvl, ltLvl, isTopLvl, isInlineCtxt
      ) where
  
  #include "HsVersions.h"
  
  import CoreSyn
  
-import CoreUtils       ( exprType, exprIsTrivial, exprIsBottom, mkPiType )
+import CmdLineOpts     ( FloatOutSwitches(..) )
+import CoreUtils       ( exprType, exprIsTrivial, exprIsCheap, mkPiTypes )
  import CoreFVs         -- all of it
  import Subst
-import Id              ( Id, idType, idFreeTyVars, mkSysLocal, isOneShotLambda, modifyIdInfo, 
+import Id              ( Id, idType, mkSysLocalUnencoded, 
+                         isOneShotLambda, zapDemandIdInfo,
                           idSpecialisation, idWorkerInfo, setIdInfo
                         )
-import IdInfo          ( workerExists, vanillaIdInfo, demandInfo, setDemandInfo )
-import Var             ( Var, setVarUnique )
+import IdInfo          ( workerExists, vanillaIdInfo, )
+import Var             ( Var )
  import VarSet
  import VarEnv
  import Name            ( getOccName )
  import OccName         ( occNameUserString )
  import Type            ( isUnLiftedType, Type )
  import BasicTypes      ( TopLevelFlag(..) )
-import Demand          ( isStrict, wwLazy )
  import UniqSupply
-import Util            ( sortLt, isSingleton, count )
+import Util            ( sortLe, isSingleton, count )
  import Outputable
+import FastString
  \end{code}
  
  %************************************************************************
@@ -69,7 +84,9 @@ import Outputable
  %************************************************************************
  
  \begin{code}
-data Level = Level Int -- Level number of enclosing lambdas
+data Level = InlineCtxt        -- A level that's used only for
+                       -- the context parameter ctxt_lvl
+          | Level Int  -- Level number of enclosing lambdas
                    Int  -- Number of big-lambda and/or case expressions between
                         -- here and the nearest enclosing lambda
  \end{code}
@@ -94,46 +111,95 @@ The main function @lvlExpr@ carries a ``context level'' (@ctxt_lvl@).
  That's meant to be the level number of the enclosing binder in the
  final (floated) program.  If the level number of a sub-expression is
  less than that of the context, then it might be worth let-binding the
-sub-expression so that it will indeed float. This context level starts
-at @Level 0 0@.
+sub-expression so that it will indeed float.  
+
+If you can float to level @Level 0 0@ worth doing so because then your
+allocation becomes static instead of dynamic.  We always start with
+context @Level 0 0@.  
+
+
+InlineCtxt
+~~~~~~~~~~
+@InlineCtxt@ very similar to @Level 0 0@, but is used for one purpose:
+to say "don't float anything out of here".  That's exactly what we
+want for the body of an INLINE, where we don't want to float anything
+out at all.  See notes with lvlMFE below.
+
+But, check this out:
+
+-- At one time I tried the effect of not float anything out of an InlineMe,
+-- but it sometimes works badly.  For example, consider PrelArr.done.  It
+-- has the form        __inline (\d. e)
+-- where e doesn't mention d.  If we float this to 
+--     __inline (let x = e in \d. x)
+-- things are bad.  The inliner doesn't even inline it because it doesn't look
+-- like a head-normal form.  So it seems a lesser evil to let things float.
+-- In SetLevels we do set the context to (Level 0 0) when we get to an InlineMe
+-- which discourages floating out.
+
+So the conclusion is: don't do any floating at all inside an InlineMe.
+(In the above example, don't float the {x=e} out of the \d.)
+
+One particular case is that of workers: we don't want to float the
+call to the worker outside the wrapper, otherwise the worker might get
+inlined into the floated expression, and an importing module won't see
+the worker at all.
  
  \begin{code}
  type LevelledExpr  = TaggedExpr Level
-type LevelledArg   = TaggedArg Level
  type LevelledBind  = TaggedBind Level
  
-tOP_LEVEL = Level 0 0
+tOP_LEVEL   = Level 0 0
+iNLINE_CTXT = InlineCtxt
  
  incMajorLvl :: Level -> Level
+-- For InlineCtxt we ignore any inc's; we don't want
+-- to do any floating at all; see notes above
+incMajorLvl InlineCtxt         = InlineCtxt
  incMajorLvl (Level major minor) = Level (major+1) 0
  
  incMinorLvl :: Level -> Level
+incMinorLvl InlineCtxt         = InlineCtxt
  incMinorLvl (Level major minor) = Level major (minor+1)
  
  maxLvl :: Level -> Level -> Level
+maxLvl InlineCtxt l2  = l2
+maxLvl l1  InlineCtxt = l1
  maxLvl l1@(Level maj1 min1) l2@(Level maj2 min2)
    | (maj1 > maj2) || (maj1 == maj2 && min1 > min2) = l1
    | otherwise                                     = l2
  
  ltLvl :: Level -> Level -> Bool
+ltLvl any_lvl   InlineCtxt  = False
+ltLvl InlineCtxt (Level _ _) = True
  ltLvl (Level maj1 min1) (Level maj2 min2)
    = (maj1 < maj2) || (maj1 == maj2 && min1 < min2)
  
  ltMajLvl :: Level -> Level -> Bool
      -- Tells if one level belongs to a difft *lambda* level to another
-    -- But it returns True regardless if l1 is the top level
-    -- We always like to float to the top!     
-ltMajLvl (Level 0 0)    _             = True
+ltMajLvl any_lvl       InlineCtxt     = False
+ltMajLvl InlineCtxt    (Level maj2 _) = 0 < maj2
  ltMajLvl (Level maj1 _) (Level maj2 _) = maj1 < maj2
  
  isTopLvl :: Level -> Bool
  isTopLvl (Level 0 0) = True
-isTopLvl other       = False
+isTopLvl other      = False
+
+isInlineCtxt :: Level -> Bool
+isInlineCtxt InlineCtxt = True
+isInlineCtxt other     = False
  
  instance Outputable Level where
+  ppr InlineCtxt      = text "<INLINE>"
    ppr (Level maj min) = hcat [ char '<', int maj, char ',', int min, char '>' ]
+
+instance Eq Level where
+  InlineCtxt       == InlineCtxt        = True
+  (Level maj1 min1) == (Level maj2 min2) = maj1==maj2 && min1==min2
+  l1               == l2                = False
  \end{code}
  
+
  %************************************************************************
  %*                                                                     *
  \subsection{Main level-setting code}
@@ -141,7 +207,7 @@ instance Outputable Level where
  %************************************************************************
  
  \begin{code}
-setLevels :: Bool              -- True <=> float lambdas to top level
+setLevels :: FloatOutSwitches
           -> [CoreBind]
           -> UniqSupply
           -> [LevelledBind]
@@ -208,14 +274,15 @@ lvlExpr ctxt_lvl env (_, AnnApp fun arg)
      lvlMFE  False ctxt_lvl env arg     `thenLvl` \ arg' ->
      returnLvl (App fun' arg')
    where
-    lvl_fun (_, AnnCase _ _ _) = lvlMFE True ctxt_lvl env fun
+-- gaw 2004
+    lvl_fun (_, AnnCase _ _ _ _) = lvlMFE True ctxt_lvl env fun
      lvl_fun other             = lvlExpr ctxt_lvl env fun
         -- We don't do MFE on partial applications generally,
         -- but we do if the function is big and hairy, like a case
  
  lvlExpr ctxt_lvl env (_, AnnNote InlineMe expr)
-       -- Don't float anything out of an InlineMe
-  = lvlExpr tOP_LEVEL env expr                 `thenLvl` \ expr' ->
+-- Don't float anything out of an InlineMe; hence the iNLINE_CTXT
+  = lvlExpr iNLINE_CTXT env expr       `thenLvl` \ expr' ->
      returnLvl (Note InlineMe expr')
  
  lvlExpr ctxt_lvl env (_, AnnNote note expr)
@@ -231,54 +298,69 @@ lvlExpr ctxt_lvl env (_, AnnNote note expr)
  
  lvlExpr ctxt_lvl env expr@(_, AnnLam bndr rhs)
    = lvlMFE True new_lvl new_env body   `thenLvl` \ new_body ->
-    returnLvl (glue_binders new_bndrs expr new_body)
+    returnLvl (mkLams new_bndrs new_body)
    where 
-    (bndrs, body)       = collect_binders expr
+    (bndrs, body)       = collectAnnBndrs expr
      (new_lvl, new_bndrs) = lvlLamBndrs ctxt_lvl bndrs
      new_env             = extendLvlEnv env new_bndrs
+       -- At one time we called a special verion of collectBinders,
+       -- which ignored coercions, because we don't want to split
+       -- a lambda like this (\x -> coerce t (\s -> ...))
+       -- This used to happen quite a bit in state-transformer programs,
+       -- but not nearly so much now non-recursive newtypes are transparent.
+       -- [See SetLevels rev 1.50 for a version with this approach.]
+
+lvlExpr ctxt_lvl env (_, AnnLet (AnnNonRec bndr rhs) body)
+  | isUnLiftedType (idType bndr)
+       -- Treat unlifted let-bindings (let x = b in e) just like (case b of x -> e)
+       -- That is, leave it exactly where it is
+       -- We used to float unlifted bindings too (e.g. to get a cheap primop
+       -- outside a lambda (to see how, look at lvlBind in rev 1.58)
+       -- but an unrelated change meant that these unlifed bindings
+       -- could get to the top level which is bad.  And there's not much point;
+       -- unlifted bindings are always cheap, and so hardly worth floating.
+  = lvlExpr ctxt_lvl env rhs           `thenLvl` \ rhs' ->
+    lvlExpr incd_lvl env' body         `thenLvl` \ body' ->
+    returnLvl (Let (NonRec bndr' rhs') body')
+  where
+    incd_lvl = incMinorLvl ctxt_lvl
+    bndr' = TB bndr incd_lvl
+    env'  = extendLvlEnv env [bndr']
  
  lvlExpr ctxt_lvl env (_, AnnLet bind body)
    = lvlBind NotTopLevel ctxt_lvl env bind      `thenLvl` \ (bind', new_env) ->
      lvlExpr ctxt_lvl new_env body              `thenLvl` \ body' ->
      returnLvl (Let bind' body')
  
-lvlExpr ctxt_lvl env (_, AnnCase expr case_bndr alts)
+-- gaw 2004
+lvlExpr ctxt_lvl env (_, AnnCase expr case_bndr ty alts)
    = lvlMFE True ctxt_lvl env expr      `thenLvl` \ expr' ->
      let
         alts_env = extendCaseBndrLvlEnv env expr' case_bndr incd_lvl
      in
      mapLvl (lvl_alt alts_env) alts     `thenLvl` \ alts' ->
-    returnLvl (Case expr' (case_bndr, incd_lvl) alts')
+    returnLvl (Case expr' (TB case_bndr incd_lvl) ty alts')
    where
-      expr_type = exprType (deAnnotate expr)
        incd_lvl  = incMinorLvl ctxt_lvl
  
        lvl_alt alts_env (con, bs, rhs)
         = lvlMFE True incd_lvl new_env rhs      `thenLvl` \ rhs' ->
           returnLvl (con, bs', rhs')
         where
-         bs'     = [ (b, incd_lvl) | b <- bs ]
+         bs'     = [ TB b incd_lvl | b <- bs ]
           new_env = extendLvlEnv alts_env bs'
-
-collect_binders lam
-  = go [] lam
-  where
-    go rev_bndrs (_, AnnLam b e)  = go (b:rev_bndrs) e
-    go rev_bndrs (_, AnnNote n e) = go rev_bndrs e
-    go rev_bndrs rhs             = (reverse rev_bndrs, rhs)
-       -- Ignore notes, because we don't want to split
-       -- a lambda like this (\x -> coerce t (\s -> ...))
-       -- This happens quite a bit in state-transformer programs
-
-       -- glue_binders puts the lambda back together
-glue_binders (b:bs) (_, AnnLam _ e)  body = Lam b (glue_binders bs e body)
-glue_binders bs            (_, AnnNote n e) body = Note n (glue_binders bs e body)
-glue_binders []            e                body = body
  \end{code}
  
  @lvlMFE@ is just like @lvlExpr@, except that it might let-bind
  the expression, so that it can itself be floated.
  
+[NOTE: unlifted MFEs]
+We don't float unlifted MFEs, which potentially loses big opportunites.
+For example:
+       \x -> f (h y)
+where h :: Int -> Int# is expensive. We'd like to float the (h y) outside
+the \x, but we don't because it's unboxed.  Possible solution: box it.
+
  \begin{code}
  lvlMFE ::  Bool                        -- True <=> strict context [body of case or let]
         -> Level                -- Level of innermost enclosing lambda/tylam
@@ -289,18 +371,19 @@ lvlMFE ::  Bool                   -- True <=> strict context [body of case or let]
  lvlMFE strict_ctxt ctxt_lvl env (_, AnnType ty)
    = returnLvl (Type ty)
  
+
  lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _)
-  |  isUnLiftedType ty                         -- Can't let-bind it
+  |  isUnLiftedType ty                 -- Can't let-bind it; see [NOTE: unlifted MFEs]
+  || isInlineCtxt ctxt_lvl             -- Don't float out of an __inline__ context
+  || exprIsTrivial expr                        -- Never float if it's trivial
    || not good_destination
-  || exprIsTrivial expr                                -- Is trivial
-  || (strict_ctxt && exprIsBottom expr)                -- Strict context and is bottom
    =    -- Don't float it out
      lvlExpr ctxt_lvl env ann_expr
  
    | otherwise  -- Float it out!
    = lvlFloatRhs abs_vars dest_lvl env ann_expr `thenLvl` \ expr' ->
      newLvlVar "lvl" abs_vars ty                        `thenLvl` \ var ->
-    returnLvl (Let (NonRec (var,dest_lvl) expr') 
+    returnLvl (Let (NonRec (TB var dest_lvl) expr') 
                    (mkVarApps (Var var) abs_vars))
    where
      expr     = deAnnotate ann_expr
@@ -308,16 +391,42 @@ lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _)
      dest_lvl = destLevel env fvs (isFunction ann_expr)
      abs_vars = abstractVars dest_lvl env fvs
  
-    good_destination =  dest_lvl `ltMajLvl` ctxt_lvl           -- Escapes a value lambda
-                    || (isTopLvl dest_lvl && not strict_ctxt)  -- Goes to the top
         -- A decision to float entails let-binding this thing, and we only do 
         -- that if we'll escape a value lambda, or will go to the top level.
-       -- But beware
-       --      concat = /\ a -> foldr ..a.. (++) []
-       -- was getting turned into
-       --      concat = /\ a -> lvl a
-       --      lvl    = /\ a -> foldr ..a.. (++) []
-       -- which is pretty stupid.  Hence the strict_ctxt test
+    good_destination 
+       | dest_lvl `ltMajLvl` ctxt_lvl          -- Escapes a value lambda
+       = not (exprIsCheap expr) || isTopLvl dest_lvl
+         -- Even if it escapes a value lambda, we only
+         -- float if it's not cheap (unless it'll get all the
+         -- way to the top).  I've seen cases where we
+         -- float dozens of tiny free expressions, which cost
+         -- more to allocate than to evaluate.
+         -- NB: exprIsCheap is also true of bottom expressions, which
+         --     is good; we don't want to share them
+         --
+         -- It's only Really Bad to float a cheap expression out of a
+         -- strict context, because that builds a thunk that otherwise
+         -- would never be built.  So another alternative would be to
+         -- add 
+         --    || (strict_ctxt && not (exprIsBottom expr))
+         -- to the condition above. We should really try this out.
+
+       | otherwise             -- Does not escape a value lambda
+       = isTopLvl dest_lvl     -- Only float if we are going to the top level
+       && floatConsts env      --   and the floatConsts flag is on
+       && not strict_ctxt      -- Don't float from a strict context    
+         -- We are keen to float something to the top level, even if it does not
+         -- escape a lambda, because then it needs no allocation.  But it's controlled
+         -- by a flag, because doing this too early loses opportunities for RULES
+         -- which (needless to say) are important in some nofib programs
+         -- (gcd is an example).
+         --
+         -- Beware:
+         --    concat = /\ a -> foldr ..a.. (++) []
+         -- was getting turned into
+         --    concat = /\ a -> lvl a
+         --    lvl    = /\ a -> foldr ..a.. (++) []
+         -- which is pretty stupid.  Hence the strict_ctxt test
  \end{code}
  
  
@@ -338,36 +447,39 @@ lvlBind :: TopLevelFlag           -- Used solely to decide whether to clone
         -> LvlM (LevelledBind, LevelEnv)
  
  lvlBind top_lvl ctxt_lvl env (AnnNonRec bndr rhs@(rhs_fvs,_))
+  | isInlineCtxt ctxt_lvl              -- Don't do anything inside InlineMe
+  = lvlExpr ctxt_lvl env rhs                   `thenLvl` \ rhs' ->
+    returnLvl (NonRec (TB bndr ctxt_lvl) rhs', env)
+
    | null abs_vars
    =    -- No type abstraction; clone existing binder
-    lvlExpr ctxt_lvl env rhs                   `thenLvl` \ rhs' ->
+    lvlExpr dest_lvl env rhs                   `thenLvl` \ rhs' ->
      cloneVar top_lvl env bndr ctxt_lvl dest_lvl        `thenLvl` \ (env', bndr') ->
-    returnLvl (NonRec (bndr', dest_lvl) rhs', env') 
+    returnLvl (NonRec (TB bndr' dest_lvl) rhs', env') 
  
    | otherwise
    = -- Yes, type abstraction; create a new binder, extend substitution, etc
      lvlFloatRhs abs_vars dest_lvl env rhs      `thenLvl` \ rhs' ->
      newPolyBndrs dest_lvl env abs_vars [bndr]  `thenLvl` \ (env', [bndr']) ->
-    returnLvl (NonRec (bndr', dest_lvl) rhs', env')
+    returnLvl (NonRec (TB bndr' dest_lvl) rhs', env')
  
    where
      bind_fvs = rhs_fvs `unionVarSet` idFreeVars bndr
      abs_vars = abstractVars dest_lvl env bind_fvs
-
-    dest_lvl | isUnLiftedType (idType bndr) = destLevel env bind_fvs False `maxLvl` Level 1 0
-            | otherwise                    = destLevel env bind_fvs (isFunction rhs)
-       -- Hack alert!  We do have some unlifted bindings, for cheap primops, and 
-       -- it is ok to float them out; but not to the top level.  If they would otherwise
-       -- go to the top level, we pin them inside the topmost lambda
+    dest_lvl = destLevel env bind_fvs (isFunction rhs)
  \end{code}
  
  
  \begin{code}
  lvlBind top_lvl ctxt_lvl env (AnnRec pairs)
+  | isInlineCtxt ctxt_lvl      -- Don't do anything inside InlineMe
+  = mapLvl (lvlExpr ctxt_lvl env) rhss                 `thenLvl` \ rhss' ->
+    returnLvl (Rec ([TB b ctxt_lvl | b <- bndrs] `zip` rhss'), env)
+
    | null abs_vars
-  = cloneVars top_lvl env bndrs ctxt_lvl dest_lvl      `thenLvl` \ (new_env, new_bndrs) ->
+  = cloneRecVars top_lvl env bndrs ctxt_lvl dest_lvl   `thenLvl` \ (new_env, new_bndrs) ->
      mapLvl (lvlExpr ctxt_lvl new_env) rhss             `thenLvl` \ new_rhss ->
-    returnLvl (Rec ((new_bndrs `zip` repeat dest_lvl) `zip` new_rhss), new_env)
+    returnLvl (Rec ([TB b dest_lvl | b <- new_bndrs] `zip` new_rhss), new_env)
  
    | isSingleton pairs && count isId abs_vars > 1
    =    -- Special case for self recursion where there are
@@ -387,22 +499,23 @@ lvlBind top_lvl ctxt_lvl env (AnnRec pairs)
      in
      cloneVar NotTopLevel rhs_env bndr rhs_lvl rhs_lvl  `thenLvl` \ (rhs_env', new_bndr) ->
      let
-       (lam_bndrs, rhs_body)     = collect_binders rhs
+       (lam_bndrs, rhs_body)     = collectAnnBndrs rhs
          (body_lvl, new_lam_bndrs) = lvlLamBndrs rhs_lvl lam_bndrs
         body_env                  = extendLvlEnv rhs_env' new_lam_bndrs
      in
      lvlExpr body_lvl body_env rhs_body         `thenLvl` \ new_rhs_body ->
      newPolyBndrs dest_lvl env abs_vars [bndr]  `thenLvl` \ (poly_env, [poly_bndr]) ->
-    returnLvl (Rec [((poly_bndr,dest_lvl), mkLams abs_vars_w_lvls $
-                                          glue_binders new_lam_bndrs rhs $
-                                          Let (Rec [((new_bndr,rhs_lvl), mkLams new_lam_bndrs new_rhs_body)]) 
-                                               (mkVarApps (Var new_bndr) lam_bndrs))],
+    returnLvl (Rec [(TB poly_bndr dest_lvl, 
+              mkLams abs_vars_w_lvls $
+              mkLams new_lam_bndrs $
+              Let (Rec [(TB new_bndr rhs_lvl, mkLams new_lam_bndrs new_rhs_body)]) 
+                  (mkVarApps (Var new_bndr) lam_bndrs))],
                poly_env)
  
-  | otherwise
+  | otherwise  -- Non-null abs_vars
    = newPolyBndrs dest_lvl env abs_vars bndrs           `thenLvl` \ (new_env, new_bndrs) ->
      mapLvl (lvlFloatRhs abs_vars dest_lvl new_env) rhss `thenLvl` \ new_rhss ->
-    returnLvl (Rec ((new_bndrs `zip` repeat dest_lvl) `zip` new_rhss), new_env)
+    returnLvl (Rec ([TB b dest_lvl | b <- new_bndrs] `zip` new_rhss), new_env)
  
    where
      (bndrs,rhss) = unzip pairs
@@ -435,8 +548,10 @@ lvlFloatRhs abs_vars dest_lvl env rhs
  %************************************************************************
  
  \begin{code}
-lvlLamBndrs :: Level -> [CoreBndr] -> (Level, [(CoreBndr, Level)])
+lvlLamBndrs :: Level -> [CoreBndr] -> (Level, [TaggedBndr Level])
  -- Compute the levels for the binders of a lambda group
+-- The binders returned are exactly the same as the ones passed,
+-- but they are now paired with a level
  lvlLamBndrs lvl [] 
    = (lvl, [])
  
@@ -449,10 +564,10 @@ lvlLamBndrs lvl bndrs
         | isId bndr &&                  -- Go to the next major level if this is a value binder,
           not bumped_major &&           -- and we havn't already gone to the next level (one jump per group)
           not (isOneShotLambda bndr)    -- and it isn't a one-shot lambda
-       = go new_lvl True ((bndr,new_lvl) : rev_lvld_bndrs) bndrs
+       = go new_lvl True (TB bndr new_lvl : rev_lvld_bndrs) bndrs
  
         | otherwise
-       = go old_lvl bumped_major ((bndr,old_lvl) : rev_lvld_bndrs) bndrs
+       = go old_lvl bumped_major (TB bndr old_lvl : rev_lvld_bndrs) bndrs
  
         where
           new_lvl = incMajorLvl old_lvl
@@ -464,25 +579,6 @@ lvlLamBndrs lvl bndrs
  \end{code}
  
  \begin{code}
-abstractVars :: Level -> LevelEnv -> VarSet -> [Var]
-       -- Find the variables in fvs, free vars of the target expresion,
-       -- whose level is less than than the supplied level
-       -- These are the ones we are going to abstract out
-abstractVars dest_lvl env fvs
-  = uniq (sortLt lt [var | fv <- varSetElems fvs, var <- absVarsOf dest_lvl env fv])
-  where
-       -- Sort the variables so we don't get 
-       -- mixed-up tyvars and Ids; it's just messy
-    v1 `lt` v2 = case (isId v1, isId v2) of
-                  (True, False) -> False
-                  (False, True) -> True
-                  other         -> v1 < v2     -- Same family
-    uniq :: [Var] -> [Var]
-       -- Remove adjacent duplicates; the sort will have brought them together
-    uniq (v1:v2:vs) | v1 == v2  = uniq (v2:vs)
-                   | otherwise = v1 : uniq (v2:vs)
-    uniq vs = vs
-
    -- Destintion level is the max Id level of the expression
    -- (We'll abstract the type variables, if any.)
  destLevel :: LevelEnv -> VarSet -> Bool -> Level
@@ -521,16 +617,18 @@ isFunction other                 = False
  %************************************************************************
  
  \begin{code}
-type LevelEnv = (Bool,                                 -- True <=> Float lambdas too
+type LevelEnv = (FloatOutSwitches,
                  VarEnv Level,                  -- Domain is *post-cloned* TyVars and Ids
-                SubstEnv,                      -- Domain is pre-cloned Ids
+                Subst,                         -- Domain is pre-cloned Ids; tracks the in-scope set
+                                               --      so that subtitution is capture-avoiding
                  IdEnv ([Var], LevelledExpr))   -- Domain is pre-cloned Ids
         -- We clone let-bound variables so that they are still
         -- distinct when floated out; hence the SubstEnv/IdEnv.
+        -- (see point 3 of the module overview comment).
         -- We also use these envs when making a variable polymorphic
         -- because we want to float it out past a big lambda.
         --
-       -- The two Envs always implement the same mapping, but the
+       -- The SubstEnv and IdEnv always implement the same mapping, but the
         -- SubstEnv maps to CoreExpr and the IdEnv to LevelledExpr
         -- Since the range is always a variable or type application,
         -- there is never any difference between the two, but sadly
@@ -545,44 +643,69 @@ type LevelEnv = (Bool,                            -- True <=> Float lambdas too
         --
         -- The domain of the VarEnv Level is the *post-cloned* Ids
  
-initialEnv :: Bool -> LevelEnv
-initialEnv float_lams = (float_lams, emptyVarEnv, emptySubstEnv, emptyVarEnv)
+initialEnv :: FloatOutSwitches -> LevelEnv
+initialEnv float_lams = (float_lams, emptyVarEnv, emptySubst, emptyVarEnv)
  
  floatLams :: LevelEnv -> Bool
-floatLams (float_lams, _, _, _) = float_lams
+floatLams (FloatOutSw float_lams _, _, _, _) = float_lams
+
+floatConsts :: LevelEnv -> Bool
+floatConsts (FloatOutSw _ float_consts, _, _, _) = float_consts
  
-extendLvlEnv :: LevelEnv -> [(Var,Level)] -> LevelEnv
-       -- Used when *not* cloning
-extendLvlEnv (float_lams, lvl_env, subst_env, id_env) prs
-  = (float_lams, foldl add lvl_env prs, subst_env, id_env)
+extendLvlEnv :: LevelEnv -> [TaggedBndr Level] -> LevelEnv
+-- Used when *not* cloning
+extendLvlEnv (float_lams, lvl_env, subst, id_env) prs
+  = (float_lams,
+     foldl add_lvl lvl_env prs,
+     foldl del_subst subst prs,
+     foldl del_id id_env prs)
    where
-    add env (v,l) = extendVarEnv env v l
+    add_lvl   env (TB v l) = extendVarEnv env v l
+    del_subst env (TB v _) = extendInScope env v
+    del_id    env (TB v _) = delVarEnv env v
+  -- We must remove any clone for this variable name in case of
+  -- shadowing.  This bit me in the following case
+  -- (in nofib/real/gg/Spark.hs):
+  -- 
+  --   case ds of wild {
+  --     ... -> case e of wild {
+  --              ... -> ... wild ...
+  --            }
+  --   }
+  -- 
+  -- The inside occurrence of @wild@ was being replaced with @ds@,
+  -- incorrectly, because the SubstEnv was still lying around.  Ouch!
+  -- KSW 2000-07.
  
  -- extendCaseBndrLvlEnv adds the mapping case-bndr->scrut-var if it can
+-- (see point 4 of the module overview comment)
+extendCaseBndrLvlEnv (float_lams, lvl_env, subst, id_env) (Var scrut_var) case_bndr lvl
+  = (float_lams,
+     extendVarEnv lvl_env case_bndr lvl,
+     extendIdSubst subst case_bndr (DoneEx (Var scrut_var)),
+     extendVarEnv id_env case_bndr ([scrut_var], Var scrut_var))
+     
  extendCaseBndrLvlEnv env scrut case_bndr lvl
-  = case scrut of
-       Var v -> extendCloneLvlEnv lvl env [(case_bndr, v)]
-       other -> extendLvlEnv          env [(case_bndr,lvl)]
+  = extendLvlEnv          env [TB case_bndr lvl]
  
-extendPolyLvlEnv dest_lvl (float_lams, lvl_env, subst_env, id_env) abs_vars bndr_pairs
+extendPolyLvlEnv dest_lvl (float_lams, lvl_env, subst, id_env) abs_vars bndr_pairs
    = (float_lams,
-     foldl add_lvl   lvl_env   bndr_pairs,
-     foldl add_subst subst_env bndr_pairs,
-     foldl add_id    id_env    bndr_pairs)
+     foldl add_lvl   lvl_env bndr_pairs,
+     foldl add_subst subst   bndr_pairs,
+     foldl add_id    id_env  bndr_pairs)
    where
-     add_lvl   env (v,v') = extendVarEnv   env v' dest_lvl
-     add_subst env (v,v') = extendSubstEnv env v (DoneEx (mkVarApps (Var v') abs_vars))
-     add_id    env (v,v') = extendVarEnv   env v ((v':abs_vars), mkVarApps (Var v') abs_vars)
+     add_lvl   env (v,v') = extendVarEnv env v' dest_lvl
+     add_subst env (v,v') = extendIdSubst env v (DoneEx (mkVarApps (Var v') abs_vars))
+     add_id    env (v,v') = extendVarEnv env v ((v':abs_vars), mkVarApps (Var v') abs_vars)
  
-extendCloneLvlEnv lvl (float_lams, lvl_env, subst_env, id_env) bndr_pairs
+extendCloneLvlEnv lvl (float_lams, lvl_env, _, id_env) new_subst bndr_pairs
    = (float_lams,
-     foldl add_lvl lvl_env bndr_pairs,
-     foldl add_subst subst_env bndr_pairs,
-     foldl add_id    id_env    bndr_pairs)
+     foldl add_lvl   lvl_env bndr_pairs,
+     new_subst,
+     foldl add_id    id_env  bndr_pairs)
    where
-     add_lvl   env (v,v') = extendVarEnv   env v' lvl
-     add_subst env (v,v') = extendSubstEnv env v (DoneEx (Var v'))
-     add_id    env (v,v') = extendVarEnv   env v ([v'], Var v')
+     add_lvl   env (v,v') = extendVarEnv env v' lvl
+     add_id    env (v,v') = extendVarEnv env v ([v'], Var v')
  
  
  maxIdLevel :: LevelEnv -> VarSet -> Level
@@ -604,13 +727,33 @@ lookupVar (_, _, _, id_env) v = case lookupVarEnv id_env v of
                                        Just (_, expr) -> expr
                                        other          -> Var v
  
+abstractVars :: Level -> LevelEnv -> VarSet -> [Var]
+       -- Find the variables in fvs, free vars of the target expresion,
+       -- whose level is greater than the destination level
+       -- These are the ones we are going to abstract out
+abstractVars dest_lvl env fvs
+  = uniq (sortLe le [var | fv <- varSetElems fvs, var <- absVarsOf dest_lvl env fv])
+  where
+       -- Sort the variables so we don't get 
+       -- mixed-up tyvars and Ids; it's just messy
+    v1 `le` v2 = case (isId v1, isId v2) of
+                  (True, False) -> False
+                  (False, True) -> True
+                  other         -> v1 <= v2    -- Same family
+
+    uniq :: [Var] -> [Var]
+       -- Remove adjacent duplicates; the sort will have brought them together
+    uniq (v1:v2:vs) | v1 == v2  = uniq (v2:vs)
+                   | otherwise = v1 : uniq (v2:vs)
+    uniq vs = vs
+
  absVarsOf :: Level -> LevelEnv -> Var -> [Var]
-       -- If f is free in the exression, and f maps to poly_f a b c in the
+       -- If f is free in the expression, and f maps to poly_f a b c in the
         -- current substitution, then we must report a b c as candidate type
         -- variables
  absVarsOf dest_lvl (_, lvl_env, _, id_env) v 
    | isId v
-  = [final_av | av <- lookup_avs v, abstract_me av, final_av <- add_tyvars av]
+  = [zap av2 | av1 <- lookup_avs v, av2 <- add_tyvars av1, abstract_me av2]
  
    | otherwise
    = if abstract_me v then [v] else []
@@ -624,15 +767,16 @@ absVarsOf dest_lvl (_, lvl_env, _, id_env) v
                         Just (abs_vars, _) -> abs_vars
                         Nothing            -> [v]
  
-       -- We are going to lambda-abstract, so nuke any IdInfo,
-       -- and add the tyvars of the Id
-    add_tyvars v | isId v    =  zap v  : varSetElems (idFreeTyVars v)
+    add_tyvars v | isId v    = v : varSetElems (idFreeTyVars v)
                  | otherwise = [v]
  
-    zap v = WARN( workerExists (idWorkerInfo v)
-                 || not (isEmptyCoreRules (idSpecialisation v)),
-                 text "absVarsOf: discarding info on" <+> ppr v )
-           setIdInfo v vanillaIdInfo
+       -- We are going to lambda-abstract, so nuke any IdInfo,
+       -- and add the tyvars of the Id (if necessary)
+    zap v | isId v = WARN( workerExists (idWorkerInfo v) ||
+                          not (isEmptyCoreRules (idSpecialisation v)),
+                          text "absVarsOf: discarding info on" <+> ppr v )
+                    setIdInfo v vanillaIdInfo
+         | otherwise = v
  \end{code}
  
  \begin{code}
@@ -646,16 +790,16 @@ mapLvl            = mapUs
  
  \begin{code}
  newPolyBndrs dest_lvl env abs_vars bndrs
-  = getUniquesUs (length bndrs)                `thenLvl` \ uniqs ->
+  = getUniquesUs               `thenLvl` \ uniqs ->
      let
         new_bndrs = zipWith mk_poly_bndr bndrs uniqs
      in
      returnLvl (extendPolyLvlEnv dest_lvl env abs_vars (bndrs `zip` new_bndrs), new_bndrs)
    where
-    mk_poly_bndr bndr uniq = mkSysLocal (_PK_ str) uniq poly_ty
+    mk_poly_bndr bndr uniq = mkSysLocalUnencoded (mkFastString str) uniq poly_ty
                            where
                              str     = "poly_" ++ occNameUserString (getOccName bndr)
-                            poly_ty = foldr mkPiType (idType bndr) abs_vars
+                            poly_ty = mkPiTypes abs_vars (idType bndr)
         
  
  newLvlVar :: String 
@@ -663,7 +807,7 @@ newLvlVar :: String
           -> LvlM Id
  newLvlVar str vars body_ty     
    = getUniqueUs        `thenLvl` \ uniq ->
-    returnUs (mkSysLocal (_PK_ str) uniq (foldr mkPiType body_ty vars))
+    returnUs (mkSysLocalUnencoded (mkFastString str) uniq (mkPiTypes vars body_ty))
      
  -- The deeply tiresome thing is that we have to apply the substitution
  -- to the rules inside each Id.  Grr.  But it matters.
@@ -671,42 +815,33 @@ newLvlVar str vars body_ty
  cloneVar :: TopLevelFlag -> LevelEnv -> Id -> Level -> Level -> LvlM (LevelEnv, Id)
  cloneVar TopLevel env v ctxt_lvl dest_lvl
    = returnUs (env, v)  -- Don't clone top level things
-cloneVar NotTopLevel env v ctxt_lvl dest_lvl
+cloneVar NotTopLevel env@(_,_,subst,_) v ctxt_lvl dest_lvl
    = ASSERT( isId v )
-    getUniqueUs        `thenLvl` \ uniq ->
+    getUs      `thenLvl` \ us ->
      let
-      v'        = setVarUnique v uniq
-      v''       = subst_id_info env ctxt_lvl dest_lvl v'
-      env'      = extendCloneLvlEnv dest_lvl env [(v,v'')]
+      (subst', v1) = substAndCloneId subst us v
+      v2          = zap_demand ctxt_lvl dest_lvl v1
+      env'        = extendCloneLvlEnv dest_lvl env subst' [(v,v2)]
      in
-    returnUs (env', v'')
+    returnUs (env', v2)
  
-cloneVars :: TopLevelFlag -> LevelEnv -> [Id] -> Level -> Level -> LvlM (LevelEnv, [Id])
-cloneVars TopLevel env vs ctxt_lvl dest_lvl 
+cloneRecVars :: TopLevelFlag -> LevelEnv -> [Id] -> Level -> Level -> LvlM (LevelEnv, [Id])
+cloneRecVars TopLevel env vs ctxt_lvl dest_lvl 
    = returnUs (env, vs) -- Don't clone top level things
-cloneVars NotTopLevel env vs ctxt_lvl dest_lvl
+cloneRecVars NotTopLevel env@(_,_,subst,_) vs ctxt_lvl dest_lvl
    = ASSERT( all isId vs )
-    getUniquesUs (length vs)   `thenLvl` \ uniqs ->
+    getUs                      `thenLvl` \ us ->
      let
-      vs'       = zipWith setVarUnique vs uniqs
-      vs''      = map (subst_id_info env' ctxt_lvl dest_lvl) vs'
-      env'      = extendCloneLvlEnv dest_lvl env (vs `zip` vs'')
+      (subst', vs1) = substAndCloneRecIds subst us vs
+      vs2          = map (zap_demand ctxt_lvl dest_lvl) vs1
+      env'         = extendCloneLvlEnv dest_lvl env subst' (vs `zip` vs2)
      in
-    returnUs (env', vs'')
-
-subst_id_info (_, _, subst_env, _) ctxt_lvl dest_lvl v
-    = modifyIdInfo (\info -> substIdInfo subst info (zap_dmd info)) v
-  where
-    subst = mkSubst emptyVarSet subst_env
+    returnUs (env', vs2)
  
         -- VERY IMPORTANT: we must zap the demand info 
         -- if the thing is going to float out past a lambda
-    zap_dmd info
-       | float_past_lam && isStrict (demandInfo info)
-       = setDemandInfo info wwLazy
-       | otherwise
-       = info
-
-    float_past_lam = ctxt_lvl `ltMajLvl` dest_lvl
+zap_demand dest_lvl ctxt_lvl id
+  | ctxt_lvl == dest_lvl = id                  -- Stays put
+  | otherwise           = zapDemandIdInfo id   -- Floats out
  \end{code}