getCoreToDo, dumpSimplPhase,
-- * Counting
- SimplCount, doSimplTick, doFreeSimplTick,
+ SimplCount, doSimplTick, doFreeSimplTick, simplCountN,
pprSimplCount, plusSimplCount, zeroSimplCount, isZeroSimplCount, Tick(..),
-- * The monad
import Maybes
import UniqSupply
import UniqFM ( UniqFM, mapUFM, filterUFM )
-import FiniteMap
+import MonadUtils
import Util ( split )
import Data.List ( intersperse )
import Data.Dynamic
import Data.IORef
+import Data.Map (Map)
+import qualified Data.Map as Map
import Data.Word
import Control.Monad
; Err.ghcExit dflags 1 }
| not (isEmptyBag warns)
+ , not (case pass of { CoreDesugar -> True; _ -> False })
+ -- Suppress warnings after desugaring pass because some
+ -- are legitimate. Notably, the desugarer generates instance
+ -- methods with INLINE pragmas that form a mutually recursive
+ -- group. Only afer a round of simplification are they unravelled.
, not opt_NoDebugOutput
, showLintWarnings pass
= printDump (banner "warnings" $$ Err.pprMessageBag warns)
\begin{code}
data FloatOutSwitches = FloatOutSwitches {
floatOutLambdas :: Bool, -- ^ True <=> float lambdas to top level
- floatOutConstants :: Bool -- ^ True <=> float constants to top level,
+ floatOutConstants :: Bool, -- ^ True <=> float constants to top level,
-- even if they do not escape a lambda
+ floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications
+ -- based on arity information.
}
instance Outputable FloatOutSwitches where
ppr = pprFloatOutSwitches
-- | Switches that specify the minimum amount of floating out
-- gentleFloatOutSwitches :: FloatOutSwitches
-- gentleFloatOutSwitches = FloatOutSwitches False False
-
--- | Switches that do not specify floating out of lambdas, just of constants
-constantsOnlyFloatOutSwitches :: FloatOutSwitches
-constantsOnlyFloatOutSwitches = FloatOutSwitches False True
\end{code}
-- initial simplify: mk specialiser happy: minimum effort please
simpl_gently = CoreDoSimplify
(SimplGently { sm_rules = True, sm_inline = False })
+ -- See Note [Gentle mode] and
+ -- Note [RULEs enabled in SimplGently] in SimplUtils
max_iter
[
- -- Simplify "gently"
- -- Don't inline anything till full laziness has bitten
- -- In particular, inlining wrappers inhibits floating
- -- e.g. ...(case f x of ...)...
- -- ==> ...(case (case x of I# x# -> fw x#) of ...)...
- -- ==> ...(case x of I# x# -> case fw x# of ...)...
- -- and now the redex (f x) isn't floatable any more
- -- Similarly, don't apply any rules until after full
- -- laziness. Notably, list fusion can prevent floating.
+
NoCaseOfCase -- Don't do case-of-case transformations.
-- This makes full laziness work better
-- so that overloaded functions have all their dictionary lambdas manifest
runWhen do_specialise CoreDoSpecialising,
- runWhen full_laziness (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = False,
+ floatOutConstants = True,
+ floatOutPartialApplications = False },
-- Was: gentleFloatOutSwitches
- -- I have no idea why, but not floating constants to top level is
- -- very bad in some cases.
+ --
+ -- I have no idea why, but not floating constants to
+ -- top level is very bad in some cases.
+ --
-- Notably: p_ident in spectral/rewrite
- -- Changing from "gentle" to "constantsOnly" improved
- -- rewrite's allocation by 19%, and made 0.0% difference
- -- to any other nofib benchmark
+ -- Changing from "gentle" to "constantsOnly"
+ -- improved rewrite's allocation by 19%, and
+ -- made 0.0% difference to any other nofib
+ -- benchmark
+ --
+ -- Not doing floatOutPartialApplications yet, we'll do
+ -- that later on when we've had a chance to get more
+ -- accurate arity information. In fact it makes no
+ -- difference at all to performance if we do it here,
+ -- but maybe we save some unnecessary to-and-fro in
+ -- the simplifier.
runWhen do_float_in CoreDoFloatInwards,
simpl_phase 0 ["post-worker-wrapper"] max_iter
]),
- runWhen full_laziness
- (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = False,
+ floatOutConstants = True,
+ floatOutPartialApplications = True },
-- nofib/spectral/hartel/wang doubles in speed if you
-- do full laziness late in the day. It only happens
-- after fusion and other stuff, so the early pass doesn't
\begin{code}
data SimplCount
- = VerySimplZero -- These two are used when
- | VerySimplNonZero -- we are only interested in
- -- termination info
+ = VerySimplCount !Int -- Used when don't want detailed stats
| SimplCount {
ticks :: !Int, -- Total ticks
-- recent history reasonably efficiently
}
-type TickCounts = FiniteMap Tick Int
+type TickCounts = Map Tick Int
+
+simplCountN :: SimplCount -> Int
+simplCountN (VerySimplCount n) = n
+simplCountN (SimplCount { ticks = n }) = n
zeroSimplCount dflags
-- This is where we decide whether to do
-- the VerySimpl version or the full-stats version
| dopt Opt_D_dump_simpl_stats dflags
- = SimplCount {ticks = 0, details = emptyFM,
+ = SimplCount {ticks = 0, details = Map.empty,
n_log = 0, log1 = [], log2 = []}
| otherwise
- = VerySimplZero
+ = VerySimplCount 0
-isZeroSimplCount VerySimplZero = True
-isZeroSimplCount (SimplCount { ticks = 0 }) = True
-isZeroSimplCount _ = False
+isZeroSimplCount (VerySimplCount n) = n==0
+isZeroSimplCount (SimplCount { ticks = n }) = n==0
doFreeSimplTick tick sc@SimplCount { details = dts }
= sc { details = dts `addTick` tick }
where
sc1 = sc { ticks = tks+1, details = dts `addTick` tick }
-doSimplTick _ _ = VerySimplNonZero -- The very simple case
+doSimplTick _ (VerySimplCount n) = VerySimplCount (n+1)
--- Don't use plusFM_C because that's lazy, and we want to
+-- Don't use Map.unionWith because that's lazy, and we want to
-- be pretty strict here!
addTick :: TickCounts -> Tick -> TickCounts
-addTick fm tick = case lookupFM fm tick of
- Nothing -> addToFM fm tick 1
- Just n -> n1 `seq` addToFM fm tick n1
+addTick fm tick = case Map.lookup tick fm of
+ Nothing -> Map.insert tick 1 fm
+ Just n -> n1 `seq` Map.insert tick n1 fm
where
n1 = n+1
plusSimplCount sc1@(SimplCount { ticks = tks1, details = dts1 })
sc2@(SimplCount { ticks = tks2, details = dts2 })
- = log_base { ticks = tks1 + tks2, details = plusFM_C (+) dts1 dts2 }
+ = log_base { ticks = tks1 + tks2, details = Map.unionWith (+) dts1 dts2 }
where
-- A hackish way of getting recent log info
log_base | null (log1 sc2) = sc1 -- Nothing at all in sc2
| null (log2 sc2) = sc2 { log2 = log1 sc1 }
| otherwise = sc2
-plusSimplCount VerySimplZero VerySimplZero = VerySimplZero
-plusSimplCount _ _ = VerySimplNonZero
+plusSimplCount (VerySimplCount n) (VerySimplCount m) = VerySimplCount (n+m)
+plusSimplCount _ _ = panic "plusSimplCount"
+ -- We use one or the other consistently
-pprSimplCount VerySimplZero = ptext (sLit "Total ticks: ZERO!")
-pprSimplCount VerySimplNonZero = ptext (sLit "Total ticks: NON-ZERO!")
+pprSimplCount (VerySimplCount n) = ptext (sLit "Total ticks:") <+> int n
pprSimplCount (SimplCount { ticks = tks, details = dts, log1 = l1, log2 = l2 })
= vcat [ptext (sLit "Total ticks: ") <+> int tks,
blankLine,
- pprTickCounts (fmToList dts),
+ pprTickCounts (Map.toList dts),
if verboseSimplStats then
vcat [blankLine,
ptext (sLit "Log (most recent first)"),