+data SimplifierMode -- See comments in SimplMonad
+ = SimplMode
+ { sm_names :: [String] -- Name(s) of the phase
+ , sm_phase :: CompilerPhase
+ , sm_rules :: Bool -- Whether RULES are enabled
+ , sm_inline :: Bool -- Whether inlining is enabled
+ , sm_case_case :: Bool -- Whether case-of-case is enabled
+ , sm_eta_expand :: Bool -- Whether eta-expansion is enabled
+ }
+
+instance Outputable SimplifierMode where
+ ppr (SimplMode { sm_phase = p, sm_names = ss
+ , sm_rules = r, sm_inline = i
+ , sm_eta_expand = eta, sm_case_case = cc })
+ = ptext (sLit "SimplMode") <+> braces (
+ sep [ ptext (sLit "Phase =") <+> ppr p <+>
+ brackets (text (concat $ intersperse "," ss)) <> comma
+ , pp_flag i (sLit "inline") <> comma
+ , pp_flag r (sLit "rules") <> comma
+ , pp_flag eta (sLit "eta-expand") <> comma
+ , pp_flag cc (sLit "case-of-case") ])
+ where
+ pp_flag f s = ppUnless f (ptext (sLit "no")) <+> ptext s
+\end{code}
+
+
+\begin{code}
+data FloatOutSwitches = FloatOutSwitches {
+ floatOutLambdas :: Maybe Int, -- ^ Just n <=> float lambdas to top level, if
+ -- doing so will abstract over n or fewer
+ -- value variables
+ -- Nothing <=> float all lambdas to top level,
+ -- regardless of how many free variables
+ -- Just 0 is the vanilla case: float a lambda
+ -- iff it has no free vars
+
+ floatOutConstants :: Bool, -- ^ True <=> float constants to top level,
+ -- even if they do not escape a lambda
+ floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications
+ -- based on arity information.
+ }
+instance Outputable FloatOutSwitches where
+ ppr = pprFloatOutSwitches
+
+pprFloatOutSwitches :: FloatOutSwitches -> SDoc
+pprFloatOutSwitches sw
+ = ptext (sLit "FOS") <+> (braces $
+ sep $ punctuate comma $
+ [ ptext (sLit "Lam =") <+> ppr (floatOutLambdas sw)
+ , ptext (sLit "Consts =") <+> ppr (floatOutConstants sw)
+ , ptext (sLit "PAPs =") <+> ppr (floatOutPartialApplications sw) ])
+\end{code}
+
+
+%************************************************************************
+%* *
+ Generating the main optimisation pipeline
+%* *
+%************************************************************************
+
+\begin{code}
+getCoreToDo :: DynFlags -> [CoreToDo]
+getCoreToDo dflags
+ = core_todo
+ where
+ opt_level = optLevel dflags
+ phases = simplPhases dflags
+ max_iter = maxSimplIterations dflags
+ rule_check = ruleCheck dflags
+ strictness = dopt Opt_Strictness dflags
+ full_laziness = dopt Opt_FullLaziness dflags
+ do_specialise = dopt Opt_Specialise dflags
+ do_float_in = dopt Opt_FloatIn dflags
+ cse = dopt Opt_CSE dflags
+ spec_constr = dopt Opt_SpecConstr dflags
+ liberate_case = dopt Opt_LiberateCase dflags
+ static_args = dopt Opt_StaticArgumentTransformation dflags
+ rules_on = dopt Opt_EnableRewriteRules dflags
+ eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
+
+ maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
+
+ maybe_strictness_before phase
+ = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
+
+ base_mode = SimplMode { sm_phase = panic "base_mode"
+ , sm_names = []
+ , sm_rules = rules_on
+ , sm_eta_expand = eta_expand_on
+ , sm_inline = True
+ , sm_case_case = True }
+
+ simpl_phase phase names iter
+ = CoreDoPasses
+ $ [ maybe_strictness_before phase
+ , CoreDoSimplify iter
+ (base_mode { sm_phase = Phase phase
+ , sm_names = names })
+
+ , maybe_rule_check (Phase phase) ]
+
+ -- Vectorisation can introduce a fair few common sub expressions involving
+ -- DPH primitives. For example, see the Reverse test from dph-examples.
+ -- We need to eliminate these common sub expressions before their definitions
+ -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
+ -- so we also run simpl_gently to inline them.
+ ++ (if dopt Opt_Vectorise dflags && phase == 3
+ then [CoreCSE, simpl_gently]
+ else [])
+
+ vectorisation
+ = runWhen (dopt Opt_Vectorise dflags) $
+ CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
+
+ -- By default, we have 2 phases before phase 0.
+
+ -- Want to run with inline phase 2 after the specialiser to give
+ -- maximum chance for fusion to work before we inline build/augment
+ -- in phase 1. This made a difference in 'ansi' where an
+ -- overloaded function wasn't inlined till too late.
+
+ -- Need phase 1 so that build/augment get
+ -- inlined. I found that spectral/hartel/genfft lost some useful
+ -- strictness in the function sumcode' if augment is not inlined
+ -- before strictness analysis runs
+ simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
+ | phase <- [phases, phases-1 .. 1] ]
+
+
+ -- initial simplify: mk specialiser happy: minimum effort please
+ simpl_gently = CoreDoSimplify max_iter
+ (base_mode { sm_phase = InitialPhase
+ , sm_names = ["Gentle"]
+ , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
+ , sm_inline = False
+ , sm_case_case = False })
+ -- Don't do case-of-case transformations.
+ -- This makes full laziness work better
+
+ core_todo =
+ if opt_level == 0 then
+ [vectorisation,
+ simpl_phase 0 ["final"] max_iter]
+ else {- opt_level >= 1 -} [
+
+ -- We want to do the static argument transform before full laziness as it
+ -- may expose extra opportunities to float things outwards. However, to fix
+ -- up the output of the transformation we need at do at least one simplify
+ -- after this before anything else
+ runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
+
+ -- We run vectorisation here for now, but we might also try to run
+ -- it later
+ vectorisation,
+
+ -- initial simplify: mk specialiser happy: minimum effort please
+ simpl_gently,
+
+ -- Specialisation is best done before full laziness
+ -- so that overloaded functions have all their dictionary lambdas manifest
+ runWhen do_specialise CoreDoSpecialising,
+
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = Just 0,
+ floatOutConstants = True,
+ floatOutPartialApplications = False },
+ -- Was: gentleFloatOutSwitches
+ --
+ -- I have no idea why, but not floating constants to
+ -- top level is very bad in some cases.
+ --
+ -- Notably: p_ident in spectral/rewrite
+ -- Changing from "gentle" to "constantsOnly"
+ -- improved rewrite's allocation by 19%, and
+ -- made 0.0% difference to any other nofib
+ -- benchmark
+ --
+ -- Not doing floatOutPartialApplications yet, we'll do
+ -- that later on when we've had a chance to get more
+ -- accurate arity information. In fact it makes no
+ -- difference at all to performance if we do it here,
+ -- but maybe we save some unnecessary to-and-fro in
+ -- the simplifier.
+
+ runWhen do_float_in CoreDoFloatInwards,
+
+ simpl_phases,
+
+ -- Phase 0: allow all Ids to be inlined now
+ -- This gets foldr inlined before strictness analysis
+
+ -- At least 3 iterations because otherwise we land up with
+ -- huge dead expressions because of an infelicity in the
+ -- simpifier.
+ -- let k = BIG in foldr k z xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
+ -- Don't stop now!
+ simpl_phase 0 ["main"] (max max_iter 3),
+
+ runWhen strictness (CoreDoPasses [
+ CoreDoStrictness,
+ CoreDoWorkerWrapper,
+ CoreDoGlomBinds,
+ simpl_phase 0 ["post-worker-wrapper"] max_iter
+ ]),
+
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = floatLamArgs dflags,
+ floatOutConstants = True,
+ floatOutPartialApplications = True },
+ -- nofib/spectral/hartel/wang doubles in speed if you
+ -- do full laziness late in the day. It only happens
+ -- after fusion and other stuff, so the early pass doesn't
+ -- catch it. For the record, the redex is
+ -- f_el22 (f_el21 r_midblock)
+
+
+ runWhen cse CoreCSE,
+ -- We want CSE to follow the final full-laziness pass, because it may
+ -- succeed in commoning up things floated out by full laziness.
+ -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
+
+ runWhen do_float_in CoreDoFloatInwards,
+
+ maybe_rule_check (Phase 0),
+
+ -- Case-liberation for -O2. This should be after
+ -- strictness analysis and the simplification which follows it.
+ runWhen liberate_case (CoreDoPasses [
+ CoreLiberateCase,
+ simpl_phase 0 ["post-liberate-case"] max_iter
+ ]), -- Run the simplifier after LiberateCase to vastly
+ -- reduce the possiblility of shadowing
+ -- Reason: see Note [Shadowing] in SpecConstr.lhs
+
+ runWhen spec_constr CoreDoSpecConstr,
+
+ maybe_rule_check (Phase 0),
+
+ -- Final clean-up simplification:
+ simpl_phase 0 ["final"] max_iter
+ ]
+
+-- The core-to-core pass ordering is derived from the DynFlags:
+runWhen :: Bool -> CoreToDo -> CoreToDo
+runWhen True do_this = do_this
+runWhen False _ = CoreDoNothing
+
+runMaybe :: Maybe a -> (a -> CoreToDo) -> CoreToDo
+runMaybe (Just x) f = f x
+runMaybe Nothing _ = CoreDoNothing
+
+dumpSimplPhase :: DynFlags -> SimplifierMode -> Bool
+dumpSimplPhase dflags mode
+ | Just spec_string <- shouldDumpSimplPhase dflags
+ = match_spec spec_string
+ | otherwise
+ = dopt Opt_D_verbose_core2core dflags
+
+ where
+ match_spec :: String -> Bool
+ match_spec spec_string
+ = or $ map (and . map match . split ':')
+ $ split ',' spec_string
+
+ match :: String -> Bool
+ match "" = True
+ match s = case reads s of
+ [(n,"")] -> phase_num n
+ _ -> phase_name s
+
+ phase_num :: Int -> Bool
+ phase_num n = case sm_phase mode of
+ Phase k -> n == k
+ _ -> False
+
+ phase_name :: String -> Bool
+ phase_name s = s `elem` sm_names mode
+\end{code}
+
+
+Note [RULEs enabled in SimplGently]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+RULES are enabled when doing "gentle" simplification. Two reasons:
+
+ * We really want the class-op cancellation to happen:
+ op (df d1 d2) --> $cop3 d1 d2
+ because this breaks the mutual recursion between 'op' and 'df'
+
+ * I wanted the RULE
+ lift String ===> ...
+ to work in Template Haskell when simplifying
+ splices, so we get simpler code for literal strings
+
+But watch out: list fusion can prevent floating. So use phase control
+to switch off those rules until after floating.
+
+
+%************************************************************************
+%* *
+ Counting and logging
+%* *
+%************************************************************************
+
+\begin{code}
+verboseSimplStats :: Bool
+verboseSimplStats = opt_PprStyle_Debug -- For now, anyway
+
+zeroSimplCount :: DynFlags -> SimplCount
+isZeroSimplCount :: SimplCount -> Bool
+pprSimplCount :: SimplCount -> SDoc
+doSimplTick, doFreeSimplTick :: Tick -> SimplCount -> SimplCount
+plusSimplCount :: SimplCount -> SimplCount -> SimplCount
+\end{code}
+
+\begin{code}
+data SimplCount
+ = VerySimplCount !Int -- Used when don't want detailed stats
+
+ | SimplCount {
+ ticks :: !Int, -- Total ticks
+ details :: !TickCounts, -- How many of each type
+
+ n_log :: !Int, -- N
+ log1 :: [Tick], -- Last N events; <= opt_HistorySize,
+ -- most recent first
+ log2 :: [Tick] -- Last opt_HistorySize events before that
+ -- Having log1, log2 lets us accumulate the
+ -- recent history reasonably efficiently
+ }
+
+type TickCounts = Map Tick Int
+
+simplCountN :: SimplCount -> Int
+simplCountN (VerySimplCount n) = n
+simplCountN (SimplCount { ticks = n }) = n
+
+zeroSimplCount dflags
+ -- This is where we decide whether to do
+ -- the VerySimpl version or the full-stats version
+ | dopt Opt_D_dump_simpl_stats dflags
+ = SimplCount {ticks = 0, details = Map.empty,
+ n_log = 0, log1 = [], log2 = []}
+ | otherwise
+ = VerySimplCount 0
+
+isZeroSimplCount (VerySimplCount n) = n==0
+isZeroSimplCount (SimplCount { ticks = n }) = n==0
+
+doFreeSimplTick tick sc@SimplCount { details = dts }
+ = sc { details = dts `addTick` tick }
+doFreeSimplTick _ sc = sc
+
+doSimplTick tick sc@SimplCount { ticks = tks, details = dts, n_log = nl, log1 = l1 }
+ | nl >= opt_HistorySize = sc1 { n_log = 1, log1 = [tick], log2 = l1 }
+ | otherwise = sc1 { n_log = nl+1, log1 = tick : l1 }
+ where
+ sc1 = sc { ticks = tks+1, details = dts `addTick` tick }
+
+doSimplTick _ (VerySimplCount n) = VerySimplCount (n+1)
+
+
+-- Don't use Map.unionWith because that's lazy, and we want to
+-- be pretty strict here!
+addTick :: TickCounts -> Tick -> TickCounts
+addTick fm tick = case Map.lookup tick fm of
+ Nothing -> Map.insert tick 1 fm
+ Just n -> n1 `seq` Map.insert tick n1 fm
+ where
+ n1 = n+1
+
+
+plusSimplCount sc1@(SimplCount { ticks = tks1, details = dts1 })
+ sc2@(SimplCount { ticks = tks2, details = dts2 })
+ = log_base { ticks = tks1 + tks2, details = Map.unionWith (+) dts1 dts2 }
+ where
+ -- A hackish way of getting recent log info
+ log_base | null (log1 sc2) = sc1 -- Nothing at all in sc2
+ | null (log2 sc2) = sc2 { log2 = log1 sc1 }
+ | otherwise = sc2
+
+plusSimplCount (VerySimplCount n) (VerySimplCount m) = VerySimplCount (n+m)
+plusSimplCount _ _ = panic "plusSimplCount"
+ -- We use one or the other consistently
+
+pprSimplCount (VerySimplCount n) = ptext (sLit "Total ticks:") <+> int n
+pprSimplCount (SimplCount { ticks = tks, details = dts, log1 = l1, log2 = l2 })
+ = vcat [ptext (sLit "Total ticks: ") <+> int tks,
+ blankLine,
+ pprTickCounts (Map.toList dts),
+ if verboseSimplStats then
+ vcat [blankLine,
+ ptext (sLit "Log (most recent first)"),
+ nest 4 (vcat (map ppr l1) $$ vcat (map ppr l2))]
+ else empty
+ ]
+
+pprTickCounts :: [(Tick,Int)] -> SDoc
+pprTickCounts [] = empty
+pprTickCounts ((tick1,n1):ticks)
+ = vcat [int tot_n <+> text (tickString tick1),
+ pprTCDetails real_these,
+ pprTickCounts others
+ ]
+ where
+ tick1_tag = tickToTag tick1
+ (these, others) = span same_tick ticks
+ real_these = (tick1,n1):these
+ same_tick (tick2,_) = tickToTag tick2 == tick1_tag
+ tot_n = sum [n | (_,n) <- real_these]
+
+pprTCDetails :: [(Tick, Int)] -> SDoc
+pprTCDetails ticks
+ = nest 4 (vcat [int n <+> pprTickCts tick | (tick,n) <- ticks])
+\end{code}
+
+
+\begin{code}
+data Tick
+ = PreInlineUnconditionally Id
+ | PostInlineUnconditionally Id
+
+ | UnfoldingDone Id
+ | RuleFired FastString -- Rule name
+
+ | LetFloatFromLet
+ | EtaExpansion Id -- LHS binder
+ | EtaReduction Id -- Binder on outer lambda
+ | BetaReduction Id -- Lambda binder
+
+
+ | CaseOfCase Id -- Bndr on *inner* case
+ | KnownBranch Id -- Case binder
+ | CaseMerge Id -- Binder on outer case
+ | AltMerge Id -- Case binder
+ | CaseElim Id -- Case binder
+ | CaseIdentity Id -- Case binder
+ | FillInCaseDefault Id -- Case binder
+
+ | BottomFound
+ | SimplifierDone -- Ticked at each iteration of the simplifier
+
+instance Outputable Tick where
+ ppr tick = text (tickString tick) <+> pprTickCts tick
+
+instance Eq Tick where
+ a == b = case a `cmpTick` b of
+ EQ -> True
+ _ -> False
+
+instance Ord Tick where
+ compare = cmpTick
+
+tickToTag :: Tick -> Int
+tickToTag (PreInlineUnconditionally _) = 0
+tickToTag (PostInlineUnconditionally _) = 1
+tickToTag (UnfoldingDone _) = 2
+tickToTag (RuleFired _) = 3
+tickToTag LetFloatFromLet = 4
+tickToTag (EtaExpansion _) = 5
+tickToTag (EtaReduction _) = 6
+tickToTag (BetaReduction _) = 7
+tickToTag (CaseOfCase _) = 8
+tickToTag (KnownBranch _) = 9
+tickToTag (CaseMerge _) = 10
+tickToTag (CaseElim _) = 11
+tickToTag (CaseIdentity _) = 12
+tickToTag (FillInCaseDefault _) = 13
+tickToTag BottomFound = 14
+tickToTag SimplifierDone = 16
+tickToTag (AltMerge _) = 17
+
+tickString :: Tick -> String
+tickString (PreInlineUnconditionally _) = "PreInlineUnconditionally"
+tickString (PostInlineUnconditionally _)= "PostInlineUnconditionally"
+tickString (UnfoldingDone _) = "UnfoldingDone"
+tickString (RuleFired _) = "RuleFired"
+tickString LetFloatFromLet = "LetFloatFromLet"
+tickString (EtaExpansion _) = "EtaExpansion"
+tickString (EtaReduction _) = "EtaReduction"
+tickString (BetaReduction _) = "BetaReduction"
+tickString (CaseOfCase _) = "CaseOfCase"
+tickString (KnownBranch _) = "KnownBranch"
+tickString (CaseMerge _) = "CaseMerge"
+tickString (AltMerge _) = "AltMerge"
+tickString (CaseElim _) = "CaseElim"
+tickString (CaseIdentity _) = "CaseIdentity"
+tickString (FillInCaseDefault _) = "FillInCaseDefault"
+tickString BottomFound = "BottomFound"
+tickString SimplifierDone = "SimplifierDone"
+
+pprTickCts :: Tick -> SDoc
+pprTickCts (PreInlineUnconditionally v) = ppr v
+pprTickCts (PostInlineUnconditionally v)= ppr v
+pprTickCts (UnfoldingDone v) = ppr v
+pprTickCts (RuleFired v) = ppr v
+pprTickCts LetFloatFromLet = empty
+pprTickCts (EtaExpansion v) = ppr v
+pprTickCts (EtaReduction v) = ppr v
+pprTickCts (BetaReduction v) = ppr v
+pprTickCts (CaseOfCase v) = ppr v
+pprTickCts (KnownBranch v) = ppr v
+pprTickCts (CaseMerge v) = ppr v
+pprTickCts (AltMerge v) = ppr v
+pprTickCts (CaseElim v) = ppr v
+pprTickCts (CaseIdentity v) = ppr v
+pprTickCts (FillInCaseDefault v) = ppr v
+pprTickCts _ = empty
+
+cmpTick :: Tick -> Tick -> Ordering
+cmpTick a b = case (tickToTag a `compare` tickToTag b) of
+ GT -> GT
+ EQ -> cmpEqTick a b
+ LT -> LT
+
+cmpEqTick :: Tick -> Tick -> Ordering
+cmpEqTick (PreInlineUnconditionally a) (PreInlineUnconditionally b) = a `compare` b
+cmpEqTick (PostInlineUnconditionally a) (PostInlineUnconditionally b) = a `compare` b
+cmpEqTick (UnfoldingDone a) (UnfoldingDone b) = a `compare` b
+cmpEqTick (RuleFired a) (RuleFired b) = a `compare` b
+cmpEqTick (EtaExpansion a) (EtaExpansion b) = a `compare` b
+cmpEqTick (EtaReduction a) (EtaReduction b) = a `compare` b
+cmpEqTick (BetaReduction a) (BetaReduction b) = a `compare` b
+cmpEqTick (CaseOfCase a) (CaseOfCase b) = a `compare` b
+cmpEqTick (KnownBranch a) (KnownBranch b) = a `compare` b
+cmpEqTick (CaseMerge a) (CaseMerge b) = a `compare` b
+cmpEqTick (AltMerge a) (AltMerge b) = a `compare` b
+cmpEqTick (CaseElim a) (CaseElim b) = a `compare` b
+cmpEqTick (CaseIdentity a) (CaseIdentity b) = a `compare` b
+cmpEqTick (FillInCaseDefault a) (FillInCaseDefault b) = a `compare` b
+cmpEqTick _ _ = EQ
+\end{code}
+
+
+%************************************************************************
+%* *
+ Monad and carried data structure definitions
+%* *
+%************************************************************************
+
+\begin{code}
+newtype CoreState = CoreState {
+ cs_uniq_supply :: UniqSupply