X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FsimplCore%2FCoreMonad.lhs;h=67a0991ec0b43d7518ee46e6776b78b626637b99;hp=f80608956273be8db329c19dc5acadd1a25eaa82;hb=5909e9a896d40a18b4bcf6abb95e0b071bfd7db2;hpb=72462499b891d5779c19f3bda03f96e24f9554ae diff --git a/compiler/simplCore/CoreMonad.lhs b/compiler/simplCore/CoreMonad.lhs index f806089..67a0991 100644 --- a/compiler/simplCore/CoreMonad.lhs +++ b/compiler/simplCore/CoreMonad.lhs @@ -7,11 +7,21 @@ {-# LANGUAGE UndecidableInstances #-} module CoreMonad ( + -- * Configuration of the core-to-core passes + CoreToDo(..), + SimplifierMode(..), + FloatOutSwitches(..), + getCoreToDo, dumpSimplPhase, + + -- * Counting + SimplCount, doSimplTick, doFreeSimplTick, simplCountN, + pprSimplCount, plusSimplCount, zeroSimplCount, isZeroSimplCount, Tick(..), + -- * The monad CoreM, runCoreM, -- ** Reading from the monad - getHscEnv, getAnnEnv, getRuleBase, getModule, + getHscEnv, getRuleBase, getModule, getDynFlags, getOrigNameCache, -- ** Writing to the monad @@ -22,16 +32,18 @@ module CoreMonad ( liftIO1, liftIO2, liftIO3, liftIO4, -- ** Dealing with annotations - findAnnotations, deserializeAnnotations, addAnnotation, + getAnnotations, getFirstAnnotations, -- ** Debug output - endPass, endPassIf, + showPass, endPass, endIteration, dumpIfSet, -- ** Screen output putMsg, putMsgS, errorMsg, errorMsgS, fatalErrorMsg, fatalErrorMsgS, debugTraceMsg, debugTraceMsgS, - dumpIfSet_dyn, + dumpIfSet_dyn, + + lookupOrigCoreM, #ifdef GHCI -- * Getting 'Name's @@ -49,11 +61,12 @@ import CoreLint ( lintCoreBindings ) import PrelNames ( iNTERACTIVE ) import HscTypes import Module ( Module ) -import DynFlags ( DynFlags, DynFlag ) -import SimplMonad ( SimplCount, plusSimplCount, zeroSimplCount ) +import DynFlags +import StaticFlags import Rules ( RuleBase ) +import BasicTypes ( CompilerPhase(..) ) import Annotations -import Serialized +import Id ( Id ) import IOEnv hiding ( liftIO, failM, failWithM ) import qualified IOEnv ( liftIO ) @@ -63,16 +76,27 @@ import TcRnMonad ( TcM, initTc ) import Outputable import FastString import qualified ErrUtils as Err +import Bag import Maybes import UniqSupply -import LazyUniqFM ( UniqFM ) +import UniqFM ( UniqFM, mapUFM, filterUFM ) +import MonadUtils +import Util ( split ) +import Data.List ( intersperse ) import Data.Dynamic import Data.IORef +import Data.Map (Map) +import qualified Data.Map as Map import Data.Word import Control.Monad import Prelude hiding ( read ) +import OccName +import IfaceEnv +import Name +import SrcLoc +import Control.Exception.Base #ifdef GHCI import {-# SOURCE #-} TcSplice ( lookupThName_maybe ) @@ -91,31 +115,708 @@ be, and it makes a conveneint place. place for them. They print out stuff before and after core passes, and do Core Lint when necessary. \begin{code} -endPass :: DynFlags -> String -> DynFlag -> [CoreBind] -> [CoreRule] -> IO () -endPass = dumpAndLint Err.dumpIfSet_core +showPass :: DynFlags -> CoreToDo -> IO () +showPass dflags pass = Err.showPass dflags (showSDoc (ppr pass)) + +endPass :: DynFlags -> CoreToDo -> [CoreBind] -> [CoreRule] -> IO () +endPass dflags pass = dumpAndLint dflags True pass empty (coreDumpFlag pass) + +-- Same as endPass but doesn't dump Core even with -dverbose-core2core +endIteration :: DynFlags -> CoreToDo -> Int -> [CoreBind] -> [CoreRule] -> IO () +endIteration dflags pass n + = dumpAndLint dflags False pass (ptext (sLit "iteration=") <> int n) + (Just Opt_D_dump_simpl_iterations) -endPassIf :: Bool -> DynFlags -> String -> DynFlag -> [CoreBind] -> [CoreRule] -> IO () -endPassIf cond = dumpAndLint (Err.dumpIf_core cond) +dumpIfSet :: Bool -> CoreToDo -> SDoc -> SDoc -> IO () +dumpIfSet dump_me pass extra_info doc + = Err.dumpIfSet dump_me (showSDoc (ppr pass <+> extra_info)) doc -dumpAndLint :: (DynFlags -> DynFlag -> String -> SDoc -> IO ()) - -> DynFlags -> String -> DynFlag +dumpAndLint :: DynFlags -> Bool -> CoreToDo -> SDoc -> Maybe DynFlag -> [CoreBind] -> [CoreRule] -> IO () -dumpAndLint dump dflags pass_name dump_flag binds rules +-- The "show_all" parameter says to print dump if -dverbose-core2core is on +dumpAndLint dflags show_all pass extra_info mb_dump_flag binds rules = do { -- Report result size if required -- This has the side effect of forcing the intermediate to be evaluated ; Err.debugTraceMsg dflags 2 $ (text " Result size =" <+> int (coreBindsSize binds)) -- Report verbosely, if required - ; dump dflags dump_flag pass_name - (pprCoreBindings binds $$ ppUnless (null rules) pp_rules) + ; let pass_name = showSDoc (ppr pass <+> extra_info) + dump_doc = pprCoreBindings binds + $$ ppUnless (null rules) pp_rules + + ; case mb_dump_flag of + Nothing -> return () + Just dump_flag -> Err.dumpIfSet_dyn_or dflags dump_flags pass_name dump_doc + where + dump_flags | show_all = [dump_flag, Opt_D_verbose_core2core] + | otherwise = [dump_flag] -- Type check - ; lintCoreBindings dflags pass_name binds } + ; when (dopt Opt_DoCoreLinting dflags) $ + do { let (warns, errs) = lintCoreBindings binds + ; Err.showPass dflags ("Core Linted result of " ++ pass_name) + ; displayLintResults dflags pass warns errs binds } } where pp_rules = vcat [ blankLine , ptext (sLit "------ Local rules for imported ids --------") , pprRules rules ] + +displayLintResults :: DynFlags -> CoreToDo + -> Bag Err.Message -> Bag Err.Message -> [CoreBind] + -> IO () +displayLintResults dflags pass warns errs binds + | not (isEmptyBag errs) + = do { printDump (vcat [ banner "errors", Err.pprMessageBag errs + , ptext (sLit "*** Offending Program ***") + , pprCoreBindings binds + , ptext (sLit "*** End of Offense ***") ]) + ; Err.ghcExit dflags 1 } + + | not (isEmptyBag warns) + , not (case pass of { CoreDesugar -> True; _ -> False }) + -- Suppress warnings after desugaring pass because some + -- are legitimate. Notably, the desugarer generates instance + -- methods with INLINE pragmas that form a mutually recursive + -- group. Only afer a round of simplification are they unravelled. + , not opt_NoDebugOutput + , showLintWarnings pass + = printDump (banner "warnings" $$ Err.pprMessageBag warns) + + | otherwise = return () + where + banner string = ptext (sLit "*** Core Lint") <+> text string + <+> ptext (sLit ": in result of") <+> ppr pass + <+> ptext (sLit "***") + +showLintWarnings :: CoreToDo -> Bool +-- Disable Lint warnings on the first simplifier pass, because +-- there may be some INLINE knots still tied, which is tiresomely noisy +showLintWarnings (CoreDoSimplify _ (SimplMode { sm_phase = InitialPhase })) = False +showLintWarnings _ = True +\end{code} + + +%************************************************************************ +%* * + The CoreToDo type and related types + Abstraction of core-to-core passes to run. +%* * +%************************************************************************ + +\begin{code} +data CoreToDo -- These are diff core-to-core passes, + -- which may be invoked in any order, + -- as many times as you like. + + = CoreDoSimplify -- The core-to-core simplifier. + Int -- Max iterations + SimplifierMode + + | CoreDoFloatInwards + | CoreDoFloatOutwards FloatOutSwitches + | CoreLiberateCase + | CoreDoPrintCore + | CoreDoStaticArgs + | CoreDoStrictness + | CoreDoWorkerWrapper + | CoreDoSpecialising + | CoreDoSpecConstr + | CoreDoGlomBinds + | CoreCSE + | CoreDoRuleCheck CompilerPhase String -- Check for non-application of rules + -- matching this string + | CoreDoVectorisation + | CoreDoNothing -- Useful when building up + | CoreDoPasses [CoreToDo] -- lists of these things + + | CoreDesugar -- Not strictly a core-to-core pass, but produces + -- Core output, and hence useful to pass to endPass + + | CoreTidy + | CorePrep + +coreDumpFlag :: CoreToDo -> Maybe DynFlag +coreDumpFlag (CoreDoSimplify {}) = Just Opt_D_dump_simpl_phases +coreDumpFlag CoreDoFloatInwards = Just Opt_D_verbose_core2core +coreDumpFlag (CoreDoFloatOutwards {}) = Just Opt_D_verbose_core2core +coreDumpFlag CoreLiberateCase = Just Opt_D_verbose_core2core +coreDumpFlag CoreDoStaticArgs = Just Opt_D_verbose_core2core +coreDumpFlag CoreDoStrictness = Just Opt_D_dump_stranal +coreDumpFlag CoreDoWorkerWrapper = Just Opt_D_dump_worker_wrapper +coreDumpFlag CoreDoSpecialising = Just Opt_D_dump_spec +coreDumpFlag CoreDoSpecConstr = Just Opt_D_dump_spec +coreDumpFlag CoreCSE = Just Opt_D_dump_cse +coreDumpFlag CoreDoVectorisation = Just Opt_D_dump_vect +coreDumpFlag CoreDesugar = Just Opt_D_dump_ds +coreDumpFlag CoreTidy = Just Opt_D_dump_simpl +coreDumpFlag CorePrep = Just Opt_D_dump_prep + +coreDumpFlag CoreDoPrintCore = Nothing +coreDumpFlag (CoreDoRuleCheck {}) = Nothing +coreDumpFlag CoreDoNothing = Nothing +coreDumpFlag CoreDoGlomBinds = Nothing +coreDumpFlag (CoreDoPasses {}) = Nothing + +instance Outputable CoreToDo where + ppr (CoreDoSimplify n md) = ptext (sLit "Simplifier") + <+> ppr md + <+> ptext (sLit "max-iterations=") <> int n + ppr CoreDoFloatInwards = ptext (sLit "Float inwards") + ppr (CoreDoFloatOutwards f) = ptext (sLit "Float out") <> parens (ppr f) + ppr CoreLiberateCase = ptext (sLit "Liberate case") + ppr CoreDoStaticArgs = ptext (sLit "Static argument") + ppr CoreDoStrictness = ptext (sLit "Demand analysis") + ppr CoreDoWorkerWrapper = ptext (sLit "Worker Wrapper binds") + ppr CoreDoSpecialising = ptext (sLit "Specialise") + ppr CoreDoSpecConstr = ptext (sLit "SpecConstr") + ppr CoreCSE = ptext (sLit "Common sub-expression") + ppr CoreDoVectorisation = ptext (sLit "Vectorisation") + ppr CoreDesugar = ptext (sLit "Desugar") + ppr CoreTidy = ptext (sLit "Tidy Core") + ppr CorePrep = ptext (sLit "CorePrep") + ppr CoreDoPrintCore = ptext (sLit "Print core") + ppr (CoreDoRuleCheck {}) = ptext (sLit "Rule check") + ppr CoreDoGlomBinds = ptext (sLit "Glom binds") + ppr CoreDoNothing = ptext (sLit "CoreDoNothing") + ppr (CoreDoPasses {}) = ptext (sLit "CoreDoPasses") +\end{code} + +\begin{code} +data SimplifierMode -- See comments in SimplMonad + = SimplMode + { sm_names :: [String] -- Name(s) of the phase + , sm_phase :: CompilerPhase + , sm_rules :: Bool -- Whether RULES are enabled + , sm_inline :: Bool -- Whether inlining is enabled + , sm_case_case :: Bool -- Whether case-of-case is enabled + , sm_eta_expand :: Bool -- Whether eta-expansion is enabled + } + +instance Outputable SimplifierMode where + ppr (SimplMode { sm_phase = p, sm_names = ss + , sm_rules = r, sm_inline = i + , sm_eta_expand = eta, sm_case_case = cc }) + = ptext (sLit "SimplMode") <+> braces ( + sep [ ptext (sLit "Phase =") <+> ppr p <+> + brackets (text (concat $ intersperse "," ss)) <> comma + , pp_flag i (sLit "inline") <> comma + , pp_flag r (sLit "rules") <> comma + , pp_flag eta (sLit "eta-expand") <> comma + , pp_flag cc (sLit "case-of-case") ]) + where + pp_flag f s = ppUnless f (ptext (sLit "no")) <+> ptext s +\end{code} + + +\begin{code} +data FloatOutSwitches = FloatOutSwitches { + floatOutLambdas :: Maybe Int, -- ^ Just n <=> float lambdas to top level, if + -- doing so will abstract over n or fewer + -- value variables + -- Nothing <=> float all lambdas to top level, + -- regardless of how many free variables + -- Just 0 is the vanilla case: float a lambda + -- iff it has no free vars + + floatOutConstants :: Bool, -- ^ True <=> float constants to top level, + -- even if they do not escape a lambda + floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications + -- based on arity information. + } +instance Outputable FloatOutSwitches where + ppr = pprFloatOutSwitches + +pprFloatOutSwitches :: FloatOutSwitches -> SDoc +pprFloatOutSwitches sw + = ptext (sLit "FOS") <+> (braces $ + sep $ punctuate comma $ + [ ptext (sLit "Lam =") <+> ppr (floatOutLambdas sw) + , ptext (sLit "Consts =") <+> ppr (floatOutConstants sw) + , ptext (sLit "PAPs =") <+> ppr (floatOutPartialApplications sw) ]) +\end{code} + + +%************************************************************************ +%* * + Generating the main optimisation pipeline +%* * +%************************************************************************ + +\begin{code} +getCoreToDo :: DynFlags -> [CoreToDo] +getCoreToDo dflags + = core_todo + where + opt_level = optLevel dflags + phases = simplPhases dflags + max_iter = maxSimplIterations dflags + rule_check = ruleCheck dflags + strictness = dopt Opt_Strictness dflags + full_laziness = dopt Opt_FullLaziness dflags + do_specialise = dopt Opt_Specialise dflags + do_float_in = dopt Opt_FloatIn dflags + cse = dopt Opt_CSE dflags + spec_constr = dopt Opt_SpecConstr dflags + liberate_case = dopt Opt_LiberateCase dflags + static_args = dopt Opt_StaticArgumentTransformation dflags + rules_on = dopt Opt_EnableRewriteRules dflags + eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags + + maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase) + + maybe_strictness_before phase + = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness + + base_mode = SimplMode { sm_phase = panic "base_mode" + , sm_names = [] + , sm_rules = rules_on + , sm_eta_expand = eta_expand_on + , sm_inline = True + , sm_case_case = True } + + simpl_phase phase names iter + = CoreDoPasses + $ [ maybe_strictness_before phase + , CoreDoSimplify iter + (base_mode { sm_phase = Phase phase + , sm_names = names }) + + , maybe_rule_check (Phase phase) ] + + -- Vectorisation can introduce a fair few common sub expressions involving + -- DPH primitives. For example, see the Reverse test from dph-examples. + -- We need to eliminate these common sub expressions before their definitions + -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings, + -- so we also run simpl_gently to inline them. + ++ (if dopt Opt_Vectorise dflags && phase == 3 + then [CoreCSE, simpl_gently] + else []) + + vectorisation + = runWhen (dopt Opt_Vectorise dflags) $ + CoreDoPasses [ simpl_gently, CoreDoVectorisation ] + + -- By default, we have 2 phases before phase 0. + + -- Want to run with inline phase 2 after the specialiser to give + -- maximum chance for fusion to work before we inline build/augment + -- in phase 1. This made a difference in 'ansi' where an + -- overloaded function wasn't inlined till too late. + + -- Need phase 1 so that build/augment get + -- inlined. I found that spectral/hartel/genfft lost some useful + -- strictness in the function sumcode' if augment is not inlined + -- before strictness analysis runs + simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter + | phase <- [phases, phases-1 .. 1] ] + + + -- initial simplify: mk specialiser happy: minimum effort please + simpl_gently = CoreDoSimplify max_iter + (base_mode { sm_phase = InitialPhase + , sm_names = ["Gentle"] + , sm_rules = rules_on -- Note [RULEs enabled in SimplGently] + , sm_inline = False + , sm_case_case = False }) + -- Don't do case-of-case transformations. + -- This makes full laziness work better + + core_todo = + if opt_level == 0 then + [vectorisation, + simpl_phase 0 ["final"] max_iter] + else {- opt_level >= 1 -} [ + + -- We want to do the static argument transform before full laziness as it + -- may expose extra opportunities to float things outwards. However, to fix + -- up the output of the transformation we need at do at least one simplify + -- after this before anything else + runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]), + + -- We run vectorisation here for now, but we might also try to run + -- it later + vectorisation, + + -- initial simplify: mk specialiser happy: minimum effort please + simpl_gently, + + -- Specialisation is best done before full laziness + -- so that overloaded functions have all their dictionary lambdas manifest + runWhen do_specialise CoreDoSpecialising, + + runWhen full_laziness $ + CoreDoFloatOutwards FloatOutSwitches { + floatOutLambdas = Just 0, + floatOutConstants = True, + floatOutPartialApplications = False }, + -- Was: gentleFloatOutSwitches + -- + -- I have no idea why, but not floating constants to + -- top level is very bad in some cases. + -- + -- Notably: p_ident in spectral/rewrite + -- Changing from "gentle" to "constantsOnly" + -- improved rewrite's allocation by 19%, and + -- made 0.0% difference to any other nofib + -- benchmark + -- + -- Not doing floatOutPartialApplications yet, we'll do + -- that later on when we've had a chance to get more + -- accurate arity information. In fact it makes no + -- difference at all to performance if we do it here, + -- but maybe we save some unnecessary to-and-fro in + -- the simplifier. + + runWhen do_float_in CoreDoFloatInwards, + + simpl_phases, + + -- Phase 0: allow all Ids to be inlined now + -- This gets foldr inlined before strictness analysis + + -- At least 3 iterations because otherwise we land up with + -- huge dead expressions because of an infelicity in the + -- simpifier. + -- let k = BIG in foldr k z xs + -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs + -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs + -- Don't stop now! + simpl_phase 0 ["main"] (max max_iter 3), + + runWhen strictness (CoreDoPasses [ + CoreDoStrictness, + CoreDoWorkerWrapper, + CoreDoGlomBinds, + simpl_phase 0 ["post-worker-wrapper"] max_iter + ]), + + runWhen full_laziness $ + CoreDoFloatOutwards FloatOutSwitches { + floatOutLambdas = floatLamArgs dflags, + floatOutConstants = True, + floatOutPartialApplications = True }, + -- nofib/spectral/hartel/wang doubles in speed if you + -- do full laziness late in the day. It only happens + -- after fusion and other stuff, so the early pass doesn't + -- catch it. For the record, the redex is + -- f_el22 (f_el21 r_midblock) + + + runWhen cse CoreCSE, + -- We want CSE to follow the final full-laziness pass, because it may + -- succeed in commoning up things floated out by full laziness. + -- CSE used to rely on the no-shadowing invariant, but it doesn't any more + + runWhen do_float_in CoreDoFloatInwards, + + maybe_rule_check (Phase 0), + + -- Case-liberation for -O2. This should be after + -- strictness analysis and the simplification which follows it. + runWhen liberate_case (CoreDoPasses [ + CoreLiberateCase, + simpl_phase 0 ["post-liberate-case"] max_iter + ]), -- Run the simplifier after LiberateCase to vastly + -- reduce the possiblility of shadowing + -- Reason: see Note [Shadowing] in SpecConstr.lhs + + runWhen spec_constr CoreDoSpecConstr, + + maybe_rule_check (Phase 0), + + -- Final clean-up simplification: + simpl_phase 0 ["final"] max_iter + ] + +-- The core-to-core pass ordering is derived from the DynFlags: +runWhen :: Bool -> CoreToDo -> CoreToDo +runWhen True do_this = do_this +runWhen False _ = CoreDoNothing + +runMaybe :: Maybe a -> (a -> CoreToDo) -> CoreToDo +runMaybe (Just x) f = f x +runMaybe Nothing _ = CoreDoNothing + +dumpSimplPhase :: DynFlags -> SimplifierMode -> Bool +dumpSimplPhase dflags mode + | Just spec_string <- shouldDumpSimplPhase dflags + = match_spec spec_string + | otherwise + = dopt Opt_D_verbose_core2core dflags + + where + match_spec :: String -> Bool + match_spec spec_string + = or $ map (and . map match . split ':') + $ split ',' spec_string + + match :: String -> Bool + match "" = True + match s = case reads s of + [(n,"")] -> phase_num n + _ -> phase_name s + + phase_num :: Int -> Bool + phase_num n = case sm_phase mode of + Phase k -> n == k + _ -> False + + phase_name :: String -> Bool + phase_name s = s `elem` sm_names mode +\end{code} + + +Note [RULEs enabled in SimplGently] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +RULES are enabled when doing "gentle" simplification. Two reasons: + + * We really want the class-op cancellation to happen: + op (df d1 d2) --> $cop3 d1 d2 + because this breaks the mutual recursion between 'op' and 'df' + + * I wanted the RULE + lift String ===> ... + to work in Template Haskell when simplifying + splices, so we get simpler code for literal strings + +But watch out: list fusion can prevent floating. So use phase control +to switch off those rules until after floating. + + +%************************************************************************ +%* * + Counting and logging +%* * +%************************************************************************ + +\begin{code} +verboseSimplStats :: Bool +verboseSimplStats = opt_PprStyle_Debug -- For now, anyway + +zeroSimplCount :: DynFlags -> SimplCount +isZeroSimplCount :: SimplCount -> Bool +pprSimplCount :: SimplCount -> SDoc +doSimplTick, doFreeSimplTick :: Tick -> SimplCount -> SimplCount +plusSimplCount :: SimplCount -> SimplCount -> SimplCount +\end{code} + +\begin{code} +data SimplCount + = VerySimplCount !Int -- Used when don't want detailed stats + + | SimplCount { + ticks :: !Int, -- Total ticks + details :: !TickCounts, -- How many of each type + + n_log :: !Int, -- N + log1 :: [Tick], -- Last N events; <= opt_HistorySize, + -- most recent first + log2 :: [Tick] -- Last opt_HistorySize events before that + -- Having log1, log2 lets us accumulate the + -- recent history reasonably efficiently + } + +type TickCounts = Map Tick Int + +simplCountN :: SimplCount -> Int +simplCountN (VerySimplCount n) = n +simplCountN (SimplCount { ticks = n }) = n + +zeroSimplCount dflags + -- This is where we decide whether to do + -- the VerySimpl version or the full-stats version + | dopt Opt_D_dump_simpl_stats dflags + = SimplCount {ticks = 0, details = Map.empty, + n_log = 0, log1 = [], log2 = []} + | otherwise + = VerySimplCount 0 + +isZeroSimplCount (VerySimplCount n) = n==0 +isZeroSimplCount (SimplCount { ticks = n }) = n==0 + +doFreeSimplTick tick sc@SimplCount { details = dts } + = sc { details = dts `addTick` tick } +doFreeSimplTick _ sc = sc + +doSimplTick tick sc@SimplCount { ticks = tks, details = dts, n_log = nl, log1 = l1 } + | nl >= opt_HistorySize = sc1 { n_log = 1, log1 = [tick], log2 = l1 } + | otherwise = sc1 { n_log = nl+1, log1 = tick : l1 } + where + sc1 = sc { ticks = tks+1, details = dts `addTick` tick } + +doSimplTick _ (VerySimplCount n) = VerySimplCount (n+1) + + +-- Don't use Map.unionWith because that's lazy, and we want to +-- be pretty strict here! +addTick :: TickCounts -> Tick -> TickCounts +addTick fm tick = case Map.lookup tick fm of + Nothing -> Map.insert tick 1 fm + Just n -> n1 `seq` Map.insert tick n1 fm + where + n1 = n+1 + + +plusSimplCount sc1@(SimplCount { ticks = tks1, details = dts1 }) + sc2@(SimplCount { ticks = tks2, details = dts2 }) + = log_base { ticks = tks1 + tks2, details = Map.unionWith (+) dts1 dts2 } + where + -- A hackish way of getting recent log info + log_base | null (log1 sc2) = sc1 -- Nothing at all in sc2 + | null (log2 sc2) = sc2 { log2 = log1 sc1 } + | otherwise = sc2 + +plusSimplCount (VerySimplCount n) (VerySimplCount m) = VerySimplCount (n+m) +plusSimplCount _ _ = panic "plusSimplCount" + -- We use one or the other consistently + +pprSimplCount (VerySimplCount n) = ptext (sLit "Total ticks:") <+> int n +pprSimplCount (SimplCount { ticks = tks, details = dts, log1 = l1, log2 = l2 }) + = vcat [ptext (sLit "Total ticks: ") <+> int tks, + blankLine, + pprTickCounts (Map.toList dts), + if verboseSimplStats then + vcat [blankLine, + ptext (sLit "Log (most recent first)"), + nest 4 (vcat (map ppr l1) $$ vcat (map ppr l2))] + else empty + ] + +pprTickCounts :: [(Tick,Int)] -> SDoc +pprTickCounts [] = empty +pprTickCounts ((tick1,n1):ticks) + = vcat [int tot_n <+> text (tickString tick1), + pprTCDetails real_these, + pprTickCounts others + ] + where + tick1_tag = tickToTag tick1 + (these, others) = span same_tick ticks + real_these = (tick1,n1):these + same_tick (tick2,_) = tickToTag tick2 == tick1_tag + tot_n = sum [n | (_,n) <- real_these] + +pprTCDetails :: [(Tick, Int)] -> SDoc +pprTCDetails ticks + = nest 4 (vcat [int n <+> pprTickCts tick | (tick,n) <- ticks]) +\end{code} + + +\begin{code} +data Tick + = PreInlineUnconditionally Id + | PostInlineUnconditionally Id + + | UnfoldingDone Id + | RuleFired FastString -- Rule name + + | LetFloatFromLet + | EtaExpansion Id -- LHS binder + | EtaReduction Id -- Binder on outer lambda + | BetaReduction Id -- Lambda binder + + + | CaseOfCase Id -- Bndr on *inner* case + | KnownBranch Id -- Case binder + | CaseMerge Id -- Binder on outer case + | AltMerge Id -- Case binder + | CaseElim Id -- Case binder + | CaseIdentity Id -- Case binder + | FillInCaseDefault Id -- Case binder + + | BottomFound + | SimplifierDone -- Ticked at each iteration of the simplifier + +instance Outputable Tick where + ppr tick = text (tickString tick) <+> pprTickCts tick + +instance Eq Tick where + a == b = case a `cmpTick` b of + EQ -> True + _ -> False + +instance Ord Tick where + compare = cmpTick + +tickToTag :: Tick -> Int +tickToTag (PreInlineUnconditionally _) = 0 +tickToTag (PostInlineUnconditionally _) = 1 +tickToTag (UnfoldingDone _) = 2 +tickToTag (RuleFired _) = 3 +tickToTag LetFloatFromLet = 4 +tickToTag (EtaExpansion _) = 5 +tickToTag (EtaReduction _) = 6 +tickToTag (BetaReduction _) = 7 +tickToTag (CaseOfCase _) = 8 +tickToTag (KnownBranch _) = 9 +tickToTag (CaseMerge _) = 10 +tickToTag (CaseElim _) = 11 +tickToTag (CaseIdentity _) = 12 +tickToTag (FillInCaseDefault _) = 13 +tickToTag BottomFound = 14 +tickToTag SimplifierDone = 16 +tickToTag (AltMerge _) = 17 + +tickString :: Tick -> String +tickString (PreInlineUnconditionally _) = "PreInlineUnconditionally" +tickString (PostInlineUnconditionally _)= "PostInlineUnconditionally" +tickString (UnfoldingDone _) = "UnfoldingDone" +tickString (RuleFired _) = "RuleFired" +tickString LetFloatFromLet = "LetFloatFromLet" +tickString (EtaExpansion _) = "EtaExpansion" +tickString (EtaReduction _) = "EtaReduction" +tickString (BetaReduction _) = "BetaReduction" +tickString (CaseOfCase _) = "CaseOfCase" +tickString (KnownBranch _) = "KnownBranch" +tickString (CaseMerge _) = "CaseMerge" +tickString (AltMerge _) = "AltMerge" +tickString (CaseElim _) = "CaseElim" +tickString (CaseIdentity _) = "CaseIdentity" +tickString (FillInCaseDefault _) = "FillInCaseDefault" +tickString BottomFound = "BottomFound" +tickString SimplifierDone = "SimplifierDone" + +pprTickCts :: Tick -> SDoc +pprTickCts (PreInlineUnconditionally v) = ppr v +pprTickCts (PostInlineUnconditionally v)= ppr v +pprTickCts (UnfoldingDone v) = ppr v +pprTickCts (RuleFired v) = ppr v +pprTickCts LetFloatFromLet = empty +pprTickCts (EtaExpansion v) = ppr v +pprTickCts (EtaReduction v) = ppr v +pprTickCts (BetaReduction v) = ppr v +pprTickCts (CaseOfCase v) = ppr v +pprTickCts (KnownBranch v) = ppr v +pprTickCts (CaseMerge v) = ppr v +pprTickCts (AltMerge v) = ppr v +pprTickCts (CaseElim v) = ppr v +pprTickCts (CaseIdentity v) = ppr v +pprTickCts (FillInCaseDefault v) = ppr v +pprTickCts _ = empty + +cmpTick :: Tick -> Tick -> Ordering +cmpTick a b = case (tickToTag a `compare` tickToTag b) of + GT -> GT + EQ -> cmpEqTick a b + LT -> LT + +cmpEqTick :: Tick -> Tick -> Ordering +cmpEqTick (PreInlineUnconditionally a) (PreInlineUnconditionally b) = a `compare` b +cmpEqTick (PostInlineUnconditionally a) (PostInlineUnconditionally b) = a `compare` b +cmpEqTick (UnfoldingDone a) (UnfoldingDone b) = a `compare` b +cmpEqTick (RuleFired a) (RuleFired b) = a `compare` b +cmpEqTick (EtaExpansion a) (EtaExpansion b) = a `compare` b +cmpEqTick (EtaReduction a) (EtaReduction b) = a `compare` b +cmpEqTick (BetaReduction a) (BetaReduction b) = a `compare` b +cmpEqTick (CaseOfCase a) (CaseOfCase b) = a `compare` b +cmpEqTick (KnownBranch a) (KnownBranch b) = a `compare` b +cmpEqTick (CaseMerge a) (CaseMerge b) = a `compare` b +cmpEqTick (AltMerge a) (AltMerge b) = a `compare` b +cmpEqTick (CaseElim a) (CaseElim b) = a `compare` b +cmpEqTick (CaseIdentity a) (CaseIdentity b) = a `compare` b +cmpEqTick (FillInCaseDefault a) (FillInCaseDefault b) = a `compare` b +cmpEqTick _ _ = EQ \end{code} @@ -126,9 +827,8 @@ dumpAndLint dump dflags pass_name dump_flag binds rules %************************************************************************ \begin{code} -data CoreState = CoreState { - cs_uniq_supply :: UniqSupply, - cs_ann_env :: AnnEnv +newtype CoreState = CoreState { + cs_uniq_supply :: UniqSupply } data CoreReader = CoreReader { @@ -187,13 +887,12 @@ instance MonadUnique CoreM where return us1 runCoreM :: HscEnv - -> AnnEnv -> RuleBase -> UniqSupply -> Module -> CoreM a -> IO (a, SimplCount) -runCoreM hsc_env ann_env rule_base us mod m = +runCoreM hsc_env rule_base us mod m = liftM extract $ runIOEnv reader $ unCoreM m state where reader = CoreReader { @@ -202,8 +901,7 @@ runCoreM hsc_env ann_env rule_base us mod m = cr_module = mod } state = CoreState { - cs_uniq_supply = us, - cs_ann_env = ann_env + cs_uniq_supply = us } extract :: (a, CoreState, CoreWriter) -> (a, SimplCount) @@ -268,9 +966,6 @@ liftIOWithCount what = liftIO what >>= (\(count, x) -> addSimplCount count >> re getHscEnv :: CoreM HscEnv getHscEnv = read cr_hsc_env -getAnnEnv :: CoreM AnnEnv -getAnnEnv = getS cs_ann_env - getRuleBase :: CoreM RuleBase getRuleBase = read cr_rule_base @@ -302,38 +997,45 @@ getOrigNameCache = do %************************************************************************ \begin{code} - --- | Find all the annotations we currently know about for the given target. Note that no --- annotations will be returned if we haven't loaded information about the particular target --- you are inquiring about: by default, only those modules that have been imported by the --- program being compiled will have been loaded in this way. +-- | Get all annotations of a given type. This happens lazily, that is +-- no deserialization will take place until the [a] is actually demanded and +-- the [a] can also be empty (the UniqFM is not filtered). -- --- To load the information from additional modules, you can use the functions 'DynamicLoading.forceLoadModuleInterfaces' --- and 'DynamicLoading.forceLoadNameModuleInterface', but be aware that doing this indiscriminantly --- will impose a performance penalty. +-- This should be done once at the start of a Core-to-Core pass that uses +-- annotations. -- --- If no deserialization function is supplied, only transient annotations will be returned. -findAnnotations :: Typeable a => ([Word8] -> a) -> CoreAnnTarget -> CoreM [a] -findAnnotations deserialize target = do - ann_env <- getAnnEnv - return (findAnns deserialize ann_env target) - --- | Deserialize all annotations of a given type. This happens lazily, that is --- no deserialization will take place until the [a] is actually demanded and --- the [a] can also be empty (the UniqFM is not filtered). -deserializeAnnotations :: Typeable a => ([Word8] -> a) -> CoreM (UniqFM [a]) -deserializeAnnotations deserialize = do - ann_env <- getAnnEnv +-- See Note [Annotations] +getAnnotations :: Typeable a => ([Word8] -> a) -> ModGuts -> CoreM (UniqFM [a]) +getAnnotations deserialize guts = do + hsc_env <- getHscEnv + ann_env <- liftIO $ prepareAnnotations hsc_env (Just guts) return (deserializeAnns deserialize ann_env) -addAnnotation :: Typeable a => (a -> [Word8]) -> CoreAnnTarget -> a -> CoreM () -addAnnotation serialize target what = addAnnotationToEnv $ Annotation { ann_target = target, ann_value = toSerialized serialize what } - -addAnnotationToEnv :: Annotation -> CoreM () -addAnnotationToEnv annotation = modifyS (\state -> state { cs_ann_env = extendAnnEnvList (cs_ann_env state) [annotation] }) - +-- | Get at most one annotation of a given type per Unique. +getFirstAnnotations :: Typeable a => ([Word8] -> a) -> ModGuts -> CoreM (UniqFM a) +getFirstAnnotations deserialize guts + = liftM (mapUFM head . filterUFM (not . null)) + $ getAnnotations deserialize guts + \end{code} +Note [Annotations] +~~~~~~~~~~~~~~~~~~ +A Core-to-Core pass that wants to make use of annotations calls +getAnnotations or getFirstAnnotations at the beginning to obtain a UniqFM with +annotations of a specific type. This produces all annotations from interface +files read so far. However, annotations from interface files read during the +pass will not be visible until getAnnotations is called again. This is similar +to how rules work and probably isn't too bad. + +The current implementation could be optimised a bit: when looking up +annotations for a thing from the HomePackageTable, we could search directly in +the module where the thing is defined rather than building one UniqFM which +contains all annotations we know of. This would work because annotations can +only be given to things defined in the same module. However, since we would +only want to deserialise every annotation once, we would have to build a cache +for every module in the HTP. In the end, it's probably not worth it as long as +we aren't using annotations heavily. %************************************************************************ %* * @@ -426,3 +1128,29 @@ thNameToGhcName th_name = do liftIO $ initTcForLookup hsc_env (lookupThName_maybe th_name) #endif \end{code} + +\begin{code} +updNameCache' :: (NameCache -> (NameCache, a)) -> CoreM a +updNameCache' upd_fn = do + HscEnv { hsc_NC = nc_var } <- getHscEnv + r <- liftIO $ atomicModifyIORef nc_var upd_fn + r' <- liftIO $ readIORef nc_var + _ <- liftIO $ evaluate r' + return r + +-- cut-and-pasted from IfaceEnv, where it lives in the TcRn monad rather than CoreM +lookupOrigCoreM :: Module -> OccName -> CoreM Name +lookupOrigCoreM mod occ + = do { mod `seq` occ `seq` return () + ; updNameCache' $ \name_cache -> + case lookupOrigNameCache (nsNames name_cache) mod occ of { + Just name -> (name_cache, name); + Nothing -> + case takeUniqFromSupply (nsUniqs name_cache) of { + (uniq, us) -> + let + name = mkExternalName uniq mod occ noSrcSpan + new_cache = extendNameCache (nsNames name_cache) mod occ name + in (name_cache{ nsUniqs = us, nsNames = new_cache }, name) + }}} +\end{code} \ No newline at end of file