{-# LANGUAGE UndecidableInstances #-}
module CoreMonad (
+ -- * Configuration of the core-to-core passes
+ CoreToDo(..),
+ SimplifierMode(..),
+ SimplifierSwitch(..),
+ FloatOutSwitches(..),
+ getCoreToDo, dumpSimplPhase,
+
+ -- * Counting
+ SimplCount, doSimplTick, doFreeSimplTick, simplCountN,
+ pprSimplCount, plusSimplCount, zeroSimplCount, isZeroSimplCount, Tick(..),
+
-- * The monad
CoreM, runCoreM,
-- ** Reading from the monad
- getHscEnv, getAnnEnv, getRuleBase, getModule,
+ getHscEnv, getRuleBase, getModule,
getDynFlags, getOrigNameCache,
-- ** Writing to the monad
liftIO1, liftIO2, liftIO3, liftIO4,
-- ** Dealing with annotations
- findAnnotations, deserializeAnnotations, addAnnotation,
+ getAnnotations, getFirstAnnotations,
-- ** Debug output
- endPass, endPassIf, endIteration,
+ showPass, endPass, endIteration, dumpIfSet,
-- ** Screen output
putMsg, putMsgS, errorMsg, errorMsgS,
fatalErrorMsg, fatalErrorMsgS,
debugTraceMsg, debugTraceMsgS,
- dumpIfSet_dyn,
+ dumpIfSet_dyn,
#ifdef GHCI
-- * Getting 'Name's
import CoreLint ( lintCoreBindings )
import PrelNames ( iNTERACTIVE )
import HscTypes
-import Module ( Module )
-import DynFlags ( DynFlags, DynFlag )
-import SimplMonad ( SimplCount, plusSimplCount, zeroSimplCount )
+import Module ( PackageId, Module )
+import DynFlags
+import StaticFlags
import Rules ( RuleBase )
+import BasicTypes ( CompilerPhase )
import Annotations
-import Serialized
+import Id ( Id )
import IOEnv hiding ( liftIO, failM, failWithM )
import qualified IOEnv ( liftIO )
import Outputable
import FastString
import qualified ErrUtils as Err
+import Bag
import Maybes
import UniqSupply
-import LazyUniqFM ( UniqFM )
+import UniqFM ( UniqFM, mapUFM, filterUFM )
+import Util ( split )
+import Data.List ( intersperse )
import Data.Dynamic
import Data.IORef
+import Data.Map (Map)
+import qualified Data.Map as Map
import Data.Word
import Control.Monad
stuff before and after core passes, and do Core Lint when necessary.
\begin{code}
-endPass :: DynFlags -> String -> DynFlag -> [CoreBind] -> [CoreRule] -> IO ()
-endPass = dumpAndLint Err.dumpIfSet_core
+showPass :: DynFlags -> CoreToDo -> IO ()
+showPass dflags pass = Err.showPass dflags (showSDoc (ppr pass))
-endPassIf :: Bool -> DynFlags -> String -> DynFlag -> [CoreBind] -> [CoreRule] -> IO ()
-endPassIf cond = dumpAndLint (Err.dumpIf_core cond)
+endPass :: DynFlags -> CoreToDo -> [CoreBind] -> [CoreRule] -> IO ()
+endPass dflags pass = dumpAndLint dflags True pass empty (coreDumpFlag pass)
-- Same as endPass but doesn't dump Core even with -dverbose-core2core
-endIteration :: DynFlags -> String -> DynFlag -> [CoreBind] -> [CoreRule] -> IO ()
-endIteration = dumpAndLint Err.dumpIfSet_dyn
+endIteration :: DynFlags -> CoreToDo -> Int -> [CoreBind] -> [CoreRule] -> IO ()
+endIteration dflags pass n
+ = dumpAndLint dflags False pass (ptext (sLit "iteration=") <> int n)
+ (Just Opt_D_dump_simpl_iterations)
+
+dumpIfSet :: Bool -> CoreToDo -> SDoc -> SDoc -> IO ()
+dumpIfSet dump_me pass extra_info doc
+ = Err.dumpIfSet dump_me (showSDoc (ppr pass <+> extra_info)) doc
-dumpAndLint :: (DynFlags -> DynFlag -> String -> SDoc -> IO ())
- -> DynFlags -> String -> DynFlag
+dumpAndLint :: DynFlags -> Bool -> CoreToDo -> SDoc -> Maybe DynFlag
-> [CoreBind] -> [CoreRule] -> IO ()
-dumpAndLint dump dflags pass_name dump_flag binds rules
+-- The "show_all" parameter says to print dump if -dverbose-core2core is on
+dumpAndLint dflags show_all pass extra_info mb_dump_flag binds rules
= do { -- Report result size if required
-- This has the side effect of forcing the intermediate to be evaluated
; Err.debugTraceMsg dflags 2 $
(text " Result size =" <+> int (coreBindsSize binds))
-- Report verbosely, if required
- ; dump dflags dump_flag pass_name
- (pprCoreBindings binds $$ ppUnless (null rules) pp_rules)
+ ; let pass_name = showSDoc (ppr pass <+> extra_info)
+ dump_doc = pprCoreBindings binds
+ $$ ppUnless (null rules) pp_rules
+
+ ; case mb_dump_flag of
+ Nothing -> return ()
+ Just dump_flag -> Err.dumpIfSet_dyn_or dflags dump_flags pass_name dump_doc
+ where
+ dump_flags | show_all = [dump_flag, Opt_D_verbose_core2core]
+ | otherwise = [dump_flag]
-- Type check
- ; lintCoreBindings dflags pass_name binds }
+ ; when (dopt Opt_DoCoreLinting dflags) $
+ do { let (warns, errs) = lintCoreBindings binds
+ ; Err.showPass dflags ("Core Linted result of " ++ pass_name)
+ ; displayLintResults dflags pass warns errs binds } }
where
pp_rules = vcat [ blankLine
, ptext (sLit "------ Local rules for imported ids --------")
, pprRules rules ]
+
+displayLintResults :: DynFlags -> CoreToDo
+ -> Bag Err.Message -> Bag Err.Message -> [CoreBind]
+ -> IO ()
+displayLintResults dflags pass warns errs binds
+ | not (isEmptyBag errs)
+ = do { printDump (vcat [ banner "errors", Err.pprMessageBag errs
+ , ptext (sLit "*** Offending Program ***")
+ , pprCoreBindings binds
+ , ptext (sLit "*** End of Offense ***") ])
+ ; Err.ghcExit dflags 1 }
+
+ | not (isEmptyBag warns)
+ , not (case pass of { CoreDesugar -> True; _ -> False })
+ -- Suppress warnings after desugaring pass because some
+ -- are legitimate. Notably, the desugarer generates instance
+ -- methods with INLINE pragmas that form a mutually recursive
+ -- group. Only afer a round of simplification are they unravelled.
+ , not opt_NoDebugOutput
+ , showLintWarnings pass
+ = printDump (banner "warnings" $$ Err.pprMessageBag warns)
+
+ | otherwise = return ()
+ where
+ banner string = ptext (sLit "*** Core Lint") <+> text string
+ <+> ptext (sLit ": in result of") <+> ppr pass
+ <+> ptext (sLit "***")
+
+showLintWarnings :: CoreToDo -> Bool
+-- Disable Lint warnings on the first simplifier pass, because
+-- there may be some INLINE knots still tied, which is tiresomely noisy
+showLintWarnings (CoreDoSimplify (SimplGently {}) _ _) = False
+showLintWarnings _ = True
+\end{code}
+
+
+%************************************************************************
+%* *
+ The CoreToDo type and related types
+ Abstraction of core-to-core passes to run.
+%* *
+%************************************************************************
+
+\begin{code}
+data CoreToDo -- These are diff core-to-core passes,
+ -- which may be invoked in any order,
+ -- as many times as you like.
+
+ = CoreDoSimplify -- The core-to-core simplifier.
+ SimplifierMode
+ Int -- Max iterations
+ [SimplifierSwitch] -- Each run of the simplifier can take a different
+ -- set of simplifier-specific flags.
+ | CoreDoFloatInwards
+ | CoreDoFloatOutwards FloatOutSwitches
+ | CoreLiberateCase
+ | CoreDoPrintCore
+ | CoreDoStaticArgs
+ | CoreDoStrictness
+ | CoreDoWorkerWrapper
+ | CoreDoSpecialising
+ | CoreDoSpecConstr
+ | CoreDoGlomBinds
+ | CoreCSE
+ | CoreDoRuleCheck CompilerPhase String -- Check for non-application of rules
+ -- matching this string
+ | CoreDoVectorisation PackageId
+ | CoreDoNothing -- Useful when building up
+ | CoreDoPasses [CoreToDo] -- lists of these things
+
+ | CoreDesugar -- Not strictly a core-to-core pass, but produces
+ -- Core output, and hence useful to pass to endPass
+
+ | CoreTidy
+ | CorePrep
+
+coreDumpFlag :: CoreToDo -> Maybe DynFlag
+coreDumpFlag (CoreDoSimplify {}) = Just Opt_D_dump_simpl_phases
+coreDumpFlag CoreDoFloatInwards = Just Opt_D_verbose_core2core
+coreDumpFlag (CoreDoFloatOutwards {}) = Just Opt_D_verbose_core2core
+coreDumpFlag CoreLiberateCase = Just Opt_D_verbose_core2core
+coreDumpFlag CoreDoStaticArgs = Just Opt_D_verbose_core2core
+coreDumpFlag CoreDoStrictness = Just Opt_D_dump_stranal
+coreDumpFlag CoreDoWorkerWrapper = Just Opt_D_dump_worker_wrapper
+coreDumpFlag CoreDoSpecialising = Just Opt_D_dump_spec
+coreDumpFlag CoreDoSpecConstr = Just Opt_D_dump_spec
+coreDumpFlag CoreCSE = Just Opt_D_dump_cse
+coreDumpFlag (CoreDoVectorisation {}) = Just Opt_D_dump_vect
+coreDumpFlag CoreDesugar = Just Opt_D_dump_ds
+coreDumpFlag CoreTidy = Just Opt_D_dump_simpl
+coreDumpFlag CorePrep = Just Opt_D_dump_prep
+
+coreDumpFlag CoreDoPrintCore = Nothing
+coreDumpFlag (CoreDoRuleCheck {}) = Nothing
+coreDumpFlag CoreDoNothing = Nothing
+coreDumpFlag CoreDoGlomBinds = Nothing
+coreDumpFlag (CoreDoPasses {}) = Nothing
+
+instance Outputable CoreToDo where
+ ppr (CoreDoSimplify md n _) = ptext (sLit "Simplifier")
+ <+> ppr md
+ <+> ptext (sLit "max-iterations=") <> int n
+ ppr CoreDoFloatInwards = ptext (sLit "Float inwards")
+ ppr (CoreDoFloatOutwards f) = ptext (sLit "Float out") <> parens (ppr f)
+ ppr CoreLiberateCase = ptext (sLit "Liberate case")
+ ppr CoreDoStaticArgs = ptext (sLit "Static argument")
+ ppr CoreDoStrictness = ptext (sLit "Demand analysis")
+ ppr CoreDoWorkerWrapper = ptext (sLit "Worker Wrapper binds")
+ ppr CoreDoSpecialising = ptext (sLit "Specialise")
+ ppr CoreDoSpecConstr = ptext (sLit "SpecConstr")
+ ppr CoreCSE = ptext (sLit "Common sub-expression")
+ ppr (CoreDoVectorisation {}) = ptext (sLit "Vectorisation")
+ ppr CoreDesugar = ptext (sLit "Desugar")
+ ppr CoreTidy = ptext (sLit "Tidy Core")
+ ppr CorePrep = ptext (sLit "CorePrep")
+ ppr CoreDoPrintCore = ptext (sLit "Print core")
+ ppr (CoreDoRuleCheck {}) = ptext (sLit "Rule check")
+ ppr CoreDoGlomBinds = ptext (sLit "Glom binds")
+ ppr CoreDoNothing = ptext (sLit "CoreDoNothing")
+ ppr (CoreDoPasses {}) = ptext (sLit "CoreDoPasses")
+\end{code}
+
+\begin{code}
+data SimplifierMode -- See comments in SimplMonad
+ = SimplGently
+ { sm_rules :: Bool -- Whether RULES are enabled
+ , sm_inline :: Bool } -- Whether inlining is enabled
+
+ | SimplPhase
+ { sm_num :: Int -- Phase number; counts downward so 0 is last phase
+ , sm_names :: [String] } -- Name(s) of the phase
+
+instance Outputable SimplifierMode where
+ ppr (SimplPhase { sm_num = n, sm_names = ss })
+ = ptext (sLit "Phase") <+> int n <+> brackets (text (concat $ intersperse "," ss))
+ ppr (SimplGently { sm_rules = r, sm_inline = i })
+ = ptext (sLit "gentle") <>
+ brackets (pp_flag r (sLit "rules") <> comma <>
+ pp_flag i (sLit "inline"))
+ where
+ pp_flag f s = ppUnless f (ptext (sLit "no")) <+> ptext s
+
+data SimplifierSwitch
+ = NoCaseOfCase
+\end{code}
+
+
+\begin{code}
+data FloatOutSwitches = FloatOutSwitches {
+ floatOutLambdas :: Bool, -- ^ True <=> float lambdas to top level
+ floatOutConstants :: Bool -- ^ True <=> float constants to top level,
+ -- even if they do not escape a lambda
+ }
+instance Outputable FloatOutSwitches where
+ ppr = pprFloatOutSwitches
+
+pprFloatOutSwitches :: FloatOutSwitches -> SDoc
+pprFloatOutSwitches sw = pp_not (floatOutLambdas sw) <+> text "lambdas" <> comma
+ <+> pp_not (floatOutConstants sw) <+> text "constants"
+ where
+ pp_not True = empty
+ pp_not False = text "not"
+
+-- | Switches that specify the minimum amount of floating out
+-- gentleFloatOutSwitches :: FloatOutSwitches
+-- gentleFloatOutSwitches = FloatOutSwitches False False
+
+-- | Switches that do not specify floating out of lambdas, just of constants
+constantsOnlyFloatOutSwitches :: FloatOutSwitches
+constantsOnlyFloatOutSwitches = FloatOutSwitches False True
+\end{code}
+
+
+%************************************************************************
+%* *
+ Generating the main optimisation pipeline
+%* *
+%************************************************************************
+
+\begin{code}
+getCoreToDo :: DynFlags -> [CoreToDo]
+getCoreToDo dflags
+ = core_todo
+ where
+ opt_level = optLevel dflags
+ phases = simplPhases dflags
+ max_iter = maxSimplIterations dflags
+ strictness = dopt Opt_Strictness dflags
+ full_laziness = dopt Opt_FullLaziness dflags
+ do_specialise = dopt Opt_Specialise dflags
+ do_float_in = dopt Opt_FloatIn dflags
+ cse = dopt Opt_CSE dflags
+ spec_constr = dopt Opt_SpecConstr dflags
+ liberate_case = dopt Opt_LiberateCase dflags
+ rule_check = ruleCheck dflags
+ static_args = dopt Opt_StaticArgumentTransformation dflags
+
+ maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
+
+ maybe_strictness_before phase
+ = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
+
+ simpl_phase phase names iter
+ = CoreDoPasses
+ [ maybe_strictness_before phase
+ , CoreDoSimplify (SimplPhase phase names)
+ iter []
+ , maybe_rule_check phase
+ ]
+
+ vectorisation
+ = runWhen (dopt Opt_Vectorise dflags)
+ $ CoreDoPasses [ simpl_gently, CoreDoVectorisation (dphPackage dflags) ]
+
+
+ -- By default, we have 2 phases before phase 0.
+
+ -- Want to run with inline phase 2 after the specialiser to give
+ -- maximum chance for fusion to work before we inline build/augment
+ -- in phase 1. This made a difference in 'ansi' where an
+ -- overloaded function wasn't inlined till too late.
+
+ -- Need phase 1 so that build/augment get
+ -- inlined. I found that spectral/hartel/genfft lost some useful
+ -- strictness in the function sumcode' if augment is not inlined
+ -- before strictness analysis runs
+ simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
+ | phase <- [phases, phases-1 .. 1] ]
+
+
+ -- initial simplify: mk specialiser happy: minimum effort please
+ simpl_gently = CoreDoSimplify
+ (SimplGently { sm_rules = True, sm_inline = False })
+ -- See Note [Gentle mode] and
+ -- Note [RULEs enabled in SimplGently] in SimplUtils
+ max_iter
+ [
+
+
+ NoCaseOfCase -- Don't do case-of-case transformations.
+ -- This makes full laziness work better
+ ]
+
+ core_todo =
+ if opt_level == 0 then
+ [vectorisation,
+ simpl_phase 0 ["final"] max_iter]
+ else {- opt_level >= 1 -} [
+
+ -- We want to do the static argument transform before full laziness as it
+ -- may expose extra opportunities to float things outwards. However, to fix
+ -- up the output of the transformation we need at do at least one simplify
+ -- after this before anything else
+ runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
+
+ -- We run vectorisation here for now, but we might also try to run
+ -- it later
+ vectorisation,
+
+ -- initial simplify: mk specialiser happy: minimum effort please
+ simpl_gently,
+
+ -- Specialisation is best done before full laziness
+ -- so that overloaded functions have all their dictionary lambdas manifest
+ runWhen do_specialise CoreDoSpecialising,
+
+ runWhen full_laziness (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+ -- Was: gentleFloatOutSwitches
+ -- I have no idea why, but not floating constants to top level is
+ -- very bad in some cases.
+ -- Notably: p_ident in spectral/rewrite
+ -- Changing from "gentle" to "constantsOnly" improved
+ -- rewrite's allocation by 19%, and made 0.0% difference
+ -- to any other nofib benchmark
+
+ runWhen do_float_in CoreDoFloatInwards,
+
+ simpl_phases,
+
+ -- Phase 0: allow all Ids to be inlined now
+ -- This gets foldr inlined before strictness analysis
+
+ -- At least 3 iterations because otherwise we land up with
+ -- huge dead expressions because of an infelicity in the
+ -- simpifier.
+ -- let k = BIG in foldr k z xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
+ -- Don't stop now!
+ simpl_phase 0 ["main"] (max max_iter 3),
+
+ runWhen strictness (CoreDoPasses [
+ CoreDoStrictness,
+ CoreDoWorkerWrapper,
+ CoreDoGlomBinds,
+ simpl_phase 0 ["post-worker-wrapper"] max_iter
+ ]),
+
+ runWhen full_laziness
+ (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
+ -- nofib/spectral/hartel/wang doubles in speed if you
+ -- do full laziness late in the day. It only happens
+ -- after fusion and other stuff, so the early pass doesn't
+ -- catch it. For the record, the redex is
+ -- f_el22 (f_el21 r_midblock)
+
+
+ runWhen cse CoreCSE,
+ -- We want CSE to follow the final full-laziness pass, because it may
+ -- succeed in commoning up things floated out by full laziness.
+ -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
+
+ runWhen do_float_in CoreDoFloatInwards,
+
+ maybe_rule_check 0,
+
+ -- Case-liberation for -O2. This should be after
+ -- strictness analysis and the simplification which follows it.
+ runWhen liberate_case (CoreDoPasses [
+ CoreLiberateCase,
+ simpl_phase 0 ["post-liberate-case"] max_iter
+ ]), -- Run the simplifier after LiberateCase to vastly
+ -- reduce the possiblility of shadowing
+ -- Reason: see Note [Shadowing] in SpecConstr.lhs
+
+ runWhen spec_constr CoreDoSpecConstr,
+
+ maybe_rule_check 0,
+
+ -- Final clean-up simplification:
+ simpl_phase 0 ["final"] max_iter
+ ]
+
+-- The core-to-core pass ordering is derived from the DynFlags:
+runWhen :: Bool -> CoreToDo -> CoreToDo
+runWhen True do_this = do_this
+runWhen False _ = CoreDoNothing
+
+runMaybe :: Maybe a -> (a -> CoreToDo) -> CoreToDo
+runMaybe (Just x) f = f x
+runMaybe Nothing _ = CoreDoNothing
+
+dumpSimplPhase :: DynFlags -> SimplifierMode -> Bool
+dumpSimplPhase dflags mode
+ | Just spec_string <- shouldDumpSimplPhase dflags
+ = match_spec spec_string
+ | otherwise
+ = dopt Opt_D_verbose_core2core dflags
+
+ where
+ match_spec :: String -> Bool
+ match_spec spec_string
+ = or $ map (and . map match . split ':')
+ $ split ',' spec_string
+
+ match :: String -> Bool
+ match "" = True
+ match s = case reads s of
+ [(n,"")] -> phase_num n
+ _ -> phase_name s
+
+ phase_num :: Int -> Bool
+ phase_num n = case mode of
+ SimplPhase k _ -> n == k
+ _ -> False
+
+ phase_name :: String -> Bool
+ phase_name s = case mode of
+ SimplGently {} -> s == "gentle"
+ SimplPhase { sm_names = ss } -> s `elem` ss
+\end{code}
+
+
+%************************************************************************
+%* *
+ Counting and logging
+%* *
+%************************************************************************
+
+\begin{code}
+verboseSimplStats :: Bool
+verboseSimplStats = opt_PprStyle_Debug -- For now, anyway
+
+zeroSimplCount :: DynFlags -> SimplCount
+isZeroSimplCount :: SimplCount -> Bool
+pprSimplCount :: SimplCount -> SDoc
+doSimplTick, doFreeSimplTick :: Tick -> SimplCount -> SimplCount
+plusSimplCount :: SimplCount -> SimplCount -> SimplCount
+\end{code}
+
+\begin{code}
+data SimplCount
+ = VerySimplCount !Int -- Used when don't want detailed stats
+
+ | SimplCount {
+ ticks :: !Int, -- Total ticks
+ details :: !TickCounts, -- How many of each type
+
+ n_log :: !Int, -- N
+ log1 :: [Tick], -- Last N events; <= opt_HistorySize,
+ -- most recent first
+ log2 :: [Tick] -- Last opt_HistorySize events before that
+ -- Having log1, log2 lets us accumulate the
+ -- recent history reasonably efficiently
+ }
+
+type TickCounts = Map Tick Int
+
+simplCountN :: SimplCount -> Int
+simplCountN (VerySimplCount n) = n
+simplCountN (SimplCount { ticks = n }) = n
+
+zeroSimplCount dflags
+ -- This is where we decide whether to do
+ -- the VerySimpl version or the full-stats version
+ | dopt Opt_D_dump_simpl_stats dflags
+ = SimplCount {ticks = 0, details = Map.empty,
+ n_log = 0, log1 = [], log2 = []}
+ | otherwise
+ = VerySimplCount 0
+
+isZeroSimplCount (VerySimplCount n) = n==0
+isZeroSimplCount (SimplCount { ticks = n }) = n==0
+
+doFreeSimplTick tick sc@SimplCount { details = dts }
+ = sc { details = dts `addTick` tick }
+doFreeSimplTick _ sc = sc
+
+doSimplTick tick sc@SimplCount { ticks = tks, details = dts, n_log = nl, log1 = l1 }
+ | nl >= opt_HistorySize = sc1 { n_log = 1, log1 = [tick], log2 = l1 }
+ | otherwise = sc1 { n_log = nl+1, log1 = tick : l1 }
+ where
+ sc1 = sc { ticks = tks+1, details = dts `addTick` tick }
+
+doSimplTick _ (VerySimplCount n) = VerySimplCount (n+1)
+
+
+-- Don't use Map.unionWith because that's lazy, and we want to
+-- be pretty strict here!
+addTick :: TickCounts -> Tick -> TickCounts
+addTick fm tick = case Map.lookup tick fm of
+ Nothing -> Map.insert tick 1 fm
+ Just n -> n1 `seq` Map.insert tick n1 fm
+ where
+ n1 = n+1
+
+
+plusSimplCount sc1@(SimplCount { ticks = tks1, details = dts1 })
+ sc2@(SimplCount { ticks = tks2, details = dts2 })
+ = log_base { ticks = tks1 + tks2, details = Map.unionWith (+) dts1 dts2 }
+ where
+ -- A hackish way of getting recent log info
+ log_base | null (log1 sc2) = sc1 -- Nothing at all in sc2
+ | null (log2 sc2) = sc2 { log2 = log1 sc1 }
+ | otherwise = sc2
+
+plusSimplCount (VerySimplCount n) (VerySimplCount m) = VerySimplCount (n+m)
+plusSimplCount _ _ = panic "plusSimplCount"
+ -- We use one or the other consistently
+
+pprSimplCount (VerySimplCount n) = ptext (sLit "Total ticks:") <+> int n
+pprSimplCount (SimplCount { ticks = tks, details = dts, log1 = l1, log2 = l2 })
+ = vcat [ptext (sLit "Total ticks: ") <+> int tks,
+ blankLine,
+ pprTickCounts (Map.toList dts),
+ if verboseSimplStats then
+ vcat [blankLine,
+ ptext (sLit "Log (most recent first)"),
+ nest 4 (vcat (map ppr l1) $$ vcat (map ppr l2))]
+ else empty
+ ]
+
+pprTickCounts :: [(Tick,Int)] -> SDoc
+pprTickCounts [] = empty
+pprTickCounts ((tick1,n1):ticks)
+ = vcat [int tot_n <+> text (tickString tick1),
+ pprTCDetails real_these,
+ pprTickCounts others
+ ]
+ where
+ tick1_tag = tickToTag tick1
+ (these, others) = span same_tick ticks
+ real_these = (tick1,n1):these
+ same_tick (tick2,_) = tickToTag tick2 == tick1_tag
+ tot_n = sum [n | (_,n) <- real_these]
+
+pprTCDetails :: [(Tick, Int)] -> SDoc
+pprTCDetails ticks
+ = nest 4 (vcat [int n <+> pprTickCts tick | (tick,n) <- ticks])
+\end{code}
+
+
+\begin{code}
+data Tick
+ = PreInlineUnconditionally Id
+ | PostInlineUnconditionally Id
+
+ | UnfoldingDone Id
+ | RuleFired FastString -- Rule name
+
+ | LetFloatFromLet
+ | EtaExpansion Id -- LHS binder
+ | EtaReduction Id -- Binder on outer lambda
+ | BetaReduction Id -- Lambda binder
+
+
+ | CaseOfCase Id -- Bndr on *inner* case
+ | KnownBranch Id -- Case binder
+ | CaseMerge Id -- Binder on outer case
+ | AltMerge Id -- Case binder
+ | CaseElim Id -- Case binder
+ | CaseIdentity Id -- Case binder
+ | FillInCaseDefault Id -- Case binder
+
+ | BottomFound
+ | SimplifierDone -- Ticked at each iteration of the simplifier
+
+instance Outputable Tick where
+ ppr tick = text (tickString tick) <+> pprTickCts tick
+
+instance Eq Tick where
+ a == b = case a `cmpTick` b of
+ EQ -> True
+ _ -> False
+
+instance Ord Tick where
+ compare = cmpTick
+
+tickToTag :: Tick -> Int
+tickToTag (PreInlineUnconditionally _) = 0
+tickToTag (PostInlineUnconditionally _) = 1
+tickToTag (UnfoldingDone _) = 2
+tickToTag (RuleFired _) = 3
+tickToTag LetFloatFromLet = 4
+tickToTag (EtaExpansion _) = 5
+tickToTag (EtaReduction _) = 6
+tickToTag (BetaReduction _) = 7
+tickToTag (CaseOfCase _) = 8
+tickToTag (KnownBranch _) = 9
+tickToTag (CaseMerge _) = 10
+tickToTag (CaseElim _) = 11
+tickToTag (CaseIdentity _) = 12
+tickToTag (FillInCaseDefault _) = 13
+tickToTag BottomFound = 14
+tickToTag SimplifierDone = 16
+tickToTag (AltMerge _) = 17
+
+tickString :: Tick -> String
+tickString (PreInlineUnconditionally _) = "PreInlineUnconditionally"
+tickString (PostInlineUnconditionally _)= "PostInlineUnconditionally"
+tickString (UnfoldingDone _) = "UnfoldingDone"
+tickString (RuleFired _) = "RuleFired"
+tickString LetFloatFromLet = "LetFloatFromLet"
+tickString (EtaExpansion _) = "EtaExpansion"
+tickString (EtaReduction _) = "EtaReduction"
+tickString (BetaReduction _) = "BetaReduction"
+tickString (CaseOfCase _) = "CaseOfCase"
+tickString (KnownBranch _) = "KnownBranch"
+tickString (CaseMerge _) = "CaseMerge"
+tickString (AltMerge _) = "AltMerge"
+tickString (CaseElim _) = "CaseElim"
+tickString (CaseIdentity _) = "CaseIdentity"
+tickString (FillInCaseDefault _) = "FillInCaseDefault"
+tickString BottomFound = "BottomFound"
+tickString SimplifierDone = "SimplifierDone"
+
+pprTickCts :: Tick -> SDoc
+pprTickCts (PreInlineUnconditionally v) = ppr v
+pprTickCts (PostInlineUnconditionally v)= ppr v
+pprTickCts (UnfoldingDone v) = ppr v
+pprTickCts (RuleFired v) = ppr v
+pprTickCts LetFloatFromLet = empty
+pprTickCts (EtaExpansion v) = ppr v
+pprTickCts (EtaReduction v) = ppr v
+pprTickCts (BetaReduction v) = ppr v
+pprTickCts (CaseOfCase v) = ppr v
+pprTickCts (KnownBranch v) = ppr v
+pprTickCts (CaseMerge v) = ppr v
+pprTickCts (AltMerge v) = ppr v
+pprTickCts (CaseElim v) = ppr v
+pprTickCts (CaseIdentity v) = ppr v
+pprTickCts (FillInCaseDefault v) = ppr v
+pprTickCts _ = empty
+
+cmpTick :: Tick -> Tick -> Ordering
+cmpTick a b = case (tickToTag a `compare` tickToTag b) of
+ GT -> GT
+ EQ -> cmpEqTick a b
+ LT -> LT
+
+cmpEqTick :: Tick -> Tick -> Ordering
+cmpEqTick (PreInlineUnconditionally a) (PreInlineUnconditionally b) = a `compare` b
+cmpEqTick (PostInlineUnconditionally a) (PostInlineUnconditionally b) = a `compare` b
+cmpEqTick (UnfoldingDone a) (UnfoldingDone b) = a `compare` b
+cmpEqTick (RuleFired a) (RuleFired b) = a `compare` b
+cmpEqTick (EtaExpansion a) (EtaExpansion b) = a `compare` b
+cmpEqTick (EtaReduction a) (EtaReduction b) = a `compare` b
+cmpEqTick (BetaReduction a) (BetaReduction b) = a `compare` b
+cmpEqTick (CaseOfCase a) (CaseOfCase b) = a `compare` b
+cmpEqTick (KnownBranch a) (KnownBranch b) = a `compare` b
+cmpEqTick (CaseMerge a) (CaseMerge b) = a `compare` b
+cmpEqTick (AltMerge a) (AltMerge b) = a `compare` b
+cmpEqTick (CaseElim a) (CaseElim b) = a `compare` b
+cmpEqTick (CaseIdentity a) (CaseIdentity b) = a `compare` b
+cmpEqTick (FillInCaseDefault a) (FillInCaseDefault b) = a `compare` b
+cmpEqTick _ _ = EQ
\end{code}
%************************************************************************
\begin{code}
-data CoreState = CoreState {
- cs_uniq_supply :: UniqSupply,
- cs_ann_env :: AnnEnv
+newtype CoreState = CoreState {
+ cs_uniq_supply :: UniqSupply
}
data CoreReader = CoreReader {
return us1
runCoreM :: HscEnv
- -> AnnEnv
-> RuleBase
-> UniqSupply
-> Module
-> CoreM a
-> IO (a, SimplCount)
-runCoreM hsc_env ann_env rule_base us mod m =
+runCoreM hsc_env rule_base us mod m =
liftM extract $ runIOEnv reader $ unCoreM m state
where
reader = CoreReader {
cr_module = mod
}
state = CoreState {
- cs_uniq_supply = us,
- cs_ann_env = ann_env
+ cs_uniq_supply = us
}
extract :: (a, CoreState, CoreWriter) -> (a, SimplCount)
getHscEnv :: CoreM HscEnv
getHscEnv = read cr_hsc_env
-getAnnEnv :: CoreM AnnEnv
-getAnnEnv = getS cs_ann_env
-
getRuleBase :: CoreM RuleBase
getRuleBase = read cr_rule_base
%************************************************************************
\begin{code}
-
--- | Find all the annotations we currently know about for the given target. Note that no
--- annotations will be returned if we haven't loaded information about the particular target
--- you are inquiring about: by default, only those modules that have been imported by the
--- program being compiled will have been loaded in this way.
+-- | Get all annotations of a given type. This happens lazily, that is
+-- no deserialization will take place until the [a] is actually demanded and
+-- the [a] can also be empty (the UniqFM is not filtered).
--
--- To load the information from additional modules, you can use the functions 'DynamicLoading.forceLoadModuleInterfaces'
--- and 'DynamicLoading.forceLoadNameModuleInterface', but be aware that doing this indiscriminantly
--- will impose a performance penalty.
+-- This should be done once at the start of a Core-to-Core pass that uses
+-- annotations.
--
--- If no deserialization function is supplied, only transient annotations will be returned.
-findAnnotations :: Typeable a => ([Word8] -> a) -> CoreAnnTarget -> CoreM [a]
-findAnnotations deserialize target = do
- ann_env <- getAnnEnv
- return (findAnns deserialize ann_env target)
-
--- | Deserialize all annotations of a given type. This happens lazily, that is
--- no deserialization will take place until the [a] is actually demanded and
--- the [a] can also be empty (the UniqFM is not filtered).
-deserializeAnnotations :: Typeable a => ([Word8] -> a) -> CoreM (UniqFM [a])
-deserializeAnnotations deserialize = do
- ann_env <- getAnnEnv
+-- See Note [Annotations]
+getAnnotations :: Typeable a => ([Word8] -> a) -> ModGuts -> CoreM (UniqFM [a])
+getAnnotations deserialize guts = do
+ hsc_env <- getHscEnv
+ ann_env <- liftIO $ prepareAnnotations hsc_env (Just guts)
return (deserializeAnns deserialize ann_env)
-addAnnotation :: Typeable a => (a -> [Word8]) -> CoreAnnTarget -> a -> CoreM ()
-addAnnotation serialize target what = addAnnotationToEnv $ Annotation { ann_target = target, ann_value = toSerialized serialize what }
-
-addAnnotationToEnv :: Annotation -> CoreM ()
-addAnnotationToEnv annotation = modifyS (\state -> state { cs_ann_env = extendAnnEnvList (cs_ann_env state) [annotation] })
-
+-- | Get at most one annotation of a given type per Unique.
+getFirstAnnotations :: Typeable a => ([Word8] -> a) -> ModGuts -> CoreM (UniqFM a)
+getFirstAnnotations deserialize guts
+ = liftM (mapUFM head . filterUFM (not . null))
+ $ getAnnotations deserialize guts
+
\end{code}
+Note [Annotations]
+~~~~~~~~~~~~~~~~~~
+A Core-to-Core pass that wants to make use of annotations calls
+getAnnotations or getFirstAnnotations at the beginning to obtain a UniqFM with
+annotations of a specific type. This produces all annotations from interface
+files read so far. However, annotations from interface files read during the
+pass will not be visible until getAnnotations is called again. This is similar
+to how rules work and probably isn't too bad.
+
+The current implementation could be optimised a bit: when looking up
+annotations for a thing from the HomePackageTable, we could search directly in
+the module where the thing is defined rather than building one UniqFM which
+contains all annotations we know of. This would work because annotations can
+only be given to things defined in the same module. However, since we would
+only want to deserialise every annotation once, we would have to build a cache
+for every module in the HTP. In the end, it's probably not worth it as long as
+we aren't using annotations heavily.
%************************************************************************
%* *