From: Jose Pedro Magalhaes Date: Fri, 20 May 2011 17:17:13 +0000 (+0200) Subject: Merge branch 'ghc-generics' of http://darcs.haskell.org/ghc into ghc-generics X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=6ad311b7965a7af86f3b931b134215dff76f5fbb;hp=d7fb8d371d3228774331a67db8da805b2d68f1c4 Merge branch 'ghc-generics' of darcs.haskell.org/ghc into ghc-generics --- diff --git a/compiler/basicTypes/BasicTypes.lhs b/compiler/basicTypes/BasicTypes.lhs index f077882..7ea66e1 100644 --- a/compiler/basicTypes/BasicTypes.lhs +++ b/compiler/basicTypes/BasicTypes.lhs @@ -72,13 +72,16 @@ module BasicTypes( inlinePragmaActivation, inlinePragmaRuleMatchInfo, setInlinePragmaActivation, setInlinePragmaRuleMatchInfo, - SuccessFlag(..), succeeded, failed, successIf + SuccessFlag(..), succeeded, failed, successIf, + + FractionalLit(..), negateFractionalLit, integralFractionalLit ) where import FastString import Outputable import Data.Data hiding (Fixity) +import Data.Function (on) \end{code} %************************************************************************ @@ -862,3 +865,36 @@ isEarlyActive (ActiveBefore {}) = True isEarlyActive _ = False \end{code} + + +\begin{code} +-- Used (instead of Rational) to represent exactly the floating point literal that we +-- encountered in the user's source program. This allows us to pretty-print exactly what +-- the user wrote, which is important e.g. for floating point numbers that can't represented +-- as Doubles (we used to via Double for pretty-printing). See also #2245. +data FractionalLit + = FL { fl_text :: String -- How the value was written in the source + , fl_value :: Rational -- Numeric value of the literal + } + deriving (Data, Typeable, Show) + -- The Show instance is required for the derived Lexer.x:Token instance when DEBUG is on + +negateFractionalLit :: FractionalLit -> FractionalLit +negateFractionalLit (FL { fl_text = '-':text, fl_value = value }) = FL { fl_text = text, fl_value = negate value } +negateFractionalLit (FL { fl_text = text, fl_value = value }) = FL { fl_text = '-':text, fl_value = negate value } + +integralFractionalLit :: Integer -> FractionalLit +integralFractionalLit i = FL { fl_text = show i, fl_value = fromInteger i } + +-- Comparison operations are needed when grouping literals +-- for compiling pattern-matching (module MatchLit) + +instance Eq FractionalLit where + (==) = (==) `on` fl_value + +instance Ord FractionalLit where + compare = compare `on` fl_value + +instance Outputable FractionalLit where + ppr = text . fl_text +\end{code} diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index 54b4b11..a6b215b 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -11,7 +11,7 @@ module Cmm ( CmmGraph, GenCmmGraph(..), CmmBlock , CmmStackInfo(..), CmmTopInfo(..), Cmm, CmmTop - , CmmReplGraph, CmmFwdRewrite, CmmBwdRewrite + , CmmReplGraph, GenCmmReplGraph, CmmFwdRewrite, CmmBwdRewrite , modifyGraph , lastNode, replaceLastNode, insertBetween @@ -46,7 +46,8 @@ type CmmGraph = GenCmmGraph CmmNode data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C } type CmmBlock = Block CmmNode C C -type CmmReplGraph e x = FuelUniqSM (Maybe (Graph CmmNode e x)) +type CmmReplGraph e x = GenCmmReplGraph CmmNode e x +type GenCmmReplGraph n e x = FuelUniqSM (Maybe (Graph n e x)) type CmmFwdRewrite f = FwdRewrite FuelUniqSM CmmNode f type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs index 6e97100..35eabb3 100644 --- a/compiler/cmm/CmmCPS.hs +++ b/compiler/cmm/CmmCPS.hs @@ -67,49 +67,47 @@ mutable reference cells in an 'HscEnv' and are global to one compiler session. -} +-- EZY: It might be helpful to have an easy way of dumping the "pre" +-- input for any given phase, besides just turning it all on with +-- -ddump-cmmz + cpsTop :: HscEnv -> CmmTop -> IO ([(CLabel, CAFSet)], [(CAFSet, CmmTop)]) cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)]) cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) = do - -- Why bother doing it this early? - -- g <- dual_rewrite run Opt_D_dump_cmmz "spills and reloads" - -- (dualLivenessWithInsertion callPPs) g - -- g <- run $ insertLateReloads g -- Duplicate reloads just before uses - -- g <- dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination" - -- (removeDeadAssignmentsAndReloads callPPs) g - dump Opt_D_dump_cmmz "Pre common block elimination" g - g <- return $ elimCommonBlocks g - dump Opt_D_dump_cmmz "Post common block elimination" g + -- Why bother doing these early: dualLivenessWithInsertion, + -- insertLateReloads, rewriteAssignments? + ----------- Eliminate common blocks ------------------- + g <- return $ elimCommonBlocks g + dump Opt_D_dump_cmmz_cbe "Post common block elimination" g -- Any work storing block Labels must be performed _after_ elimCommonBlocks ----------- Proc points ------------------- let callPPs = callProcPoints g procPoints <- run $ minimalProcPointSet callPPs g g <- run $ addProcPointProtocols callPPs procPoints g - dump Opt_D_dump_cmmz "Post Proc Points Added" g + dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g ----------- Spills and reloads ------------------- - g <- - -- pprTrace "pre Spills" (ppr g) $ - dual_rewrite run Opt_D_dump_cmmz "spills and reloads" - (dualLivenessWithInsertion procPoints) g - -- Insert spills at defns; reloads at return points - g <- - -- pprTrace "pre insertLateReloads" (ppr g) $ - runOptimization $ insertLateReloads g -- Duplicate reloads just before uses - dump Opt_D_dump_cmmz "Post late reloads" g - g <- - -- pprTrace "post insertLateReloads" (ppr g) $ - dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination" - (removeDeadAssignmentsAndReloads procPoints) g - -- Remove redundant reloads (and any other redundant asst) - - ----------- Debug only: add code to put zero in dead stack slots---- - -- Debugging: stubbing slots on death can cause crashes early - g <- -- trace "post dead-assign elim" $ - if opt_StubDeadValues then run $ stubSlotsOnDeath g else return g + g <- run $ dualLivenessWithInsertion procPoints g + dump Opt_D_dump_cmmz_spills "Post spills and reloads" g + ----------- Sink and inline assignments ------------------- + g <- runOptimization $ rewriteAssignments g + dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g + + ----------- Eliminate dead assignments ------------------- + -- Remove redundant reloads (and any other redundant asst) + g <- runOptimization $ removeDeadAssignmentsAndReloads procPoints g + dump Opt_D_dump_cmmz_dead "Post Dead Assignment Elimination" g + + ----------- Zero dead stack slots (Debug only) --------------- + -- Debugging: stubbing slots on death can cause crashes early + g <- if opt_StubDeadValues + then run $ stubSlotsOnDeath g + else return g + dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g --------------- Stack layout ---------------- slotEnv <- run $ liveSlotAnal g @@ -120,16 +118,16 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) ------------ Manifest the stack pointer -------- g <- run $ manifestSP spEntryMap areaMap entry_off g - dump Opt_D_dump_cmmz "after manifestSP" g + dump Opt_D_dump_cmmz_sp "Post manifestSP" g -- UGH... manifestSP can require updates to the procPointMap. -- We can probably do something quicker here for the update... ------------- Split into separate procedures ------------ procPointMap <- run $ procPointAnalysis procPoints g - dump Opt_D_dump_cmmz "procpoint map" procPointMap + dump Opt_D_dump_cmmz_procmap "procpoint map" procPointMap gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap (CmmProc h l g) - mapM_ (dump Opt_D_dump_cmmz "after splitting") gs + mapM_ (dump Opt_D_dump_cmmz_split "Post splitting") gs ------------- More CAFs and foreign calls ------------ cafEnv <- run $ cafAnal g @@ -137,30 +135,29 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) mbpprTrace "localCAFs" (ppr localCAFs) $ return () gs <- run $ mapM (lowerSafeForeignCalls areaMap) gs - mapM_ (dump Opt_D_dump_cmmz "after lowerSafeForeignCalls") gs + mapM_ (dump Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls") gs -- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES - let gs' = map (setInfoTableStackMap slotEnv areaMap) gs - mapM_ (dump Opt_D_dump_cmmz "after setInfoTableStackMap") gs' - let gs'' = map (bundleCAFs cafEnv) gs' - mapM_ (dump Opt_D_dump_cmmz "after bundleCAFs") gs'' - return (localCAFs, gs'') + gs <- return $ map (setInfoTableStackMap slotEnv areaMap) gs + mapM_ (dump Opt_D_dump_cmmz_info "after setInfoTableStackMap") gs + gs <- return $ map (bundleCAFs cafEnv) gs + mapM_ (dump Opt_D_dump_cmmz_cafs "after bundleCAFs") gs + return (localCAFs, gs) where dflags = hsc_dflags hsc_env mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z - dump f txt g = dumpIfSet_dyn dflags f txt (ppr g) + dump f txt g = do + -- ToDo: No easy way of say "dump all the cmmz, *and* split + -- them into files." Also, -ddump-cmmz doesn't play nicely + -- with -ddump-to-file, since the headers get omitted. + dumpIfSet_dyn dflags f txt (ppr g) + when (not (dopt f dflags)) $ + dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (ppr g) -- Runs a required transformation/analysis run = runInfiniteFuelIO (hsc_OptFuel hsc_env) -- Runs an optional transformation/analysis (and should -- thus be subject to optimization fuel) runOptimization = runFuelIO (hsc_OptFuel hsc_env) - -- pass 'run' or 'runOptimization' for 'r' - dual_rewrite r flag txt pass g = - do dump flag ("Pre " ++ txt) g - g <- r $ pass g - dump flag ("Post " ++ txt) $ g - return g - -- This probably belongs in CmmBuildInfoTables? -- We're just finishing the job here: once we know what CAFs are defined -- in non-static closures, we can build the SRTs. diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index ee948fe..7d50d9a 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -10,7 +10,7 @@ module CmmNode ( CmmNode(..) , UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..) - , mapExp, mapExpDeep, foldExp, foldExpDeep + , mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf ) where @@ -46,7 +46,9 @@ data CmmNode e x where CmmActuals -> -- zero or more arguments CmmNode O O -- Semantics: kills only result regs; all other regs (both GlobalReg - -- and LocalReg) are preserved + -- and LocalReg) are preserved. But there is a current + -- bug for what can be put in arguments, see + -- Note [Register Parameter Passing] CmmBranch :: Label -> CmmNode O C -- Goto another block in the same procedure @@ -73,7 +75,8 @@ data CmmNode e x where -- moment of the call. Later stages can use this to give liveness -- everywhere, which in turn guides register allocation. -- It is the companion of cml_args; cml_args says which stack words --- hold parameters, while cml_arg_regs says which global regs hold parameters +-- hold parameters, while cml_arg_regs says which global regs hold parameters. +-- But do note [Register parameter passing] cml_args :: ByteOff, -- Byte offset, from the *old* end of the Area associated with @@ -103,7 +106,7 @@ data CmmNode e x where -- Always the last node of a block tgt :: ForeignTarget, -- call target and convention res :: CmmFormals, -- zero or more results - args :: CmmActuals, -- zero or more arguments + args :: CmmActuals, -- zero or more arguments; see Note [Register parameter passing] succ :: Label, -- Label of continuation updfr :: UpdFrameOffset, -- where the update frame is (for building infotable) intrbl:: Bool -- whether or not the call is interruptible @@ -113,9 +116,11 @@ data CmmNode e x where ~~~~~~~~~~~~~~~~~~~~~~~ A CmmUnsafeForeignCall is used for *unsafe* foreign calls; a CmmForeignCall call is used for *safe* foreign calls. -Unsafe ones are easy: think of them as a "fat machine instruction". -In particular, they do *not* kill all live registers (there was a bit -of code in GHC that conservatively assumed otherwise.) + +Unsafe ones are mostly easy: think of them as a "fat machine +instruction". In particular, they do *not* kill all live registers, +just the registers they return to (there was a bit of code in GHC that +conservatively assumed otherwise.) However, see [Register parameter passing]. Safe ones are trickier. A safe foreign call r = f(x) @@ -138,6 +143,21 @@ constructors do *not* (currently) know the foreign call conventions. Note that a safe foreign call needs an info table. -} +{- Note [Register parameter passing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +On certain architectures, some registers are utilized for parameter +passing in the C calling convention. For example, in x86-64 Linux +convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for +argument passing. These are registers R3-R6, which our generated +code may also be using; as a result, it's necessary to save these +values before doing a foreign call. This is done during initial +code generation in callerSaveVolatileRegs in StgCmmUtils.hs. However, +one result of doing this is that the contents of these registers +may mysteriously change if referenced inside the arguments. This +is dangerous, so you'll need to disable inlining much in the same +way is done in cmm/CmmOpt.hs currently. We should fix this! +-} + --------------------------------------------- -- Eq instance of CmmNode -- It is a shame GHC cannot infer it by itself :( diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs index 17364ad..2dcfb02 100644 --- a/compiler/cmm/CmmSpillReload.hs +++ b/compiler/cmm/CmmSpillReload.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE GADTs,NoMonoLocalBinds #-} +{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts, ViewPatterns #-} -- Norman likes local bindings -- If this module lives on I'd like to get rid of this flag in due course +{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} #if __GLASGOW_HASKELL__ >= 701 -- GHC 7.0.1 improved incomplete pattern warnings with GADTs @@ -14,9 +15,7 @@ module CmmSpillReload --, insertSpillsAndReloads --- XXX todo check live-in at entry against formals , dualLivenessWithInsertion - , availRegsLattice - , cmmAvailableReloads - , insertLateReloads + , rewriteAssignments , removeDeadAssignmentsAndReloads ) where @@ -26,13 +25,16 @@ import Cmm import CmmExpr import CmmLive import OptimizationFuel +import StgCmmUtils import Control.Monad import Outputable hiding (empty) import qualified Outputable as PP import UniqSet +import UniqFM +import Unique -import Compiler.Hoopl +import Compiler.Hoopl hiding (Unique) import Data.Maybe import Prelude hiding (succ, zip) @@ -172,11 +174,6 @@ insertSpillAndReloadRewrites graph procPoints = deepBwdRw3 first middle nothing text "after"{-, ppr m-}]) $ Just $ mkMiddles $ [m, spill reg] else Nothing - middle m@(CmmUnsafeForeignCall _ fs _) live = return $ - case map spill (filter (flip elemRegSet (on_stack live)) fs) ++ - map reload (uniqSetToList (kill fs (in_regs live))) of - [] -> Nothing - reloads -> Just $ mkMiddles (m : reloads) middle _ _ = return Nothing nothing _ _ = return Nothing @@ -188,91 +185,6 @@ spill, reload :: LocalReg -> CmmNode O O spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r) ----------------------------------------------------------------- ---- sinking reloads - --- The idea is to compute at each point the set of registers such that --- on every path to the point, the register is defined by a Reload --- instruction. Then, if a use appears at such a point, we can safely --- insert a Reload right before the use. Finally, we can eliminate --- the early reloads along with other dead assignments. - -data AvailRegs = UniverseMinus RegSet - | AvailRegs RegSet - - -availRegsLattice :: DataflowLattice AvailRegs -availRegsLattice = DataflowLattice "register gotten from reloads" empty add - where empty = UniverseMinus emptyRegSet - -- | compute in the Tx monad to track whether anything has changed - add _ (OldFact old) (NewFact new) = - if join `smallerAvail` old then (SomeChange, join) else (NoChange, old) - where join = interAvail new old - - -interAvail :: AvailRegs -> AvailRegs -> AvailRegs -interAvail (UniverseMinus s) (UniverseMinus s') = UniverseMinus (s `plusRegSet` s') -interAvail (AvailRegs s) (AvailRegs s') = AvailRegs (s `timesRegSet` s') -interAvail (AvailRegs s) (UniverseMinus s') = AvailRegs (s `minusRegSet` s') -interAvail (UniverseMinus s) (AvailRegs s') = AvailRegs (s' `minusRegSet` s ) - -smallerAvail :: AvailRegs -> AvailRegs -> Bool -smallerAvail (AvailRegs _) (UniverseMinus _) = True -smallerAvail (UniverseMinus _) (AvailRegs _) = False -smallerAvail (AvailRegs s) (AvailRegs s') = sizeUniqSet s < sizeUniqSet s' -smallerAvail (UniverseMinus s) (UniverseMinus s') = sizeUniqSet s > sizeUniqSet s' - -extendAvail :: AvailRegs -> LocalReg -> AvailRegs -extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r) -extendAvail (AvailRegs s) r = AvailRegs (extendRegSet s r) - -delFromAvail :: AvailRegs -> LocalReg -> AvailRegs -delFromAvail (UniverseMinus s) r = UniverseMinus (extendRegSet s r) -delFromAvail (AvailRegs s) r = AvailRegs (deleteFromRegSet s r) - -elemAvail :: AvailRegs -> LocalReg -> Bool -elemAvail (UniverseMinus s) r = not $ elemRegSet r s -elemAvail (AvailRegs s) r = elemRegSet r s - -cmmAvailableReloads :: CmmGraph -> FuelUniqSM (BlockEnv AvailRegs) -cmmAvailableReloads g = - liftM snd $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $ - analFwd availRegsLattice availReloadsTransfer - -availReloadsTransfer :: FwdTransfer CmmNode AvailRegs -availReloadsTransfer = mkFTransfer3 (flip const) middleAvail ((mkFactBase availRegsLattice .) . lastAvail) - -middleAvail :: CmmNode O O -> AvailRegs -> AvailRegs -middleAvail (CmmAssign (CmmLocal r) (CmmLoad l _)) avail - | l `isStackSlotOf` r = extendAvail avail r -middleAvail (CmmAssign lhs _) avail = foldRegsDefd delFromAvail avail lhs -middleAvail (CmmStore l (CmmReg (CmmLocal r))) avail - | l `isStackSlotOf` r = avail -middleAvail (CmmStore (CmmStackSlot (RegSlot r) _) _) avail = delFromAvail avail r -middleAvail (CmmStore {}) avail = avail -middleAvail (CmmUnsafeForeignCall {}) _ = AvailRegs emptyRegSet -middleAvail (CmmComment {}) avail = avail - -lastAvail :: CmmNode O C -> AvailRegs -> [(Label, AvailRegs)] -lastAvail (CmmCall _ (Just k) _ _ _) _ = [(k, AvailRegs emptyRegSet)] -lastAvail (CmmForeignCall {succ=k}) _ = [(k, AvailRegs emptyRegSet)] -lastAvail l avail = map (\id -> (id, avail)) $ successors l - -insertLateReloads :: CmmGraph -> FuelUniqSM CmmGraph -insertLateReloads g = - liftM fst $ dataflowPassFwd g [(g_entry g, fact_bot availRegsLattice)] $ - analRewFwd availRegsLattice availReloadsTransfer rewrites - where rewrites = mkFRewrite3 first middle last - first _ _ = return Nothing - middle m avail = return $ maybe_reload_before avail m (mkMiddle m) - last l avail = return $ maybe_reload_before avail l (mkLast l) - maybe_reload_before avail node tail = - let used = filterRegsUsed (elemAvail avail) node - in if isEmptyUniqSet used then Nothing - else Just $ reloadTail used tail - reloadTail regset t = foldl rel t $ uniqSetToList regset - where rel t r = mkMiddle (reload r) <*> t - removeDeadAssignmentsAndReloads :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph removeDeadAssignmentsAndReloads procPoints g = liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice @@ -283,10 +195,464 @@ removeDeadAssignmentsAndReloads procPoints g = -- but GHC panics while compiling, see bug #4045. middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` in_regs live) = return $ Just emptyGraph + -- XXX maybe this should be somewhere else... + middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph + middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph middle _ _ = return Nothing nothing _ _ = return Nothing +---------------------------------------------------------------- +--- Usage information + +-- We decorate all register assignments with usage information, +-- that is, the maximum number of times the register is referenced +-- while it is live along all outgoing control paths. There are a few +-- subtleties here: +-- +-- - If a register goes dead, and then becomes live again, the usages +-- of the disjoint live range don't count towards the original range. +-- +-- a = 1; // used once +-- b = a; +-- a = 2; // used once +-- c = a; +-- +-- - A register may be used multiple times, but these all reside in +-- different control paths, such that any given execution only uses +-- it once. In that case, the usage count may still be 1. +-- +-- a = 1; // used once +-- if (b) { +-- c = a + 3; +-- } else { +-- c = a + 1; +-- } +-- +-- This policy corresponds to an inlining strategy that does not +-- duplicate computation but may increase binary size. +-- +-- - If we naively implement a usage count, we have a counting to +-- infinity problem across joins. Furthermore, knowing that +-- something is used 2 or more times in one runtime execution isn't +-- particularly useful for optimizations (inlining may be beneficial, +-- but there's no way of knowing that without register pressure +-- information.) +-- +-- while (...) { +-- // first iteration, b used once +-- // second iteration, b used twice +-- // third iteration ... +-- a = b; +-- } +-- // b used zero times +-- +-- There is an orthogonal question, which is that for every runtime +-- execution, the register may be used only once, but if we inline it +-- in every conditional path, the binary size might increase a lot. +-- But tracking this information would be tricky, because it violates +-- the finite lattice restriction Hoopl requires for termination; +-- we'd thus need to supply an alternate proof, which is probably +-- something we should defer until we actually have an optimization +-- that would take advantage of this. (This might also interact +-- strangely with liveness information.) +-- +-- a = ...; +-- // a is used one time, but in X different paths +-- case (b) of +-- 1 -> ... a ... +-- 2 -> ... a ... +-- 3 -> ... a ... +-- ... +-- +-- This analysis is very similar to liveness analysis; we just keep a +-- little extra info. (Maybe we should move it to CmmLive, and subsume +-- the old liveness analysis.) + +data RegUsage = SingleUse | ManyUse + deriving (Ord, Eq, Show) +-- Absence in map = ZeroUse + +{- +-- minBound is bottom, maxBound is top, least-upper-bound is max +-- ToDo: Put this in Hoopl. Note that this isn't as useful as I +-- originally hoped, because you usually want to leave out the bottom +-- element when you have things like this put in maps. Maybe f is +-- useful on its own as a combining function. +boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a +boundedOrdLattice n = DataflowLattice n minBound f + where f _ (OldFact x) (NewFact y) + | x >= y = (NoChange, x) + | otherwise = (SomeChange, y) +-} + +-- Custom node type we'll rewrite to. CmmAssign nodes to local +-- registers are replaced with AssignLocal nodes. +data WithRegUsage n e x where + Plain :: n e x -> WithRegUsage n e x + AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O + +instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where + foldRegsUsed f z (Plain n) = foldRegsUsed f z n + foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e + +instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where + foldRegsDefd f z (Plain n) = foldRegsDefd f z n + foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r + +instance NonLocal n => NonLocal (WithRegUsage n) where + entryLabel (Plain n) = entryLabel n + successors (Plain n) = successors n + +liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x +liftRegUsage = mapGraph Plain + +eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x +eraseRegUsage = mapGraph f + where f :: WithRegUsage CmmNode e x -> CmmNode e x + f (AssignLocal l e _) = CmmAssign (CmmLocal l) e + f (Plain n) = n + +type UsageMap = UniqFM RegUsage + +usageLattice :: DataflowLattice UsageMap +usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f) + where f _ (OldFact x) (NewFact y) + | x >= y = (NoChange, x) + | otherwise = (SomeChange, y) + +-- We reuse the names 'gen' and 'kill', although we're doing something +-- slightly different from the Dragon Book +usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap +usageTransfer = mkBTransfer3 first middle last + where first _ f = f + middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap + middle n f = gen_kill n f + last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap + -- Checking for CmmCall/CmmForeignCall is unnecessary, because + -- spills/reloads have already occurred by the time we do this + -- analysis. + -- XXX Deprecated warning is puzzling: what label are we + -- supposed to use? + -- ToDo: With a bit more cleverness here, we can avoid + -- disappointment and heartbreak associated with the inability + -- to inline into CmmCall and CmmForeignCall by + -- over-estimating the usage to be ManyUse. + last n f = gen_kill n (joinOutFacts usageLattice n f) + gen_kill a = gen a . kill a + gen a f = foldRegsUsed increaseUsage f a + kill a f = foldRegsDefd delFromUFM f a + increaseUsage f r = addToUFM_C combine f r SingleUse + where combine _ _ = ManyUse + +usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap +usageRewrite = mkBRewrite3 first middle last + where first _ _ = return Nothing + middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O)) + middle (Plain (CmmAssign (CmmLocal l) e)) f + = return . Just + $ case lookupUFM f l of + Nothing -> emptyGraph + Just usage -> mkMiddle (AssignLocal l e usage) + middle _ _ = return Nothing + last _ _ = return Nothing + +type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode) +annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage) +annotateUsage vanilla_g = + let g = modifyGraph liftRegUsage vanilla_g + in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $ + analRewBwd usageLattice usageTransfer usageRewrite + +---------------------------------------------------------------- +--- Assignment tracking + +-- The idea is to maintain a map of local registers do expressions, +-- such that the value of that register is the same as the value of that +-- expression at any given time. We can then do several things, +-- as described by Assignment. + +-- Assignment describes the various optimizations that are valid +-- at a given point in the program. +data Assignment = +-- This assignment can always be inlined. It is cheap or single-use. + AlwaysInline CmmExpr +-- This assignment should be sunk down to its first use. (This will +-- increase code size if the register is used in multiple control flow +-- paths, but won't increase execution time, and the reduction of +-- register pressure is worth it.) + | AlwaysSink CmmExpr +-- We cannot safely optimize occurrences of this local register. (This +-- corresponds to top in the lattice structure.) + | NeverOptimize + +-- Extract the expression that is being assigned to +xassign :: Assignment -> Maybe CmmExpr +xassign (AlwaysInline e) = Just e +xassign (AlwaysSink e) = Just e +xassign NeverOptimize = Nothing + +-- Extracts the expression, but only if they're the same constructor +xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr) +xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e') +xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e') +xassign2 _ = Nothing + +-- Note: We'd like to make decisions about "not optimizing" as soon as +-- possible, because this will make running the transfer function more +-- efficient. +type AssignmentMap = UniqFM Assignment + +assignmentLattice :: DataflowLattice AssignmentMap +assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add) + where add _ (OldFact old) (NewFact new) + = case (old, new) of + (NeverOptimize, _) -> (NoChange, NeverOptimize) + (_, NeverOptimize) -> (SomeChange, NeverOptimize) + (xassign2 -> Just (e, e')) + | e == e' -> (NoChange, old) + | otherwise -> (SomeChange, NeverOptimize) + _ -> (SomeChange, NeverOptimize) + +-- Deletes sinks from assignment map, because /this/ is the place +-- where it will be sunk to. +deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap +deleteSinks n m = foldRegsUsed (adjustUFM f) m n + where f (AlwaysSink _) = NeverOptimize + f old = old + +-- Invalidates any expressions that use a register. +invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap +-- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a +invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance] + where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize + f _ _ m = m +{- This requires the entire spine of the map to be continually rebuilt, + - which causes crazy memory usage! +invalidateUsersOf reg = mapUFM (invalidateUsers' reg) + where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize + invalidateUsers' _ old = old +-} + +-- Note [foldUFM performance] +-- These calls to fold UFM no longer leak memory, but they do cause +-- pretty killer amounts of allocation. So they'll be something to +-- optimize; we need an algorithmic change to prevent us from having to +-- traverse the /entire/ map continually. + +middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap + +-- Algorithm for annotated assignments: +-- 1. Delete any sinking assignments that were used by this instruction +-- 2. Add the assignment to our list of valid local assignments with +-- the correct optimization policy. +-- 3. Look for all assignments that reference that register and +-- invalidate them. +middleAssignment n@(AssignLocal r e usage) assign + = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign + where add m = addToUFM m r + $ case usage of + SingleUse -> AlwaysInline e + ManyUse -> decide e + decide CmmLit{} = AlwaysInline e + decide CmmReg{} = AlwaysInline e + decide CmmLoad{} = AlwaysSink e + decide CmmStackSlot{} = AlwaysSink e + decide CmmMachOp{} = AlwaysSink e + -- We'll always inline simple operations on the global + -- registers, to reduce register pressure: Sp - 4 or Hp - 8 + -- EZY: Justify this optimization more carefully. + decide CmmRegOff{} = AlwaysInline e + +-- Algorithm for unannotated assignments of global registers: +-- 1. Delete any sinking assignments that were used by this instruction +-- 2. Look for all assignments that reference this register and +-- invalidate them. +middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign + = invalidateUsersOf reg . deleteSinks n $ assign + +-- Algorithm for unannotated assignments of *local* registers: do +-- nothing (it's a reload, so no state should have changed) +middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign + +-- Algorithm for stores: +-- 1. Delete any sinking assignments that were used by this instruction +-- 2. Look for all assignments that load from memory locations that +-- were clobbered by this store and invalidate them. +middleAssignment (Plain n@(CmmStore lhs rhs)) assign + = let m = deleteSinks n assign + in foldUFM_Directly f m m -- [foldUFM performance] + where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize + f _ _ m = m +{- Also leaky + = mapUFM_Directly p . deleteSinks n $ assign + -- ToDo: There's a missed opportunity here: even if a memory + -- access we're attempting to sink gets clobbered at some + -- location, it's still /better/ to sink it to right before the + -- point where it gets clobbered. How might we do this? + -- Unfortunately, it's too late to change the assignment... + where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize + p _ old = old +-} + +-- Assumption: Unsafe foreign calls don't clobber memory +-- Since foreign calls clobber caller saved registers, we need +-- invalidate any assignments that reference those global registers. +-- This is kind of expensive. (One way to optimize this might be to +-- store extra information about expressions that allow this and other +-- checks to be done cheaply.) +middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign + = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n) + where deleteCallerSaves m = foldUFM_Directly f m m + f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize + f _ _ m = m + g (CmmReg (CmmGlobal r)) _ | callerSaves r = True + g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True + g _ b = b + +middleAssignment (Plain (CmmComment {})) assign + = assign + +-- Assumptions: +-- * Writes using Hp do not overlap with any other memory locations +-- (An important invariant being relied on here is that we only ever +-- use Hp to allocate values on the heap, which appears to be the +-- case given hpReg usage, and that our heap writing code doesn't +-- do anything stupid like overlapping writes.) +-- * Stack slots do not overlap with any other memory locations +-- * Stack slots for different areas do not overlap +-- * Stack slots within the same area and different offsets may +-- overlap; we need to do a size check (see 'overlaps'). +-- * Register slots only overlap with themselves. (But this shouldn't +-- happen in practice, because we'll fail to inline a reload across +-- the next spill.) +-- * Non stack-slot stores always conflict with each other. (This is +-- not always the case; we could probably do something special for Hp) +clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore + -> (Unique, CmmExpr) -- (register, expression) that may be clobbered + -> Bool +clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False +clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False +-- ToDo: Also catch MachOp case +clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _) + | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?) +clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr + where f (CmmLoad (CmmStackSlot (CallArea a') o') t) + = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t)) + f (CmmLoad e _) = containsStackSlot e + f (CmmMachOp _ es) = or (map f es) + f _ = False + -- Maybe there's an invariant broken if this actually ever + -- returns True + containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off + containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es) + containsStackSlot (CmmStackSlot{}) = True + containsStackSlot _ = False +clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr + where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l' + f _ = False +clobbers _ (_, e) = f e + where f (CmmLoad (CmmStackSlot _ _) _) = False + f (CmmLoad{}) = True -- conservative + f (CmmMachOp _ es) = or (map f es) + f _ = False + +-- Check for memory overlapping. +-- Diagram: +-- 4 8 12 +-- s -w- o +-- [ I32 ] +-- [ F64 ] +-- s' -w'- o' +type CallSubArea = (AreaId, Int, Int) -- area, offset, width +overlaps :: CallSubArea -> CallSubArea -> Bool +overlaps (a, _, _) (a', _, _) | a /= a' = False +overlaps (_, o, w) (_, o', w') = + let s = o - w + s' = o' - w' + in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK + +lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)] +-- Variables are dead across calls, so invalidating all mappings is justified +lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, mapUFM (const NeverOptimize) assign)] +lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, mapUFM (const NeverOptimize) assign)] +lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l + +assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap +assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment) + +assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap +assignmentRewrite = mkFRewrite3 first middle last + where + first _ _ = return Nothing + middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O + middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m + middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) mkMiddle l e u + last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l + -- Tuple is (inline?, reloads) + precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless + where f (i, l) r = case lookupUFM assign r of + Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l) + Just (AlwaysInline _) -> (True, l) + Just NeverOptimize -> (i, l) + -- This case can show up when we have + -- limited optimization fuel. + Nothing -> (i, l) + rewrite _ (False, []) _ _ = Nothing + -- Note [CmmCall Inline Hack] + -- Conservative hack: don't do any inlining on what will + -- be translated into an OldCmm CmmCalls, since the code + -- produced here tends to be unproblematic and I need to write + -- lint passes to ensure that we don't put anything in the + -- arguments that could be construed as a global register by + -- some later translation pass. (For example, slots will turn + -- into dereferences of Sp). See [Register parameter passing]. + -- ToDo: Fix this up to only bug out if all inlines were for + -- CmmExprs with global registers (we can't use the + -- straightforward mapExpDeep call, in this case.) ToDo: We miss + -- an opportunity here, where all possible inlinings should + -- instead be sunk. + rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack] + rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n)) + + rewriteLocal _ (False, []) _ _ _ _ = Nothing + rewriteLocal assign (i, xs) mk l e u = Just $ mkMiddles xs <*> mk n' + where n' = AssignLocal l e' u + e' = if i then wrapRecExp (inlineExp assign) e else e + -- inlinable check omitted, since we can always inline into + -- assignments. + + inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x + inline False _ n = n + inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack] + inline True assign n = mapExpDeep (inlineExp assign) n + + inlineExp assign old@(CmmReg (CmmLocal r)) + = case lookupUFM assign r of + Just (AlwaysInline x) -> x + _ -> old + inlineExp assign old@(CmmRegOff (CmmLocal r) i) + = case lookupUFM assign r of + Just (AlwaysInline x) -> + case x of + (CmmRegOff r' i') -> CmmRegOff r' (i + i') + _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] + where rep = typeWidth (localRegType r) + _ -> old + inlineExp _ old = old + + inlinable :: CmmNode e x -> Bool + inlinable (CmmCall{}) = False + inlinable (CmmForeignCall{}) = False + inlinable (CmmUnsafeForeignCall{}) = False + inlinable _ = True + +rewriteAssignments :: CmmGraph -> FuelUniqSM CmmGraph +rewriteAssignments g = do + g' <- annotateUsage g + g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $ + analRewFwd assignmentLattice assignmentTransfer assignmentRewrite + return (modifyGraph eraseRegUsage g'') --------------------- -- prettyprinting @@ -305,11 +671,7 @@ instance Outputable DualLive where if isEmptyUniqSet stack then PP.empty else (ppr_regs "live on stack =" stack)] -instance Outputable AvailRegs where - ppr (UniverseMinus s) = if isEmptyUniqSet s then text "" - else ppr_regs "available = all but" s - ppr (AvailRegs s) = if isEmptyUniqSet s then text "" - else ppr_regs "available = " s +-- ToDo: Outputable instance for UsageMap and AssignmentMap my_trace :: String -> SDoc -> a -> a my_trace = if False then pprTrace else \_ _ a -> a diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs index 57d458c..f5c0817 100644 --- a/compiler/cmm/OldCmm.hs +++ b/compiler/cmm/OldCmm.hs @@ -144,12 +144,14 @@ data CmmStmt -- Old-style | CmmStore CmmExpr CmmExpr -- Assign to memory location. Size is -- given by cmmExprType of the rhs. - | CmmCall -- A call (forign, native or primitive), with + | CmmCall -- A call (foreign, native or primitive), with CmmCallTarget HintedCmmFormals -- zero or more results HintedCmmActuals -- zero or more arguments CmmSafety -- whether to build a continuation CmmReturnInfo + -- Some care is necessary when handling the arguments of these, see + -- [Register parameter passing] and the hack in cmm/CmmOpt.hs | CmmBranch BlockId -- branch to another BB in this fn diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 48416e3..d917811 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -340,6 +340,23 @@ emitRtsCall' res pkg fun args _vols safe -- * Regs.h claims that BaseReg should be saved last and loaded first -- * This might not have been tickled before since BaseReg is callee save -- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim +-- +-- This code isn't actually used right now, because callerSaves +-- only ever returns true in the current universe for registers NOT in +-- system_regs (just do a grep for CALLER_SAVES in +-- includes/stg/MachRegs.h). It's all one giant no-op, and for +-- good reason: having to save system registers on every foreign call +-- would be very expensive, so we avoid assigning them to those +-- registers when we add support for an architecture. +-- +-- Note that the old code generator actually does more work here: it +-- also saves other global registers. We can't (nor want) to do that +-- here, as we don't have liveness information. And really, we +-- shouldn't be doing the workaround at this point in the pipeline, see +-- Note [Register parameter passing] and the ToDo on CmmCall in +-- cmm/CmmNode.hs. Right now the workaround is to avoid inlining across +-- unsafe foreign calls in rewriteAssignments, but this is strictly +-- temporary. callerSaveVolatileRegs :: (CmmAGraph, CmmAGraph) callerSaveVolatileRegs = (caller_save, caller_load) where @@ -396,6 +413,51 @@ callerSaves :: GlobalReg -> Bool #ifdef CALLER_SAVES_Base callerSaves BaseReg = True #endif +#ifdef CALLER_SAVES_R1 +callerSaves (VanillaReg 1 _) = True +#endif +#ifdef CALLER_SAVES_R2 +callerSaves (VanillaReg 2 _) = True +#endif +#ifdef CALLER_SAVES_R3 +callerSaves (VanillaReg 3 _) = True +#endif +#ifdef CALLER_SAVES_R4 +callerSaves (VanillaReg 4 _) = True +#endif +#ifdef CALLER_SAVES_R5 +callerSaves (VanillaReg 5 _) = True +#endif +#ifdef CALLER_SAVES_R6 +callerSaves (VanillaReg 6 _) = True +#endif +#ifdef CALLER_SAVES_R7 +callerSaves (VanillaReg 7 _) = True +#endif +#ifdef CALLER_SAVES_R8 +callerSaves (VanillaReg 8 _) = True +#endif +#ifdef CALLER_SAVES_F1 +callerSaves (FloatReg 1) = True +#endif +#ifdef CALLER_SAVES_F2 +callerSaves (FloatReg 2) = True +#endif +#ifdef CALLER_SAVES_F3 +callerSaves (FloatReg 3) = True +#endif +#ifdef CALLER_SAVES_F4 +callerSaves (FloatReg 4) = True +#endif +#ifdef CALLER_SAVES_D1 +callerSaves (DoubleReg 1) = True +#endif +#ifdef CALLER_SAVES_D2 +callerSaves (DoubleReg 2) = True +#endif +#ifdef CALLER_SAVES_L1 +callerSaves (LongReg 1) = True +#endif #ifdef CALLER_SAVES_Sp callerSaves Sp = True #endif diff --git a/compiler/deSugar/Check.lhs b/compiler/deSugar/Check.lhs index 47fbf2e..59c102f 100644 --- a/compiler/deSugar/Check.lhs +++ b/compiler/deSugar/Check.lhs @@ -30,6 +30,7 @@ import Type import SrcLoc import UniqSet import Util +import BasicTypes import Outputable import FastString \end{code} @@ -436,14 +437,14 @@ get_lit :: Pat id -> Maybe HsLit -- It doesn't matter which one, because they will only be compared -- with other HsLits gotten in the same way get_lit (LitPat lit) = Just lit -get_lit (NPat (OverLit { ol_val = HsIntegral i}) mb _) = Just (HsIntPrim (mb_neg mb i)) -get_lit (NPat (OverLit { ol_val = HsFractional f }) mb _) = Just (HsFloatPrim (mb_neg mb f)) +get_lit (NPat (OverLit { ol_val = HsIntegral i}) mb _) = Just (HsIntPrim (mb_neg negate mb i)) +get_lit (NPat (OverLit { ol_val = HsFractional f }) mb _) = Just (HsFloatPrim (mb_neg negateFractionalLit mb f)) get_lit (NPat (OverLit { ol_val = HsIsString s }) _ _) = Just (HsStringPrim s) get_lit _ = Nothing -mb_neg :: Num a => Maybe b -> a -> a -mb_neg Nothing v = v -mb_neg (Just _) v = -v +mb_neg :: (a -> a) -> Maybe b -> a -> a +mb_neg _ Nothing v = v +mb_neg negate (Just _) v = negate v get_unused_cons :: [Pat Id] -> [DataCon] get_unused_cons used_cons = ASSERT( not (null used_cons) ) unused_cons diff --git a/compiler/deSugar/DsMeta.hs b/compiler/deSugar/DsMeta.hs index 3a2dda8..a4b47ee 100644 --- a/compiler/deSugar/DsMeta.hs +++ b/compiler/deSugar/DsMeta.hs @@ -1583,7 +1583,7 @@ repLiteral lit mk_integer :: Integer -> DsM HsLit mk_integer i = do integer_ty <- lookupType integerTyConName return $ HsInteger i integer_ty -mk_rational :: Rational -> DsM HsLit +mk_rational :: FractionalLit -> DsM HsLit mk_rational r = do rat_ty <- lookupType rationalTyConName return $ HsRat r rat_ty mk_string :: FastString -> DsM HsLit diff --git a/compiler/deSugar/MatchLit.lhs b/compiler/deSugar/MatchLit.lhs index be112e0..0bd2538 100644 --- a/compiler/deSugar/MatchLit.lhs +++ b/compiler/deSugar/MatchLit.lhs @@ -33,6 +33,7 @@ import Literal import SrcLoc import Data.Ratio import Outputable +import BasicTypes import Util import FastString \end{code} @@ -64,8 +65,8 @@ dsLit (HsStringPrim s) = return (Lit (MachStr s)) dsLit (HsCharPrim c) = return (Lit (MachChar c)) dsLit (HsIntPrim i) = return (Lit (MachInt i)) dsLit (HsWordPrim w) = return (Lit (MachWord w)) -dsLit (HsFloatPrim f) = return (Lit (MachFloat f)) -dsLit (HsDoublePrim d) = return (Lit (MachDouble d)) +dsLit (HsFloatPrim f) = return (Lit (MachFloat (fl_value f))) +dsLit (HsDoublePrim d) = return (Lit (MachDouble (fl_value d))) dsLit (HsChar c) = return (mkCharExpr c) dsLit (HsString str) = mkStringExprFS str @@ -73,8 +74,8 @@ dsLit (HsInteger i _) = mkIntegerExpr i dsLit (HsInt i) = return (mkIntExpr i) dsLit (HsRat r ty) = do - num <- mkIntegerExpr (numerator r) - denom <- mkIntegerExpr (denominator r) + num <- mkIntegerExpr (numerator (fl_value r)) + denom <- mkIntegerExpr (denominator (fl_value r)) return (mkConApp ratio_data_con [Type integer_ty, num, denom]) where (ratio_data_con, integer_ty) @@ -112,8 +113,8 @@ hsLitKey (HsIntPrim i) = mkMachInt i hsLitKey (HsWordPrim w) = mkMachWord w hsLitKey (HsCharPrim c) = MachChar c hsLitKey (HsStringPrim s) = MachStr s -hsLitKey (HsFloatPrim f) = MachFloat f -hsLitKey (HsDoublePrim d) = MachDouble d +hsLitKey (HsFloatPrim f) = MachFloat (fl_value f) +hsLitKey (HsDoublePrim d) = MachDouble (fl_value d) hsLitKey (HsString s) = MachStr s hsLitKey l = pprPanic "hsLitKey" (ppr l) @@ -124,8 +125,8 @@ hsOverLitKey (OverLit { ol_val = l }) neg = litValKey l neg litValKey :: OverLitVal -> Bool -> Literal litValKey (HsIntegral i) False = MachInt i litValKey (HsIntegral i) True = MachInt (-i) -litValKey (HsFractional r) False = MachFloat r -litValKey (HsFractional r) True = MachFloat (-r) +litValKey (HsFractional r) False = MachFloat (fl_value r) +litValKey (HsFractional r) True = MachFloat (negate (fl_value r)) litValKey (HsIsString s) neg = ASSERT( not neg) MachStr s \end{code} @@ -186,12 +187,12 @@ tidyNPat tidy_lit_pat (OverLit val False _ ty) mb_neg _ (Just _, HsIntegral i) -> Just (-i) _ -> Nothing - mb_rat_lit :: Maybe Rational + mb_rat_lit :: Maybe FractionalLit mb_rat_lit = case (mb_neg, val) of - (Nothing, HsIntegral i) -> Just (fromInteger i) - (Just _, HsIntegral i) -> Just (fromInteger (-i)) + (Nothing, HsIntegral i) -> Just (integralFractionalLit (fromInteger i)) + (Just _, HsIntegral i) -> Just (integralFractionalLit (fromInteger (-i))) (Nothing, HsFractional f) -> Just f - (Just _, HsFractional f) -> Just (-f) + (Just _, HsFractional f) -> Just (negateFractionalLit f) _ -> Nothing mb_str_lit :: Maybe FastString diff --git a/compiler/ghci/ObjLink.lhs b/compiler/ghci/ObjLink.lhs index 310ddb5..cd593f7 100644 --- a/compiler/ghci/ObjLink.lhs +++ b/compiler/ghci/ObjLink.lhs @@ -28,6 +28,8 @@ import Control.Monad ( when ) import Foreign.C import Foreign ( nullPtr ) import GHC.Exts ( Ptr(..) ) +import GHC.IO.Encoding ( fileSystemEncoding ) +import qualified GHC.Foreign as GHC @@ -35,17 +37,21 @@ import GHC.Exts ( Ptr(..) ) -- RTS Linker Interface -- --------------------------------------------------------------------------- +-- UNICODE FIXME: Unicode object/archive/DLL file names on Windows will only work in the right code page +withFileCString :: FilePath -> (CString -> IO a) -> IO a +withFileCString = GHC.withCString fileSystemEncoding + insertSymbol :: String -> String -> Ptr a -> IO () insertSymbol obj_name key symbol = let str = prefixUnderscore key - in withCString obj_name $ \c_obj_name -> - withCString str $ \c_str -> + in withFileCString obj_name $ \c_obj_name -> + withCAString str $ \c_str -> c_insertSymbol c_obj_name c_str symbol lookupSymbol :: String -> IO (Maybe (Ptr a)) lookupSymbol str_in = do let str = prefixUnderscore str_in - withCString str $ \c_str -> do + withCAString str $ \c_str -> do addr <- c_lookupSymbol c_str if addr == nullPtr then return Nothing @@ -60,7 +66,7 @@ loadDLL :: String -> IO (Maybe String) -- Nothing => success -- Just err_msg => failure loadDLL str = do - maybe_errmsg <- withCString str $ \dll -> c_addDLL dll + maybe_errmsg <- withFileCString str $ \dll -> c_addDLL dll if maybe_errmsg == nullPtr then return Nothing else do str <- peekCString maybe_errmsg @@ -68,19 +74,19 @@ loadDLL str = do loadArchive :: String -> IO () loadArchive str = do - withCString str $ \c_str -> do + withFileCString str $ \c_str -> do r <- c_loadArchive c_str when (r == 0) (panic ("loadArchive " ++ show str ++ ": failed")) loadObj :: String -> IO () loadObj str = do - withCString str $ \c_str -> do + withFileCString str $ \c_str -> do r <- c_loadObj c_str when (r == 0) (panic ("loadObj " ++ show str ++ ": failed")) unloadObj :: String -> IO () unloadObj str = - withCString str $ \c_str -> do + withFileCString str $ \c_str -> do r <- c_unloadObj c_str when (r == 0) (panic ("unloadObj " ++ show str ++ ": failed")) diff --git a/compiler/hsSyn/Convert.lhs b/compiler/hsSyn/Convert.lhs index 5933e9d..492f255 100644 --- a/compiler/hsSyn/Convert.lhs +++ b/compiler/hsSyn/Convert.lhs @@ -568,7 +568,7 @@ cvtOverLit :: Lit -> CvtM (HsOverLit RdrName) cvtOverLit (IntegerL i) = do { force i; return $ mkHsIntegral i placeHolderType} cvtOverLit (RationalL r) - = do { force r; return $ mkHsFractional r placeHolderType} + = do { force r; return $ mkHsFractional (cvtFractionalLit r) placeHolderType} cvtOverLit (StringL s) = do { let { s' = mkFastString s } ; force s' @@ -602,8 +602,8 @@ allCharLs xs cvtLit :: Lit -> CvtM HsLit cvtLit (IntPrimL i) = do { force i; return $ HsIntPrim i } cvtLit (WordPrimL w) = do { force w; return $ HsWordPrim w } -cvtLit (FloatPrimL f) = do { force f; return $ HsFloatPrim f } -cvtLit (DoublePrimL f) = do { force f; return $ HsDoublePrim f } +cvtLit (FloatPrimL f) = do { force f; return $ HsFloatPrim (cvtFractionalLit f) } +cvtLit (DoublePrimL f) = do { force f; return $ HsDoublePrim (cvtFractionalLit f) } cvtLit (CharL c) = do { force c; return $ HsChar c } cvtLit (StringL s) = do { let { s' = mkFastString s } ; force s' @@ -768,6 +768,9 @@ overloadedLit _ = False void :: Type.Type void = placeHolderType +cvtFractionalLit :: Rational -> FractionalLit +cvtFractionalLit r = FL { fl_text = show (fromRational r :: Double), fl_value = r } + -------------------------------------------------------------------- -- Turning Name back into RdrName -------------------------------------------------------------------- diff --git a/compiler/hsSyn/HsBinds.lhs b/compiler/hsSyn/HsBinds.lhs index 81b5f18..5871914 100644 --- a/compiler/hsSyn/HsBinds.lhs +++ b/compiler/hsSyn/HsBinds.lhs @@ -69,23 +69,23 @@ data HsLocalBindsLR idL idR -- Bindings in a 'let' expression type HsValBinds id = HsValBindsLR id id data HsValBindsLR idL idR -- Value bindings (not implicit parameters) - = ValBindsIn -- Before renaming + = ValBindsIn -- Before renaming RHS; idR is always RdrName (LHsBindsLR idL idR) [LSig idR] -- Not dependency analysed -- Recursive by default - | ValBindsOut -- After renaming + | ValBindsOut -- After renaming RHS; idR can be Name or Id [(RecFlag, LHsBinds idL)] -- Dependency analysed, later bindings -- in the list may depend on earlier -- ones. [LSig Name] deriving (Data, Typeable) -type LHsBinds id = Bag (LHsBind id) -type LHsBind id = Located (HsBind id) -type HsBind id = HsBindLR id id +type LHsBind id = LHsBindLR id id +type LHsBinds id = LHsBindsLR id id +type HsBind id = HsBindLR id id -type LHsBindLR idL idR = Located (HsBindLR idL idR) type LHsBindsLR idL idR = Bag (LHsBindLR idL idR) +type LHsBindLR idL idR = Located (HsBindLR idL idR) data HsBindLR idL idR = -- | FunBind is used for both functions @f x = e@ diff --git a/compiler/hsSyn/HsLit.lhs b/compiler/hsSyn/HsLit.lhs index 4a565ff..2cda103 100644 --- a/compiler/hsSyn/HsLit.lhs +++ b/compiler/hsSyn/HsLit.lhs @@ -12,7 +12,8 @@ module HsLit where #include "HsVersions.h" import {-# SOURCE #-} HsExpr( SyntaxExpr, pprExpr ) -import HsTypes (PostTcType) +import BasicTypes ( FractionalLit(..) ) +import HsTypes ( PostTcType ) import Type ( Type ) import Outputable import FastString @@ -40,10 +41,10 @@ data HsLit | HsWordPrim Integer -- Unboxed Word | HsInteger Integer Type -- Genuinely an integer; arises only from TRANSLATION -- (overloaded literals are done with HsOverLit) - | HsRat Rational Type -- Genuinely a rational; arises only from TRANSLATION + | HsRat FractionalLit Type -- Genuinely a rational; arises only from TRANSLATION -- (overloaded literals are done with HsOverLit) - | HsFloatPrim Rational -- Unboxed Float - | HsDoublePrim Rational -- Unboxed Double + | HsFloatPrim FractionalLit -- Unboxed Float + | HsDoublePrim FractionalLit -- Unboxed Double deriving (Data, Typeable) instance Eq HsLit where @@ -70,7 +71,7 @@ data HsOverLit id -- An overloaded literal data OverLitVal = HsIntegral !Integer -- Integer-looking literals; - | HsFractional !Rational -- Frac-looking literals + | HsFractional !FractionalLit -- Frac-looking literals | HsIsString !FastString -- String-looking literals deriving (Data, Typeable) @@ -142,9 +143,9 @@ instance Outputable HsLit where ppr (HsStringPrim s) = pprHsString s <> char '#' ppr (HsInt i) = integer i ppr (HsInteger i _) = integer i - ppr (HsRat f _) = rational f - ppr (HsFloatPrim f) = rational f <> char '#' - ppr (HsDoublePrim d) = rational d <> text "##" + ppr (HsRat f _) = ppr f + ppr (HsFloatPrim f) = ppr f <> char '#' + ppr (HsDoublePrim d) = ppr d <> text "##" ppr (HsIntPrim i) = integer i <> char '#' ppr (HsWordPrim w) = integer w <> text "##" @@ -155,6 +156,6 @@ instance OutputableBndr id => Outputable (HsOverLit id) where instance Outputable OverLitVal where ppr (HsIntegral i) = integer i - ppr (HsFractional f) = rational f + ppr (HsFractional f) = ppr f ppr (HsIsString s) = pprHsString s \end{code} diff --git a/compiler/hsSyn/HsUtils.lhs b/compiler/hsSyn/HsUtils.lhs index dfb3dd5..cc57e05 100644 --- a/compiler/hsSyn/HsUtils.lhs +++ b/compiler/hsSyn/HsUtils.lhs @@ -84,7 +84,6 @@ import NameSet import BasicTypes import SrcLoc import FastString -import Outputable import Util import Bag @@ -188,7 +187,7 @@ mkSimpleHsAlt pat expr -- See RnEnv.lookupSyntaxName mkHsIntegral :: Integer -> PostTcType -> HsOverLit id -mkHsFractional :: Rational -> PostTcType -> HsOverLit id +mkHsFractional :: FractionalLit -> PostTcType -> HsOverLit id mkHsIsString :: FastString -> PostTcType -> HsOverLit id mkHsDo :: HsStmtContext Name -> [LStmt id] -> HsExpr id mkHsComp :: HsStmtContext Name -> [LStmt id] -> LHsExpr id -> HsExpr id @@ -664,11 +663,15 @@ lStmtsImplicits = hs_lstmts hsValBindsImplicits :: HsValBindsLR Name idR -> NameSet hsValBindsImplicits (ValBindsOut binds _) - = unionManyNameSets [foldBag unionNameSets (hs_bind . unLoc) emptyNameSet hs_binds | (_rec, hs_binds) <- binds] + = foldr (unionNameSets . lhsBindsImplicits . snd) emptyNameSet binds +hsValBindsImplicits (ValBindsIn binds _) + = lhsBindsImplicits binds + +lhsBindsImplicits :: LHsBindsLR Name idR -> NameSet +lhsBindsImplicits = foldBag unionNameSets lhs_bind emptyNameSet where - hs_bind (PatBind { pat_lhs = lpat }) = lPatImplicits lpat - hs_bind _ = emptyNameSet -hsValBindsImplicits (ValBindsIn {}) = pprPanic "hsValBindsImplicits: ValBindsIn" empty + lhs_bind (L _ (PatBind { pat_lhs = lpat })) = lPatImplicits lpat + lhs_bind _ = emptyNameSet lPatImplicits :: LPat Name -> NameSet lPatImplicits = hs_lpat diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 87bf391..d80d2a6 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -108,6 +108,8 @@ import Data.Char import Data.List import Data.Map (Map) import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set import System.FilePath import System.IO ( stderr, hPutChar ) @@ -122,6 +124,21 @@ data DynFlag | Opt_D_dump_raw_cmm | Opt_D_dump_cmmz | Opt_D_dump_cmmz_pretty + -- All of the cmmz subflags (there are a lot!) Automatically + -- enabled if you run -ddump-cmmz + | Opt_D_dump_cmmz_cbe + | Opt_D_dump_cmmz_proc + | Opt_D_dump_cmmz_spills + | Opt_D_dump_cmmz_rewrite + | Opt_D_dump_cmmz_dead + | Opt_D_dump_cmmz_stub + | Opt_D_dump_cmmz_sp + | Opt_D_dump_cmmz_procmap + | Opt_D_dump_cmmz_split + | Opt_D_dump_cmmz_lower + | Opt_D_dump_cmmz_info + | Opt_D_dump_cmmz_cafs + -- end cmmz subflags | Opt_D_dump_cps_cmm | Opt_D_dump_cvt_cmm | Opt_D_dump_asm @@ -481,6 +498,11 @@ data DynFlags = DynFlags { filesToClean :: IORef [FilePath], dirsToClean :: IORef (Map FilePath FilePath), + -- Names of files which were generated from -ddump-to-file; used to + -- track which ones we need to truncate because it's our first run + -- through + generatedDumps :: IORef (Set FilePath), + -- hsc dynamic flags flags :: [DynFlag], -- Don't change this without updating extensionFlags: @@ -717,12 +739,14 @@ initDynFlags dflags = do ways <- readIORef v_Ways refFilesToClean <- newIORef [] refDirsToClean <- newIORef Map.empty + refGeneratedDumps <- newIORef Set.empty return dflags{ ways = ways, buildTag = mkBuildTag (filter (not . wayRTSOnly) ways), rtsBuildTag = mkBuildTag ways, filesToClean = refFilesToClean, - dirsToClean = refDirsToClean + dirsToClean = refDirsToClean, + generatedDumps = refGeneratedDumps } -- | The normal 'DynFlags'. Note that they is not suitable for use in this form @@ -798,6 +822,7 @@ defaultDynFlags mySettings = -- end of ghc -M values filesToClean = panic "defaultDynFlags: No filesToClean", dirsToClean = panic "defaultDynFlags: No dirsToClean", + generatedDumps = panic "defaultDynFlags: No generatedDumps", haddockOptions = Nothing, flags = defaultFlags, language = Nothing, @@ -1187,8 +1212,8 @@ dynamic_flags = [ , Flag "dylib-install-name" (hasArg setDylibInstallName) ------- Libraries --------------------------------------------------- - , Flag "L" (Prefix addLibraryPath) - , Flag "l" (AnySuffix (upd . addOptl)) + , Flag "L" (Prefix addLibraryPath) + , Flag "l" (hasArg (addOptl . ("-l" ++))) ------- Frameworks -------------------------------------------------- -- -framework-path should really be -F ... @@ -1256,6 +1281,18 @@ dynamic_flags = [ , Flag "ddump-raw-cmm" (setDumpFlag Opt_D_dump_raw_cmm) , Flag "ddump-cmmz" (setDumpFlag Opt_D_dump_cmmz) , Flag "ddump-cmmz-pretty" (setDumpFlag Opt_D_dump_cmmz_pretty) + , Flag "ddump-cmmz-cbe" (setDumpFlag Opt_D_dump_cmmz_cbe) + , Flag "ddump-cmmz-spills" (setDumpFlag Opt_D_dump_cmmz_spills) + , Flag "ddump-cmmz-proc" (setDumpFlag Opt_D_dump_cmmz_proc) + , Flag "ddump-cmmz-rewrite" (setDumpFlag Opt_D_dump_cmmz_rewrite) + , Flag "ddump-cmmz-dead" (setDumpFlag Opt_D_dump_cmmz_dead) + , Flag "ddump-cmmz-stub" (setDumpFlag Opt_D_dump_cmmz_stub) + , Flag "ddump-cmmz-sp" (setDumpFlag Opt_D_dump_cmmz_sp) + , Flag "ddump-cmmz-procmap" (setDumpFlag Opt_D_dump_cmmz_procmap) + , Flag "ddump-cmmz-split" (setDumpFlag Opt_D_dump_cmmz_split) + , Flag "ddump-cmmz-lower" (setDumpFlag Opt_D_dump_cmmz_lower) + , Flag "ddump-cmmz-info" (setDumpFlag Opt_D_dump_cmmz_info) + , Flag "ddump-cmmz-cafs" (setDumpFlag Opt_D_dump_cmmz_cafs) , Flag "ddump-core-stats" (setDumpFlag Opt_D_dump_core_stats) , Flag "ddump-cps-cmm" (setDumpFlag Opt_D_dump_cps_cmm) , Flag "ddump-cvt-cmm" (setDumpFlag Opt_D_dump_cvt_cmm) diff --git a/compiler/main/ErrUtils.lhs b/compiler/main/ErrUtils.lhs index b6297a2..1c7a389 100644 --- a/compiler/main/ErrUtils.lhs +++ b/compiler/main/ErrUtils.lhs @@ -41,6 +41,9 @@ import StaticFlags ( opt_ErrorSpans ) import System.Exit ( ExitCode(..), exitWith ) import Data.List +import qualified Data.Set as Set +import Data.IORef +import Control.Monad import System.IO -- ----------------------------------------------------------------------------- @@ -208,19 +211,26 @@ mkDumpDoc hdr doc -- otherwise emit to stdout. dumpSDoc :: DynFlags -> DynFlag -> String -> SDoc -> IO () dumpSDoc dflags dflag hdr doc - = do let mFile = chooseDumpFile dflags dflag - case mFile of - -- write the dump to a file - -- don't add the header in this case, we can see what kind - -- of dump it is from the filename. - Just fileName - -> do handle <- openFile fileName AppendMode - hPrintDump handle doc - hClose handle - - -- write the dump to stdout - Nothing - -> do printDump (mkDumpDoc hdr doc) + = do let mFile = chooseDumpFile dflags dflag + case mFile of + -- write the dump to a file + -- don't add the header in this case, we can see what kind + -- of dump it is from the filename. + Just fileName + -> do + let gdref = generatedDumps dflags + gd <- readIORef gdref + let append = Set.member fileName gd + mode = if append then AppendMode else WriteMode + when (not append) $ + writeIORef gdref (Set.insert fileName gd) + handle <- openFile fileName mode + hPrintDump handle doc + hClose handle + + -- write the dump to stdout + Nothing + -> printDump (mkDumpDoc hdr doc) -- | Choose where to put a dump file based on DynFlags diff --git a/compiler/main/SysTools.lhs b/compiler/main/SysTools.lhs index 436cfa6..497a938 100644 --- a/compiler/main/SysTools.lhs +++ b/compiler/main/SysTools.lhs @@ -822,14 +822,15 @@ getBaseDir :: IO (Maybe String) #if defined(mingw32_HOST_OS) -- Assuming we are running ghc, accessed by path $(stuff)/bin/ghc.exe, -- return the path $(stuff)/lib. -getBaseDir = do let len = (2048::Int) -- plenty, PATH_MAX is 512 under Win32. - buf <- mallocArray len - ret <- getModuleFileName nullPtr buf len - if ret == 0 then free buf >> return Nothing - else do s <- peekCString buf - free buf - return (Just (rootDir s)) +getBaseDir = try_size 2048 -- plenty, PATH_MAX is 512 under Win32. where + try_size size = allocaArray (fromIntegral size) $ \buf -> do + ret <- c_GetModuleFileName nullPtr buf size + case ret of + 0 -> return Nothing + _ | ret < size -> fmap (Just . rootDir) $ peekCWString buf + | otherwise -> try_size (size * 2) + rootDir s = case splitFileName $ normalise s of (d, ghc_exe) | lower ghc_exe `elem` ["ghc.exe", @@ -844,8 +845,8 @@ getBaseDir = do let len = (2048::Int) -- plenty, PATH_MAX is 512 under Win32. where fail = panic ("can't decompose ghc.exe path: " ++ show s) lower = map toLower -foreign import stdcall unsafe "GetModuleFileNameA" - getModuleFileName :: Ptr () -> CString -> Int -> IO Int32 +foreign import stdcall unsafe "windows.h GetModuleFileNameW" + c_GetModuleFileName :: Ptr () -> CWString -> Word32 -> IO Word32 #else getBaseDir = return Nothing #endif diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 07acbbb..57faa6f 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -50,6 +50,7 @@ import qualified RegAlloc.Graph.TrivColorable as Color import TargetReg import Platform +import Config import Instruction import PIC import Reg @@ -68,7 +69,6 @@ import UniqSupply import DynFlags import StaticFlags import Util -import Config import Digraph import qualified Pretty @@ -451,14 +451,12 @@ makeImportsDoc dflags imports -- stack so add the note in: Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits" #endif -#if !defined(darwin_TARGET_OS) -- And just because every other compiler does, lets stick in -- an identifier directive: .ident "GHC x.y.z" - Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+> + Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+> Pretty.text cProjectVersion in Pretty.text ".ident" Pretty.<+> Pretty.doubleQuotes compilerIdent -#endif where -- Generate "symbol stubs" for all external symbols that might diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index bf34ee7..a55a631 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -68,7 +68,7 @@ import UniqFM import DynFlags import Module import Ctype -import BasicTypes ( InlineSpec(..), RuleMatchInfo(..) ) +import BasicTypes ( InlineSpec(..), RuleMatchInfo(..), FractionalLit(..) ) import Util ( readRational ) import Control.Monad @@ -536,14 +536,14 @@ data Token | ITchar Char | ITstring FastString | ITinteger Integer - | ITrational Rational + | ITrational FractionalLit | ITprimchar Char | ITprimstring FastString | ITprimint Integer | ITprimword Integer - | ITprimfloat Rational - | ITprimdouble Rational + | ITprimfloat FractionalLit + | ITprimdouble FractionalLit -- Template Haskell extension tokens | ITopenExpQuote -- [| or [e| @@ -1056,9 +1056,12 @@ hexadecimal = (16,hexDigit) -- readRational can understand negative rationals, exponents, everything. tok_float, tok_primfloat, tok_primdouble :: String -> Token -tok_float str = ITrational $! readRational str -tok_primfloat str = ITprimfloat $! readRational str -tok_primdouble str = ITprimdouble $! readRational str +tok_float str = ITrational $! readFractionalLit str +tok_primfloat str = ITprimfloat $! readFractionalLit str +tok_primdouble str = ITprimdouble $! readFractionalLit str + +readFractionalLit :: String -> FractionalLit +readFractionalLit str = (FL $! str) $! readRational str -- ----------------------------------------------------------------------------- -- Layout processing diff --git a/compiler/typecheck/TcHsSyn.lhs b/compiler/typecheck/TcHsSyn.lhs index cd2cadf..12b50ac 100644 --- a/compiler/typecheck/TcHsSyn.lhs +++ b/compiler/typecheck/TcHsSyn.lhs @@ -121,7 +121,7 @@ shortCutLit (HsIntegral i) ty | isIntTy ty && inIntRange i = Just (HsLit (HsInt i)) | isWordTy ty && inWordRange i = Just (mkLit wordDataCon (HsWordPrim i)) | isIntegerTy ty = Just (HsLit (HsInteger i ty)) - | otherwise = shortCutLit (HsFractional (fromInteger i)) ty + | otherwise = shortCutLit (HsFractional (integralFractionalLit i)) ty -- The 'otherwise' case is important -- Consider (3 :: Float). Syntactically it looks like an IntLit, -- so we'll call shortCutIntLit, but of course it's a float diff --git a/compiler/typecheck/TcSplice.lhs b/compiler/typecheck/TcSplice.lhs index d6517a6..3cc2eb5 100644 --- a/compiler/typecheck/TcSplice.lhs +++ b/compiler/typecheck/TcSplice.lhs @@ -1204,7 +1204,7 @@ reifyClassInstance i reifyType :: TypeRep.Type -> TcM TH.Type -- Monadic only because of failure reifyType ty@(ForAllTy _ _) = reify_for_all ty -reifyType ty@(PredTy {} `FunTy` _) = reify_for_all ty -- Types like ((?x::Int) => Char -> Char) +reifyType ty@(PredTy {} `FunTy` _) = reify_for_all ty -- Types like ((?x::Int) => Char -> Char) reifyType (TyVarTy tv) = return (TH.VarT (reifyName tv)) reifyType (TyConApp tc tys) = reify_tc_app tc tys -- Do not expand type synonyms here reifyType (AppTy t1 t2) = do { [r1,r2] <- reifyTypes [t1,t2] ; return (r1 `TH.AppT` r2) } diff --git a/compiler/vectorise/Vectorise/Builtins/Base.hs b/compiler/vectorise/Vectorise/Builtins/Base.hs index 165dbda..8456d34 100644 --- a/compiler/vectorise/Vectorise/Builtins/Base.hs +++ b/compiler/vectorise/Vectorise/Builtins/Base.hs @@ -13,7 +13,7 @@ module Vectorise.Builtins.Base ( indexBuiltin, -- * Projections - selTy, + selTy, selReplicate, selPick, selTags, diff --git a/compiler/vectorise/Vectorise/Builtins/Initialise.hs b/compiler/vectorise/Vectorise/Builtins/Initialise.hs index ecb8a98..5a6cf88 100644 --- a/compiler/vectorise/Vectorise/Builtins/Initialise.hs +++ b/compiler/vectorise/Vectorise/Builtins/Initialise.hs @@ -40,26 +40,62 @@ initBuiltins initBuiltins pkg = do mapM_ load dph_Orphans - -- From dph-common:Data.Array.Parallel.Lifted.PArray - parrayTyCon <- externalTyCon dph_PArray (fsLit "PArray") - let [parrayDataCon] = tyConDataCons parrayTyCon + -- From dph-common:Data.Array.Parallel.PArray.PData + -- PData is a type family that maps an element type onto the type + -- we use to hold an array of those elements. + pdataTyCon <- externalTyCon dph_PArray_PData (fsLit "PData") + + -- PR is a type class that holds the primitive operators we can + -- apply to array data. Its functions take arrays in terms of PData types. + prClass <- externalClass dph_PArray_PData (fsLit "PR") + let prTyCon = classTyCon prClass + [prDataCon] = tyConDataCons prTyCon - pdataTyCon <- externalTyCon dph_PArray (fsLit "PData") - paClass <- externalClass dph_PArray (fsLit "PA") + + -- From dph-common:Data.Array.Parallel.PArray.PRepr + preprTyCon <- externalTyCon dph_PArray_PRepr (fsLit "PRepr") + paClass <- externalClass dph_PArray_PRepr (fsLit "PA") let paTyCon = classTyCon paClass [paDataCon] = tyConDataCons paTyCon paPRSel = classSCSelId paClass 0 - preprTyCon <- externalTyCon dph_PArray (fsLit "PRepr") - prClass <- externalClass dph_PArray (fsLit "PR") - let prTyCon = classTyCon prClass - [prDataCon] = tyConDataCons prTyCon + replicatePDVar <- externalVar dph_PArray_PRepr (fsLit "replicatePD") + emptyPDVar <- externalVar dph_PArray_PRepr (fsLit "emptyPD") + packByTagPDVar <- externalVar dph_PArray_PRepr (fsLit "packByTagPD") + combines <- mapM (externalVar dph_PArray_PRepr) + [mkFastString ("combine" ++ show i ++ "PD") + | i <- [2..mAX_DPH_COMBINE]] + + let combinePDVars = listArray (2, mAX_DPH_COMBINE) combines + - closureTyCon <- externalTyCon dph_Closure (fsLit ":->") + -- From dph-common:Data.Array.Parallel.PArray.Scalar + -- Scalar is the class of scalar values. + -- The dictionary contains functions to coerce U.Arrays of scalars + -- to and from the PData representation. + scalarClass <- externalClass dph_PArray_Scalar (fsLit "Scalar") + + + -- From dph-common:Data.Array.Parallel.Lifted.PArray + -- A PArray (Parallel Array) holds the array length and some array elements + -- represented by the PData type family. + parrayTyCon <- externalTyCon dph_PArray_Base (fsLit "PArray") + let [parrayDataCon] = tyConDataCons parrayTyCon + + -- From dph-common:Data.Array.Parallel.PArray.Types + voidTyCon <- externalTyCon dph_PArray_Types (fsLit "Void") + voidVar <- externalVar dph_PArray_Types (fsLit "void") + fromVoidVar <- externalVar dph_PArray_Types (fsLit "fromVoid") + wrapTyCon <- externalTyCon dph_PArray_Types (fsLit "Wrap") + sum_tcs <- mapM (externalTyCon dph_PArray_Types) (numbered "Sum" 2 mAX_DPH_SUM) + + -- from dph-common:Data.Array.Parallel.PArray.PDataInstances + pvoidVar <- externalVar dph_PArray_PDataInstances (fsLit "pvoid") + punitVar <- externalVar dph_PArray_PDataInstances (fsLit "punit") + + + closureTyCon <- externalTyCon dph_Closure (fsLit ":->") - -- From dph-common:Data.Array.Parallel.Lifted.Repr - voidTyCon <- externalTyCon dph_Repr (fsLit "Void") - wrapTyCon <- externalTyCon dph_Repr (fsLit "Wrap") -- From dph-common:Data.Array.Parallel.Lifted.Unboxed sel_tys <- mapM (externalType dph_Unboxed) @@ -77,8 +113,6 @@ initBuiltins pkg sel_els <- mapM mk_elements [(i,j) | i <- [2..mAX_DPH_SUM], j <- [0..i-1]] - sum_tcs <- mapM (externalTyCon dph_Repr) - (numbered "Sum" 2 mAX_DPH_SUM) let selTys = listArray (2, mAX_DPH_SUM) sel_tys selReplicates = listArray (2, mAX_DPH_SUM) sel_replicates @@ -88,26 +122,14 @@ initBuiltins pkg sumTyCons = listArray (2, mAX_DPH_SUM) sum_tcs - voidVar <- externalVar dph_Repr (fsLit "void") - pvoidVar <- externalVar dph_Repr (fsLit "pvoid") - fromVoidVar <- externalVar dph_Repr (fsLit "fromVoid") - punitVar <- externalVar dph_Repr (fsLit "punit") + closureVar <- externalVar dph_Closure (fsLit "closure") applyVar <- externalVar dph_Closure (fsLit "$:") liftedClosureVar <- externalVar dph_Closure (fsLit "liftedClosure") liftedApplyVar <- externalVar dph_Closure (fsLit "liftedApply") - replicatePDVar <- externalVar dph_PArray (fsLit "replicatePD") - emptyPDVar <- externalVar dph_PArray (fsLit "emptyPD") - packByTagPDVar <- externalVar dph_PArray (fsLit "packByTagPD") - - combines <- mapM (externalVar dph_PArray) - [mkFastString ("combine" ++ show i ++ "PD") - | i <- [2..mAX_DPH_COMBINE]] - let combinePDVars = listArray (2, mAX_DPH_COMBINE) combines - scalarClass <- externalClass dph_PArray (fsLit "Scalar") scalar_map <- externalVar dph_Scalar (fsLit "scalar_map") - scalar_zip2 <- externalVar dph_Scalar (fsLit "scalar_zipWith") + scalar_zip2 <- externalVar dph_Scalar (fsLit "scalar_zipWith") scalar_zips <- mapM (externalVar dph_Scalar) (numbered "scalar_zipWith" 3 mAX_DPH_SCALAR_ARGS) @@ -162,13 +184,20 @@ initBuiltins pkg , liftingContext = liftingContext } where - mods@(Modules { - dph_PArray = dph_PArray - , dph_Repr = dph_Repr - , dph_Closure = dph_Closure - , dph_Scalar = dph_Scalar - , dph_Unboxed = dph_Unboxed - }) + -- Extract out all the modules we'll use. + -- These are the modules from the DPH base library that contain + -- the primitive array types and functions that vectorised code uses. + mods@(Modules + { dph_PArray_Base = dph_PArray_Base + , dph_PArray_Scalar = dph_PArray_Scalar + , dph_PArray_PRepr = dph_PArray_PRepr + , dph_PArray_PData = dph_PArray_PData + , dph_PArray_PDataInstances = dph_PArray_PDataInstances + , dph_PArray_Types = dph_PArray_Types + , dph_Closure = dph_Closure + , dph_Scalar = dph_Scalar + , dph_Unboxed = dph_Unboxed + }) = dph_Modules pkg load get_mod = dsLoadModule doc mod @@ -248,13 +277,13 @@ initBuiltinDataCons _ -- | Get the names of all buildin instance functions for the PA class. initBuiltinPAs :: Builtins -> (InstEnv, InstEnv) -> DsM [(Name, Var)] initBuiltinPAs (Builtins { dphModules = mods }) insts - = liftM (initBuiltinDicts insts) (externalClass (dph_PArray mods) (fsLit "PA")) + = liftM (initBuiltinDicts insts) (externalClass (dph_PArray_PRepr mods) (fsLit "PA")) -- | Get the names of all builtin instance functions for the PR class. initBuiltinPRs :: Builtins -> (InstEnv, InstEnv) -> DsM [(Name, Var)] initBuiltinPRs (Builtins { dphModules = mods }) insts - = liftM (initBuiltinDicts insts) (externalClass (dph_PArray mods) (fsLit "PR")) + = liftM (initBuiltinDicts insts) (externalClass (dph_PArray_PData mods) (fsLit "PR")) -- | Get the names of all DPH instance functions for this class. diff --git a/compiler/vectorise/Vectorise/Builtins/Modules.hs b/compiler/vectorise/Vectorise/Builtins/Modules.hs index d5b10cb..6ea3595 100644 --- a/compiler/vectorise/Vectorise/Builtins/Modules.hs +++ b/compiler/vectorise/Vectorise/Builtins/Modules.hs @@ -10,45 +10,61 @@ import FastString -- | Ids of the modules that contain our DPH builtins. data Modules - = Modules - { dph_PArray :: Module - , dph_Repr :: Module - , dph_Closure :: Module - , dph_Unboxed :: Module - , dph_Instances :: Module - , dph_Combinators :: Module - , dph_Scalar :: Module - , dph_Prelude_PArr :: Module - , dph_Prelude_Int :: Module - , dph_Prelude_Word8 :: Module - , dph_Prelude_Double :: Module - , dph_Prelude_Bool :: Module - , dph_Prelude_Tuple :: Module - } + = Modules + { dph_PArray_Base :: Module + , dph_PArray_Scalar :: Module + , dph_PArray_ScalarInstances :: Module + , dph_PArray_PRepr :: Module + , dph_PArray_PReprInstances :: Module + , dph_PArray_PData :: Module + , dph_PArray_PDataInstances :: Module + , dph_PArray_Types :: Module + + , dph_Closure :: Module + , dph_Unboxed :: Module + , dph_Combinators :: Module + , dph_Scalar :: Module + + , dph_Prelude_Int :: Module + , dph_Prelude_Word8 :: Module + , dph_Prelude_Double :: Module + , dph_Prelude_Bool :: Module + , dph_Prelude_Tuple :: Module + } -- | The locations of builtins in the current DPH library. dph_Modules :: PackageId -> Modules dph_Modules pkg - = Modules - { dph_PArray = mk (fsLit "Data.Array.Parallel.Lifted.PArray") - , dph_Repr = mk (fsLit "Data.Array.Parallel.Lifted.Repr") - , dph_Closure = mk (fsLit "Data.Array.Parallel.Lifted.Closure") - , dph_Unboxed = mk (fsLit "Data.Array.Parallel.Lifted.Unboxed") - , dph_Instances = mk (fsLit "Data.Array.Parallel.Lifted.Instances") - , dph_Combinators = mk (fsLit "Data.Array.Parallel.Lifted.Combinators") - , dph_Scalar = mk (fsLit "Data.Array.Parallel.Lifted.Scalar") + = Modules + { dph_PArray_Base = mk (fsLit "Data.Array.Parallel.PArray.Base") + , dph_PArray_Scalar = mk (fsLit "Data.Array.Parallel.PArray.Scalar") + , dph_PArray_ScalarInstances = mk (fsLit "Data.Array.Parallel.PArray.ScalarInstances") + , dph_PArray_PRepr = mk (fsLit "Data.Array.Parallel.PArray.PRepr") + , dph_PArray_PReprInstances = mk (fsLit "Data.Array.Parallel.PArray.PReprInstances") + , dph_PArray_PData = mk (fsLit "Data.Array.Parallel.PArray.PData") + , dph_PArray_PDataInstances = mk (fsLit "Data.Array.Parallel.PArray.PDataInstances") + , dph_PArray_Types = mk (fsLit "Data.Array.Parallel.PArray.Types") + + , dph_Closure = mk (fsLit "Data.Array.Parallel.Lifted.Closure") + , dph_Unboxed = mk (fsLit "Data.Array.Parallel.Lifted.Unboxed") + , dph_Combinators = mk (fsLit "Data.Array.Parallel.Lifted.Combinators") + , dph_Scalar = mk (fsLit "Data.Array.Parallel.Lifted.Scalar") - , dph_Prelude_PArr = mk (fsLit "Data.Array.Parallel.Prelude.Base.PArr") - , dph_Prelude_Int = mk (fsLit "Data.Array.Parallel.Prelude.Base.Int") - , dph_Prelude_Word8 = mk (fsLit "Data.Array.Parallel.Prelude.Base.Word8") - , dph_Prelude_Double = mk (fsLit "Data.Array.Parallel.Prelude.Base.Double") - , dph_Prelude_Bool = mk (fsLit "Data.Array.Parallel.Prelude.Base.Bool") - , dph_Prelude_Tuple = mk (fsLit "Data.Array.Parallel.Prelude.Base.Tuple") - } - where mk = mkModule pkg . mkModuleNameFS + , dph_Prelude_Int = mk (fsLit "Data.Array.Parallel.Prelude.Base.Int") + , dph_Prelude_Word8 = mk (fsLit "Data.Array.Parallel.Prelude.Base.Word8") + , dph_Prelude_Double = mk (fsLit "Data.Array.Parallel.Prelude.Base.Double") + , dph_Prelude_Bool = mk (fsLit "Data.Array.Parallel.Prelude.Base.Bool") + , dph_Prelude_Tuple = mk (fsLit "Data.Array.Parallel.Prelude.Base.Tuple") + } + where mk = mkModule pkg . mkModuleNameFS --- | Project out ids of modules that contain orphan instances that we need to load. dph_Orphans :: [Modules -> Module] -dph_Orphans = [dph_Repr, dph_Instances] +dph_Orphans + = [ dph_PArray_Scalar + , dph_PArray_ScalarInstances + , dph_PArray_PReprInstances + , dph_PArray_PDataInstances + , dph_Scalar + ] diff --git a/compiler/vectorise/Vectorise/Builtins/Prelude.hs b/compiler/vectorise/Vectorise/Builtins/Prelude.hs index b0f305d..51b3d14 100644 --- a/compiler/vectorise/Vectorise/Builtins/Prelude.hs +++ b/compiler/vectorise/Vectorise/Builtins/Prelude.hs @@ -25,36 +25,18 @@ preludeVars :: Modules -> [( Module, FastString -- Maps the original variable to the one in the DPH , Module, FastString)] -- packages that it should be rewritten to. preludeVars (Modules { dph_Combinators = _dph_Combinators - , dph_PArray = _dph_PArray , dph_Prelude_Int = dph_Prelude_Int , dph_Prelude_Word8 = dph_Prelude_Word8 , dph_Prelude_Double = dph_Prelude_Double , dph_Prelude_Bool = dph_Prelude_Bool - , dph_Prelude_PArr = _dph_Prelude_PArr }) - -- Functions that work on whole PArrays, defined in GHC.PArr - = [ {- mk gHC_PARR' (fsLit "mapP") dph_Combinators (fsLit "mapPA") - , mk gHC_PARR' (fsLit "zipWithP") dph_Combinators (fsLit "zipWithPA") - , mk gHC_PARR' (fsLit "zipP") dph_Combinators (fsLit "zipPA") - , mk gHC_PARR' (fsLit "unzipP") dph_Combinators (fsLit "unzipPA") - , mk gHC_PARR' (fsLit "filterP") dph_Combinators (fsLit "filterPA") - , mk gHC_PARR' (fsLit "lengthP") dph_Combinators (fsLit "lengthPA") - , mk gHC_PARR' (fsLit "replicateP") dph_Combinators (fsLit "replicatePA") - , mk gHC_PARR' (fsLit "!:") dph_Combinators (fsLit "indexPA") - , mk gHC_PARR' (fsLit "sliceP") dph_Combinators (fsLit "slicePA") - , mk gHC_PARR' (fsLit "crossMapP") dph_Combinators (fsLit "crossMapPA") - , mk gHC_PARR' (fsLit "singletonP") dph_Combinators (fsLit "singletonPA") - , mk gHC_PARR' (fsLit "concatP") dph_Combinators (fsLit "concatPA") - , mk gHC_PARR' (fsLit "+:+") dph_Combinators (fsLit "appPA") - , mk gHC_PARR' (fsLit "emptyP") dph_PArray (fsLit "emptyPA") - + = [ -- Map scalar functions to versions using closures. - , -} mk' dph_Prelude_Int "div" "divV" + mk' dph_Prelude_Int "div" "divV" , mk' dph_Prelude_Int "mod" "modV" , mk' dph_Prelude_Int "sqrt" "sqrtV" , mk' dph_Prelude_Int "enumFromToP" "enumFromToPA" - -- , mk' dph_Prelude_Int "upToP" "upToPA" ] ++ vars_Ord dph_Prelude_Int ++ vars_Num dph_Prelude_Int @@ -80,17 +62,7 @@ preludeVars (Modules { dph_Combinators = _dph_Combinators , mk gHC_CLASSES (fsLit "not") dph_Prelude_Bool (fsLit "notV") , mk gHC_CLASSES (fsLit "&&") dph_Prelude_Bool (fsLit "andV") , mk gHC_CLASSES (fsLit "||") dph_Prelude_Bool (fsLit "orV") - -{- - -- FIXME: temporary - , mk dph_Prelude_PArr (fsLit "fromPArrayP") dph_Prelude_PArr (fsLit "fromPArrayPA") - , mk dph_Prelude_PArr (fsLit "toPArrayP") dph_Prelude_PArr (fsLit "toPArrayPA") - , mk dph_Prelude_PArr (fsLit "fromNestedPArrayP") dph_Prelude_PArr (fsLit "fromNestedPArrayPA") - , mk dph_Prelude_PArr (fsLit "combineP") dph_Combinators (fsLit "combine2PA") - , mk dph_Prelude_PArr (fsLit "updateP") dph_Combinators (fsLit "updatePA") - , mk dph_Prelude_PArr (fsLit "bpermuteP") dph_Combinators (fsLit "bpermutePA") - , mk dph_Prelude_PArr (fsLit "indexedP") dph_Combinators (fsLit "indexedPA") --} ] + ] where mk = (,,,) mk' mod v v' = mk mod (fsLit v) mod (fsLit v') diff --git a/includes/Rts.h b/includes/Rts.h index 51351fa..3a6c6f2 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -219,6 +219,12 @@ DLL_IMPORT_RTS extern char **prog_argv; /* so we can get at these from Haskell * DLL_IMPORT_RTS extern int prog_argc; DLL_IMPORT_RTS extern char *prog_name; +#ifdef mingw32_HOST_OS +// We need these two from Haskell too +void getWin32ProgArgv(int *argc, wchar_t **argv[]); +void setWin32ProgArgv(int argc, wchar_t *argv[]); +#endif + void stackOverflow(void); void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__); diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index cd98666..6b1d319 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -67,6 +67,11 @@ Caller-saves regs have to be saved around C-calls made from STG land, so this file defines CALLER_SAVES_ for each that is designated caller-saves in that machine's C calling convention. + + As it stands, the only registers that are ever marked caller saves + are the RX, FX, DX and USER registers; as a result, if you + decide to caller save a system register (e.g. SP, HP, etc), note that + this code path is completely untested! -- EZY -------------------------------------------------------------------------- */ /* ----------------------------------------------------------------------------- diff --git a/rts/Linker.c b/rts/Linker.c index c1310b0..28ba9a0 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -387,6 +387,8 @@ typedef struct _RtsSymbolVal { SymI_HasProto(stg_asyncReadzh) \ SymI_HasProto(stg_asyncWritezh) \ SymI_HasProto(stg_asyncDoProczh) \ + SymI_HasProto(getWin32ProgArgv) \ + SymI_HasProto(setWin32ProgArgv) \ SymI_HasProto(memset) \ SymI_HasProto(inet_ntoa) \ SymI_HasProto(inet_addr) \ @@ -2335,6 +2337,7 @@ unloadObj( char *path ) // stgFree(oc->image); // #endif stgFree(oc->fileName); + stgFree(oc->archiveMemberName); stgFree(oc->symbols); stgFree(oc->sections); stgFree(oc); @@ -3680,31 +3683,6 @@ PLTSize(void) * Generic ELF functions */ -static char * -findElfSection ( void* objImage, Elf_Word sh_type ) -{ - char* ehdrC = (char*)objImage; - Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC; - Elf_Shdr* shdr = (Elf_Shdr*)(ehdrC + ehdr->e_shoff); - char* sh_strtab = ehdrC + shdr[ehdr->e_shstrndx].sh_offset; - char* ptr = NULL; - int i; - - for (i = 0; i < ehdr->e_shnum; i++) { - if (shdr[i].sh_type == sh_type - /* Ignore the section header's string table. */ - && i != ehdr->e_shstrndx - /* Ignore string tables named .stabstr, as they contain - debugging info. */ - && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8) - ) { - ptr = ehdrC + shdr[i].sh_offset; - break; - } - } - return ptr; -} - static int ocVerifyImage_ELF ( ObjectCode* oc ) { @@ -3712,7 +3690,6 @@ ocVerifyImage_ELF ( ObjectCode* oc ) Elf_Sym* stab; int i, j, nent, nstrtab, nsymtabs; char* sh_strtab; - char* strtab; char* ehdrC = (char*)(oc->image); Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC; @@ -3794,20 +3771,64 @@ ocVerifyImage_ELF ( ObjectCode* oc ) ehdrC + shdr[i].sh_offset, ehdrC + shdr[i].sh_offset + shdr[i].sh_size - 1)); - if (shdr[i].sh_type == SHT_REL) { - IF_DEBUG(linker,debugBelch("Rel " )); - } else if (shdr[i].sh_type == SHT_RELA) { - IF_DEBUG(linker,debugBelch("RelA " )); - } else { - IF_DEBUG(linker,debugBelch(" ")); +#define SECTION_INDEX_VALID(ndx) (ndx > SHN_UNDEF && ndx < ehdr->e_shnum) + + switch (shdr[i].sh_type) { + + case SHT_REL: + case SHT_RELA: + IF_DEBUG(linker,debugBelch( shdr[i].sh_type == SHT_REL ? "Rel " : "RelA ")); + + if (!SECTION_INDEX_VALID(shdr[i].sh_link)) { + if (shdr[i].sh_link == SHN_UNDEF) + errorBelch("\n%s: relocation section #%d has no symbol table\n" + "This object file has probably been fully striped. " + "Such files cannot be linked.\n", + oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i); + else + errorBelch("\n%s: relocation section #%d has an invalid link field (%d)\n", + oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, + i, shdr[i].sh_link); + return 0; + } + if (shdr[shdr[i].sh_link].sh_type != SHT_SYMTAB) { + errorBelch("\n%s: relocation section #%d does not link to a symbol table\n", + oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i); + return 0; + } + if (!SECTION_INDEX_VALID(shdr[i].sh_info)) { + errorBelch("\n%s: relocation section #%d has an invalid info field (%d)\n", + oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, + i, shdr[i].sh_info); + return 0; + } + + break; + case SHT_SYMTAB: + IF_DEBUG(linker,debugBelch("Sym ")); + + if (!SECTION_INDEX_VALID(shdr[i].sh_link)) { + errorBelch("\n%s: symbol table section #%d has an invalid link field (%d)\n", + oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, + i, shdr[i].sh_link); + return 0; + } + if (shdr[shdr[i].sh_link].sh_type != SHT_STRTAB) { + errorBelch("\n%s: symbol table section #%d does not link to a string table\n", + oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i); + + return 0; + } + break; + case SHT_STRTAB: IF_DEBUG(linker,debugBelch("Str ")); break; + default: IF_DEBUG(linker,debugBelch(" ")); break; } if (sh_strtab) { IF_DEBUG(linker,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name )); } } - IF_DEBUG(linker,debugBelch( "\nString tables" )); - strtab = NULL; + IF_DEBUG(linker,debugBelch( "\nString tables\n" )); nstrtab = 0; for (i = 0; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type == SHT_STRTAB @@ -3817,18 +3838,16 @@ ocVerifyImage_ELF ( ObjectCode* oc ) debugging info. */ && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8) ) { - IF_DEBUG(linker,debugBelch(" section %d is a normal string table", i )); - strtab = ehdrC + shdr[i].sh_offset; + IF_DEBUG(linker,debugBelch(" section %d is a normal string table\n", i )); nstrtab++; } } - if (nstrtab != 1) { - errorBelch("%s: no string tables, or too many", oc->fileName); - return 0; + if (nstrtab == 0) { + IF_DEBUG(linker,debugBelch(" no normal string tables (potentially, but not necessarily a problem)\n")); } nsymtabs = 0; - IF_DEBUG(linker,debugBelch( "\nSymbol tables" )); + IF_DEBUG(linker,debugBelch( "Symbol tables\n" )); for (i = 0; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type != SHT_SYMTAB) continue; IF_DEBUG(linker,debugBelch( "section %d is a symbol table\n", i )); @@ -3870,13 +3889,17 @@ ocVerifyImage_ELF ( ObjectCode* oc ) } IF_DEBUG(linker,debugBelch(" " )); - IF_DEBUG(linker,debugBelch("name=%s\n", strtab + stab[j].st_name )); + IF_DEBUG(linker,debugBelch("name=%s\n", + ehdrC + shdr[shdr[i].sh_link].sh_offset + + stab[j].st_name )); } } if (nsymtabs == 0) { - errorBelch("%s: didn't find any symbol tables", oc->fileName); - return 0; + // Not having a symbol table is not in principle a problem. + // When an object file has no symbols then the 'strip' program + // typically will remove the symbol table entirely. + IF_DEBUG(linker,debugBelch(" no symbol tables (potentially, but not necessarily a problem)\n")); } return 1; @@ -3923,16 +3946,11 @@ ocGetNames_ELF ( ObjectCode* oc ) char* ehdrC = (char*)(oc->image); Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC; - char* strtab = findElfSection ( ehdrC, SHT_STRTAB ); + char* strtab; Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); ASSERT(symhash != NULL); - if (!strtab) { - errorBelch("%s: no strtab", oc->fileName); - return 0; - } - k = 0; for (i = 0; i < ehdr->e_shnum; i++) { /* Figure out what kind of section it is. Logic derived from @@ -3965,12 +3983,16 @@ ocGetNames_ELF ( ObjectCode* oc ) /* copy stuff into this module's object symbol table */ stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset); + strtab = ehdrC + shdr[shdr[i].sh_link].sh_offset; nent = shdr[i].sh_size / sizeof(Elf_Sym); oc->n_symbols = nent; oc->symbols = stgMallocBytes(oc->n_symbols * sizeof(char*), "ocGetNames_ELF(oc->symbols)"); + //TODO: we ignore local symbols anyway right? So we can use the + // shdr[i].sh_info to get the index of the first non-local symbol + // ie we should use j = shdr[i].sh_info for (j = 0; j < nent; j++) { char isLocal = FALSE; /* avoids uninit-var warning */ @@ -4068,21 +4090,24 @@ ocGetNames_ELF ( ObjectCode* oc ) relocations appear to be of this form. */ static int do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, - Elf_Shdr* shdr, int shnum, - Elf_Sym* stab, char* strtab ) + Elf_Shdr* shdr, int shnum ) { int j; char *symbol; Elf_Word* targ; Elf_Rel* rtab = (Elf_Rel*) (ehdrC + shdr[shnum].sh_offset); + Elf_Sym* stab; + char* strtab; int nent = shdr[shnum].sh_size / sizeof(Elf_Rel); int target_shndx = shdr[shnum].sh_info; int symtab_shndx = shdr[shnum].sh_link; + int strtab_shndx = shdr[symtab_shndx].sh_link; stab = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset); + strtab= (char*) (ehdrC + shdr[ strtab_shndx ].sh_offset); targ = (Elf_Word*)(ehdrC + shdr[ target_shndx ].sh_offset); - IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d\n", - target_shndx, symtab_shndx )); + IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d and strtab %d\n", + target_shndx, symtab_shndx, strtab_shndx )); /* Skip sections that we're not interested in. */ { @@ -4168,18 +4193,21 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, sparc-solaris relocations appear to be of this form. */ static int do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, - Elf_Shdr* shdr, int shnum, - Elf_Sym* stab, char* strtab ) + Elf_Shdr* shdr, int shnum ) { int j; char *symbol = NULL; Elf_Addr targ; Elf_Rela* rtab = (Elf_Rela*) (ehdrC + shdr[shnum].sh_offset); + Elf_Sym* stab; + char* strtab; int nent = shdr[shnum].sh_size / sizeof(Elf_Rela); int target_shndx = shdr[shnum].sh_info; int symtab_shndx = shdr[shnum].sh_link; + int strtab_shndx = shdr[symtab_shndx].sh_link; stab = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset); + strtab= (char*) (ehdrC + shdr[ strtab_shndx ].sh_offset); targ = (Elf_Addr) (ehdrC + shdr[ target_shndx ].sh_offset); IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d\n", target_shndx, symtab_shndx )); @@ -4448,35 +4476,20 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, static int ocResolve_ELF ( ObjectCode* oc ) { - char *strtab; int shnum, ok; - Elf_Sym* stab = NULL; char* ehdrC = (char*)(oc->image); Elf_Ehdr* ehdr = (Elf_Ehdr*) ehdrC; Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff); - /* first find "the" symbol table */ - stab = (Elf_Sym*) findElfSection ( ehdrC, SHT_SYMTAB ); - - /* also go find the string table */ - strtab = findElfSection ( ehdrC, SHT_STRTAB ); - - if (stab == NULL || strtab == NULL) { - errorBelch("%s: can't find string or symbol table", oc->fileName); - return 0; - } - /* Process the relocation sections. */ for (shnum = 0; shnum < ehdr->e_shnum; shnum++) { if (shdr[shnum].sh_type == SHT_REL) { - ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr, - shnum, stab, strtab ); + ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr, shnum ); if (!ok) return ok; } else if (shdr[shnum].sh_type == SHT_RELA) { - ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr, - shnum, stab, strtab ); + ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr, shnum ); if (!ok) return ok; } } @@ -4509,8 +4522,12 @@ static int ocAllocateSymbolExtras_ELF( ObjectCode *oc ) if( i == ehdr->e_shnum ) { - errorBelch( "This ELF file contains no symtab" ); - return 0; + // Not having a symbol table is not in principle a problem. + // When an object file has no symbols then the 'strip' program + // typically will remove the symbol table entirely. + IF_DEBUG(linker, debugBelch( "The ELF file %s contains no symtab\n", + oc->archiveMemberName ? oc->archiveMemberName : oc->fileName )); + return 1; } if( shdr[i].sh_entsize != sizeof( Elf_Sym ) ) diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 408e1c7..1408070 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -34,6 +34,14 @@ char **full_prog_argv = NULL; char *prog_name = NULL; /* 'basename' of prog_argv[0] */ int rts_argc = 0; /* ditto */ char *rts_argv[MAX_RTS_ARGS]; +#if defined(mingw32_HOST_OS) +// On Windows, we want to use GetCommandLineW rather than argc/argv, +// but we need to mutate the command line arguments for withProgName and +// friends. The System.Environment module achieves that using this bit of +// shared state: +int win32_prog_argc = 0; +wchar_t **win32_prog_argv = NULL; +#endif /* * constants, used later @@ -1536,3 +1544,53 @@ freeFullProgArgv (void) full_prog_argc = 0; full_prog_argv = NULL; } + +#if defined(mingw32_HOST_OS) +void freeWin32ProgArgv (void); + +void +freeWin32ProgArgv (void) +{ + int i; + + if (win32_prog_argv != NULL) { + for (i = 0; i < win32_prog_argc; i++) { + stgFree(win32_prog_argv[i]); + } + stgFree(win32_prog_argv); + } + + win32_prog_argc = 0; + win32_prog_argv = NULL; +} + +void +getWin32ProgArgv(int *argc, wchar_t **argv[]) +{ + *argc = win32_prog_argc; + *argv = win32_prog_argv; +} + +void +setWin32ProgArgv(int argc, wchar_t *argv[]) +{ + int i; + + freeWin32ProgArgv(); + + win32_prog_argc = argc; + if (argv == NULL) { + win32_prog_argv = NULL; + return; + } + + win32_prog_argv = stgCallocBytes(argc + 1, sizeof (wchar_t *), + "setWin32ProgArgv 1"); + for (i = 0; i < argc; i++) { + win32_prog_argv[i] = stgMallocBytes((wcslen(argv[i]) + 1) * sizeof(wchar_t), + "setWin32ProgArgv 2"); + wcscpy(win32_prog_argv[i], argv[i]); + } + win32_prog_argv[argc] = NULL; +} +#endif diff --git a/utils/ghc-pkg/Main.hs b/utils/ghc-pkg/Main.hs index 1cec56a..74f761b 100644 --- a/utils/ghc-pkg/Main.hs +++ b/utils/ghc-pkg/Main.hs @@ -1487,16 +1487,17 @@ getExecDir cmd = removeCmdSuffix = return . Just . initN (length cmd) . unDosifyPath getExecPath :: IO (Maybe String) -getExecPath = - allocaArray len $ \buf -> do - ret <- getModuleFileName nullPtr buf len - if ret == 0 then return Nothing - else liftM Just $ peekCString buf - where len = 2048 -- Plenty, PATH_MAX is 512 under Win32. - -foreign import stdcall unsafe "GetModuleFileNameA" - getModuleFileName :: Ptr () -> CString -> Int -> IO Int32 - +getExecPath = try_size 2048 -- plenty, PATH_MAX is 512 under Win32. + where + try_size size = allocaArray (fromIntegral size) $ \buf -> do + ret <- c_GetModuleFileName nullPtr buf size + case ret of + 0 -> return Nothing + _ | ret < size -> fmap Just $ peekCWString buf + | otherwise -> try_size (size * 2) + +foreign import stdcall unsafe "windows.h GetModuleFileNameW" + c_GetModuleFileName :: Ptr () -> CWString -> Word32 -> IO Word32 #else getLibDir :: IO (Maybe String) getLibDir = return Nothing diff --git a/utils/runghc/runghc.hs b/utils/runghc/runghc.hs index ab49513..4424c96 100644 --- a/utils/runghc/runghc.hs +++ b/utils/runghc/runghc.hs @@ -149,15 +149,17 @@ dieProg msg = do getExecPath :: IO (Maybe String) #if defined(mingw32_HOST_OS) -getExecPath = - allocaArray len $ \buf -> do - ret <- getModuleFileName nullPtr buf len - if ret == 0 then return Nothing - else liftM Just $ peekCString buf - where len = 2048 -- Plenty, PATH_MAX is 512 under Win32. - -foreign import stdcall unsafe "GetModuleFileNameA" - getModuleFileName :: Ptr () -> CString -> Int -> IO Int32 +getExecPath = try_size 2048 -- plenty, PATH_MAX is 512 under Win32. + where + try_size size = allocaArray (fromIntegral size) $ \buf -> do + ret <- c_GetModuleFileName nullPtr buf size + case ret of + 0 -> return Nothing + _ | ret < size -> fmap Just $ peekCWString buf + | otherwise -> try_size (size * 2) + +foreign import stdcall unsafe "windows.h GetModuleFileNameW" + c_GetModuleFileName :: Ptr () -> CWString -> Word32 -> IO Word32 #else getExecPath = return Nothing #endif