From 78c209010058cd7669781de92068b64dd32caaea Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 12 Sep 2008 02:15:35 +0000 Subject: [PATCH] ext-core library: Add code for merging multiple Core modules into a single module I added a new module, Merge, to the ext-core library that combines a list of ext-core modules into a new, uniquely renamed module. See comments in Merge.hs for more details. --- utils/ext-core/Language/Core/CoreUtils.hs | 84 +++++++++++++++++ utils/ext-core/Language/Core/Merge.hs | 147 +++++++++++++++++++++++++++++ utils/ext-core/Language/Core/Utils.hs | 76 +++++++++++++++ utils/ext-core/extcore.cabal | 6 +- 4 files changed, 310 insertions(+), 3 deletions(-) create mode 100644 utils/ext-core/Language/Core/CoreUtils.hs create mode 100644 utils/ext-core/Language/Core/Merge.hs create mode 100644 utils/ext-core/Language/Core/Utils.hs diff --git a/utils/ext-core/Language/Core/CoreUtils.hs b/utils/ext-core/Language/Core/CoreUtils.hs new file mode 100644 index 0000000..afe4039 --- /dev/null +++ b/utils/ext-core/Language/Core/CoreUtils.hs @@ -0,0 +1,84 @@ +module Language.Core.CoreUtils where + +import Language.Core.Core +import Language.Core.Utils + +import Data.Generics +import Data.List + +splitDataConApp_maybe :: Exp -> Maybe (Qual Dcon, [Ty], [Exp]) +splitDataConApp_maybe (Dcon d) = Just (d, [], []) +splitDataConApp_maybe (Appt rator t) = + case splitDataConApp_maybe rator of + Just (r, ts, rs) -> Just (r, ts ++ [t], rs) + Nothing -> Nothing +splitDataConApp_maybe (App rator rand) = + case splitDataConApp_maybe rator of + Just (r, ts, rs) -> Just (r, ts, rs++[rand]) + Nothing -> Nothing +splitDataConApp_maybe _ = Nothing + +splitApp :: Exp -> (Exp, [Exp]) +splitApp (Appt rator _) = splitApp rator +splitApp (App rator rand) = + case splitApp rator of + (r, rs) -> (r, rs++[rand]) +splitApp e = (e, []) + +splitAppIgnoreCasts :: Exp -> (Exp, [Exp]) +splitAppIgnoreCasts (Appt rator _) = splitApp rator +splitAppIgnoreCasts (App (Cast rator _) rand) = splitApp (App rator rand) +splitAppIgnoreCasts (App rator rand) = + case splitApp rator of + (r, rs) -> (r, rs++[rand]) +splitAppIgnoreCasts e = (e, []) + +splitFunTy_maybe :: Ty -> Maybe ([Ty], Ty) +splitFunTy_maybe (Tforall _ t) = splitFunTy_maybe t +splitFunTy_maybe t = + case splitFunTy2_maybe t of + Just (rator, rand) -> case splitFunTy_maybe rand of + Just (r,s) -> Just (rator:r, s) + Nothing -> Just ([rator], rand) + Nothing -> Nothing + +splitFunTy2_maybe :: Ty -> Maybe (Ty,Ty) +splitFunTy2_maybe (Tapp (Tapp (Tcon c) t) u) | c == tcArrow = Just (t, u) +splitFunTy2_maybe _ = Nothing + +vdefNamesQ :: [Vdef] -> [Qual Var] +vdefNamesQ = map (\ (Vdef (v,_,_)) -> v) + +vdefNames :: [Vdef] -> [Var] +vdefNames = snd . unzip . vdefNamesQ + +vdefTys :: [Vdef] -> [Ty] +vdefTys = map (\ (Vdef (_,t,_)) -> t) + +vdefgNames :: Vdefg -> [Var] +vdefgNames (Rec vds) = map (\ (Vdef ((_,v),_,_)) -> v) vds +vdefgNames (Nonrec (Vdef ((_,v),_,_))) = [v] +vdefgTys :: Vdefg -> [Ty] +vdefgTys (Rec vds) = map (\ (Vdef (_,t,_)) -> t) vds +vdefgTys (Nonrec (Vdef (_,t,_))) = [t] + +vbNames :: [Vbind] -> [Var] +vbNames = fst . unzip + +-- assumes v is not bound in e +substIn :: Data a => Var -> Var -> a -> a +substIn v newV = everywhereExcept (mkT frob) + where frob (Var (Nothing,v1)) | v == v1 = Var (Nothing,newV) + frob e = e + +substVars :: Data a => [Var] -> [Var] -> a -> a +substVars oldVars newVars e = foldl' (\ e1 (old,new) -> substIn old new e1) + e (zip oldVars newVars) + + +tdefNames :: [Tdef] -> [Qual Var] +tdefNames = concatMap doOne + where doOne (Data qtc _ cds) = qtc:(concatMap doCdef cds) + doOne (Newtype qtc qtc1 _ _) = [qtc, qtc1] + doCdef (Constr qdc _ _) = [qdc] + diff --git a/utils/ext-core/Language/Core/Merge.hs b/utils/ext-core/Language/Core/Merge.hs new file mode 100644 index 0000000..b5ffd05 --- /dev/null +++ b/utils/ext-core/Language/Core/Merge.hs @@ -0,0 +1,147 @@ +{- + This module combines multiple External Core modules into + a single module, including both datatype and value definitions. +-} +module Language.Core.Merge(merge) where + +import Language.Core.Core +import Language.Core.CoreUtils +import Language.Core.Utils + +import Data.Char +import Data.Generics +import Data.List + +{- + merge turns a group of (possibly mutually recursive) modules + into a single module, which should be called main:Main. + + This doesn't handle dependency-finding; you have to hand it all + the modules that your main module depends on (transitively). + Language.Core.Dependencies does automatic dependency-finding, + but that code is a bit moldy. + + merge takes an extra argument that is a variable substitution. + This is because you may want to treat some defined names specially + rather than dumping their definitions into the Main module. For + example, if my back-end tool defines a new primop that has + the type IO (), it's easiest for me if I can consider IO and () as + primitive type constructors, though they are not. Thus, I pass in + a substitution that says to replace GHC.IOBase.IO with GHC.Prim.IO, + and GHC.Base.() with GHC.Prim.(). Of course, I am responsible for + providing a type environment defining those names if I want to be + able to type the resulting program. + + You can pass in the empty list if you don't understand what the + purpose of the substitution is. +-} + +merge :: [(Qual Var, Qual Var)] -> [Module] -> Module +merge subst ms = + zapNames subst topNames (Module mainMname newTdefs [Rec topBinds]) + where -- note: dead code elimination will later remove any names + -- that were in the domain of the substitution + newTdefs = finishTdefs deadIds $ concat allTdefs + (allTdefs, allVdefgs) = unzip $ map (\ (Module _ tds vdefgs) + -> (tds, vdefgs)) ms + (deadIds,_) = unzip subst + topNames = uniqueNamesIn topBinds (concat allTdefs) + topBinds = finishVdefs deadIds $ flattenBinds (concat allVdefgs) + +{- + This function finds all of the names in the given group of vdefs and + tdefs that are only defined by one module. This is because if function + quux is only defined in module foo:Bar.Blat, we want to call it + main:Main.quux in the final module, and not main:Main.foo_Bar_Blat_quux, + for file size and readability's sake. + + Possible improvements: + * take into account that tcons/dcons are separate namespaces + * restructure the whole thing to shorten names *after* dead code elim. + (Both of those would allow for more names to be shortened, but aren't + strictly necessary.) +-} +uniqueNamesIn :: [Vdef] -> [Tdef] -> [Qual Var] +uniqueNamesIn topBinds allTdefs = res + where allNames = vdefNamesQ topBinds ++ tdefNames allTdefs + dups = dupsUnqual allNames + res = allNames \\ dups + +-- This takes each top-level name of the form Foo.Bar.blah and +-- renames it to FoozuBarzublah (note we *don't* make it exported! +-- This is so we know which names were in the original program and +-- which were dumped in from other modules, and thus can eliminate +-- dead code.) +zapNames :: Data a => [(Qual Var, Qual Var)] -> [Qual Var] -> a -> a +zapNames subst qvs = everywhereBut (mkQ False (\ (_::String) -> True)) + (mkT (fixupName subst qvs)) + +-- also need version for type and data constructors +-- don't forget to *not* zap if something has the primitive module name +-- We hope and pray there are no top-level unqualified names that are used in +-- more than one module. (Can we assume this?) (I think so, b/c -fext-core +-- attaches uniques to things. But could still perhaps go wrong if we fed +-- in .hcr files that were generated in diff. compilation sessions...) +-- (This wouldn't be too hard to fix, but should state the assumption, +-- and how to remove it.) + +fixupName :: [(Qual Var, Qual Var)] -> [Qual Var] -> Qual Var -> Qual Var +-- For a variable in the domain of the substitution, just +-- apply the substitution. +fixupName subst _ oldVar | Just newVar <- lookup oldVar subst = newVar +-- We don't alter unqualified names, since we just need to make sure +-- everything can go in the Main module. +fixupName _ _ vr@(Nothing,_) = vr +-- Nor do we alter anything defined in the Main module +-- or in the primitive or Bool modules +-- (because we basically treat the Bool type as primitive.) +fixupName _ _ vr@(Just mn, _) | mn == mainMname || mn == wrapperMainMname || + mn == primMname || mn == boolMname = vr +-- For a variable that is defined by only one module in scope, we +-- give it a name that is just its unqualified name, without the original +-- module and package names. +fixupName _ uniqueNames (_, v) | okay = + (mkMname v, v) + where okay = any (\ (_,v1) -> v == v1) uniqueNames +-- This is the case for a name that is defined in more than one +-- module. In this case, we have to give it a unique name to disambiguate +-- it from other definitions of the same name. We combine the package and +-- module name to give a unique prefix. +fixupName _ _ (Just (M (P pname, hierNames, leafName)), varName) = + (mkMname varName, -- see comment for zapNames + (if isUpperStr varName then capitalize else id) $ + intercalate "zu" (pname:(hierNames ++ [leafName, varName]))) + where capitalize (ch:rest) = (toUpper ch):rest + capitalize "" = "" + +mkMname :: Var -> Mname +-- icky hack :-( +-- necessary b/c tycons and datacons have to be qualified, +-- but we want to write fixupName as a generic transformation on vars. +mkMname v = if isUpperStr v then Just mainMname else Nothing + +isUpperStr :: String -> Bool +isUpperStr (c:_) = isUpper c +isUpperStr [] = False + +dupsUnqual :: [Qual Var] -> [Qual Var] +dupsUnqual = dupsBy (\ (_,v1) (_,v2) -> v1 == v2) + +-- We remove any declarations for tcons/dcons that are in +-- the domain of the substitution. Why? Because we assume that +-- the substitution maps anything in its domain onto something +-- with a different module name from the main one. If you want +-- to substitute Main-module-defined things for Main-module-defined +-- things, you can do that before merging modules. +finishTdefs :: [Qual Var] -> [Tdef] -> [Tdef] +finishTdefs namesToDrop = filter isOkay + where isOkay (Newtype qtc qtc1 _ _) = + qtc `notElem` namesToDrop + && qtc1 `notElem` namesToDrop + isOkay (Data qtc _ cdefs) = + qtc `notElem` namesToDrop + && cdefsOkay cdefs + cdefsOkay = all cdefOkay + cdefOkay (Constr qdc _ _) = qdc `notElem` namesToDrop +finishVdefs :: [Qual Var] -> [Vdef] -> [Vdef] +finishVdefs namesToDrop = filter (\ (Vdef (qv,_,_)) -> qv `notElem` namesToDrop) diff --git a/utils/ext-core/Language/Core/Utils.hs b/utils/ext-core/Language/Core/Utils.hs new file mode 100644 index 0000000..3ffabf2 --- /dev/null +++ b/utils/ext-core/Language/Core/Utils.hs @@ -0,0 +1,76 @@ +module Language.Core.Utils + (everywhereExcept, everywhereExceptM, noNames, notNull, + expectJust, fixedPointBy, applyPasses, varsIn, dupsBy, + everywhere'Except, everywhere'But, wordsBy) where + +import Data.Generics +import Data.List +import Data.Maybe +import qualified Data.Set as S + +everywhereExcept :: Data a => GenericT -> a -> a +everywhereExcept = everywhereBut (mkQ False (\ (_::String) -> True)) + +everywhere'Except :: Data a => GenericT -> a -> a +everywhere'Except = everywhere'But (mkQ False (\ (_::String) -> True)) + +everywhereExceptM :: (Data a, Monad m) => GenericM m -> a -> m a +everywhereExceptM = everywhereButM (mkQ False (\ (_::String) -> True)) + + +noNames :: Data a => r -> (r -> r -> r) -> GenericQ r -> a -> r +noNames e c = everythingBut e c (mkQ False (\ (_::String) -> True)) + +everythingBut :: r -> (r -> r -> r) -> GenericQ Bool + -> GenericQ r -> GenericQ r +everythingBut empty combine q q1 x + | q x = empty + | otherwise = q1 x `combine` + (foldl' combine empty + (gmapQ (everythingBut empty combine q q1) x)) + +everywhere'But :: GenericQ Bool -> GenericT -> GenericT +-- Guarded to let traversal cease if predicate q holds for x +everywhere'But q f x + | q x = x + | otherwise = let top = gmapT f x in + top `seq` (gmapT (everywhere'But q f) top) + +everywhereButM :: Monad m => GenericQ Bool -> GenericM m -> GenericM m +everywhereButM q f x + | q x = return x + | otherwise = (gmapM (everywhereButM q f) x) >>= f + +notNull :: [a] -> Bool +notNull = not . null + +expectJust :: String -> Maybe a -> a +expectJust s = fromMaybe (error s) + +fixedPointBy :: (a -> a -> Bool) -> (a -> a) -> a -> a +fixedPointBy done trans start = go start + where go v = + let next = trans v in + if done v next then + next + else + go next + +applyPasses :: [a -> a] -> a -> a +applyPasses passes p = -- trace ("p = " ++ show p) $ + foldl' (\ p' nextF -> nextF p') p passes + +varsIn :: (Ord b, Typeable b, Data a) => a -> S.Set b +varsIn = noNames S.empty S.union + (mkQ S.empty (\ v -> S.singleton v)) + +dupsBy :: (a -> a -> Bool) -> [a] -> [a] +dupsBy (~=) xs = filter (\ x -> length (filter (~= x) xs) > 1) xs + +wordsBy :: Eq a => a -> [a] -> [[a]] +wordsBy _ [] = [[]] +wordsBy y (x:xs) | y == x = [x]:(wordsBy y xs) +wordsBy y (x:xs) = + case wordsBy y xs of + (z:zs) -> (x:z):zs + [] -> [[y]] diff --git a/utils/ext-core/extcore.cabal b/utils/ext-core/extcore.cabal index fd1e2df..ee8f45e 100644 --- a/utils/ext-core/extcore.cabal +++ b/utils/ext-core/extcore.cabal @@ -9,9 +9,9 @@ author: Andrew Tolmach, Tim Chevalier, The GHC Team maintainer: chevalier@alum.wellesley.edu stability: alpha build-depends: base, containers, directory, filepath, mtl, parsec, pretty -exposed-modules: Language.Core.Check, Language.Core.Dependencies, Language.Core.Core, Language.Core.Interp, Language.Core.Overrides, Language.Core.ParsecParser, Language.Core.Prep, Language.Core.Prims, Language.Core.Printer -other-modules: Language.Core.Encoding, Language.Core.Env,Language.Core.PrimCoercions, Language.Core.PrimEnv -extensions: DeriveDataTypeable PatternGuards +exposed-modules: Language.Core.Check, Language.Core.Dependencies, Language.Core.Core, Language.Core.Interp, Language.Core.Overrides, Language.Core.ParsecParser, Language.Core.Prep, Language.Core.Prims, Language.Core.Printer, Language.Core.Merge +other-modules: Language.Core.Encoding, Language.Core.Env,Language.Core.PrimCoercions, Language.Core.PrimEnv, Language.Core.Utils, Language.Core.CoreUtils +extensions: DeriveDataTypeable PatternGuards PatternSignatures ghc-options: -Wall -O2 tested-with: GHC ==6.8.2 data-files: README -- 1.7.10.4