X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=utils%2Fghctags%2FGhcTags.hs;h=5237bbc53bba638f9d0343cdef887e943bc55b04;hb=7fb94f753f166563ee3f3b43e7ec5064e82971d3;hp=2defe7515fcf10ac2f7247a73b000a188e2f4dff;hpb=56b37cae901f6a013f6fe8b29d7db9e7c896d6f7;p=ghc-hetmet.git diff --git a/utils/ghctags/GhcTags.hs b/utils/ghctags/GhcTags.hs index 2defe75..5237bbc 100644 --- a/utils/ghctags/GhcTags.hs +++ b/utils/ghctags/GhcTags.hs @@ -1,19 +1,43 @@ +{-# OPTIONS_GHC -XCPP -XPatternGuards -XScopedTypeVariables -Wall #-} module Main where + +import Prelude hiding ( mod, id, mapM ) +import GHC hiding (flags) +--import Packages +import HscTypes ( isBootSummary ) +import BasicTypes +import Digraph ( flattenSCCs ) +import DriverPhases ( isHaskellSrcFilename ) +import HscTypes ( msHsFilePath ) +import Name ( getOccString ) +--import ErrUtils ( printBagOfErrors ) +import DynFlags ( defaultDynFlags ) +import SrcLoc import Bag -import Char -import DynFlags(GhcMode, defaultDynFlags) +import Exception -- ( ghandle ) import FastString -import GHC -import HscTypes (msHsFilePath) -import List -import IO -import Name -import Outputable -import SrcLoc +import MonadUtils ( liftIO ) + +-- Every GHC comes with Cabal anyways, so this is not a bad new dependency +import Distribution.Simple.GHC ( ghcOptions ) +import Distribution.Simple.Configure ( getPersistBuildConfig ) +import Distribution.PackageDescription ( library, libBuildInfo ) +import Distribution.Simple.LocalBuildInfo ( localPkgDescr, buildDir ) + +import Control.Monad hiding (mapM) import System.Environment import System.Console.GetOpt import System.Exit +import Data.Char +import System.IO +import Data.List as List hiding ( group ) +import Data.Maybe +import Data.Traversable (mapM) +import Data.Map ( Map ) +import qualified Data.Map as M +--import UniqFM +--import Debug.Trace -- search for definitions of things -- we do this by parsing the source and grabbing top-level definitions @@ -21,227 +45,324 @@ import System.Exit -- We generate both CTAGS and ETAGS format tags files -- The former is for use in most sensible editors, while EMACS uses ETAGS -{- -placateGhc :: IO () -placateGhc = defaultErrorHandler defaultDynFlags $ do - GHC.init (Just "/usr/local/lib/ghc-6.5") -- or your build tree! - s <- newSession mode --} +---------------------------------- +---- CENTRAL DATA TYPES ---------- + +type FileName = String +type ThingName = String -- name of a defined entity in a Haskell program + +-- A definition we have found (we know its containing module, name, and location) +data FoundThing = FoundThing ModuleName ThingName SrcLoc + +-- Data we have obtained from a file (list of things we found) +data FileData = FileData FileName [FoundThing] (Map Int String) +--- invariant (not checked): every found thing has a source location in that file? + + +------------------------------ +-------- MAIN PROGRAM -------- main :: IO () main = do - progName <- getProgName - let usageString = - "Usage: " ++ progName ++ " [OPTION...] [-- GHC OPTION... --] [files...]" - args <- getArgs - let (ghcArgs, ourArgs, unbalanced) = splitArgs args - let (modes, filenames, errs) = getOpt Permute options ourArgs - if unbalanced || errs /= [] || elem Help modes || filenames == [] - then do - putStr $ unlines errs - putStr $ usageInfo usageString options - exitWith (ExitFailure 1) - else return () - let mode = getMode (Append `delete` modes) - let openFileMode = if elem Append modes - then AppendMode - else WriteMode - GHC.init (Just "/usr/local/lib/ghc-6.5") - GHC.defaultErrorHandler defaultDynFlags $ do - session <- newSession JustTypecheck - print "created a session" - flags <- getSessionDynFlags session - (pflags, _) <- parseDynamicFlags flags ghcArgs - let flags = pflags { hscTarget = HscNothing } - GHC.defaultCleanupHandler flags $ do - flags <- initPackages flags - setSessionDynFlags session flags - setTargets session (map fileTarget filenames) - print "set targets" - success <- load session LoadAllTargets --- bring module graph up to date - filedata <- case success of - Failed -> do { putStr "Load failed"; exitWith (ExitFailure 2) } - Succeeded -> do - print "loaded all targets" - graph <- getModuleGraph session - print "got modules graph" - graphData session graph - if mode == BothTags || mode == CTags - then do - ctagsfile <- openFile "tags" openFileMode - writectagsfile ctagsfile filedata - hClose ctagsfile - else return () - if mode == BothTags || mode == ETags - then do - etagsfile <- openFile "TAGS" openFileMode - writeetagsfile etagsfile filedata - hClose etagsfile - else return () - --- | getMode takes a list of modes and extract the mode with the --- highest precedence. These are as follows: Both, CTags, ETags --- The default case is Both. -getMode :: [Mode] -> Mode -getMode [] = BothTags -getMode [x] = x -getMode (x:xs) = max x (getMode xs) - + progName <- getProgName + let usageString = + "Usage: " ++ progName ++ " [OPTION...] [-- GHC OPTION... --] [files...]" + args <- getArgs + let (ghcArgs', ourArgs, unbalanced) = splitArgs args + let (flags, filenames, errs) = getOpt Permute options ourArgs + let (hsfiles, otherfiles) = List.partition isHaskellSrcFilename filenames + + let ghc_topdir = case [ d | FlagTopDir d <- flags ] of + [] -> "" + (x:_) -> x + mapM_ (\n -> putStr $ "Warning: ignoring non-Haskellish file " ++ n ++ "\n") + otherfiles + if unbalanced || errs /= [] || elem FlagHelp flags || hsfiles == [] + then do + putStr $ unlines errs + putStr $ usageInfo usageString options + exitWith (ExitFailure 1) + else return () + + ghcArgs <- case [ d | FlagUseCabalConfig d <- flags ] of + [distPref] -> do + cabalOpts <- flagsFromCabal distPref + return (ghcArgs' ++ cabalOpts) + [] -> + return ghcArgs' + _ -> error "Too many --use-cabal-config flags" + print ghcArgs + + let modes = getMode flags + let openFileMode = if elem FlagAppend flags + then AppendMode + else WriteMode + ctags_hdl <- if CTags `elem` modes + then Just `liftM` openFile "tags" openFileMode + else return Nothing + etags_hdl <- if ETags `elem` modes + then Just `liftM` openFile "TAGS" openFileMode + else return Nothing + + GHC.defaultErrorHandler defaultDynFlags $ + runGhc (Just ghc_topdir) $ do + --liftIO $ print "starting up session" + dflags <- getSessionDynFlags + (pflags, unrec, warns) <- parseDynamicFlags dflags{ verbosity=1 } + (map noLoc ghcArgs) + unless (null unrec) $ + liftIO $ putStrLn $ "Unrecognised options:\n" ++ show (map unLoc unrec) + liftIO $ mapM_ putStrLn (map unLoc warns) + let dflags2 = pflags { hscTarget = HscNothing } -- don't generate anything + -- liftIO $ print ("pkgDB", case (pkgDatabase dflags2) of Nothing -> 0 + -- Just m -> sizeUFM m) + setSessionDynFlags dflags2 + --liftIO $ print (length pkgs) + + GHC.defaultCleanupHandler dflags2 $ do + + targetsAtOneGo hsfiles (ctags_hdl,etags_hdl) + mapM_ (mapM (liftIO . hClose)) [ctags_hdl, etags_hdl] + +---------------------------------------------- +---------- ARGUMENT PROCESSING -------------- + +data Flag + = FlagETags + | FlagCTags + | FlagBoth + | FlagAppend + | FlagHelp + | FlagTopDir FilePath + | FlagUseCabalConfig FilePath + | FlagFilesFromCabal + deriving (Ord, Eq, Show) + -- ^Represents options passed to the program + +data Mode = ETags | CTags deriving Eq + +getMode :: [Flag] -> [Mode] +getMode fs = go (concatMap modeLike fs) + where go [] = [ETags,CTags] + go [x] = [x] + go more = nub more + + modeLike FlagETags = [ETags] + modeLike FlagCTags = [CTags] + modeLike FlagBoth = [ETags,CTags] + modeLike _ = [] splitArgs :: [String] -> ([String], [String], Bool) --- pull out arguments between -- for GHC -splitArgs args = split [] [] False args +-- ^Pull out arguments between -- for GHC +splitArgs args0 = split [] [] False args0 where split ghc' tags' unbal ("--" : args) = split tags' ghc' (not unbal) args split ghc' tags' unbal (arg : args) = split ghc' (arg:tags') unbal args split ghc' tags' unbal [] = (reverse ghc', reverse tags', unbal) -data Mode = ETags | CTags | BothTags | Append | Help deriving (Ord, Eq, Show) - -options :: [OptDescr Mode] -options = [ Option "c" ["ctags"] - (NoArg CTags) "generate CTAGS file (ctags)" +options :: [OptDescr Flag] +-- supports getopt +options = [ Option "" ["topdir"] + (ReqArg FlagTopDir "DIR") "root of GHC installation (optional)" + , Option "c" ["ctags"] + (NoArg FlagCTags) "generate CTAGS file (ctags)" , Option "e" ["etags"] - (NoArg ETags) "generate ETAGS file (etags)" + (NoArg FlagETags) "generate ETAGS file (etags)" , Option "b" ["both"] - (NoArg BothTags) ("generate both CTAGS and ETAGS") + (NoArg FlagBoth) ("generate both CTAGS and ETAGS") , Option "a" ["append"] - (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)") - , Option "h" ["help"] (NoArg Help) "This help" + (NoArg FlagAppend) ("append to existing CTAGS and/or ETAGS file(s)") + , Option "" ["use-cabal-config"] + (ReqArg FlagUseCabalConfig "DIR") "use local cabal configuration from dist dir" + , Option "" ["files-from-cabal"] + (NoArg FlagFilesFromCabal) "use files from cabal" + , Option "h" ["help"] (NoArg FlagHelp) "This help" ] -type FileName = String - -type ThingName = String - --- The position of a token or definition -data Pos = Pos - FileName -- file name - Int -- line number - Int -- token number - String -- string that makes up that line - deriving Show - -srcLocToPos :: SrcLoc -> Pos -srcLocToPos loc = - Pos (unpackFS $ srcLocFile loc) (srcLocLine loc) (srcLocCol loc) "bogus" +flagsFromCabal :: FilePath -> IO [String] +flagsFromCabal distPref = do + lbi <- getPersistBuildConfig distPref + let pd = localPkgDescr lbi + case library pd of + Nothing -> error "no library" + Just lib -> + let bi = libBuildInfo lib + odir = buildDir lbi + opts = ghcOptions lbi bi odir + in return opts + +---------------------------------------------------------------- +--- LOADING HASKELL SOURCE +--- (these bits actually run the compiler and produce abstract syntax) + +safeLoad :: LoadHowMuch -> Ghc SuccessFlag +-- like GHC.load, but does not stop process on exception +safeLoad mode = do + _dflags <- getSessionDynFlags + ghandle (\(e :: SomeException) -> liftIO (print e) >> return Failed ) $ + handleSourceError (\e -> printExceptionAndWarnings e >> return Failed) $ + load mode + + +targetsAtOneGo :: [FileName] -> (Maybe Handle, Maybe Handle) -> Ghc () +-- load a list of targets +targetsAtOneGo hsfiles handles = do + targets <- mapM (\f -> guessTarget f Nothing) hsfiles + setTargets targets + modgraph <- depanal [] False + let mods = flattenSCCs $ topSortModuleGraph False modgraph Nothing + graphData mods handles --- A definition we have found -data FoundThing = FoundThing ThingName Pos - deriving Show +fileTarget :: FileName -> Target +fileTarget filename = Target (TargetFile filename Nothing) True Nothing --- Data we have obtained from a file -data FileData = FileData FileName [FoundThing] +--------------------------------------------------------------- +----- CRAWLING ABSTRACT SYNTAX TO SNAFFLE THE DEFINITIONS ----- -data Token = Token String Pos - deriving Show +graphData :: ModuleGraph -> (Maybe Handle, Maybe Handle) -> Ghc () +graphData graph handles = do + mapM_ foundthings graph + where foundthings ms = + let filename = msHsFilePath ms + modname = moduleName $ ms_mod ms + in handleSourceError (\e -> do + printExceptionAndWarnings e + liftIO $ exitWith (ExitFailure 1)) $ + do liftIO $ putStrLn ("loading " ++ filename) + mod <- loadModule =<< typecheckModule =<< parseModule ms + case mod of + _ | isBootSummary ms -> return () + _ | Just s <- renamedSource mod -> + liftIO (writeTagsData handles =<< fileData filename modname s) + _otherwise -> + liftIO $ exitWith (ExitFailure 1) + +fileData :: FileName -> ModuleName -> RenamedSource -> IO FileData +fileData filename modname (group, _imports, _lie, _doc, _haddock) = do + -- lie is related to type checking and so is irrelevant + -- imports contains import declarations and no definitions + -- doc and haddock seem haddock-related; let's hope to ignore them + ls <- lines `fmap` readFile filename + let line_map = M.fromAscList $ zip [1..] ls + evaluate line_map + return $ FileData filename (boundValues modname group) line_map + +boundValues :: ModuleName -> HsGroup Name -> [FoundThing] +-- ^Finds all the top-level definitions in a module +boundValues mod group = + let vals = case hs_valds group of + ValBindsOut nest _sigs -> + [ x | (_rec, binds) <- nest, bind <- bagToList binds, + x <- boundThings mod bind ] + _other -> error "boundValues" + tys = concat $ map tyBound (hs_tyclds group) + where tyBound ltcd = case unLoc ltcd of + ForeignType { tcdLName = n } -> [found n] + TyData { tcdLName = tycon, tcdCons = cons } -> + dataNames tycon cons + TySynonym { tcdLName = n } -> [found n] + ClassDecl { tcdLName = n } -> [found n] + _ -> error "boundValues: tys" + fors = concat $ map forBound (hs_fords group) + where forBound lford = case unLoc lford of + ForeignImport n _ _ -> [found n] + ForeignExport { } -> [] + in vals ++ tys ++ fors + where dataNames tycon cons = found tycon : map conName cons + conName td = found $ con_name $ unLoc td + found = foundOfLName mod + +startOfLocated :: Located a -> SrcLoc +startOfLocated lHs = srcSpanStart $ getLoc lHs + +foundOfLName :: ModuleName -> Located Name -> FoundThing +foundOfLName mod id = FoundThing mod (getOccString $ unLoc id) (startOfLocated id) + +boundThings :: ModuleName -> LHsBind Name -> [FoundThing] +boundThings modname lbinding = + case unLoc lbinding of + FunBind { fun_id = id } -> [thing id] + PatBind { pat_lhs = lhs } -> patThings lhs [] + VarBind { var_id = id } -> [FoundThing modname (getOccString id) (startOfLocated lbinding)] + AbsBinds { } -> [] -- nothing interesting in a type abstraction + where thing = foundOfLName modname + patThings lpat tl = + let loc = startOfLocated lpat + lid id = FoundThing modname (getOccString id) loc + in case unLoc lpat of + WildPat _ -> tl + VarPat name -> lid name : tl + VarPatOut name _ -> lid name : tl -- XXX need help here + LazyPat p -> patThings p tl + AsPat id p -> patThings p (thing id : tl) + ParPat p -> patThings p tl + BangPat p -> patThings p tl + ListPat ps _ -> foldr patThings tl ps + TuplePat ps _ _ -> foldr patThings tl ps + PArrPat ps _ -> foldr patThings tl ps + ConPatIn _ conargs -> conArgs conargs tl + ConPatOut _ _ _ _ conargs _ -> conArgs conargs tl + LitPat _ -> tl +#if __GLASGOW_HASKELL__ > 608 + NPat _ _ _ -> tl -- form of literal pattern? +#else + NPat _ _ _ _ -> tl -- form of literal pattern? +#endif + NPlusKPat id _ _ _ -> thing id : tl + TypePat _ -> tl -- XXX need help here + SigPatIn p _ -> patThings p tl + SigPatOut p _ -> patThings p tl + _ -> error "boundThings" + conArgs (PrefixCon ps) tl = foldr patThings tl ps + conArgs (RecCon (HsRecFields { rec_flds = flds })) tl + = foldr (\f tl' -> patThings (hsRecFieldArg f) tl') tl flds + conArgs (InfixCon p1 p2) tl = patThings p1 $ patThings p2 tl -- stuff for dealing with ctags output format -writectagsfile :: Handle -> [FileData] -> IO () +writeTagsData :: (Maybe Handle, Maybe Handle) -> FileData -> IO () +writeTagsData (mb_ctags_hdl, mb_etags_hdl) fd = do + maybe (return ()) (\hdl -> writectagsfile hdl fd) mb_ctags_hdl + maybe (return ()) (\hdl -> writeetagsfile hdl fd) mb_etags_hdl + +writectagsfile :: Handle -> FileData -> IO () writectagsfile ctagsfile filedata = do - let things = concat $ map getfoundthings filedata - mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing x) things + let things = getfoundthings filedata + mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing False x) things + mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing True x) things getfoundthings :: FileData -> [FoundThing] -getfoundthings (FileData filename things) = things - -dumpthing :: FoundThing -> String -dumpthing (FoundThing name (Pos filename line _ _)) = - name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1) +getfoundthings (FileData _filename things _src_lines) = things +dumpthing :: Bool -> FoundThing -> String +dumpthing showmod (FoundThing modname name loc) = + fullname ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1) + where line = srcLocLine loc + filename = unpackFS $ srcLocFile loc + fullname = if showmod then moduleNameString modname ++ "." ++ name + else name -- stuff for dealing with etags output format -writeetagsfile :: Handle -> [FileData] -> IO () -writeetagsfile etagsfile filedata = do - mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata +writeetagsfile :: Handle -> FileData -> IO () +writeetagsfile etagsfile = hPutStr etagsfile . e_dumpfiledata e_dumpfiledata :: FileData -> String -e_dumpfiledata (FileData filename things) = +e_dumpfiledata (FileData filename things line_map) = "\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump where - thingsdump = concat $ map e_dumpthing things + thingsdump = concat $ map (e_dumpthing line_map) things thingslength = length thingsdump -e_dumpthing :: FoundThing -> String -e_dumpthing (FoundThing name (Pos filename line token fullline)) = - ---- (concat $ take (token + 1) $ spacedwords fullline) - name - ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n" - - --- like "words", but keeping the whitespace, and so letting us build --- accurate prefixes - -spacedwords :: String -> [String] -spacedwords [] = [] -spacedwords xs = (blanks ++ wordchars):(spacedwords rest2) - where - (blanks,rest) = span Char.isSpace xs - (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest - - --- Find the definitions in a file - -modsummary :: ModuleGraph -> FileName -> Maybe ModSummary -modsummary graph n = - List.find matches graph - where matches ms = n == msHsFilePath ms - -modname :: ModSummary -> ModuleName -modname summary = moduleName $ ms_mod $ summary - -fileTarget :: FileName -> Target -fileTarget filename = Target (TargetFile filename Nothing) Nothing - -graphData :: Session -> ModuleGraph -> IO [FileData] -graphData session graph = - mapM foundthings graph - where foundthings ms = - let filename = msHsFilePath ms - in do mod <- checkModule session (moduleName $ ms_mod ms) - return $ case mod of - Nothing -> FileData filename [] - Just m -> case renamedSource m of - Nothing -> FileData filename [] - Just s -> fileData filename s - - -fileData :: FileName -> RenamedSource -> FileData -fileData filename (group, imports, lie) = - -- lie is related to type checking and so is irrelevant - -- imports contains import declarations and no definitions - FileData filename (boundValues group) - -boundValues :: HsGroup Name -> [FoundThing] -boundValues group = - let vals = case hs_valds group of - ValBindsOut nest sigs -> - [ x | (_rec, binds) <- nest, bind <- bagToList binds, x <- boundThings bind ] - tys = concat $ map tyBound (hs_tyclds group) - where tyBound ltcd = case unLoc ltcd of - ForeignType { tcdLName = n } -> [foundOfLName n] - TyData { tcdLName = n } -> [foundOfLName n] - TySynonym { tcdLName = n } -> [foundOfLName n] - ClassDecl { tcdLName = n } -> [foundOfLName n] - fors = concat $ map forBound (hs_fords group) - where forBound lford = case unLoc lford of - ForeignImport n _ _ -> [foundOfLName n] - ForeignExport { } -> [] - in vals ++ tys ++ fors - -posOfLocated :: Located a -> Pos -posOfLocated lHs = srcLocToPos $ srcSpanStart $ getLoc lHs - -foundOfLName :: Located Name -> FoundThing -foundOfLName id = FoundThing (getOccString $ unLoc id) (posOfLocated id) - -boundThings :: LHsBind Name -> [FoundThing] -boundThings lbinding = - let thing = foundOfLName - in case unLoc lbinding of - FunBind { fun_id = id } -> [thing id] - PatBind { pat_lhs = lhs } -> panic "Pattern at top level" - VarBind { var_id = id } -> [FoundThing (getOccString id) (posOfLocated lbinding)] - AbsBinds { } -> [] -- nothing interesting in a type abstraction +e_dumpthing :: Map Int String -> FoundThing -> String +e_dumpthing src_lines (FoundThing modname name loc) = + tagline name ++ tagline (moduleNameString modname ++ "." ++ name) + where tagline n = src_code ++ "\x7f" + ++ n ++ "\x01" + ++ (show line) ++ "," ++ (show $ column) ++ "\n" + line = srcLocLine loc + column = srcLocCol loc + src_code = case M.lookup line src_lines of + Just l -> take (column + length name) l + Nothing -> --trace (show ("not found: ", moduleNameString modname, name, line, column)) + name