+{-# OPTIONS_GHC -XCPP -XPatternGuards -XScopedTypeVariables -Wall #-}
module Main where
+
+import Prelude hiding ( mod, id, mapM )
+import GHC hiding (flags)
+--import Packages
+import HscTypes ( isBootSummary )
+import BasicTypes
+import Digraph ( flattenSCCs )
+import DriverPhases ( isHaskellSrcFilename )
+import HscTypes ( msHsFilePath )
+import Name ( getOccString )
+--import ErrUtils ( printBagOfErrors )
+import DynFlags ( defaultDynFlags )
+import SrcLoc
import Bag
-import Char
-import DynFlags(GhcMode, defaultDynFlags)
+import Exception -- ( ghandle )
import FastString
-import GHC
-import HscTypes (msHsFilePath)
-import List
-import IO
-import Name
-import Outputable
-import SrcLoc
+import MonadUtils ( liftIO )
+
+-- Every GHC comes with Cabal anyways, so this is not a bad new dependency
+import Distribution.Simple.GHC ( ghcOptions )
+import Distribution.Simple.Configure ( getPersistBuildConfig )
+import Distribution.PackageDescription ( library, libBuildInfo )
+import Distribution.Simple.LocalBuildInfo ( localPkgDescr, buildDir )
+
+import Control.Monad hiding (mapM)
import System.Environment
import System.Console.GetOpt
import System.Exit
+import Data.Char
+import System.IO
+import Data.List as List hiding ( group )
+import Data.Maybe
+import Data.Traversable (mapM)
+import Data.Map ( Map )
+import qualified Data.Map as M
+--import UniqFM
+--import Debug.Trace
-- search for definitions of things
-- we do this by parsing the source and grabbing top-level definitions
-- We generate both CTAGS and ETAGS format tags files
-- The former is for use in most sensible editors, while EMACS uses ETAGS
-{-
-placateGhc :: IO ()
-placateGhc = defaultErrorHandler defaultDynFlags $ do
- GHC.init (Just "/usr/local/lib/ghc-6.5") -- or your build tree!
- s <- newSession mode
--}
+----------------------------------
+---- CENTRAL DATA TYPES ----------
+
+type FileName = String
+type ThingName = String -- name of a defined entity in a Haskell program
+
+-- A definition we have found (we know its containing module, name, and location)
+data FoundThing = FoundThing ModuleName ThingName SrcLoc
+
+-- Data we have obtained from a file (list of things we found)
+data FileData = FileData FileName [FoundThing] (Map Int String)
+--- invariant (not checked): every found thing has a source location in that file?
+
+
+------------------------------
+-------- MAIN PROGRAM --------
main :: IO ()
main = do
- progName <- getProgName
- let usageString =
- "Usage: " ++ progName ++ " [OPTION...] [-- GHC OPTION... --] [files...]"
- args <- getArgs
- let (ghcArgs, ourArgs, unbalanced) = splitArgs args
- let (modes, filenames, errs) = getOpt Permute options ourArgs
- if unbalanced || errs /= [] || elem Help modes || filenames == []
- then do
- putStr $ unlines errs
- putStr $ usageInfo usageString options
- exitWith (ExitFailure 1)
- else return ()
- let mode = getMode (Append `delete` modes)
- let openFileMode = if elem Append modes
- then AppendMode
- else WriteMode
- GHC.init (Just "/usr/local/lib/ghc-6.5")
- GHC.defaultErrorHandler defaultDynFlags $ do
- session <- newSession JustTypecheck
- print "created a session"
- flags <- getSessionDynFlags session
- (pflags, _) <- parseDynamicFlags flags ghcArgs
- let flags = pflags { hscTarget = HscNothing }
- GHC.defaultCleanupHandler flags $ do
- flags <- initPackages flags
- setSessionDynFlags session flags
- setTargets session (map fileTarget filenames)
- print "set targets"
- success <- load session LoadAllTargets --- bring module graph up to date
- filedata <- case success of
- Failed -> do { putStr "Load failed"; exitWith (ExitFailure 2) }
- Succeeded -> do
- print "loaded all targets"
- graph <- getModuleGraph session
- print "got modules graph"
- graphData session graph
- if mode == BothTags || mode == CTags
- then do
- ctagsfile <- openFile "tags" openFileMode
- writectagsfile ctagsfile filedata
- hClose ctagsfile
- else return ()
- if mode == BothTags || mode == ETags
- then do
- etagsfile <- openFile "TAGS" openFileMode
- writeetagsfile etagsfile filedata
- hClose etagsfile
- else return ()
-
--- | getMode takes a list of modes and extract the mode with the
--- highest precedence. These are as follows: Both, CTags, ETags
--- The default case is Both.
-getMode :: [Mode] -> Mode
-getMode [] = BothTags
-getMode [x] = x
-getMode (x:xs) = max x (getMode xs)
-
+ progName <- getProgName
+ let usageString =
+ "Usage: " ++ progName ++ " [OPTION...] [-- GHC OPTION... --] [files...]"
+ args <- getArgs
+ let (ghcArgs', ourArgs, unbalanced) = splitArgs args
+ let (flags, filenames, errs) = getOpt Permute options ourArgs
+ let (hsfiles, otherfiles) = List.partition isHaskellSrcFilename filenames
+
+ let ghc_topdir = case [ d | FlagTopDir d <- flags ] of
+ [] -> ""
+ (x:_) -> x
+ mapM_ (\n -> putStr $ "Warning: ignoring non-Haskellish file " ++ n ++ "\n")
+ otherfiles
+ if unbalanced || errs /= [] || elem FlagHelp flags || hsfiles == []
+ then do
+ putStr $ unlines errs
+ putStr $ usageInfo usageString options
+ exitWith (ExitFailure 1)
+ else return ()
+
+ ghcArgs <- case [ d | FlagUseCabalConfig d <- flags ] of
+ [distPref] -> do
+ cabalOpts <- flagsFromCabal distPref
+ return (ghcArgs' ++ cabalOpts)
+ [] ->
+ return ghcArgs'
+ _ -> error "Too many --use-cabal-config flags"
+ print ghcArgs
+
+ let modes = getMode flags
+ let openFileMode = if elem FlagAppend flags
+ then AppendMode
+ else WriteMode
+ ctags_hdl <- if CTags `elem` modes
+ then Just `liftM` openFile "tags" openFileMode
+ else return Nothing
+ etags_hdl <- if ETags `elem` modes
+ then Just `liftM` openFile "TAGS" openFileMode
+ else return Nothing
+
+ GHC.defaultErrorHandler defaultDynFlags $
+ runGhc (Just ghc_topdir) $ do
+ --liftIO $ print "starting up session"
+ dflags <- getSessionDynFlags
+ (pflags, unrec, warns) <- parseDynamicFlags dflags{ verbosity=1 }
+ (map noLoc ghcArgs)
+ unless (null unrec) $
+ liftIO $ putStrLn $ "Unrecognised options:\n" ++ show (map unLoc unrec)
+ liftIO $ mapM_ putStrLn (map unLoc warns)
+ let dflags2 = pflags { hscTarget = HscNothing } -- don't generate anything
+ -- liftIO $ print ("pkgDB", case (pkgDatabase dflags2) of Nothing -> 0
+ -- Just m -> sizeUFM m)
+ setSessionDynFlags dflags2
+ --liftIO $ print (length pkgs)
+
+ GHC.defaultCleanupHandler dflags2 $ do
+
+ targetsAtOneGo hsfiles (ctags_hdl,etags_hdl)
+ mapM_ (mapM (liftIO . hClose)) [ctags_hdl, etags_hdl]
+
+----------------------------------------------
+---------- ARGUMENT PROCESSING --------------
+
+data Flag
+ = FlagETags
+ | FlagCTags
+ | FlagBoth
+ | FlagAppend
+ | FlagHelp
+ | FlagTopDir FilePath
+ | FlagUseCabalConfig FilePath
+ | FlagFilesFromCabal
+ deriving (Ord, Eq, Show)
+ -- ^Represents options passed to the program
+
+data Mode = ETags | CTags deriving Eq
+
+getMode :: [Flag] -> [Mode]
+getMode fs = go (concatMap modeLike fs)
+ where go [] = [ETags,CTags]
+ go [x] = [x]
+ go more = nub more
+
+ modeLike FlagETags = [ETags]
+ modeLike FlagCTags = [CTags]
+ modeLike FlagBoth = [ETags,CTags]
+ modeLike _ = []
splitArgs :: [String] -> ([String], [String], Bool)
--- pull out arguments between -- for GHC
-splitArgs args = split [] [] False args
+-- ^Pull out arguments between -- for GHC
+splitArgs args0 = split [] [] False args0
where split ghc' tags' unbal ("--" : args) = split tags' ghc' (not unbal) args
split ghc' tags' unbal (arg : args) = split ghc' (arg:tags') unbal args
split ghc' tags' unbal [] = (reverse ghc', reverse tags', unbal)
-data Mode = ETags | CTags | BothTags | Append | Help deriving (Ord, Eq, Show)
-
-options :: [OptDescr Mode]
-options = [ Option "c" ["ctags"]
- (NoArg CTags) "generate CTAGS file (ctags)"
+options :: [OptDescr Flag]
+-- supports getopt
+options = [ Option "" ["topdir"]
+ (ReqArg FlagTopDir "DIR") "root of GHC installation (optional)"
+ , Option "c" ["ctags"]
+ (NoArg FlagCTags) "generate CTAGS file (ctags)"
, Option "e" ["etags"]
- (NoArg ETags) "generate ETAGS file (etags)"
+ (NoArg FlagETags) "generate ETAGS file (etags)"
, Option "b" ["both"]
- (NoArg BothTags) ("generate both CTAGS and ETAGS")
+ (NoArg FlagBoth) ("generate both CTAGS and ETAGS")
, Option "a" ["append"]
- (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)")
- , Option "h" ["help"] (NoArg Help) "This help"
+ (NoArg FlagAppend) ("append to existing CTAGS and/or ETAGS file(s)")
+ , Option "" ["use-cabal-config"]
+ (ReqArg FlagUseCabalConfig "DIR") "use local cabal configuration from dist dir"
+ , Option "" ["files-from-cabal"]
+ (NoArg FlagFilesFromCabal) "use files from cabal"
+ , Option "h" ["help"] (NoArg FlagHelp) "This help"
]
-type FileName = String
-
-type ThingName = String
+flagsFromCabal :: FilePath -> IO [String]
+flagsFromCabal distPref = do
+ lbi <- getPersistBuildConfig distPref
+ let pd = localPkgDescr lbi
+ case library pd of
+ Nothing -> error "no library"
+ Just lib ->
+ let bi = libBuildInfo lib
+ odir = buildDir lbi
+ opts = ghcOptions lbi bi odir
+ in return opts
+
+----------------------------------------------------------------
+--- LOADING HASKELL SOURCE
+--- (these bits actually run the compiler and produce abstract syntax)
+
+safeLoad :: LoadHowMuch -> Ghc SuccessFlag
+-- like GHC.load, but does not stop process on exception
+safeLoad mode = do
+ _dflags <- getSessionDynFlags
+ ghandle (\(e :: SomeException) -> liftIO (print e) >> return Failed ) $
+ handleSourceError (\e -> printExceptionAndWarnings e >> return Failed) $
+ load mode
+
+
+targetsAtOneGo :: [FileName] -> (Maybe Handle, Maybe Handle) -> Ghc ()
+-- load a list of targets
+targetsAtOneGo hsfiles handles = do
+ targets <- mapM (\f -> guessTarget f Nothing) hsfiles
+ setTargets targets
+ modgraph <- depanal [] False
+ let mods = flattenSCCs $ topSortModuleGraph False modgraph Nothing
+ graphData mods handles
--- The position of a token or definition
-data Pos = Pos
- FileName -- file name
- Int -- line number
- Int -- token number
- String -- string that makes up that line
- deriving Show
-
-srcLocToPos :: SrcLoc -> Pos
-srcLocToPos loc =
- Pos (unpackFS $ srcLocFile loc) (srcLocLine loc) (srcLocCol loc) "bogus"
-
--- A definition we have found
-data FoundThing = FoundThing ThingName Pos
- deriving Show
+fileTarget :: FileName -> Target
+fileTarget filename = Target (TargetFile filename Nothing) True Nothing
--- Data we have obtained from a file
-data FileData = FileData FileName [FoundThing]
+---------------------------------------------------------------
+----- CRAWLING ABSTRACT SYNTAX TO SNAFFLE THE DEFINITIONS -----
-data Token = Token String Pos
- deriving Show
+graphData :: ModuleGraph -> (Maybe Handle, Maybe Handle) -> Ghc ()
+graphData graph handles = do
+ mapM_ foundthings graph
+ where foundthings ms =
+ let filename = msHsFilePath ms
+ modname = moduleName $ ms_mod ms
+ in handleSourceError (\e -> do
+ printExceptionAndWarnings e
+ liftIO $ exitWith (ExitFailure 1)) $
+ do liftIO $ putStrLn ("loading " ++ filename)
+ mod <- loadModule =<< typecheckModule =<< parseModule ms
+ case mod of
+ _ | isBootSummary ms -> return ()
+ _ | Just s <- renamedSource mod ->
+ liftIO (writeTagsData handles =<< fileData filename modname s)
+ _otherwise ->
+ liftIO $ exitWith (ExitFailure 1)
+
+fileData :: FileName -> ModuleName -> RenamedSource -> IO FileData
+fileData filename modname (group, _imports, _lie, _doc, _haddock) = do
+ -- lie is related to type checking and so is irrelevant
+ -- imports contains import declarations and no definitions
+ -- doc and haddock seem haddock-related; let's hope to ignore them
+ ls <- lines `fmap` readFile filename
+ let line_map = M.fromAscList $ zip [1..] ls
+ evaluate line_map
+ return $ FileData filename (boundValues modname group) line_map
+
+boundValues :: ModuleName -> HsGroup Name -> [FoundThing]
+-- ^Finds all the top-level definitions in a module
+boundValues mod group =
+ let vals = case hs_valds group of
+ ValBindsOut nest _sigs ->
+ [ x | (_rec, binds) <- nest
+ , bind <- bagToList binds
+ , x <- boundThings mod bind ]
+ _other -> error "boundValues"
+ tys = [ n | ns <- map (tyClDeclNames . unLoc) (hs_tyclds group)
+ , n <- map found ns ]
+ fors = concat $ map forBound (hs_fords group)
+ where forBound lford = case unLoc lford of
+ ForeignImport n _ _ -> [found n]
+ ForeignExport { } -> []
+ in vals ++ tys ++ fors
+ where dataNames tycon cons = found tycon : map conName cons
+ conName td = found $ con_name $ unLoc td
+ found = foundOfLName mod
+
+startOfLocated :: Located a -> SrcLoc
+startOfLocated lHs = srcSpanStart $ getLoc lHs
+
+foundOfLName :: ModuleName -> Located Name -> FoundThing
+foundOfLName mod id = FoundThing mod (getOccString $ unLoc id) (startOfLocated id)
+
+boundThings :: ModuleName -> LHsBind Name -> [FoundThing]
+boundThings modname lbinding =
+ case unLoc lbinding of
+ FunBind { fun_id = id } -> [thing id]
+ PatBind { pat_lhs = lhs } -> patThings lhs []
+ VarBind { var_id = id } -> [FoundThing modname (getOccString id) (startOfLocated lbinding)]
+ AbsBinds { } -> [] -- nothing interesting in a type abstraction
+ where thing = foundOfLName modname
+ patThings lpat tl =
+ let loc = startOfLocated lpat
+ lid id = FoundThing modname (getOccString id) loc
+ in case unLoc lpat of
+ WildPat _ -> tl
+ VarPat name -> lid name : tl
+ VarPatOut name _ -> lid name : tl -- XXX need help here
+ LazyPat p -> patThings p tl
+ AsPat id p -> patThings p (thing id : tl)
+ ParPat p -> patThings p tl
+ BangPat p -> patThings p tl
+ ListPat ps _ -> foldr patThings tl ps
+ TuplePat ps _ _ -> foldr patThings tl ps
+ PArrPat ps _ -> foldr patThings tl ps
+ ConPatIn _ conargs -> conArgs conargs tl
+ ConPatOut _ _ _ _ conargs _ -> conArgs conargs tl
+ LitPat _ -> tl
+#if __GLASGOW_HASKELL__ > 608
+ NPat _ _ _ -> tl -- form of literal pattern?
+#else
+ NPat _ _ _ _ -> tl -- form of literal pattern?
+#endif
+ NPlusKPat id _ _ _ -> thing id : tl
+ TypePat _ -> tl -- XXX need help here
+ SigPatIn p _ -> patThings p tl
+ SigPatOut p _ -> patThings p tl
+ _ -> error "boundThings"
+ conArgs (PrefixCon ps) tl = foldr patThings tl ps
+ conArgs (RecCon (HsRecFields { rec_flds = flds })) tl
+ = foldr (\f tl' -> patThings (hsRecFieldArg f) tl') tl flds
+ conArgs (InfixCon p1 p2) tl = patThings p1 $ patThings p2 tl
-- stuff for dealing with ctags output format
-writectagsfile :: Handle -> [FileData] -> IO ()
+writeTagsData :: (Maybe Handle, Maybe Handle) -> FileData -> IO ()
+writeTagsData (mb_ctags_hdl, mb_etags_hdl) fd = do
+ maybe (return ()) (\hdl -> writectagsfile hdl fd) mb_ctags_hdl
+ maybe (return ()) (\hdl -> writeetagsfile hdl fd) mb_etags_hdl
+
+writectagsfile :: Handle -> FileData -> IO ()
writectagsfile ctagsfile filedata = do
- let things = concat $ map getfoundthings filedata
- mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing x) things
+ let things = getfoundthings filedata
+ mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing False x) things
+ mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing True x) things
getfoundthings :: FileData -> [FoundThing]
-getfoundthings (FileData filename things) = things
-
-dumpthing :: FoundThing -> String
-dumpthing (FoundThing name (Pos filename line _ _)) =
- name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1)
+getfoundthings (FileData _filename things _src_lines) = things
+dumpthing :: Bool -> FoundThing -> String
+dumpthing showmod (FoundThing modname name loc) =
+ fullname ++ "\t" ++ filename ++ "\t" ++ (show line)
+ where line = srcLocLine loc
+ filename = unpackFS $ srcLocFile loc
+ fullname = if showmod then moduleNameString modname ++ "." ++ name
+ else name
-- stuff for dealing with etags output format
-writeetagsfile :: Handle -> [FileData] -> IO ()
-writeetagsfile etagsfile filedata = do
- mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata
+writeetagsfile :: Handle -> FileData -> IO ()
+writeetagsfile etagsfile = hPutStr etagsfile . e_dumpfiledata
e_dumpfiledata :: FileData -> String
-e_dumpfiledata (FileData filename things) =
+e_dumpfiledata (FileData filename things line_map) =
"\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump
where
- thingsdump = concat $ map e_dumpthing things
+ thingsdump = concat $ map (e_dumpthing line_map) things
thingslength = length thingsdump
-e_dumpthing :: FoundThing -> String
-e_dumpthing (FoundThing name (Pos filename line token fullline)) =
- ---- (concat $ take (token + 1) $ spacedwords fullline)
- name
- ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n"
-
-
--- like "words", but keeping the whitespace, and so letting us build
--- accurate prefixes
-
-spacedwords :: String -> [String]
-spacedwords [] = []
-spacedwords xs = (blanks ++ wordchars):(spacedwords rest2)
- where
- (blanks,rest) = span Char.isSpace xs
- (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest
-
-
--- Find the definitions in a file
-
-modsummary :: ModuleGraph -> FileName -> Maybe ModSummary
-modsummary graph n =
- List.find matches graph
- where matches ms = n == msHsFilePath ms
-
-modname :: ModSummary -> ModuleName
-modname summary = moduleName $ ms_mod $ summary
-
-fileTarget :: FileName -> Target
-fileTarget filename = Target (TargetFile filename Nothing) Nothing
-
-graphData :: Session -> ModuleGraph -> IO [FileData]
-graphData session graph =
- mapM foundthings graph
- where foundthings ms =
- let filename = msHsFilePath ms
- in do mod <- checkModule session (moduleName $ ms_mod ms)
- return $ maybe (FileData filename []) id $ do
- m <- mod
- s <- renamedSource m
- return $ fileData filename s
-
-fileData :: FileName -> RenamedSource -> FileData
-fileData filename (group, imports, lie) =
- -- lie is related to type checking and so is irrelevant
- -- imports contains import declarations and no definitions
- FileData filename (boundValues group)
-
-boundValues :: HsGroup Name -> [FoundThing]
-boundValues group =
- let vals = case hs_valds group of
- ValBindsOut nest sigs ->
- [ x | (_rec, binds) <- nest, bind <- bagToList binds, x <- boundThings bind ]
- tys = concat $ map tyBound (hs_tyclds group)
- where tyBound ltcd = case unLoc ltcd of
- ForeignType { tcdLName = n } -> [foundOfLName n]
- TyData { tcdLName = n } -> [foundOfLName n]
- TySynonym { tcdLName = n } -> [foundOfLName n]
- ClassDecl { tcdLName = n } -> [foundOfLName n]
- fors = concat $ map forBound (hs_fords group)
- where forBound lford = case unLoc lford of
- ForeignImport n _ _ -> [foundOfLName n]
- ForeignExport { } -> []
- in vals ++ tys ++ fors
-
-posOfLocated :: Located a -> Pos
-posOfLocated lHs = srcLocToPos $ srcSpanStart $ getLoc lHs
-
-foundOfLName :: Located Name -> FoundThing
-foundOfLName id = FoundThing (getOccString $ unLoc id) (posOfLocated id)
-
-boundThings :: LHsBind Name -> [FoundThing]
-boundThings lbinding =
- let thing = foundOfLName
- in case unLoc lbinding of
- FunBind { fun_id = id } -> [thing id]
- PatBind { pat_lhs = lhs } -> panic "Pattern at top level"
- VarBind { var_id = id } -> [FoundThing (getOccString id) (posOfLocated lbinding)]
- AbsBinds { } -> [] -- nothing interesting in a type abstraction
+e_dumpthing :: Map Int String -> FoundThing -> String
+e_dumpthing src_lines (FoundThing modname name loc) =
+ tagline name ++ tagline (moduleNameString modname ++ "." ++ name)
+ where tagline n = src_code ++ "\x7f"
+ ++ n ++ "\x01"
+ ++ (show line) ++ "," ++ (show $ column) ++ "\n"
+ line = srcLocLine loc
+ column = srcLocCol loc
+ src_code = case M.lookup line src_lines of
+ Just l -> take (column + length name) l
+ Nothing -> --trace (show ("not found: ", moduleNameString modname, name, line, column))
+ name