utils/hasktags/HaskTags.hs

   1 module Main where
   2 import Char
   3 import List
   4 import IO
   5 import System.Environment
   6 import System.Console.GetOpt
   7 import System.Exit
   8
   9
  10 -- search for definitions of things
  11 -- we do this by looking for the following patterns:
  12 -- data XXX = ...      giving a datatype location
  13 -- newtype XXX = ...   giving a newtype location
  14 -- bla :: ...          giving a function location
  15 --
  16 -- by doing it this way, we avoid picking up local definitions
  17 --              (whether this is good or not is a matter for debate)
  18 --
  19
  20 -- We generate both CTAGS and ETAGS format tags files
  21 -- The former is for use in most sensible editors, while EMACS uses ETAGS
  22
  23 --
  24 -- TODO add tag categories
  25 -- alternatives: http://haskell.org/haskellwiki/Tags
  26
  27 main :: IO ()
  28 main = do
  29         progName <- getProgName
  30         args <- getArgs
  31         let usageString = "Usage: " ++ progName ++ " [OPTION...] [files...]"
  32         let (modes, filenames, errs) = getOpt Permute options args
  33         if errs /= [] || elem Help modes || filenames == []
  34          then do
  35            putStr $ unlines errs
  36            putStr $ usageInfo usageString options
  37            exitWith (ExitFailure 1)
  38          else return ()
  39         let mode = getMode (Append `delete` modes)
  40         let openFileMode = if elem Append modes
  41                            then AppendMode
  42                            else WriteMode
  43         filedata <- mapM findthings filenames
  44         if mode == BothTags || mode == CTags
  45          then do
  46            ctagsfile <- openFile "tags" openFileMode
  47            writectagsfile ctagsfile filedata
  48            hClose ctagsfile
  49          else return ()
  50         if mode == BothTags || mode == ETags
  51          then do
  52            etagsfile <- openFile "TAGS" openFileMode
  53            writeetagsfile etagsfile filedata
  54            hClose etagsfile
  55          else return ()
  56
  57 -- | getMode takes a list of modes and extract the mode with the
  58 --   highest precedence.  These are as follows: Both, CTags, ETags
  59 --   The default case is Both.
  60 getMode :: [Mode] -> Mode
  61 getMode [] = BothTags
  62 getMode [x] = x
  63 getMode (x:xs) = max x (getMode xs)
  64
  65
  66 data Mode = ETags | CTags | BothTags | Append | Help deriving (Ord, Eq, Show)
  67
  68 options :: [OptDescr Mode]
  69 options = [ Option "c" ["ctags"]
  70             (NoArg CTags) "generate CTAGS file (ctags)"
  71           , Option "e" ["etags"]
  72             (NoArg ETags) "generate ETAGS file (etags)"
  73           , Option "b" ["both"]
  74             (NoArg BothTags) ("generate both CTAGS and ETAGS")
  75           , Option "a" ["append"]
  76             (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)")
  77           , Option "h" ["help"] (NoArg Help) "This help"
  78           ]
  79
  80 type FileName = String
  81
  82 type ThingName = String
  83
  84 -- The position of a token or definition
  85 data Pos = Pos
  86                 FileName        -- file name
  87                 Int                     -- line number
  88                 Int             -- token number
  89                 String          -- string that makes up that line
  90         deriving (Show, Eq)
  91
  92 -- A definition we have found
  93 data FoundThing = FoundThing ThingName Pos
  94         deriving (Show, Eq)
  95
  96 -- Data we have obtained from a file
  97 data FileData = FileData FileName [FoundThing]
  98
  99 data Token = Token String Pos
 100         deriving Show
 101
 102
 103 -- stuff for dealing with ctags output format
 104
 105 writectagsfile :: Handle -> [FileData] -> IO ()
 106 writectagsfile ctagsfile filedata = do
 107         let things = concat $ map getfoundthings filedata
 108         mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing x) (sortThings things)
 109
 110 sortThings = sortBy (\(FoundThing a _) (FoundThing b _) -> compare a b)
 111
 112 getfoundthings :: FileData -> [FoundThing]
 113 getfoundthings (FileData filename things) = things
 114
 115 dumpthing :: FoundThing -> String
 116 dumpthing (FoundThing name (Pos filename line _ _)) =
 117         name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1)
 118
 119
 120 -- stuff for dealing with etags output format
 121
 122 writeetagsfile :: Handle -> [FileData] -> IO ()
 123 writeetagsfile etagsfile filedata = do
 124         mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata
 125
 126 e_dumpfiledata :: FileData -> String
 127 e_dumpfiledata (FileData filename things) =
 128         "\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump
 129         where
 130                 thingsdump = concat $ map e_dumpthing things
 131                 thingslength = length thingsdump
 132
 133 e_dumpthing :: FoundThing -> String
 134 e_dumpthing (FoundThing name (Pos filename line token fullline)) =
 135         (concat $ take (token + 1) $ spacedwords fullline)
 136         ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n"
 137
 138
 139 -- like "words", but keeping the whitespace, and so letting us build
 140 -- accurate prefixes
 141
 142 spacedwords :: String -> [String]
 143 spacedwords [] = []
 144 spacedwords xs = (blanks ++ wordchars):(spacedwords rest2)
 145         where
 146                 (blanks,rest) = span Char.isSpace xs
 147                 (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest
 148
 149
 150 -- Find the definitions in a file
 151
 152 findthings :: FileName -> IO FileData
 153 findthings filename = do
 154     text <- readFile filename
 155     evaluate text -- forces evaluation of text
 156                   -- too many files were being opened otherwise since
 157                   -- readFile is lazy
 158     let aslines = lines text
 159     let wordlines = map mywords aslines
 160     let noslcoms = map stripslcomments wordlines
 161     let tokens = concat $ zipWith3 (withline filename) noslcoms aslines [0 ..]
 162     -- there are some tokens with "" (don't know why yet) this filter fixes it
 163     let tokens' = filter (\(Token s _ ) -> (not .  null) s ) tokens
 164     let nocoms = stripblockcomments tokens'
 165     -- using nub because getcons and findstuff are parsing parts of the file twice
 166     return $ FileData filename $ nub $ findstuff nocoms
 167   where evaluate [] = return ()
 168         evaluate (c:cs) = c `seq` evaluate cs
 169         -- my words is mainly copied from Data.List.
 170         -- difference abc::def is split into three words instead of one.
 171         -- We should really be lexing Haskell properly here rather
 172         -- than using hacks like this. In the future we expect hasktags
 173         -- to be replaced by something using the GHC API.
 174         mywords :: String -> [String]
 175         mywords (':':':':xs) = "::" : mywords xs
 176         mywords s =  case dropWhile isSpace s of
 177                          "" -> []
 178                          s' -> w : mywords s''
 179                              where (w, s'') = myBreak s'
 180                                    myBreak [] = ([],[])
 181                                    myBreak (':':':':xs) = ([], "::"++xs)
 182                                    myBreak (' ':xs) = ([],xs);
 183                                    myBreak (x:xs) = let (a,b) = myBreak xs
 184                                                     in  (x:a,b)
 185
 186 -- Create tokens from words, by recording their line number
 187 -- and which token they are through that line
 188
 189 withline :: FileName -> [String] -> String -> Int -> [Token]
 190 withline filename words fullline i =
 191         zipWith (\w t -> Token w (Pos filename i t fullline)) words $ [0 ..]
 192
 193 -- comments stripping
 194
 195 stripslcomments :: [String] -> [String]
 196 stripslcomments ("--":xs) = []
 197 stripslcomments (x:xs) = x : stripslcomments xs
 198 stripslcomments [] = []
 199
 200 stripblockcomments :: [Token] -> [Token]
 201 stripblockcomments ((Token "\\end{code}" _):xs) = afterlitend xs
 202 stripblockcomments ((Token "{-" _):xs) = afterblockcomend xs
 203 stripblockcomments (x:xs) = x:stripblockcomments xs
 204 stripblockcomments [] = []
 205
 206 afterlitend2 :: [Token] -> [Token]
 207 afterlitend2 (x:xs) = afterlitend xs
 208 afterlitend2 [] = []
 209
 210 afterlitend :: [Token] -> [Token]
 211 afterlitend ((Token "\\begin{code}" _):xs) = xs
 212 afterlitend (x:xs) = afterlitend xs
 213 afterlitend [] = []
 214
 215 afterblockcomend :: [Token] -> [Token]
 216 afterblockcomend ((Token token _):xs) | contains "-}" token = xs
 217                                                 | otherwise = afterblockcomend xs
 218 afterblockcomend [] = []
 219
 220
 221 -- does one string contain another string
 222
 223 contains :: Eq a => [a] -> [a] -> Bool
 224 contains sub full = any (isPrefixOf sub) $ tails full
 225
 226 ints :: Int -> [Int]
 227 ints i = i:(ints $ i+1)
 228
 229
 230 -- actually pick up definitions
 231
 232 findstuff :: [Token] -> [FoundThing]
 233 findstuff ((Token "module" _):(Token name pos):xs) =
 234         FoundThing name pos : (getcons xs) ++ (findstuff xs)
 235 findstuff ((Token "data" _):(Token name pos):xs) =
 236         FoundThing name pos : (getcons xs) ++ (findstuff xs)
 237 findstuff ((Token "newtype" _):(Token name pos):xs) =
 238         FoundThing name pos : findstuff xs
 239 findstuff ((Token "type" _):(Token name pos):xs) =
 240         FoundThing name pos : findstuff xs
 241 findstuff ((Token "class" _):xs) = findClassName xs
 242 findstuff ((Token name pos):(Token "::" _):xs) =
 243         FoundThing name pos : findstuff xs
 244 findstuff (x:xs) = findstuff xs
 245 findstuff [] = []
 246
 247 findClassName :: [Token] -> [FoundThing]
 248 findClassName []  = []
 249 findClassName [Token n p]  = [FoundThing n p]
 250 findClassName xs = (\((Token n pos):xs) -> FoundThing n pos : findstuff xs) . drop2 . dropParens 0 $ xs
 251 dropParens n ((Token "(" _ ):xs) = dropParens (n+1) xs
 252 dropParens 0 (x:xs) = x:xs
 253 dropParens 1 ((Token ")" _ ):xs) = xs
 254 dropParens n ((Token ")" _ ):xs) = dropParens (n-1) xs
 255 dropParens n (x:xs) = dropParens n xs
 256 -- dropsEverything till token "=>" (if it is on the same line as the first token. if not return tokens)
 257 drop2 tokens@(x@(Token _ (Pos _ line_nr _ _ )):xs) =
 258   let (line, following) = span (\(Token s (Pos _ l _ _)) -> l == line_nr) tokens
 259       (_, following_in_line) = span (\(Token n _) -> n /= "=>") line
 260   in case following_in_line of
 261           (Token "=>" _:xs) ->  xs ++ following
 262           _ -> tokens
 263 drop2 xs = xs
 264
 265 -- get the constructor definitions, knowing that a datatype has just started
 266
 267 getcons :: [Token] -> [FoundThing]
 268 getcons ((Token "=" _):(Token name pos):xs) =
 269         FoundThing name pos : getcons2 xs
 270 getcons (x:xs) = getcons xs
 271 getcons [] = []
 272
 273
 274 getcons2 ((Token "=" _):xs) = []
 275 getcons2 ((Token "|" _):(Token name pos):xs) =
 276         FoundThing name pos : getcons2 xs
 277 getcons2 (x:xs) = getcons2 xs
 278 getcons2 [] = []
 279