utils/hasktags/HaskTags.hs

   1
   2 module Main (main) where
   3
   4 import Char
   5 import List
   6 import IO
   7 import System.Environment
   8 import System.Console.GetOpt
   9 import System.Exit
  10
  11
  12 -- search for definitions of things
  13 -- we do this by looking for the following patterns:
  14 -- data XXX = ...      giving a datatype location
  15 -- newtype XXX = ...   giving a newtype location
  16 -- bla :: ...          giving a function location
  17 --
  18 -- by doing it this way, we avoid picking up local definitions
  19 --              (whether this is good or not is a matter for debate)
  20 --
  21
  22 -- We generate both CTAGS and ETAGS format tags files
  23 -- The former is for use in most sensible editors, while EMACS uses ETAGS
  24
  25 --
  26 -- TODO add tag categories
  27 -- alternatives: http://haskell.org/haskellwiki/Tags
  28
  29 main :: IO ()
  30 main = do
  31         progName <- getProgName
  32         args <- getArgs
  33         let usageString = "Usage: " ++ progName ++ " [OPTION...] [files...]"
  34         let (modes, filenames, errs) = getOpt Permute options args
  35         if errs /= [] || elem Help modes || filenames == []
  36          then do
  37            putStr $ unlines errs
  38            putStr $ usageInfo usageString options
  39            exitWith (ExitFailure 1)
  40          else return ()
  41         let mode = getMode (Append `delete` modes)
  42         let openFileMode = if elem Append modes
  43                            then AppendMode
  44                            else WriteMode
  45         filedata <- mapM findthings filenames
  46         if mode == BothTags || mode == CTags
  47          then do
  48            ctagsfile <- openFile "tags" openFileMode
  49            writectagsfile ctagsfile filedata
  50            hClose ctagsfile
  51          else return ()
  52         if mode == BothTags || mode == ETags
  53          then do
  54            etagsfile <- openFile "TAGS" openFileMode
  55            writeetagsfile etagsfile filedata
  56            hClose etagsfile
  57          else return ()
  58
  59 -- | getMode takes a list of modes and extract the mode with the
  60 --   highest precedence.  These are as follows: Both, CTags, ETags
  61 --   The default case is Both.
  62 getMode :: [Mode] -> Mode
  63 getMode [] = BothTags
  64 getMode [x] = x
  65 getMode (x:xs) = max x (getMode xs)
  66
  67
  68 data Mode = ETags | CTags | BothTags | Append | Help deriving (Ord, Eq, Show)
  69
  70 options :: [OptDescr Mode]
  71 options = [ Option "c" ["ctags"]
  72             (NoArg CTags) "generate CTAGS file (ctags)"
  73           , Option "e" ["etags"]
  74             (NoArg ETags) "generate ETAGS file (etags)"
  75           , Option "b" ["both"]
  76             (NoArg BothTags) ("generate both CTAGS and ETAGS")
  77           , Option "a" ["append"]
  78             (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)")
  79           , Option "h" ["help"] (NoArg Help) "This help"
  80           ]
  81
  82 type FileName = String
  83
  84 type ThingName = String
  85
  86 -- The position of a token or definition
  87 data Pos = Pos
  88                 FileName -- file name
  89                 Int      -- line number
  90                 Int      -- token number
  91                 String   -- string that makes up that line
  92     deriving (Show, Eq)
  93
  94 -- A definition we have found
  95 data FoundThing = FoundThing ThingName Pos
  96     deriving (Show, Eq)
  97
  98 -- Data we have obtained from a file
  99 data FileData = FileData FileName [FoundThing]
 100
 101 data Token = Token String Pos
 102     deriving Show
 103
 104
 105 -- stuff for dealing with ctags output format
 106
 107 writectagsfile :: Handle -> [FileData] -> IO ()
 108 writectagsfile ctagsfile filedata = do
 109     let things = concat $ map getfoundthings filedata
 110     mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing x) (sortThings things)
 111
 112 sortThings :: [FoundThing] -> [FoundThing]
 113 sortThings = sortBy (\(FoundThing a _) (FoundThing b _) -> compare a b)
 114
 115 getfoundthings :: FileData -> [FoundThing]
 116 getfoundthings (FileData _ things) = things
 117
 118 dumpthing :: FoundThing -> String
 119 dumpthing (FoundThing name (Pos filename line _ _)) =
 120     name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1)
 121
 122
 123 -- stuff for dealing with etags output format
 124
 125 writeetagsfile :: Handle -> [FileData] -> IO ()
 126 writeetagsfile etagsfile filedata = do
 127     mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata
 128
 129 e_dumpfiledata :: FileData -> String
 130 e_dumpfiledata (FileData filename things) =
 131     "\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump
 132     where thingsdump = concat $ map e_dumpthing things
 133           thingslength = length thingsdump
 134
 135 e_dumpthing :: FoundThing -> String
 136 e_dumpthing (FoundThing _ (Pos _ line token fullline)) =
 137     (concat $ take (token + 1) $ spacedwords fullline)
 138  ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n"
 139
 140
 141 -- like "words", but keeping the whitespace, and so letting us build
 142 -- accurate prefixes
 143
 144 spacedwords :: String -> [String]
 145 spacedwords [] = []
 146 spacedwords xs = (blanks ++ wordchars):(spacedwords rest2)
 147     where (blanks,rest) = span Char.isSpace xs
 148           (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest
 149
 150
 151 -- Find the definitions in a file
 152
 153 findthings :: FileName -> IO FileData
 154 findthings filename = do
 155     text <- readFile filename
 156     evaluate text -- forces evaluation of text
 157                   -- too many files were being opened otherwise since
 158                   -- readFile is lazy
 159     let aslines = lines text
 160     let wordlines = map mywords aslines
 161     let noslcoms = map stripslcomments wordlines
 162     let tokens = concat $ zipWith3 (withline filename) noslcoms aslines [0 ..]
 163     -- there are some tokens with "" (don't know why yet) this filter fixes it
 164     let tokens' = filter (\(Token s _ ) -> (not .  null) s ) tokens
 165     let nocoms = stripblockcomments tokens'
 166     -- using nub because getcons and findstuff are parsing parts of the file twice
 167     return $ FileData filename $ nub $ findstuff nocoms
 168   where evaluate [] = return ()
 169         evaluate (c:cs) = c `seq` evaluate cs
 170         -- my words is mainly copied from Data.List.
 171         -- difference abc::def is split into three words instead of one.
 172         -- We should really be lexing Haskell properly here rather
 173         -- than using hacks like this. In the future we expect hasktags
 174         -- to be replaced by something using the GHC API.
 175         mywords :: String -> [String]
 176         mywords (':':':':xs) = "::" : mywords xs
 177         mywords s =  case dropWhile isSpace s of
 178                          "" -> []
 179                          s' -> w : mywords s''
 180                              where (w, s'') = myBreak s'
 181                                    myBreak [] = ([],[])
 182                                    myBreak (':':':':xs) = ([], "::"++xs)
 183                                    myBreak (' ':xs) = ([],xs);
 184                                    myBreak (x:xs) = let (a,b) = myBreak xs
 185                                                     in  (x:a,b)
 186
 187 -- Create tokens from words, by recording their line number
 188 -- and which token they are through that line
 189
 190 withline :: FileName -> [String] -> String -> Int -> [Token]
 191 withline filename theWords fullline i =
 192     zipWith (\w t -> Token w (Pos filename i t fullline)) theWords $ [0 ..]
 193
 194 -- comments stripping
 195
 196 stripslcomments :: [String] -> [String]
 197 stripslcomments ("--" : _) = []
 198 stripslcomments (x : xs) = x : stripslcomments xs
 199 stripslcomments [] = []
 200
 201 stripblockcomments :: [Token] -> [Token]
 202 stripblockcomments ((Token "\\end{code}" _):xs) = afterlitend xs
 203 stripblockcomments ((Token "{-" _):xs) = afterblockcomend xs
 204 stripblockcomments (x:xs) = x:stripblockcomments xs
 205 stripblockcomments [] = []
 206
 207 afterlitend :: [Token] -> [Token]
 208 afterlitend (Token "\\begin{code}" _ : xs) = xs
 209 afterlitend (_ : xs) = afterlitend xs
 210 afterlitend [] = []
 211
 212 afterblockcomend :: [Token] -> [Token]
 213 afterblockcomend ((Token token _):xs)
 214  | contains "-}" token = xs
 215  | otherwise           = afterblockcomend xs
 216 afterblockcomend [] = []
 217
 218
 219 -- does one string contain another string
 220
 221 contains :: Eq a => [a] -> [a] -> Bool
 222 contains sub full = any (isPrefixOf sub) $ tails full
 223
 224 -- actually pick up definitions
 225
 226 findstuff :: [Token] -> [FoundThing]
 227 findstuff ((Token "module" _):(Token name pos):xs) =
 228     FoundThing name pos : (getcons xs) ++ (findstuff xs)
 229 findstuff ((Token "data" _):(Token name pos):xs) =
 230     FoundThing name pos : (getcons xs) ++ (findstuff xs)
 231 findstuff ((Token "newtype" _):(Token name pos):xs) =
 232     FoundThing name pos : findstuff xs
 233 findstuff ((Token "type" _):(Token name pos):xs) =
 234     FoundThing name pos : findstuff xs
 235 findstuff ((Token "class" _):xs) = findClassName xs
 236 findstuff ((Token name pos):(Token "::" _):xs) =
 237     FoundThing name pos : findstuff xs
 238 findstuff (_ : xs) = findstuff xs
 239 findstuff [] = []
 240
 241 findClassName :: [Token] -> [FoundThing]
 242 findClassName []  = []
 243 findClassName [Token n p]  = [FoundThing n p]
 244 findClassName xs = (\(Token n pos : xs') -> FoundThing n pos : findstuff xs') . drop2 . dropParens 0 $ xs
 245
 246 dropParens :: Integer -> [Token] -> [Token]
 247 dropParens n (Token "(" _ : xs) = dropParens (n + 1) xs
 248 dropParens 0 (x           : xs) = x : xs
 249 dropParens 1 (Token ")" _ : xs) = xs
 250 dropParens n (Token ")" _ : xs) = dropParens (n - 1) xs
 251 dropParens n (_           : xs) = dropParens n xs
 252 dropParens _ []                 = [] -- Shouldn't happen on correct source
 253
 254 -- dropsEverything till token "=>" (if it is on the same line as the
 255 -- first token. if not return tokens)
 256 drop2 :: [Token] -> [Token]
 257 drop2 tokens@(Token _ (Pos _ line_nr _ _ ) : _) =
 258   let (line, following) = span (\(Token _ (Pos _ l _ _)) -> l == line_nr) tokens
 259       (_, following_in_line) = span (\(Token n _) -> n /= "=>") line
 260   in case following_in_line of
 261           (Token "=>" _ : xs) -> xs ++ following
 262           _ -> tokens
 263 drop2 xs = xs
 264
 265 -- get the constructor definitions, knowing that a datatype has just started
 266
 267 getcons :: [Token] -> [FoundThing]
 268 getcons (Token "=" _ : Token name pos : xs) =
 269     FoundThing name pos : getcons2 xs
 270 getcons (_ : xs) = getcons xs
 271 getcons [] = []
 272
 273 getcons2 :: [Token] -> [FoundThing]
 274 getcons2 (Token "=" _ : _) = []
 275 getcons2 (Token "|" _ : Token name pos : xs) =
 276     FoundThing name pos : getcons2 xs
 277 getcons2 (_:xs) = getcons2 xs
 278 getcons2 [] = []
 279