2 module Main (main) where
7 import System.Environment
8 import System.Console.GetOpt
12 -- search for definitions of things
13 -- we do this by looking for the following patterns:
14 -- data XXX = ... giving a datatype location
15 -- newtype XXX = ... giving a newtype location
16 -- bla :: ... giving a function location
18 -- by doing it this way, we avoid picking up local definitions
19 -- (whether this is good or not is a matter for debate)
22 -- We generate both CTAGS and ETAGS format tags files
23 -- The former is for use in most sensible editors, while EMACS uses ETAGS
26 -- TODO add tag categories
27 -- alternatives: http://haskell.org/haskellwiki/Tags
31 progName <- getProgName
33 let usageString = "Usage: " ++ progName ++ " [OPTION...] [files...]"
34 let (modes, filenames, errs) = getOpt Permute options args
35 if errs /= [] || elem Help modes || filenames == []
38 putStr $ usageInfo usageString options
39 exitWith (ExitFailure 1)
41 let mode = getMode (Append `delete` modes)
42 let openFileMode = if elem Append modes
45 filedata <- mapM findthings filenames
46 if mode == BothTags || mode == CTags
48 ctagsfile <- openFile "tags" openFileMode
49 writectagsfile ctagsfile filedata
52 if mode == BothTags || mode == ETags
54 etagsfile <- openFile "TAGS" openFileMode
55 writeetagsfile etagsfile filedata
59 -- | getMode takes a list of modes and extract the mode with the
60 -- highest precedence. These are as follows: Both, CTags, ETags
61 -- The default case is Both.
62 getMode :: [Mode] -> Mode
65 getMode (x:xs) = max x (getMode xs)
68 data Mode = ETags | CTags | BothTags | Append | Help deriving (Ord, Eq, Show)
70 options :: [OptDescr Mode]
71 options = [ Option "c" ["ctags"]
72 (NoArg CTags) "generate CTAGS file (ctags)"
73 , Option "e" ["etags"]
74 (NoArg ETags) "generate ETAGS file (etags)"
76 (NoArg BothTags) ("generate both CTAGS and ETAGS")
77 , Option "a" ["append"]
78 (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)")
79 , Option "h" ["help"] (NoArg Help) "This help"
82 type FileName = String
84 type ThingName = String
86 -- The position of a token or definition
91 String -- string that makes up that line
94 -- A definition we have found
95 data FoundThing = FoundThing ThingName Pos
98 -- Data we have obtained from a file
99 data FileData = FileData FileName [FoundThing]
101 data Token = Token String Pos
105 -- stuff for dealing with ctags output format
107 writectagsfile :: Handle -> [FileData] -> IO ()
108 writectagsfile ctagsfile filedata = do
109 let things = concat $ map getfoundthings filedata
110 mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing x) (sortThings things)
112 sortThings :: [FoundThing] -> [FoundThing]
113 sortThings = sortBy (\(FoundThing a _) (FoundThing b _) -> compare a b)
115 getfoundthings :: FileData -> [FoundThing]
116 getfoundthings (FileData _ things) = things
118 dumpthing :: FoundThing -> String
119 dumpthing (FoundThing name (Pos filename line _ _)) =
120 name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1)
123 -- stuff for dealing with etags output format
125 writeetagsfile :: Handle -> [FileData] -> IO ()
126 writeetagsfile etagsfile filedata = do
127 mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata
129 e_dumpfiledata :: FileData -> String
130 e_dumpfiledata (FileData filename things) =
131 "\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump
132 where thingsdump = concat $ map e_dumpthing things
133 thingslength = length thingsdump
135 e_dumpthing :: FoundThing -> String
136 e_dumpthing (FoundThing _ (Pos _ line token fullline)) =
137 (concat $ take (token + 1) $ spacedwords fullline)
138 ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n"
141 -- like "words", but keeping the whitespace, and so letting us build
144 spacedwords :: String -> [String]
146 spacedwords xs = (blanks ++ wordchars):(spacedwords rest2)
147 where (blanks,rest) = span Char.isSpace xs
148 (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest
151 -- Find the definitions in a file
153 findthings :: FileName -> IO FileData
154 findthings filename = do
155 text <- readFile filename
156 evaluate text -- forces evaluation of text
157 -- too many files were being opened otherwise since
159 let aslines = lines text
160 let wordlines = map mywords aslines
161 let noslcoms = map stripslcomments wordlines
162 let tokens = concat $ zipWith3 (withline filename) noslcoms aslines [0 ..]
163 -- there are some tokens with "" (don't know why yet) this filter fixes it
164 let tokens' = filter (\(Token s _ ) -> (not . null) s ) tokens
165 let nocoms = stripblockcomments tokens'
166 -- using nub because getcons and findstuff are parsing parts of the file twice
167 return $ FileData filename $ nub $ findstuff nocoms
168 where evaluate [] = return ()
169 evaluate (c:cs) = c `seq` evaluate cs
170 -- my words is mainly copied from Data.List.
171 -- difference abc::def is split into three words instead of one.
172 -- We should really be lexing Haskell properly here rather
173 -- than using hacks like this. In the future we expect hasktags
174 -- to be replaced by something using the GHC API.
175 mywords :: String -> [String]
176 mywords (':':':':xs) = "::" : mywords xs
177 mywords s = case dropWhile isSpace s of
179 s' -> w : mywords s''
180 where (w, s'') = myBreak s'
182 myBreak (':':':':xs) = ([], "::"++xs)
183 myBreak (' ':xs) = ([],xs);
184 myBreak (x:xs) = let (a,b) = myBreak xs
187 -- Create tokens from words, by recording their line number
188 -- and which token they are through that line
190 withline :: FileName -> [String] -> String -> Int -> [Token]
191 withline filename theWords fullline i =
192 zipWith (\w t -> Token w (Pos filename i t fullline)) theWords $ [0 ..]
194 -- comments stripping
196 stripslcomments :: [String] -> [String]
197 stripslcomments ("--" : _) = []
198 stripslcomments (x : xs) = x : stripslcomments xs
199 stripslcomments [] = []
201 stripblockcomments :: [Token] -> [Token]
202 stripblockcomments ((Token "\\end{code}" _):xs) = afterlitend xs
203 stripblockcomments ((Token "{-" _):xs) = afterblockcomend xs
204 stripblockcomments (x:xs) = x:stripblockcomments xs
205 stripblockcomments [] = []
207 afterlitend :: [Token] -> [Token]
208 afterlitend (Token "\\begin{code}" _ : xs) = xs
209 afterlitend (_ : xs) = afterlitend xs
212 afterblockcomend :: [Token] -> [Token]
213 afterblockcomend ((Token token _):xs)
214 | contains "-}" token = xs
215 | otherwise = afterblockcomend xs
216 afterblockcomend [] = []
219 -- does one string contain another string
221 contains :: Eq a => [a] -> [a] -> Bool
222 contains sub full = any (isPrefixOf sub) $ tails full
224 -- actually pick up definitions
226 findstuff :: [Token] -> [FoundThing]
227 findstuff ((Token "module" _):(Token name pos):xs) =
228 FoundThing name pos : (getcons xs) ++ (findstuff xs)
229 findstuff ((Token "data" _):(Token name pos):xs) =
230 FoundThing name pos : (getcons xs) ++ (findstuff xs)
231 findstuff ((Token "newtype" _):(Token name pos):xs) =
232 FoundThing name pos : findstuff xs
233 findstuff ((Token "type" _):(Token name pos):xs) =
234 FoundThing name pos : findstuff xs
235 findstuff ((Token "class" _):xs) = findClassName xs
236 findstuff ((Token name pos):(Token "::" _):xs) =
237 FoundThing name pos : findstuff xs
238 findstuff (_ : xs) = findstuff xs
241 findClassName :: [Token] -> [FoundThing]
242 findClassName [] = []
243 findClassName [Token n p] = [FoundThing n p]
244 findClassName xs = (\(Token n pos : xs') -> FoundThing n pos : findstuff xs') . drop2 . dropParens 0 $ xs
246 dropParens :: Integer -> [Token] -> [Token]
247 dropParens n (Token "(" _ : xs) = dropParens (n + 1) xs
248 dropParens 0 (x : xs) = x : xs
249 dropParens 1 (Token ")" _ : xs) = xs
250 dropParens n (Token ")" _ : xs) = dropParens (n - 1) xs
251 dropParens n (_ : xs) = dropParens n xs
252 dropParens _ [] = [] -- Shouldn't happen on correct source
254 -- dropsEverything till token "=>" (if it is on the same line as the
255 -- first token. if not return tokens)
256 drop2 :: [Token] -> [Token]
257 drop2 tokens@(Token _ (Pos _ line_nr _ _ ) : _) =
258 let (line, following) = span (\(Token _ (Pos _ l _ _)) -> l == line_nr) tokens
259 (_, following_in_line) = span (\(Token n _) -> n /= "=>") line
260 in case following_in_line of
261 (Token "=>" _ : xs) -> xs ++ following
265 -- get the constructor definitions, knowing that a datatype has just started
267 getcons :: [Token] -> [FoundThing]
268 getcons (Token "=" _ : Token name pos : xs) =
269 FoundThing name pos : getcons2 xs
270 getcons (_ : xs) = getcons xs
273 getcons2 :: [Token] -> [FoundThing]
274 getcons2 (Token "=" _ : _) = []
275 getcons2 (Token "|" _ : Token name pos : xs) =
276 FoundThing name pos : getcons2 xs
277 getcons2 (_:xs) = getcons2 xs