2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError
21 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper))
23 import Demand ( Demand {- instance Read -} )
24 import FiniteMap ( FiniteMap, listToFM, lookupFM )
25 import Maybes ( Maybe(..), MaybeErr(..) )
27 import CharSeq ( CSeq )
28 import ErrUtils ( Error(..) )
29 import Outputable ( Outputable(..) )
30 import PprStyle ( PprStyle(..) )
31 import Util ( nOfThem, panic )
35 %************************************************************************
37 \subsection{Lexical categories}
39 %************************************************************************
41 These functions test strings to see if they fit the lexical categories
42 defined in the Haskell report. Normally applied as in e.g. @isCon
46 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
47 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
49 isLexCon cs = isLexConId cs || isLexConSym cs
50 isLexVar cs = isLexVarId cs || isLexVarSym cs
52 isLexId cs = isLexConId cs || isLexVarId cs
53 isLexSym cs = isLexConSym cs || isLexVarSym cs
59 | cs == SLIT("[]") = True
60 | c == '(' = True -- (), (,), (,,), ...
61 | otherwise = isUpper c || isUpperISO c
67 | otherwise = isLower c || isLowerISO c
73 | otherwise = c == ':'
80 | otherwise = isSymbolASCII c
86 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
87 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
88 isUpperISO c = 0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
89 isLowerISO c = 0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
93 %************************************************************************
95 \subsection{Tuple strings -- ugh!}
97 %************************************************************************
100 mkTupNameStr 0 = SLIT("()")
101 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
102 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
103 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
104 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
105 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
110 %************************************************************************
112 \subsection{Data types}
114 %************************************************************************
118 = ITinterface -- keywords
138 | ITbang -- magic symbols
155 | ITinteger Integer -- numbers and names
156 | ITvarid FAST_STRING
157 | ITconid FAST_STRING
158 | ITvarsym FAST_STRING
159 | ITconsym FAST_STRING
160 | ITqvarid (FAST_STRING,FAST_STRING)
161 | ITqconid (FAST_STRING,FAST_STRING)
162 | ITqvarsym (FAST_STRING,FAST_STRING)
163 | ITqconsym (FAST_STRING,FAST_STRING)
165 -- Stuff for reading unfoldings
166 | ITarity | ITstrict | ITunfold
167 | ITdemand [Demand] | ITbottom
168 | ITlam | ITbiglam | ITcase | ITlet | ITletrec | ITin | ITof
169 | ITcoerce_in | ITcoerce_out
170 | ITchar Char | ITstring FAST_STRING
171 deriving Text -- debugging
174 %************************************************************************
176 \subsection{The lexical analyser}
178 %************************************************************************
181 lexIface :: String -> [IfaceToken]
188 -- whitespace and comments
189 ' ' : cs -> lexIface cs
190 '\t' : cs -> lexIface cs
191 '\n' : cs -> lexIface cs
192 '-' : '-' : cs -> lex_comment cs
194 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
195 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
197 '(' : '.' : '.' : ')' : cs -> ITdotdot : lexIface cs
198 '{' : cs -> ITocurly : lexIface cs
199 '}' : cs -> ITccurly : lexIface cs
200 '(' : ',' : cs -> lex_tuple Nothing cs
201 '(' : ')' : cs -> ITconid SLIT("()") : lexIface cs
202 '(' : cs -> IToparen : lexIface cs
203 ')' : cs -> ITcparen : lexIface cs
204 '[' : ']' : cs -> ITconid SLIT("[]") : lexIface cs
205 '[' : cs -> ITobrack : lexIface cs
206 ']' : cs -> ITcbrack : lexIface cs
207 ',' : cs -> ITcomma : lexIface cs
208 ':' : ':' : cs -> ITdcolon : lexIface cs
209 ';' : cs -> ITsemi : lexIface cs
210 '\"' : cs -> case read input of
211 ((str, rest) : _) -> ITstring (_PK_ (str::String)) : lexIface rest
212 '\'' : cs -> case read input of
213 ((ch, rest) : _) -> ITchar ch : lexIface rest
215 '_' : 'S' : '_' : cs -> ITstrict : lex_demand cs
216 '_' : cs -> lex_keyword cs
218 c : cs | isDigit c -> lex_num input
219 | otherwise -> lex_id input
221 other -> error ("lexing:"++other)
224 = case (span ((/=) '\n') str) of { (junk, rest) ->
228 lex_demand (c:cs) | isSpace c = lex_demand cs
229 | otherwise = case readList (c:cs) of
230 ((demand,rest) : _) -> ITdemand demand : lexIface rest
233 = case (span isDigit str) of { (num, rest) ->
234 ITinteger (read num) : lexIface rest }
238 = case (span is_kwd_mod_char str) of { (kw, rest) ->
239 case (lookupFM ifaceKeywordsFM kw) of
240 Nothing -> panic ("lex_keyword:"++str)
241 Just xx -> xx : lexIface rest
244 is_kwd_mod_char '_' = True
245 is_kwd_mod_char c = isAlphanum c
248 lex_tuple module_dot orig_cs = go 2 orig_cs
250 go n (',':cs) = go (n+1) cs
251 go n (')':cs) = end_lex_id module_dot (ITconid (mkTupNameStr n)) cs
252 go n other = panic ("lex_tuple" ++ orig_cs)
254 -- NB: ':' isn't valid inside an identifier, only at the start.
255 -- otherwise we get confused by a::t!
256 is_id_char c = isAlphanum c || c `elem` "_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
260 go xs (f :cs) | is_kwd_mod_char f = go (f : xs) cs
261 go xs ('.':cs) | not (null xs) = lex_id2 (Just (_PK_ (reverse xs))) [] cs
262 go xs cs = lex_id2 Nothing xs cs
264 -- Dealt with the Module.part
265 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
266 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
267 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
268 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
269 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
271 -- Dealt with [], (), : special cases
272 lex_id3 module_dot xs (f:cs) | is_id_char f = lex_id3 module_dot (f : xs) cs
274 lex_id3 Nothing xs rest = case lookupFM haskellKeywordsFM rxs of
275 Just kwd_token -> kwd_token : lexIface rest
276 other -> (mk_var_token rxs) : lexIface rest
280 lex_id3 (Just m) xs rest = end_lex_id (Just m) (mk_var_token (reverse xs)) rest
282 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
283 | f == ':' = ITconsym n
284 | isAlpha f = ITvarid n
285 | otherwise = ITvarsym n
289 end_lex_id (Just m) (ITconid n) cs = ITqconid (m,n) : lexIface cs
290 end_lex_id (Just m) (ITvarid n) cs = ITqvarid (m,n) : lexIface cs
291 end_lex_id (Just m) (ITconsym n) cs = ITqconsym (m,n): lexIface cs
292 end_lex_id (Just m) (ITvarsym n) cs = ITqvarsym (m,n): lexIface cs
293 end_lex_id (Just m) ITbang cs = ITqvarsym (m,SLIT("!")) : lexIface cs
294 end_lex_id (Just m) token cs = panic ("end_lex_id:" ++ show token)
295 end_lex_id Nothing token cs = token : lexIface cs
298 ifaceKeywordsFM :: FiniteMap String IfaceToken
299 ifaceKeywordsFM = listToFM [
300 ("interface_", ITinterface)
301 ,("usages_", ITusages)
302 ,("versions_", ITversions)
303 ,("exports_", ITexports)
304 ,("instance_modules_", ITinstance_modules)
305 ,("instances_", ITinstances)
306 ,("fixities_", ITfixities)
307 ,("declarations_", ITdeclarations)
308 ,("pragmas_", ITpragmas)
309 ,("forall_", ITforall)
312 ,("coerce_in_", ITcoerce_in)
313 ,("coerce_out_", ITcoerce_out)
320 haskellKeywordsFM = listToFM [
323 ,("newtype", ITnewtype)
326 ,("instance", ITinstance)
327 ,("infixl", ITinfixl)
328 ,("infixr", ITinfixr)
334 ,("letrec", ITletrec)
335 ,("deriving", ITderiving)
348 %************************************************************************
350 \subsection{Other utility functions
352 %************************************************************************
355 type IfM a = MaybeErr a Error
357 returnIf :: a -> IfM a
358 thenIf :: IfM a -> (a -> IfM b) -> IfM b
359 happyError :: Int -> [IfaceToken] -> IfM a
361 returnIf a = Succeeded a
363 thenIf (Succeeded a) k = k a
364 thenIf (Failed err) _ = Failed err
366 happyError ln toks = Failed (ifaceParseErr ln toks)
368 -----------------------------------------------------------------
370 ifaceParseErr ln toks sty
371 = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppStr "toks=", ppStr (show (take 10 toks))]