2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError
21 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper))
23 import Demand ( Demand {- instance Read -} )
24 import FiniteMap ( FiniteMap, listToFM, lookupFM )
25 import Maybes ( Maybe(..), MaybeErr(..) )
27 import CharSeq ( CSeq )
28 import ErrUtils ( Error(..) )
29 import Outputable ( Outputable(..) )
30 import PprStyle ( PprStyle(..) )
31 import Util ( nOfThem, panic )
35 %************************************************************************
37 \subsection{Lexical categories}
39 %************************************************************************
41 These functions test strings to see if they fit the lexical categories
42 defined in the Haskell report. Normally applied as in e.g. @isCon
46 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
47 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
49 isLexCon cs = isLexConId cs || isLexConSym cs
50 isLexVar cs = isLexVarId cs || isLexVarSym cs
52 isLexId cs = isLexConId cs || isLexVarId cs
53 isLexSym cs = isLexConSym cs || isLexVarSym cs
59 | cs == SLIT("[]") = True
60 | c == '(' = True -- (), (,), (,,), ...
61 | otherwise = isUpper c || isUpperISO c
67 | otherwise = isLower c || isLowerISO c
73 | otherwise = c == ':'
80 | otherwise = isSymbolASCII c
86 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
87 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
88 isUpperISO c = 0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
89 isLowerISO c = 0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
93 %************************************************************************
95 \subsection{Tuple strings -- ugh!}
97 %************************************************************************
100 mkTupNameStr 0 = SLIT("()")
101 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
102 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
103 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
104 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
105 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
110 %************************************************************************
112 \subsection{Data types}
114 %************************************************************************
118 = ITinterface -- keywords
138 | ITbang -- magic symbols
155 | ITvarid FAST_STRING
156 | ITconid FAST_STRING
157 | ITvarsym FAST_STRING
158 | ITconsym FAST_STRING
159 | ITqvarid (FAST_STRING,FAST_STRING)
160 | ITqconid (FAST_STRING,FAST_STRING)
161 | ITqvarsym (FAST_STRING,FAST_STRING)
162 | ITqconsym (FAST_STRING,FAST_STRING)
164 -- Stuff for reading unfoldings
165 | ITarity | ITstrict | ITunfold
166 | ITdemand [Demand] | ITbottom
167 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
168 | ITcoerce_in | ITcoerce_out | ITatsign
169 | ITccall (Bool,Bool) -- (is_casm, may_gc)
171 | ITchar Char | ITstring FAST_STRING
172 | ITinteger Integer | ITdouble Double
173 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
174 deriving Text -- debugging
177 %************************************************************************
179 \subsection{The lexical analyser}
181 %************************************************************************
184 lexIface :: String -> [IfaceToken]
191 -- whitespace and comments
192 ' ' : cs -> lexIface cs
193 '\t' : cs -> lexIface cs
194 '\n' : cs -> lexIface cs
195 '-' : '-' : cs -> lex_comment cs
197 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
198 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
200 '(' : '.' : '.' : ')' : cs -> ITdotdot : lexIface cs
201 '{' : cs -> ITocurly : lexIface cs
202 '}' : cs -> ITccurly : lexIface cs
203 '(' : ',' : cs -> lex_tuple Nothing cs
204 '(' : ')' : cs -> ITconid SLIT("()") : lexIface cs
205 '(' : cs -> IToparen : lexIface cs
206 ')' : cs -> ITcparen : lexIface cs
207 '[' : ']' : cs -> ITconid SLIT("[]") : lexIface cs
208 '[' : cs -> ITobrack : lexIface cs
209 ']' : cs -> ITcbrack : lexIface cs
210 ',' : cs -> ITcomma : lexIface cs
211 ':' : ':' : cs -> ITdcolon : lexIface cs
212 ';' : cs -> ITsemi : lexIface cs
213 '@' : cs -> ITatsign : lexIface cs
214 '\"' : cs -> case reads input of
215 [(str, rest)] -> ITstring (_PK_ (str::String)) : lexIface rest
216 '\'' : cs -> case reads input of
217 [(ch, rest)] -> ITchar ch : lexIface rest
219 -- ``thingy'' form for casm
220 '`' : '`' : cs -> lex_cstring "" cs
223 '_' : 'S' : '_' : cs -> ITstrict : lex_demand cs
224 '_' : cs -> lex_keyword cs
227 '-' : c : cs | isDigit c -> lex_num "-" (c:cs)
228 c : cs | isDigit c -> lex_num "" (c:cs)
230 other -> lex_id input
233 = case (span ((/=) '\n') str) of { (junk, rest) ->
237 lex_demand (c:cs) | isSpace c = lex_demand cs
238 | otherwise = case readList (c:cs) of
239 ((demand,rest) : _) -> ITdemand demand : lexIface rest
243 = case (span isDigit str) of { (num, rest) ->
245 '.' : str2 -> case (span isDigit str2) of { (num2,rest2) ->
246 ITdouble (read (minus ++ num ++ ('.':num2))) : lexIface rest2
249 other -> ITinteger (read (minus ++ num)) : lexIface rest
254 = case (span is_kwd_mod_char str) of { (kw, rest) ->
255 case (lookupFM ifaceKeywordsFM kw) of
256 Nothing -> panic ("lex_keyword:"++str)
257 Just xx -> xx : lexIface rest
260 is_kwd_mod_char '_' = True
261 is_kwd_mod_char c = isAlphanum c
264 lex_cstring so_far ('\'' : '\'' : cs) = ITstring (_PK_ (reverse (so_far::String))) : lexIface cs
265 lex_cstring so_far (c : cs) = lex_cstring (c:so_far) cs
269 lex_tuple module_dot orig_cs = go 2 orig_cs
271 go n (',':cs) = go (n+1) cs
272 go n (')':cs) = end_lex_id module_dot (ITconid (mkTupNameStr n)) cs
273 go n other = panic ("lex_tuple" ++ orig_cs)
275 -- NB: ':' isn't valid inside an identifier, only at the start.
276 -- otherwise we get confused by a::t!
277 -- Similarly ' itself is ok inside an identifier, but not at the start
278 is_id_char c = isAlphanum c || c `elem` "_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
282 go xs (f :cs) | is_kwd_mod_char f = go (f : xs) cs
283 go xs ('.':cs) | not (null xs) = lex_id2 (Just (_PK_ (reverse xs))) [] cs
284 go xs cs = lex_id2 Nothing xs cs
286 -- Dealt with the Module.part
287 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
288 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
289 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
290 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
291 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
293 -- Dealt with [], (), : special cases
294 lex_id3 module_dot xs (f:cs) | is_id_char f = lex_id3 module_dot (f : xs) cs
296 lex_id3 Nothing xs rest = case lookupFM haskellKeywordsFM rxs of
297 Just kwd_token -> kwd_token : lexIface rest
298 other -> (mk_var_token rxs) : lexIface rest
302 lex_id3 (Just m) xs rest = end_lex_id (Just m) (mk_var_token (reverse xs)) rest
304 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
305 | f == ':' = ITconsym n
306 | isAlpha f = ITvarid n
307 | otherwise = ITvarsym n
311 end_lex_id (Just m) (ITconid n) cs = ITqconid (m,n) : lexIface cs
312 end_lex_id (Just m) (ITvarid n) cs = ITqvarid (m,n) : lexIface cs
313 end_lex_id (Just m) (ITconsym n) cs = ITqconsym (m,n): lexIface cs
314 end_lex_id (Just m) (ITvarsym n) cs = ITqvarsym (m,n): lexIface cs
315 end_lex_id (Just m) ITbang cs = ITqvarsym (m,SLIT("!")) : lexIface cs
316 end_lex_id (Just m) token cs = panic ("end_lex_id:" ++ show token)
317 end_lex_id Nothing token cs = token : lexIface cs
320 ifaceKeywordsFM :: FiniteMap String IfaceToken
321 ifaceKeywordsFM = listToFM [
322 ("interface_", ITinterface)
323 ,("usages_", ITusages)
324 ,("versions_", ITversions)
325 ,("exports_", ITexports)
326 ,("instance_modules_", ITinstance_modules)
327 ,("instances_", ITinstances)
328 ,("fixities_", ITfixities)
329 ,("declarations_", ITdeclarations)
330 ,("pragmas_", ITpragmas)
331 ,("forall_", ITforall)
334 ,("coerce_in_", ITcoerce_in)
335 ,("coerce_out_", ITcoerce_out)
339 ,("integer_", ITinteger_lit)
340 ,("rational_", ITrational_lit)
341 ,("addr_", ITaddr_lit)
342 ,("float_", ITfloat_lit)
343 ,("string_", ITstring_lit)
344 ,("litlit_", ITlit_lit)
345 ,("ccall_", ITccall (False, False))
346 ,("ccall_GC_", ITccall (False, True))
347 ,("casm_", ITccall (True, False))
348 ,("casm_GC_", ITccall (True, True))
351 haskellKeywordsFM = listToFM [
354 ,("newtype", ITnewtype)
357 ,("instance", ITinstance)
358 ,("infixl", ITinfixl)
359 ,("infixr", ITinfixr)
362 ,("case#", ITprim_case)
366 ,("letrec", ITletrec)
367 ,("deriving", ITderiving)
380 %************************************************************************
382 \subsection{Other utility functions
384 %************************************************************************
387 type IfM a = MaybeErr a Error
389 returnIf :: a -> IfM a
390 thenIf :: IfM a -> (a -> IfM b) -> IfM b
391 happyError :: Int -> [IfaceToken] -> IfM a
393 returnIf a = Succeeded a
395 thenIf (Succeeded a) k = k a
396 thenIf (Failed err) _ = Failed err
398 happyError ln toks = Failed (ifaceParseErr ln toks)
400 -----------------------------------------------------------------
402 ifaceParseErr ln toks sty
403 = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppStr "toks=", ppStr (show (take 10 toks))]