2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[OccName]{@OccName@}
10 Module, -- Abstract, instance of Outputable
11 mkModule, mkModuleFS, moduleString, moduleCString, pprModule,
14 OccName, -- Abstract, instance of Outputable
15 varOcc, tcOcc, tvOcc, -- Occ constructors
16 srcVarOcc, srcTCOcc, srcTvOcc, -- For Occs arising from source code
18 mkSuperDictSelOcc, mkDFunOcc,
19 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
20 mkClassTyConOcc, mkClassDataConOcc,
22 isTvOcc, isTCOcc, isVarOcc, isConSymOcc, isConOcc, isSymOcc,
23 isWildCardOcc, isAnonOcc,
24 pprOccName, occNameString, occNameFlavour,
26 -- The basic form of names
27 isLexCon, isLexVar, isLexId, isLexSym,
28 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
29 isLowerISO, isUpperISO,
32 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
39 #include "HsVersions.h"
41 import Char ( isAlpha, isUpper, isLower, ISALPHANUM, ord )
42 import Util ( thenCmp )
43 import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
49 %************************************************************************
51 \subsection[Module]{The name of a module}
53 %************************************************************************
56 data Module = Module FAST_STRING -- User and interface files
57 FAST_STRING -- Print this in C files
59 -- The C version has quote chars Z-encoded
61 instance Outputable Module where
64 instance Eq Module where
65 (Module m1 _) == (Module m2 _) = m1 == m2
67 instance Ord Module where
68 (Module m1 _) `compare` (Module m2 _) = m1 `compare` m2
70 pprModule :: Module -> SDoc
71 pprModule (Module real code)
72 = getPprStyle $ \ sty ->
78 mkModule :: String -> Module
79 mkModule s = Module (_PK_ s) (identToC s)
81 mkModuleFS :: FAST_STRING -> Module
82 mkModuleFS s = Module s (identFsToC s)
84 moduleString :: Module -> String
85 moduleString (Module mod _) = _UNPK_ mod
87 moduleCString :: Module -> String
88 moduleCString (Module _ code) = _UNPK_ code
92 %************************************************************************
94 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
96 %************************************************************************
99 data OccName = OccName
101 FAST_STRING -- The 'real name'
102 FAST_STRING -- Print this in interface files
103 FAST_STRING -- Print this in C/asm code
105 -- The OccSpace/real-name pair define the OccName
106 -- The iface and c/asm versions are simply derived from the
107 -- other two. They are cached here simply to avoid recomputing
108 -- them repeatedly when printing
110 -- The latter two are irrelevant in RdrNames; on the other hand,
111 -- the OccSpace field is irrelevant after RdrNames.
112 -- So the OccName type might be refined a bit.
113 -- It is now abstract so that's easier than before
116 -- Why three print-names?
118 -- ---------------------
121 -- + + Zp Operators OK in interface files;
122 -- 'Z' is the escape char for C names
124 -- x# x# xZh Trailing # lexed ok by GHC -fglasgow-exts
126 -- _foo _ufoo _ufoo Leading '_' is the escape char in interface files
128 -- _vfoo _vfoo _vfoo Worker for foo
130 -- _wp _wp _wp Worker for +
133 data OccSpace = VarOcc -- Variables and data constructors
134 | TvOcc -- Type variables
135 | TCOcc -- Type constructors and classes
140 %************************************************************************
142 \subsection{Printing}
144 %************************************************************************
147 instance Outputable OccName where
150 pprOccName :: OccName -> SDoc
151 pprOccName (OccName space real iface code)
152 = getPprStyle $ \ sty ->
153 if codeStyle sty then
155 else if ifaceStyle sty then
162 %************************************************************************
164 \subsection{Construction}
166 %************************************************************************
168 *Source-code* things beginning with '_' are zapped to begin with '_u'
171 mkSrcOcc :: OccSpace -> FAST_STRING -> OccName
173 = case _UNPK_ real of
175 '_' : rest -> OccName occ_sp real (_PK_ zapped_str) (identToC zapped_str)
177 zapped_str = '_' : 'u' : rest
179 other -> OccName occ_sp real real (identFsToC real)
181 srcVarOcc, srcTCOcc, srcTvOcc :: FAST_STRING -> OccName
182 srcVarOcc = mkSrcOcc VarOcc
183 srcTCOcc = mkSrcOcc TCOcc
184 srcTvOcc = mkSrcOcc TvOcc
187 However, things that don't come from Haskell source code aren't
191 mkOcc :: OccSpace -> String -> OccName
192 mkOcc occ_sp str = OccName occ_sp fs fs (identToC str)
196 mkFsOcc :: OccSpace -> FAST_STRING -> OccName
197 mkFsOcc occ_sp real = OccName occ_sp real real (identFsToC real)
199 varOcc, tcOcc, tvOcc :: FAST_STRING -> OccName
200 varOcc = mkFsOcc VarOcc
201 tcOcc = mkFsOcc TCOcc
202 tvOcc = mkFsOcc TvOcc
206 %************************************************************************
208 \subsection{Making system names}
210 %************************************************************************
212 Here's our convention for splitting up the interface file name space:
214 _d... dictionary identifiers
216 _f... dict-fun identifiers (from inst decls)
217 _g... ditto, when the tycon has symbols
219 _t... externally visible (non-user visible) names
221 _m... default methods
222 _n... default methods (encoded symbols, eg. <= becomes _nle)
224 _p... superclass selectors
227 _w... workers (encoded symbols)
229 _x... local variables
231 _u... user-defined names that previously began with '_'
233 _T... compiler-generated tycons for dictionaries
234 _D.. ...ditto data cons
236 __.... keywords (__export, __letrec etc.)
238 This knowledge is encoded in the following functions.
243 @mkDerivedOcc@ generates an @OccName@ from an existing @OccName@;
244 eg: workers, derived methods
246 We pass a character to use as the prefix. So, for example,
247 "f" gets derived to "_vf", if the prefix char is 'v'
250 mk_deriv :: OccSpace -> Char -> String -> OccName
251 mk_deriv occ_sp sys_ch str = mkOcc occ_sp ('_' : sys_ch : str)
254 Things are a bit more complicated if the thing is an operator; then
255 we must encode it into a normal identifier first. We do this in
256 a simple way, and use a different character prefix (one after the one
257 suggested). For example
258 "<" gets derived to "_wl", if the prefix char is 'v'
261 mk_enc_deriv :: OccSpace
262 -> Char -- The system-name-space character (see list above)
263 -> OccName -- The OccName from which we are deriving
266 mk_enc_deriv occ_sp sys_ch occ
267 | needs_encoding real_str = mk_deriv occ_sp sys_op_ch (encode_operator real_str)
268 | otherwise = mk_deriv occ_sp sys_ch real_str
270 real_str = occNameString occ
271 sys_op_ch = succ sys_ch
274 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
275 mkClassTyConOcc, mkClassDataConOcc
276 :: OccName -> OccName
278 mkWorkerOcc = mk_enc_deriv VarOcc 'v' -- v,w
279 mkDefaultMethodOcc = mk_enc_deriv VarOcc 'm' -- m,n
280 mkClassTyConOcc = mk_enc_deriv TCOcc 'T' -- not U
281 mkClassDataConOcc = mk_enc_deriv VarOcc 'D' -- not E
282 mkDictOcc = mk_enc_deriv VarOcc 'd' -- not e
286 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
287 -> OccName -- Class, eg "Ord"
288 -> OccName -- eg "p3Ord"
289 mkSuperDictSelOcc index cls_occ
290 = mk_deriv VarOcc 'p' (show index ++ occNameString cls_occ)
295 mkDFunOcc :: OccName -- class, eg "Ord"
296 -> OccName -- tycon (or something convenient from the instance type)
298 -> Int -- Unique to distinguish dfuns which share the previous two
300 -> OccName -- "dOrdMaybe3"
302 mkDFunOcc cls_occ tycon_occ index
303 | needs_encoding tycon_str -- Drat! Have to encode the tycon
304 = mk_deriv VarOcc 'g' (show_index ++ cls_str ++ encode_operator tycon_str)
305 | otherwise -- Normal case
306 = mk_deriv VarOcc 'f' (show_index ++ cls_str ++ tycon_str)
308 cls_str = occNameString cls_occ
309 tycon_str = occNameString tycon_occ
310 -- NB: if a non-operator the tycon has a trailing # we don't encode.
311 show_index | index == 0 = ""
312 | otherwise = show index
316 %************************************************************************
318 \subsection{Lexical categories}
320 %************************************************************************
322 These functions test strings to see if they fit the lexical categories
323 defined in the Haskell report.
326 isLexCon, isLexVar, isLexId, isLexSym :: FAST_STRING -> Bool
327 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FAST_STRING -> Bool
329 isLexCon cs = isLexConId cs || isLexConSym cs
330 isLexVar cs = isLexVarId cs || isLexVarSym cs
332 isLexId cs = isLexConId cs || isLexVarId cs
333 isLexSym cs = isLexConSym cs || isLexVarSym cs
337 isLexConId cs -- Prefix type or data constructors
338 | _NULL_ cs = False -- e.g. "Foo", "[]", "(,)"
339 | cs == SLIT("[]") = True
340 | c == '(' = True -- (), (,), (,,), ...
341 | otherwise = isUpper c || isUpperISO c
345 isLexVarId cs -- Ordinary prefix identifiers
346 | _NULL_ cs = False -- e.g. "x", "_x"
347 | otherwise = isLower c || isLowerISO c || c == '_'
351 isLexConSym cs -- Infix type or data constructors
352 | _NULL_ cs = False -- e.g. ":-:", ":", "->"
353 | otherwise = c == ':'
358 isLexVarSym cs -- Infix identifiers
359 | _NULL_ cs = False -- e.g. "+"
360 | otherwise = isSymbolASCII c
366 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
367 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
368 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
369 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
370 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
371 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
374 %************************************************************************
376 \subsection{Predicates and taking them apart}
378 %************************************************************************
381 occNameString :: OccName -> String
382 occNameString (OccName _ s _ _) = _UNPK_ s
384 -- occNameFlavour is used only to generate good error messages, so it doesn't matter
385 -- that the VarOcc case isn't mega-efficient. We could have different Occ constructors for
386 -- data constructors and values, but that makes everything else a bit more complicated.
387 occNameFlavour :: OccName -> String
388 occNameFlavour (OccName VarOcc s _ _) | isLexConId s = "Data constructor"
389 | otherwise = "Value"
390 occNameFlavour (OccName TvOcc _ _ _) = "Type variable"
391 occNameFlavour (OccName TCOcc s _ _) = "Type constructor or class"
393 isVarOcc, isTCOcc, isTvOcc,
394 isConSymOcc, isSymOcc, isWildCardOcc :: OccName -> Bool
396 isVarOcc (OccName VarOcc _ _ _) = True
397 isVarOcc other = False
399 isTvOcc (OccName TvOcc _ _ _) = True
400 isTvOcc other = False
402 isTCOcc (OccName TCOcc _ _ _) = True
403 isTCOcc other = False
405 isConSymOcc (OccName _ s _ _) = isLexConSym s
407 isSymOcc (OccName _ s _ _) = isLexSym s
409 isConOcc (OccName _ s _ _) = isLexCon s
411 isWildCardOcc (OccName _ s _ _) = (_HEAD_ s) == '_' && _LENGTH_ s == 1
413 isAnonOcc (OccName _ s _ _) = (_HEAD_ s) == '_'
417 %************************************************************************
419 \subsection{Comparison}
421 %************************************************************************
423 Comparison is done by space and 'real' name
426 instance Eq OccName where
427 a == b = case (a `compare` b) of { EQ -> True; _ -> False }
428 a /= b = case (a `compare` b) of { EQ -> False; _ -> True }
430 instance Ord OccName where
431 a <= b = case (a `compare` b) of { LT -> True; EQ -> True; GT -> False }
432 a < b = case (a `compare` b) of { LT -> True; EQ -> False; GT -> False }
433 a >= b = case (a `compare` b) of { LT -> False; EQ -> True; GT -> True }
434 a > b = case (a `compare` b) of { LT -> False; EQ -> False; GT -> True }
436 compare (OccName sp1 r1 _ _) (OccName sp2 r2 _ _)
437 = (sp1 `compare` sp2) `thenCmp` (r1 `compare` r2)
441 %************************************************************************
443 \subsection{Tidying them up}
445 %************************************************************************
447 Before we print chunks of code we like to rename it so that
448 we don't have to print lots of silly uniques in it. But we mustn't
449 accidentally introduce name clashes! So the idea is that we leave the
450 OccName alone unless it accidentally clashes with one that is already
451 in scope; if so, we tack on '1' at the end and try again, then '2', and
452 so on till we find a unique one.
454 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
455 because that isn't a single lexeme. So we encode it to 'lle' and *then*
456 tack on the '1', if necessary.
459 type TidyOccEnv = FiniteMap FAST_STRING Int -- The in-scope OccNames
460 emptyTidyOccEnv = emptyFM
462 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
463 initTidyOccEnv = foldl (\env (OccName _ fs _ _) -> addToFM env fs 1) emptyTidyOccEnv
465 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
467 tidyOccName in_scope occ@(OccName occ_sp real _ _)
468 | not (real `elemFM` in_scope) &&
469 not (isLexCon real) -- Hack alert! Specialised versions of overloaded
470 -- constructors end up as ordinary Ids, but we don't
471 -- want them as ConIds in interface files.
473 = (addToFM in_scope real 1, occ) -- First occurrence
475 | otherwise -- Already occurs
476 = -- First encode, to deal with
478 -- b) trailing # signs
479 -- so that we can then append '1', '2', etc
480 go in_scope (encode_operator (_UNPK_ real))
483 go in_scope str = case lookupFM in_scope pk_str of
484 Just n -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
485 -- Need to go round again, just in case "t3" (say)
486 -- clashes with a "t3" that's already in scope
488 Nothing -> (addToFM in_scope pk_str 1, mkFsOcc occ_sp pk_str)
495 %************************************************************************
497 \subsection{Encoding for operators in derived names}
499 %************************************************************************
501 See comments with mk_enc_deriv
504 needs_encoding :: String -> Bool -- Needs encoding when embedded in a derived name
505 -- Just look at the first character
506 needs_encoding (c:cs) = not (isAlpha c || c == '_')
508 encode_operator :: String -> String
509 encode_operator nm = foldr tran "" nm
511 tran c cs = case trChar c of
512 '\0' -> '_' : show (ord c) ++ cs -- No translation
537 trChar _ = '\0' -- No translation
541 %************************************************************************
543 \subsection{The 'Z' encoding}
545 %************************************************************************
547 We provide two interfaces for efficiency.
550 identToC :: String -> FAST_STRING
552 | all ISALPHANUM str && not std = _PK_ str
553 | std = _PK_ ("Zs" ++ encode str)
554 | otherwise = _PK_ (encode str)
556 std = has_std_prefix str
558 identFsToC :: FAST_STRING -> FAST_STRING
560 | all ISALPHANUM str && not std = fast_str
561 | std = _PK_ ("Zs" ++ encode str)
562 | otherwise = _PK_ (encode str)
564 std = has_std_prefix str
565 str = _UNPK_ fast_str
567 -- avoid "stdin", "stdout", and "stderr"...
568 has_std_prefix ('s':'t':'d':_) = True
569 has_std_prefix _ = False
571 encode :: String -> String
573 encode (c:cs) = encode_ch c ++ encode cs
575 encode_ch :: Char -> String
576 encode_ch c | ISALPHANUM c = [c]
590 encode_ch '\'' = "Zq"
594 encode_ch c = 'Z':show (ord c)
597 For \tr{modnameToC}, we really only have to worry about \tr{'}s
598 (quote chars) in the name. Rare.
601 modnameToC :: FAST_STRING -> FAST_STRING
602 modnameToC fast_str = identFsToC fast_str