2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[OccName]{@OccName@}
10 Module, -- Abstract, instance of Outputable
11 mkModule, mkModuleFS, moduleString, moduleCString, pprModule,
14 OccName, -- Abstract, instance of Outputable
15 varOcc, tcOcc, tvOcc, -- Occ constructors
16 srcVarOcc, srcTCOcc, srcTvOcc, -- For Occs arising from source code
18 mkSuperDictSelOcc, mkDFunOcc,
19 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
20 mkClassTyConOcc, mkClassDataConOcc,
22 isTvOcc, isTCOcc, isVarOcc, isConSymOcc, isConOcc, isSymOcc,
23 isWildCardOcc, isAnonOcc,
24 pprOccName, occNameString, occNameFlavour,
26 -- The basic form of names
27 isLexCon, isLexVar, isLexId, isLexSym,
28 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
29 isLowerISO, isUpperISO,
32 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
39 #include "HsVersions.h"
42 import Char ( isAlpha, isUpper, isLower, isAlphaNum{-sigh-}, ord )
44 import Char ( isAlpha, isUpper, isLower, isAlphanum, ord )
46 import Util ( thenCmp )
47 import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
53 %************************************************************************
55 \subsection[Module]{The name of a module}
57 %************************************************************************
60 data Module = Module FAST_STRING -- User and interface files
61 FAST_STRING -- Print this in C files
63 -- The C version has quote chars Z-encoded
65 instance Outputable Module where
68 instance Eq Module where
69 (Module m1 _) == (Module m2 _) = m1 == m2
71 instance Ord Module where
72 (Module m1 _) `compare` (Module m2 _) = m1 `compare` m2
74 pprModule :: Module -> SDoc
75 pprModule (Module real code)
76 = getPprStyle $ \ sty ->
82 mkModule :: String -> Module
83 mkModule s = Module (_PK_ s) (identToC s)
85 mkModuleFS :: FAST_STRING -> Module
86 mkModuleFS s = Module s (identFsToC s)
88 moduleString :: Module -> String
89 moduleString (Module mod _) = _UNPK_ mod
91 moduleCString :: Module -> String
92 moduleCString (Module _ code) = _UNPK_ code
96 %************************************************************************
98 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
100 %************************************************************************
103 data OccName = OccName
105 FAST_STRING -- The 'real name'
106 FAST_STRING -- Print this in interface files
107 FAST_STRING -- Print this in C/asm code
109 -- The OccSpace/real-name pair define the OccName
110 -- The iface and c/asm versions are simply derived from the
111 -- other two. They are cached here simply to avoid recomputing
112 -- them repeatedly when printing
114 -- The latter two are irrelevant in RdrNames; on the other hand,
115 -- the OccSpace field is irrelevant after RdrNames.
116 -- So the OccName type might be refined a bit.
117 -- It is now abstract so that's easier than before
120 -- Why three print-names?
122 -- ---------------------
125 -- + + Zp Operators OK in interface files;
126 -- 'Z' is the escape char for C names
128 -- x# x# xZh Trailing # lexed ok by GHC -fglasgow-exts
130 -- _foo _ufoo _ufoo Leading '_' is the escape char in interface files
132 -- _vfoo _vfoo _vfoo Worker for foo
134 -- _wp _wp _wp Worker for +
137 data OccSpace = VarOcc -- Variables and data constructors
138 | TvOcc -- Type variables
139 | TCOcc -- Type constructors and classes
144 %************************************************************************
146 \subsection{Printing}
148 %************************************************************************
151 instance Outputable OccName where
154 pprOccName :: OccName -> SDoc
155 pprOccName (OccName space real iface code)
156 = getPprStyle $ \ sty ->
157 if codeStyle sty then
159 else if ifaceStyle sty then
166 %************************************************************************
168 \subsection{Construction}
170 %************************************************************************
172 *Source-code* things beginning with '_' are zapped to begin with '_u'
175 mkSrcOcc :: OccSpace -> FAST_STRING -> OccName
177 = case _UNPK_ real of
179 '_' : rest -> OccName occ_sp real (_PK_ zapped_str) (identToC zapped_str)
181 zapped_str = '_' : 'u' : rest
183 other -> OccName occ_sp real real (identFsToC real)
185 srcVarOcc, srcTCOcc, srcTvOcc :: FAST_STRING -> OccName
186 srcVarOcc = mkSrcOcc VarOcc
187 srcTCOcc = mkSrcOcc TCOcc
188 srcTvOcc = mkSrcOcc TvOcc
191 However, things that don't come from Haskell source code aren't
195 mkOcc :: OccSpace -> String -> OccName
196 mkOcc occ_sp str = OccName occ_sp fs fs (identToC str)
200 mkFsOcc :: OccSpace -> FAST_STRING -> OccName
201 mkFsOcc occ_sp real = OccName occ_sp real real (identFsToC real)
203 varOcc, tcOcc, tvOcc :: FAST_STRING -> OccName
204 varOcc = mkFsOcc VarOcc
205 tcOcc = mkFsOcc TCOcc
206 tvOcc = mkFsOcc TvOcc
210 %************************************************************************
212 \subsection{Making system names}
214 %************************************************************************
216 Here's our convention for splitting up the interface file name space:
218 _d... dictionary identifiers
220 _f... dict-fun identifiers (from inst decls)
221 _g... ditto, when the tycon has symbols
223 _t... externally visible (non-user visible) names
225 _m... default methods
226 _n... default methods (encoded symbols, eg. <= becomes _nle)
228 _p... superclass selectors
231 _w... workers (encoded symbols)
233 _x... local variables
235 _u... user-defined names that previously began with '_'
237 _T... compiler-generated tycons for dictionaries
238 _D.. ...ditto data cons
240 __.... keywords (__export, __letrec etc.)
242 This knowledge is encoded in the following functions.
247 @mkDerivedOcc@ generates an @OccName@ from an existing @OccName@;
248 eg: workers, derived methods
250 We pass a character to use as the prefix. So, for example,
251 "f" gets derived to "_vf", if the prefix char is 'v'
254 mk_deriv :: OccSpace -> Char -> String -> OccName
255 mk_deriv occ_sp sys_ch str = mkOcc occ_sp ('_' : sys_ch : str)
258 Things are a bit more complicated if the thing is an operator; then
259 we must encode it into a normal identifier first. We do this in
260 a simple way, and use a different character prefix (one after the one
261 suggested). For example
262 "<" gets derived to "_wl", if the prefix char is 'v'
265 mk_enc_deriv :: OccSpace
266 -> Char -- The system-name-space character (see list above)
267 -> OccName -- The OccName from which we are deriving
270 mk_enc_deriv occ_sp sys_ch occ
271 | needs_encoding real_str = mk_deriv occ_sp sys_op_ch (encode_operator real_str)
272 | otherwise = mk_deriv occ_sp sys_ch real_str
274 real_str = occNameString occ
275 sys_op_ch = succ sys_ch
278 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
279 mkClassTyConOcc, mkClassDataConOcc
280 :: OccName -> OccName
282 mkWorkerOcc = mk_enc_deriv VarOcc 'v' -- v,w
283 mkDefaultMethodOcc = mk_enc_deriv VarOcc 'm' -- m,n
284 mkClassTyConOcc = mk_enc_deriv TCOcc 'T' -- not U
285 mkClassDataConOcc = mk_enc_deriv VarOcc 'D' -- not E
286 mkDictOcc = mk_enc_deriv VarOcc 'd' -- not e
290 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
291 -> OccName -- Class, eg "Ord"
292 -> OccName -- eg "p3Ord"
293 mkSuperDictSelOcc index cls_occ
294 = mk_deriv VarOcc 'p' (show index ++ occNameString cls_occ)
299 mkDFunOcc :: OccName -- class, eg "Ord"
300 -> OccName -- tycon (or something convenient from the instance type)
302 -> Int -- Unique to distinguish dfuns which share the previous two
304 -> OccName -- "dOrdMaybe3"
306 mkDFunOcc cls_occ tycon_occ index
307 | needs_encoding tycon_str -- Drat! Have to encode the tycon
308 = mk_deriv VarOcc 'g' (show_index ++ cls_str ++ encode_operator tycon_str)
309 | otherwise -- Normal case
310 = mk_deriv VarOcc 'f' (show_index ++ cls_str ++ tycon_str)
312 cls_str = occNameString cls_occ
313 tycon_str = occNameString tycon_occ
314 -- NB: if a non-operator the tycon has a trailing # we don't encode.
315 show_index | index == 0 = ""
316 | otherwise = show index
320 %************************************************************************
322 \subsection{Lexical categories}
324 %************************************************************************
326 These functions test strings to see if they fit the lexical categories
327 defined in the Haskell report.
330 isLexCon, isLexVar, isLexId, isLexSym :: FAST_STRING -> Bool
331 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FAST_STRING -> Bool
333 isLexCon cs = isLexConId cs || isLexConSym cs
334 isLexVar cs = isLexVarId cs || isLexVarSym cs
336 isLexId cs = isLexConId cs || isLexVarId cs
337 isLexSym cs = isLexConSym cs || isLexVarSym cs
341 isLexConId cs -- Prefix type or data constructors
342 | _NULL_ cs = False -- e.g. "Foo", "[]", "(,)"
343 | cs == SLIT("[]") = True
344 | c == '(' = True -- (), (,), (,,), ...
345 | otherwise = isUpper c || isUpperISO c
349 isLexVarId cs -- Ordinary prefix identifiers
350 | _NULL_ cs = False -- e.g. "x", "_x"
351 | otherwise = isLower c || isLowerISO c || c == '_'
355 isLexConSym cs -- Infix type or data constructors
356 | _NULL_ cs = False -- e.g. ":-:", ":", "->"
357 | otherwise = c == ':'
362 isLexVarSym cs -- Infix identifiers
363 | _NULL_ cs = False -- e.g. "+"
364 | otherwise = isSymbolASCII c
370 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
371 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
372 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
373 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
374 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
375 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
378 %************************************************************************
380 \subsection{Predicates and taking them apart}
382 %************************************************************************
385 occNameString :: OccName -> String
386 occNameString (OccName _ s _ _) = _UNPK_ s
388 -- occNameFlavour is used only to generate good error messages, so it doesn't matter
389 -- that the VarOcc case isn't mega-efficient. We could have different Occ constructors for
390 -- data constructors and values, but that makes everything else a bit more complicated.
391 occNameFlavour :: OccName -> String
392 occNameFlavour (OccName VarOcc s _ _) | isLexConId s = "Data constructor"
393 | otherwise = "Value"
394 occNameFlavour (OccName TvOcc _ _ _) = "Type variable"
395 occNameFlavour (OccName TCOcc s _ _) = "Type constructor or class"
397 isVarOcc, isTCOcc, isTvOcc,
398 isConSymOcc, isSymOcc, isWildCardOcc :: OccName -> Bool
400 isVarOcc (OccName VarOcc _ _ _) = True
401 isVarOcc other = False
403 isTvOcc (OccName TvOcc _ _ _) = True
404 isTvOcc other = False
406 isTCOcc (OccName TCOcc _ _ _) = True
407 isTCOcc other = False
409 isConSymOcc (OccName _ s _ _) = isLexConSym s
411 isSymOcc (OccName _ s _ _) = isLexSym s
413 isConOcc (OccName _ s _ _) = isLexCon s
415 isWildCardOcc (OccName _ s _ _) = (_HEAD_ s) == '_' && _LENGTH_ s == 1
417 isAnonOcc (OccName _ s _ _) = (_HEAD_ s) == '_'
421 %************************************************************************
423 \subsection{Comparison}
425 %************************************************************************
427 Comparison is done by space and 'real' name
430 instance Eq OccName where
431 a == b = case (a `compare` b) of { EQ -> True; _ -> False }
432 a /= b = case (a `compare` b) of { EQ -> False; _ -> True }
434 instance Ord OccName where
435 a <= b = case (a `compare` b) of { LT -> True; EQ -> True; GT -> False }
436 a < b = case (a `compare` b) of { LT -> True; EQ -> False; GT -> False }
437 a >= b = case (a `compare` b) of { LT -> False; EQ -> True; GT -> True }
438 a > b = case (a `compare` b) of { LT -> False; EQ -> False; GT -> True }
440 compare (OccName sp1 r1 _ _) (OccName sp2 r2 _ _)
441 = (sp1 `compare` sp2) `thenCmp` (r1 `compare` r2)
445 %************************************************************************
447 \subsection{Tidying them up}
449 %************************************************************************
451 Before we print chunks of code we like to rename it so that
452 we don't have to print lots of silly uniques in it. But we mustn't
453 accidentally introduce name clashes! So the idea is that we leave the
454 OccName alone unless it accidentally clashes with one that is already
455 in scope; if so, we tack on '1' at the end and try again, then '2', and
456 so on till we find a unique one.
458 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
459 because that isn't a single lexeme. So we encode it to 'lle' and *then*
460 tack on the '1', if necessary.
463 type TidyOccEnv = FiniteMap FAST_STRING Int -- The in-scope OccNames
464 emptyTidyOccEnv = emptyFM
466 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
467 initTidyOccEnv = foldl (\env (OccName _ fs _ _) -> addToFM env fs 1) emptyTidyOccEnv
469 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
471 tidyOccName in_scope occ@(OccName occ_sp real _ _)
472 | not (real `elemFM` in_scope) &&
473 not (isLexCon real) -- Hack alert! Specialised versions of overloaded
474 -- constructors end up as ordinary Ids, but we don't
475 -- want them as ConIds in interface files.
477 = (addToFM in_scope real 1, occ) -- First occurrence
479 | otherwise -- Already occurs
480 = -- First encode, to deal with
482 -- b) trailing # signs
483 -- so that we can then append '1', '2', etc
484 go in_scope (encode_operator (_UNPK_ real))
487 go in_scope str = case lookupFM in_scope pk_str of
488 Just n -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
489 -- Need to go round again, just in case "t3" (say)
490 -- clashes with a "t3" that's already in scope
492 Nothing -> (addToFM in_scope pk_str 1, mkFsOcc occ_sp pk_str)
499 %************************************************************************
501 \subsection{Encoding for operators in derived names}
503 %************************************************************************
505 See comments with mk_enc_deriv
508 needs_encoding :: String -> Bool -- Needs encoding when embedded in a derived name
509 -- Just look at the first character
510 needs_encoding (c:cs) = not (isAlpha c || c == '_')
512 encode_operator :: String -> String
513 encode_operator nm = foldr tran "" nm
515 tran c cs = case trChar c of
516 '\0' -> '_' : show (ord c) ++ cs -- No translation
541 trChar _ = '\0' -- No translation
545 %************************************************************************
547 \subsection{The 'Z' encoding}
549 %************************************************************************
551 We provide two interfaces for efficiency.
554 identToC :: String -> FAST_STRING
556 | all isAlphanum str && not std = _PK_ str
557 | std = _PK_ ("Zs" ++ encode str)
558 | otherwise = _PK_ (encode str)
560 std = has_std_prefix str
562 identFsToC :: FAST_STRING -> FAST_STRING
564 | all isAlphanum str && not std = fast_str
565 | std = _PK_ ("Zs" ++ encode str)
566 | otherwise = _PK_ (encode str)
568 std = has_std_prefix str
569 str = _UNPK_ fast_str
571 -- avoid "stdin", "stdout", and "stderr"...
572 has_std_prefix ('s':'t':'d':_) = True
573 has_std_prefix _ = False
575 encode :: String -> String
577 encode (c:cs) = encode_ch c ++ encode cs
579 encode_ch :: Char -> String
580 encode_ch c | isAlphanum c = [c]
594 encode_ch '\'' = "Zq"
598 encode_ch c = 'Z':show (ord c)
601 For \tr{modnameToC}, we really only have to worry about \tr{'}s
602 (quote chars) in the name. Rare.
605 modnameToC :: FAST_STRING -> FAST_STRING
606 modnameToC fast_str = identFsToC fast_str