2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[OccName]{@OccName@}
10 Module, -- Abstract, instance of Outputable
11 mkModule, mkModuleFS, moduleString, moduleCString, pprModule,
14 OccName, -- Abstract, instance of Outputable
15 varOcc, tcOcc, tvOcc, -- Occ constructors
16 srcVarOcc, srcTCOcc, srcTvOcc, -- For Occs arising from source code
18 mkSuperDictSelOcc, mkDFunOcc,
19 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
20 mkClassTyConOcc, mkClassDataConOcc,
22 isTvOcc, isTCOcc, isVarOcc, isConSymOcc, isConOcc, isSymOcc,
23 isWildCardOcc, isAnonOcc,
24 pprOccName, occNameString, occNameFlavour,
26 -- The basic form of names
27 isLexCon, isLexVar, isLexId, isLexSym,
28 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
29 isLowerISO, isUpperISO,
32 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
39 #include "HsVersions.h"
42 #define ISALPHANUM isAlphaNum
44 #define ISALPHANUM isAlphanum
47 import Char ( isAlpha, isUpper, isLower, ISALPHANUM, ord )
48 import Util ( thenCmp )
49 import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
55 %************************************************************************
57 \subsection[Module]{The name of a module}
59 %************************************************************************
62 data Module = Module FAST_STRING -- User and interface files
63 FAST_STRING -- Print this in C files
65 -- The C version has quote chars Z-encoded
67 instance Outputable Module where
70 instance Eq Module where
71 (Module m1 _) == (Module m2 _) = m1 == m2
73 instance Ord Module where
74 (Module m1 _) `compare` (Module m2 _) = m1 `compare` m2
76 pprModule :: Module -> SDoc
77 pprModule (Module real code)
78 = getPprStyle $ \ sty ->
84 mkModule :: String -> Module
85 mkModule s = Module (_PK_ s) (identToC s)
87 mkModuleFS :: FAST_STRING -> Module
88 mkModuleFS s = Module s (identFsToC s)
90 moduleString :: Module -> String
91 moduleString (Module mod _) = _UNPK_ mod
93 moduleCString :: Module -> String
94 moduleCString (Module _ code) = _UNPK_ code
98 %************************************************************************
100 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
102 %************************************************************************
105 data OccName = OccName
107 FAST_STRING -- The 'real name'
108 FAST_STRING -- Print this in interface files
109 FAST_STRING -- Print this in C/asm code
111 -- The OccSpace/real-name pair define the OccName
112 -- The iface and c/asm versions are simply derived from the
113 -- other two. They are cached here simply to avoid recomputing
114 -- them repeatedly when printing
116 -- The latter two are irrelevant in RdrNames; on the other hand,
117 -- the OccSpace field is irrelevant after RdrNames.
118 -- So the OccName type might be refined a bit.
119 -- It is now abstract so that's easier than before
122 -- Why three print-names?
124 -- ---------------------
127 -- + + Zp Operators OK in interface files;
128 -- 'Z' is the escape char for C names
130 -- x# x# xZh Trailing # lexed ok by GHC -fglasgow-exts
132 -- _foo _ufoo _ufoo Leading '_' is the escape char in interface files
134 -- _vfoo _vfoo _vfoo Worker for foo
136 -- _wp _wp _wp Worker for +
139 data OccSpace = VarOcc -- Variables and data constructors
140 | TvOcc -- Type variables
141 | TCOcc -- Type constructors and classes
146 %************************************************************************
148 \subsection{Printing}
150 %************************************************************************
153 instance Outputable OccName where
156 pprOccName :: OccName -> SDoc
157 pprOccName (OccName space real iface code)
158 = getPprStyle $ \ sty ->
159 if codeStyle sty then
161 else if ifaceStyle sty then
168 %************************************************************************
170 \subsection{Construction}
172 %************************************************************************
174 *Source-code* things beginning with '_' are zapped to begin with '_u'
177 mkSrcOcc :: OccSpace -> FAST_STRING -> OccName
179 = case _UNPK_ real of
181 '_' : rest -> OccName occ_sp real (_PK_ zapped_str) (identToC zapped_str)
183 zapped_str = '_' : 'u' : rest
185 other -> OccName occ_sp real real (identFsToC real)
187 srcVarOcc, srcTCOcc, srcTvOcc :: FAST_STRING -> OccName
188 srcVarOcc = mkSrcOcc VarOcc
189 srcTCOcc = mkSrcOcc TCOcc
190 srcTvOcc = mkSrcOcc TvOcc
193 However, things that don't come from Haskell source code aren't
197 mkOcc :: OccSpace -> String -> OccName
198 mkOcc occ_sp str = OccName occ_sp fs fs (identToC str)
202 mkFsOcc :: OccSpace -> FAST_STRING -> OccName
203 mkFsOcc occ_sp real = OccName occ_sp real real (identFsToC real)
205 varOcc, tcOcc, tvOcc :: FAST_STRING -> OccName
206 varOcc = mkFsOcc VarOcc
207 tcOcc = mkFsOcc TCOcc
208 tvOcc = mkFsOcc TvOcc
212 %************************************************************************
214 \subsection{Making system names}
216 %************************************************************************
218 Here's our convention for splitting up the interface file name space:
220 _d... dictionary identifiers
222 _f... dict-fun identifiers (from inst decls)
223 _g... ditto, when the tycon has symbols
225 _t... externally visible (non-user visible) names
227 _m... default methods
228 _n... default methods (encoded symbols, eg. <= becomes _nle)
230 _p... superclass selectors
233 _w... workers (encoded symbols)
235 _x... local variables
237 _u... user-defined names that previously began with '_'
239 _T... compiler-generated tycons for dictionaries
240 _D.. ...ditto data cons
242 __.... keywords (__export, __letrec etc.)
244 This knowledge is encoded in the following functions.
249 @mkDerivedOcc@ generates an @OccName@ from an existing @OccName@;
250 eg: workers, derived methods
252 We pass a character to use as the prefix. So, for example,
253 "f" gets derived to "_vf", if the prefix char is 'v'
256 mk_deriv :: OccSpace -> Char -> String -> OccName
257 mk_deriv occ_sp sys_ch str = mkOcc occ_sp ('_' : sys_ch : str)
260 Things are a bit more complicated if the thing is an operator; then
261 we must encode it into a normal identifier first. We do this in
262 a simple way, and use a different character prefix (one after the one
263 suggested). For example
264 "<" gets derived to "_wl", if the prefix char is 'v'
267 mk_enc_deriv :: OccSpace
268 -> Char -- The system-name-space character (see list above)
269 -> OccName -- The OccName from which we are deriving
272 mk_enc_deriv occ_sp sys_ch occ
273 | needs_encoding real_str = mk_deriv occ_sp sys_op_ch (encode_operator real_str)
274 | otherwise = mk_deriv occ_sp sys_ch real_str
276 real_str = occNameString occ
277 sys_op_ch = succ sys_ch
280 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
281 mkClassTyConOcc, mkClassDataConOcc
282 :: OccName -> OccName
284 mkWorkerOcc = mk_enc_deriv VarOcc 'v' -- v,w
285 mkDefaultMethodOcc = mk_enc_deriv VarOcc 'm' -- m,n
286 mkClassTyConOcc = mk_enc_deriv TCOcc 'T' -- not U
287 mkClassDataConOcc = mk_enc_deriv VarOcc 'D' -- not E
288 mkDictOcc = mk_enc_deriv VarOcc 'd' -- not e
292 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
293 -> OccName -- Class, eg "Ord"
294 -> OccName -- eg "p3Ord"
295 mkSuperDictSelOcc index cls_occ
296 = mk_deriv VarOcc 'p' (show index ++ occNameString cls_occ)
301 mkDFunOcc :: OccName -- class, eg "Ord"
302 -> OccName -- tycon (or something convenient from the instance type)
304 -> Int -- Unique to distinguish dfuns which share the previous two
306 -> OccName -- "dOrdMaybe3"
308 mkDFunOcc cls_occ tycon_occ index
309 | needs_encoding tycon_str -- Drat! Have to encode the tycon
310 = mk_deriv VarOcc 'g' (show_index ++ cls_str ++ encode_operator tycon_str)
311 | otherwise -- Normal case
312 = mk_deriv VarOcc 'f' (show_index ++ cls_str ++ tycon_str)
314 cls_str = occNameString cls_occ
315 tycon_str = occNameString tycon_occ
316 -- NB: if a non-operator the tycon has a trailing # we don't encode.
317 show_index | index == 0 = ""
318 | otherwise = show index
322 %************************************************************************
324 \subsection{Lexical categories}
326 %************************************************************************
328 These functions test strings to see if they fit the lexical categories
329 defined in the Haskell report.
332 isLexCon, isLexVar, isLexId, isLexSym :: FAST_STRING -> Bool
333 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FAST_STRING -> Bool
335 isLexCon cs = isLexConId cs || isLexConSym cs
336 isLexVar cs = isLexVarId cs || isLexVarSym cs
338 isLexId cs = isLexConId cs || isLexVarId cs
339 isLexSym cs = isLexConSym cs || isLexVarSym cs
343 isLexConId cs -- Prefix type or data constructors
344 | _NULL_ cs = False -- e.g. "Foo", "[]", "(,)"
345 | cs == SLIT("[]") = True
346 | c == '(' = True -- (), (,), (,,), ...
347 | otherwise = isUpper c || isUpperISO c
351 isLexVarId cs -- Ordinary prefix identifiers
352 | _NULL_ cs = False -- e.g. "x", "_x"
353 | otherwise = isLower c || isLowerISO c || c == '_'
357 isLexConSym cs -- Infix type or data constructors
358 | _NULL_ cs = False -- e.g. ":-:", ":", "->"
359 | otherwise = c == ':'
364 isLexVarSym cs -- Infix identifiers
365 | _NULL_ cs = False -- e.g. "+"
366 | otherwise = isSymbolASCII c
372 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
373 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
374 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
375 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
376 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
377 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
380 %************************************************************************
382 \subsection{Predicates and taking them apart}
384 %************************************************************************
387 occNameString :: OccName -> String
388 occNameString (OccName _ s _ _) = _UNPK_ s
390 -- occNameFlavour is used only to generate good error messages, so it doesn't matter
391 -- that the VarOcc case isn't mega-efficient. We could have different Occ constructors for
392 -- data constructors and values, but that makes everything else a bit more complicated.
393 occNameFlavour :: OccName -> String
394 occNameFlavour (OccName VarOcc s _ _) | isLexConId s = "Data constructor"
395 | otherwise = "Value"
396 occNameFlavour (OccName TvOcc _ _ _) = "Type variable"
397 occNameFlavour (OccName TCOcc s _ _) = "Type constructor or class"
399 isVarOcc, isTCOcc, isTvOcc,
400 isConSymOcc, isSymOcc, isWildCardOcc :: OccName -> Bool
402 isVarOcc (OccName VarOcc _ _ _) = True
403 isVarOcc other = False
405 isTvOcc (OccName TvOcc _ _ _) = True
406 isTvOcc other = False
408 isTCOcc (OccName TCOcc _ _ _) = True
409 isTCOcc other = False
411 isConSymOcc (OccName _ s _ _) = isLexConSym s
413 isSymOcc (OccName _ s _ _) = isLexSym s
415 isConOcc (OccName _ s _ _) = isLexCon s
417 isWildCardOcc (OccName _ s _ _) = (_HEAD_ s) == '_' && _LENGTH_ s == 1
419 isAnonOcc (OccName _ s _ _) = (_HEAD_ s) == '_'
423 %************************************************************************
425 \subsection{Comparison}
427 %************************************************************************
429 Comparison is done by space and 'real' name
432 instance Eq OccName where
433 a == b = case (a `compare` b) of { EQ -> True; _ -> False }
434 a /= b = case (a `compare` b) of { EQ -> False; _ -> True }
436 instance Ord OccName where
437 a <= b = case (a `compare` b) of { LT -> True; EQ -> True; GT -> False }
438 a < b = case (a `compare` b) of { LT -> True; EQ -> False; GT -> False }
439 a >= b = case (a `compare` b) of { LT -> False; EQ -> True; GT -> True }
440 a > b = case (a `compare` b) of { LT -> False; EQ -> False; GT -> True }
442 compare (OccName sp1 r1 _ _) (OccName sp2 r2 _ _)
443 = (sp1 `compare` sp2) `thenCmp` (r1 `compare` r2)
447 %************************************************************************
449 \subsection{Tidying them up}
451 %************************************************************************
453 Before we print chunks of code we like to rename it so that
454 we don't have to print lots of silly uniques in it. But we mustn't
455 accidentally introduce name clashes! So the idea is that we leave the
456 OccName alone unless it accidentally clashes with one that is already
457 in scope; if so, we tack on '1' at the end and try again, then '2', and
458 so on till we find a unique one.
460 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
461 because that isn't a single lexeme. So we encode it to 'lle' and *then*
462 tack on the '1', if necessary.
465 type TidyOccEnv = FiniteMap FAST_STRING Int -- The in-scope OccNames
466 emptyTidyOccEnv = emptyFM
468 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
469 initTidyOccEnv = foldl (\env (OccName _ fs _ _) -> addToFM env fs 1) emptyTidyOccEnv
471 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
473 tidyOccName in_scope occ@(OccName occ_sp real _ _)
474 | not (real `elemFM` in_scope) &&
475 not (isLexCon real) -- Hack alert! Specialised versions of overloaded
476 -- constructors end up as ordinary Ids, but we don't
477 -- want them as ConIds in interface files.
479 = (addToFM in_scope real 1, occ) -- First occurrence
481 | otherwise -- Already occurs
482 = -- First encode, to deal with
484 -- b) trailing # signs
485 -- so that we can then append '1', '2', etc
486 go in_scope (encode_operator (_UNPK_ real))
489 go in_scope str = case lookupFM in_scope pk_str of
490 Just n -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
491 -- Need to go round again, just in case "t3" (say)
492 -- clashes with a "t3" that's already in scope
494 Nothing -> (addToFM in_scope pk_str 1, mkFsOcc occ_sp pk_str)
501 %************************************************************************
503 \subsection{Encoding for operators in derived names}
505 %************************************************************************
507 See comments with mk_enc_deriv
510 needs_encoding :: String -> Bool -- Needs encoding when embedded in a derived name
511 -- Just look at the first character
512 needs_encoding (c:cs) = not (isAlpha c || c == '_')
514 encode_operator :: String -> String
515 encode_operator nm = foldr tran "" nm
517 tran c cs = case trChar c of
518 '\0' -> '_' : show (ord c) ++ cs -- No translation
543 trChar _ = '\0' -- No translation
547 %************************************************************************
549 \subsection{The 'Z' encoding}
551 %************************************************************************
553 We provide two interfaces for efficiency.
556 identToC :: String -> FAST_STRING
558 | all ISALPHANUM str && not std = _PK_ str
559 | std = _PK_ ("Zs" ++ encode str)
560 | otherwise = _PK_ (encode str)
562 std = has_std_prefix str
564 identFsToC :: FAST_STRING -> FAST_STRING
566 | all ISALPHANUM str && not std = fast_str
567 | std = _PK_ ("Zs" ++ encode str)
568 | otherwise = _PK_ (encode str)
570 std = has_std_prefix str
571 str = _UNPK_ fast_str
573 -- avoid "stdin", "stdout", and "stderr"...
574 has_std_prefix ('s':'t':'d':_) = True
575 has_std_prefix _ = False
577 encode :: String -> String
579 encode (c:cs) = encode_ch c ++ encode cs
581 encode_ch :: Char -> String
582 encode_ch c | ISALPHANUM c = [c]
596 encode_ch '\'' = "Zq"
600 encode_ch c = 'Z':show (ord c)
603 For \tr{modnameToC}, we really only have to worry about \tr{'}s
604 (quote chars) in the name. Rare.
607 modnameToC :: FAST_STRING -> FAST_STRING
608 modnameToC fast_str = identFsToC fast_str