2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 \section[OccName]{@OccName@}
10 Module, -- Abstract, instance of Outputable
11 mkModule, mkModuleFS, moduleString, moduleCString, pprModule,
14 OccName, -- Abstract, instance of Outputable
15 varOcc, tcOcc, tvOcc, -- Occ constructors
16 srcVarOcc, srcTCOcc, srcTvOcc, -- For Occs arising from source code
18 mkSuperDictSelOcc, mkDFunOcc,
19 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
20 mkClassTyConOcc, mkClassDataConOcc,
22 isTvOcc, isTCOcc, isVarOcc, isConSymOcc, isConOcc, isSymOcc,
23 pprOccName, occNameString, occNameFlavour,
25 -- The basic form of names
26 isLexCon, isLexVar, isLexId, isLexSym,
27 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
28 isLowerISO, isUpperISO,
31 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
38 #include "HsVersions.h"
40 import Char ( isAlpha, isUpper, isLower, isAlphanum, ord )
41 import Util ( thenCmp )
42 import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
48 %************************************************************************
50 \subsection[Module]{The name of a module}
52 %************************************************************************
55 data Module = Module FAST_STRING -- User and interface files
56 FAST_STRING -- Print this in C files
58 -- The C version has quote chars Z-encoded
60 instance Outputable Module where
63 instance Eq Module where
64 (Module m1 _) == (Module m2 _) = m1 == m2
66 instance Ord Module where
67 (Module m1 _) `compare` (Module m2 _) = m1 `compare` m2
69 pprModule :: Module -> SDoc
70 pprModule (Module real code)
71 = getPprStyle $ \ sty ->
77 mkModule :: String -> Module
78 mkModule s = Module (_PK_ s) (identToC s)
80 mkModuleFS :: FAST_STRING -> Module
81 mkModuleFS s = Module s (identFsToC s)
83 moduleString :: Module -> String
84 moduleString (Module mod _) = _UNPK_ mod
86 moduleCString :: Module -> String
87 moduleCString (Module _ code) = _UNPK_ code
91 %************************************************************************
93 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
95 %************************************************************************
98 data OccName = OccName
100 FAST_STRING -- The 'real name'
101 FAST_STRING -- Print this in interface files
102 FAST_STRING -- Print this in C/asm code
104 -- The OccSpace/real-name pair define the OccName
105 -- The iface and c/asm versions are simply derived from the
106 -- other two. They are cached here simply to avoid recomputing
107 -- them repeatedly when printing
109 -- The latter two are irrelevant in RdrNames; on the other hand,
110 -- the OccSpace field is irrelevant after RdrNames.
111 -- So the OccName type might be refined a bit.
112 -- It is now abstract so that's easier than before
115 -- Why three print-names?
117 -- ---------------------
120 -- + + Zp Operators OK in interface files;
121 -- 'Z' is the escape char for C names
123 -- x# x# xZh Trailing # lexed ok by GHC -fglasgow-exts
125 -- _foo _ufoo _ufoo Leading '_' is the escape char in interface files
127 -- _vfoo _vfoo _vfoo Worker for foo
129 -- _wp _wp _wp Worker for +
132 data OccSpace = VarOcc -- Variables and data constructors
133 | TvOcc -- Type variables
134 | TCOcc -- Type constructors and classes
139 %************************************************************************
141 \subsection{Printing}
143 %************************************************************************
146 instance Outputable OccName where
149 pprOccName :: OccName -> SDoc
150 pprOccName (OccName space real iface code)
151 = getPprStyle $ \ sty ->
152 if codeStyle sty then
154 else if ifaceStyle sty then
161 %************************************************************************
163 \subsection{Construction}
165 %************************************************************************
167 *Source-code* things beginning with '_' are zapped to begin with '_u'
170 mkSrcOcc :: OccSpace -> FAST_STRING -> OccName
172 = case _UNPK_ real of
174 '_' : rest -> OccName occ_sp real (_PK_ zapped_str) (identToC zapped_str)
176 zapped_str = '_' : 'u' : rest
178 other -> OccName occ_sp real real (identFsToC real)
180 srcVarOcc, srcTCOcc, srcTvOcc :: FAST_STRING -> OccName
181 srcVarOcc = mkSrcOcc VarOcc
182 srcTCOcc = mkSrcOcc TCOcc
183 srcTvOcc = mkSrcOcc TvOcc
186 However, things that don't come from Haskell source code aren't
190 mkOcc :: OccSpace -> String -> OccName
191 mkOcc occ_sp str = OccName occ_sp fs fs (identToC str)
195 mkFsOcc :: OccSpace -> FAST_STRING -> OccName
196 mkFsOcc occ_sp real = OccName occ_sp real real (identFsToC real)
198 varOcc, tcOcc, tvOcc :: FAST_STRING -> OccName
199 varOcc = mkFsOcc VarOcc
200 tcOcc = mkFsOcc TCOcc
201 tvOcc = mkFsOcc TvOcc
205 %************************************************************************
207 \subsection{Making system names}
209 %************************************************************************
211 Here's our convention for splitting up the interface file name space:
213 _d... dictionary identifiers
215 _f... dict-fun identifiers (from inst decls)
216 _g... ditto, when the tycon has symbols
218 _t... externally visible (non-user visible) names
220 _m... default methods
221 _n... default methods (encoded symbols, eg. <= becomes _nle)
223 _p... superclass selectors
226 _w... workers (encoded symbols)
228 _x... local variables
230 _u... user-defined names that previously began with '_'
232 _T... compiler-generated tycons for dictionaries
233 _D.. ...ditto data cons
235 __.... keywords (__export, __letrec etc.)
237 This knowledge is encoded in the following functions.
242 @mkDerivedOcc@ generates an @OccName@ from an existing @OccName@;
243 eg: workers, derived methods
245 We pass a character to use as the prefix. So, for example,
246 "f" gets derived to "_vf", if the prefix char is 'v'
249 mk_deriv :: OccSpace -> Char -> String -> OccName
250 mk_deriv occ_sp sys_ch str = mkOcc occ_sp ('_' : sys_ch : str)
253 Things are a bit more complicated if the thing is an operator; then
254 we must encode it into a normal identifier first. We do this in
255 a simple way, and use a different character prefix (one after the one
256 suggested). For example
257 "<" gets derived to "_wl", if the prefix char is 'v'
260 mk_enc_deriv :: OccSpace
261 -> Char -- The system-name-space character (see list above)
262 -> OccName -- The OccName from which we are deriving
265 mk_enc_deriv occ_sp sys_ch occ
266 | needs_encoding real_str = mk_deriv occ_sp sys_op_ch (encode_operator real_str)
267 | otherwise = mk_deriv occ_sp sys_ch real_str
269 real_str = occNameString occ
270 sys_op_ch = succ sys_ch
273 mkDictOcc, mkWorkerOcc, mkDefaultMethodOcc,
274 mkClassTyConOcc, mkClassDataConOcc
275 :: OccName -> OccName
277 mkWorkerOcc = mk_enc_deriv VarOcc 'v' -- v,w
278 mkDefaultMethodOcc = mk_enc_deriv VarOcc 'm' -- m,n
279 mkClassTyConOcc = mk_enc_deriv TCOcc 'T' -- not U
280 mkClassDataConOcc = mk_enc_deriv VarOcc 'D' -- not E
281 mkDictOcc = mk_enc_deriv VarOcc 'd' -- not e
285 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
286 -> OccName -- Class, eg "Ord"
287 -> OccName -- eg "p3Ord"
288 mkSuperDictSelOcc index cls_occ
289 = mk_deriv VarOcc 'p' (show index ++ occNameString cls_occ)
294 mkDFunOcc :: OccName -- class, eg "Ord"
295 -> OccName -- tycon (or something convenient from the instance type)
297 -> Int -- Unique to distinguish dfuns which share the previous two
299 -> OccName -- "dOrdMaybe3"
301 mkDFunOcc cls_occ tycon_occ index
302 | needs_encoding tycon_str -- Drat! Have to encode the tycon
303 = mk_deriv VarOcc 'g' (show_index ++ cls_str ++ encode_operator tycon_str)
304 | otherwise -- Normal case
305 = mk_deriv VarOcc 'f' (show_index ++ cls_str ++ tycon_str)
307 cls_str = occNameString cls_occ
308 tycon_str = occNameString tycon_occ
309 -- NB: if a non-operator the tycon has a trailing # we don't encode.
310 show_index | index == 0 = ""
311 | otherwise = show index
315 %************************************************************************
317 \subsection{Lexical categories}
319 %************************************************************************
321 These functions test strings to see if they fit the lexical categories
322 defined in the Haskell report.
325 isLexCon, isLexVar, isLexId, isLexSym :: FAST_STRING -> Bool
326 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FAST_STRING -> Bool
328 isLexCon cs = isLexConId cs || isLexConSym cs
329 isLexVar cs = isLexVarId cs || isLexVarSym cs
331 isLexId cs = isLexConId cs || isLexVarId cs
332 isLexSym cs = isLexConSym cs || isLexVarSym cs
336 isLexConId cs -- Prefix type or data constructors
337 | _NULL_ cs = False -- e.g. "Foo", "[]", "(,)"
338 | cs == SLIT("[]") = True
339 | c == '(' = True -- (), (,), (,,), ...
340 | otherwise = isUpper c || isUpperISO c
344 isLexVarId cs -- Ordinary prefix identifiers
345 | _NULL_ cs = False -- e.g. "x", "_x"
346 | otherwise = isLower c || isLowerISO c || c == '_'
350 isLexConSym cs -- Infix type or data constructors
351 | _NULL_ cs = False -- e.g. ":-:", ":", "->"
352 | otherwise = c == ':'
357 isLexVarSym cs -- Infix identifiers
358 | _NULL_ cs = False -- e.g. "+"
359 | otherwise = isSymbolASCII c
365 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
366 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
367 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
368 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
369 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
370 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
373 %************************************************************************
375 \subsection{Predicates and taking them apart}
377 %************************************************************************
380 occNameString :: OccName -> String
381 occNameString (OccName _ s _ _) = _UNPK_ s
383 -- occNameFlavour is used only to generate good error messages, so it doesn't matter
384 -- that the VarOcc case isn't mega-efficient. We could have different Occ constructors for
385 -- data constructors and values, but that makes everything else a bit more complicated.
386 occNameFlavour :: OccName -> String
387 occNameFlavour (OccName VarOcc s _ _) | isLexConId s = "Data constructor"
388 | otherwise = "Value"
389 occNameFlavour (OccName TvOcc _ _ _) = "Type variable"
390 occNameFlavour (OccName TCOcc s _ _) = "Type constructor or class"
392 isVarOcc, isTCOcc, isTvOcc,
393 isConSymOcc, isSymOcc :: OccName -> Bool
395 isVarOcc (OccName VarOcc _ _ _) = True
396 isVarOcc other = False
398 isTvOcc (OccName TvOcc _ _ _) = True
399 isTvOcc other = False
401 isTCOcc (OccName TCOcc _ _ _) = True
402 isTCOcc other = False
404 isConSymOcc (OccName _ s _ _) = isLexConSym s
406 isSymOcc (OccName _ s _ _) = isLexSym s
408 isConOcc (OccName _ s _ _) = isLexCon s
412 %************************************************************************
414 \subsection{Comparison}
416 %************************************************************************
418 Comparison is done by space and 'real' name
421 instance Eq OccName where
422 a == b = case (a `compare` b) of { EQ -> True; _ -> False }
423 a /= b = case (a `compare` b) of { EQ -> False; _ -> True }
425 instance Ord OccName where
426 a <= b = case (a `compare` b) of { LT -> True; EQ -> True; GT -> False }
427 a < b = case (a `compare` b) of { LT -> True; EQ -> False; GT -> False }
428 a >= b = case (a `compare` b) of { LT -> False; EQ -> True; GT -> True }
429 a > b = case (a `compare` b) of { LT -> False; EQ -> False; GT -> True }
431 compare (OccName sp1 r1 _ _) (OccName sp2 r2 _ _)
432 = (sp1 `compare` sp2) `thenCmp` (r1 `compare` r2)
436 %************************************************************************
438 \subsection{Tidying them up}
440 %************************************************************************
442 Before we print chunks of code we like to rename it so that
443 we don't have to print lots of silly uniques in it. But we mustn't
444 accidentally introduce name clashes! So the idea is that we leave the
445 OccName alone unless it accidentally clashes with one that is already
446 in scope; if so, we tack on '1' at the end and try again, then '2', and
447 so on till we find a unique one.
449 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
450 because that isn't a single lexeme. So we encode it to 'lle' and *then*
451 tack on the '1', if necessary.
454 type TidyOccEnv = FiniteMap FAST_STRING Int -- The in-scope OccNames
455 emptyTidyOccEnv = emptyFM
457 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
458 initTidyOccEnv = foldl (\env (OccName _ fs _ _) -> addToFM env fs 1) emptyTidyOccEnv
460 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
462 tidyOccName in_scope occ@(OccName occ_sp real _ _)
463 | not (real `elemFM` in_scope)
464 = (addToFM in_scope real 1, occ) -- First occurrence
466 | otherwise -- Already occurs
467 = -- First encode, to deal with
469 -- b) trailing # signs
470 -- so that we can then append '1', '2', etc
471 go in_scope (encode_operator (_UNPK_ real))
474 go in_scope str = case lookupFM in_scope pk_str of
475 Just n -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
476 -- Need to go round again, just in case "t3" (say)
477 -- clashes with a "t3" that's already in scope
479 Nothing -> (addToFM in_scope pk_str 1, mkFsOcc occ_sp pk_str)
486 %************************************************************************
488 \subsection{Encoding for operators in derived names}
490 %************************************************************************
492 See comments with mk_enc_deriv
495 needs_encoding :: String -> Bool -- Needs encoding when embedded in a derived name
496 -- Just look at the first character
497 needs_encoding (c:cs) = not (isAlpha c || c == '_')
499 encode_operator :: String -> String
500 encode_operator nm = foldr tran "" nm
502 tran c cs = case trChar c of
503 '\0' -> '_' : show (ord c) ++ cs -- No translation
528 trChar _ = '\0' -- No translation
532 %************************************************************************
534 \subsection{The 'Z' encoding}
536 %************************************************************************
538 We provide two interfaces for efficiency.
541 identToC :: String -> FAST_STRING
543 | all isAlphanum str && not std = _PK_ str
544 | std = _PK_ ("Zs" ++ encode str)
545 | otherwise = _PK_ (encode str)
547 std = has_std_prefix str
549 identFsToC :: FAST_STRING -> FAST_STRING
551 | all isAlphanum str && not std = fast_str
552 | std = _PK_ ("Zs" ++ encode str)
553 | otherwise = _PK_ (encode str)
555 std = has_std_prefix str
556 str = _UNPK_ fast_str
558 -- avoid "stdin", "stdout", and "stderr"...
559 has_std_prefix ('s':'t':'d':_) = True
560 has_std_prefix _ = False
562 encode :: String -> String
564 encode (c:cs) = encode_ch c ++ encode cs
566 encode_ch :: Char -> String
567 encode_ch c | isAlphanum c = [c]
581 encode_ch '\'' = "Zq"
585 encode_ch c = 'Z':show (ord c)
588 For \tr{modnameToC}, we really only have to worry about \tr{'}s
589 (quote chars) in the name. Rare.
592 modnameToC :: FAST_STRING -> FAST_STRING
593 modnameToC fast_str = identFsToC fast_str