1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
6 \section[OccName]{@OccName@}
10 -- The NameSpace type; abstact
11 NameSpace, tcName, clsName, tcClsName, dataName, varName,
12 tvName, srcDataName, nameSpaceString,
15 OccName, -- Abstract, instance of Outputable
19 OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
20 lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
21 occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
25 OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
26 unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
27 foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
29 mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
30 mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
31 mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
32 mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
33 mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
34 mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
35 mkDataConWrapperOcc, mkDataConWorkerOcc,
37 isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
38 parenSymOcc, reportIfUnused,
40 occNameFS, occNameString, occNameUserString, occNameSpace,
41 occNameFlavour, briefOccNameFlavour,
44 mkTupleOcc, isTupleOcc_maybe,
47 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
50 EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
52 -- The basic form of names
53 isLexCon, isLexVar, isLexId, isLexSym,
54 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
55 isLowerISO, isUpperISO
59 #include "HsVersions.h"
61 import Char ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
62 import Util ( thenCmp )
63 import Unique ( Unique, mkUnique, Uniquable(..) )
64 import BasicTypes ( Boxity(..), Arity )
65 import StaticFlags ( opt_PprStyle_Debug )
75 We hold both module names and identifier names in a 'Z-encoded' form
76 that makes them acceptable both as a C identifier and as a Haskell
79 They can always be decoded again when printing error messages
80 or anything else for the user, but it does make sense for it
81 to be represented here in encoded form, so that when generating
82 code the encoding operation is not performed on each occurrence.
84 These type synonyms help documentation.
87 type UserFS = FastString -- As the user typed it
88 type EncodedFS = FastString -- Encoded form
90 type UserString = String -- As the user typed it
91 type EncodedString = String -- Encoded form
94 pprEncodedFS :: EncodedFS -> SDoc
96 = getPprStyle $ \ sty ->
97 if userStyle sty || dumpStyle sty
98 -- ftext (decodeFS fs) would needlessly pack the string again
99 then text (decode (unpackFS fs))
103 %************************************************************************
105 \subsection{Name space}
107 %************************************************************************
110 data NameSpace = VarName -- Variables, including "source" data constructors
111 | DataName -- "Real" data constructors
112 | TvName -- Type variables
113 | TcClsName -- Type constructors and classes; Haskell has them
114 -- in the same name space for now.
116 {-! derive: Binary !-}
118 -- Note [Data Constructors]
119 -- see also: Note [Data Constructor Naming] in DataCon.lhs
121 -- "Source" data constructors are the data constructors mentioned
122 -- in Haskell source code
124 -- "Real" data constructors are the data constructors of the
125 -- representation type, which may not be the same as the source
129 -- data T = T !(Int,Int)
131 -- The source datacon has type (Int,Int) -> T
132 -- The real datacon has type Int -> Int -> T
133 -- GHC chooses a representation based on the strictness etc.
136 -- Though type constructors and classes are in the same name space now,
137 -- the NameSpace type is abstract, so we can easily separate them later
138 tcName = TcClsName -- Type constructors
139 clsName = TcClsName -- Classes
140 tcClsName = TcClsName -- Not sure which!
143 srcDataName = DataName -- Haskell-source data constructors should be
144 -- in the Data name space
149 nameSpaceString :: NameSpace -> String
150 nameSpaceString DataName = "Data constructor"
151 nameSpaceString VarName = "Variable"
152 nameSpaceString TvName = "Type variable"
153 nameSpaceString TcClsName = "Type constructor or class"
157 %************************************************************************
159 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
161 %************************************************************************
164 data OccName = OccName
167 {-! derive : Binary !-}
172 instance Eq OccName where
173 (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
175 instance Ord OccName where
176 compare (OccName sp1 s1) (OccName sp2 s2) = (s1 `compare` s2) `thenCmp`
181 %************************************************************************
183 \subsection{Printing}
185 %************************************************************************
188 instance Outputable OccName where
191 pprOccName :: OccName -> SDoc
192 pprOccName (OccName sp occ)
193 = getPprStyle $ \ sty ->
194 pprEncodedFS occ <> if debugStyle sty then
195 braces (text (briefNameSpaceFlavour sp))
200 %************************************************************************
202 \subsection{Construction}
204 %*****p*******************************************************************
206 *Sys* things do no encoding; the caller should ensure that the thing is
210 mkSysOcc :: NameSpace -> EncodedString -> OccName
211 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
212 OccName occ_sp (mkFastString str)
214 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
215 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
218 mkFCallOcc :: EncodedString -> OccName
219 -- This version of mkSysOcc doesn't check that the string is already encoded,
220 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
221 -- This encodes a lot into something that then parses like an Id.
222 -- But then alreadyEncoded complains about the braces!
223 mkFCallOcc str = OccName varName (mkFastString str)
225 -- Kind constructors get a special function. Uniquely, they are not encoded,
226 -- so that they have names like '*'. This means that *even in interface files*
227 -- we'll get kinds like (* -> (* -> *)). We can't use mkSysOcc because it
228 -- has an ASSERT that doesn't hold.
229 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
230 mkKindOccFS occ_sp fs = OccName occ_sp fs
233 *Source-code* things are encoded.
236 mkOccFS :: NameSpace -> UserFS -> OccName
237 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
239 mkOccName :: NameSpace -> String -> OccName
240 mkOccName ns s = mkSysOcc ns (encode s)
242 mkVarOcc :: UserFS -> OccName
243 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
245 mkTyVarOcc :: UserFS -> OccName
246 mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
248 mkVarOccEncoded :: EncodedFS -> OccName
249 mkVarOccEncoded fs = mkSysOccFS varName fs
254 %************************************************************************
258 %************************************************************************
260 OccEnvs are used mainly for the envts in ModIfaces.
262 They are efficient, because FastStrings have unique Int# keys. We assume
263 this key is less than 2^24, so we can make a Unique using
264 mkUnique ns key :: Unique
265 where 'ns' is a Char reprsenting the name space. This in turn makes it
266 easy to build an OccEnv.
269 instance Uniquable OccName where
270 getUnique (OccName ns fs)
271 = mkUnique char (I# (uniqueOfFS fs))
272 where -- See notes above about this getUnique function
279 type OccEnv a = UniqFM a
281 emptyOccEnv :: OccEnv a
282 unitOccEnv :: OccName -> a -> OccEnv a
283 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
284 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
285 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
286 mkOccEnv :: [(OccName,a)] -> OccEnv a
287 elemOccEnv :: OccName -> OccEnv a -> Bool
288 foldOccEnv :: (a -> b -> b) -> b -> OccEnv a -> b
289 occEnvElts :: OccEnv a -> [a]
290 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
291 plusOccEnv :: OccEnv a -> OccEnv a -> OccEnv a
292 plusOccEnv_C :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
294 emptyOccEnv = emptyUFM
296 extendOccEnv = addToUFM
297 extendOccEnvList = addListToUFM
298 lookupOccEnv = lookupUFM
304 plusOccEnv_C = plusUFM_C
305 extendOccEnv_C = addToUFM_C
308 type OccSet = UniqFM OccName
310 emptyOccSet :: OccSet
311 unitOccSet :: OccName -> OccSet
312 mkOccSet :: [OccName] -> OccSet
313 extendOccSet :: OccSet -> OccName -> OccSet
314 extendOccSetList :: OccSet -> [OccName] -> OccSet
315 unionOccSets :: OccSet -> OccSet -> OccSet
316 unionManyOccSets :: [OccSet] -> OccSet
317 minusOccSet :: OccSet -> OccSet -> OccSet
318 elemOccSet :: OccName -> OccSet -> Bool
319 occSetElts :: OccSet -> [OccName]
320 foldOccSet :: (OccName -> b -> b) -> b -> OccSet -> b
321 isEmptyOccSet :: OccSet -> Bool
322 intersectOccSet :: OccSet -> OccSet -> OccSet
323 intersectsOccSet :: OccSet -> OccSet -> Bool
325 emptyOccSet = emptyUniqSet
326 unitOccSet = unitUniqSet
328 extendOccSet = addOneToUniqSet
329 extendOccSetList = addListToUniqSet
330 unionOccSets = unionUniqSets
331 unionManyOccSets = unionManyUniqSets
332 minusOccSet = minusUniqSet
333 elemOccSet = elementOfUniqSet
334 occSetElts = uniqSetToList
335 foldOccSet = foldUniqSet
336 isEmptyOccSet = isEmptyUniqSet
337 intersectOccSet = intersectUniqSets
338 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
342 %************************************************************************
344 \subsection{Predicates and taking them apart}
346 %************************************************************************
349 occNameFS :: OccName -> EncodedFS
350 occNameFS (OccName _ s) = s
352 occNameString :: OccName -> EncodedString
353 occNameString (OccName _ s) = unpackFS s
355 occNameUserString :: OccName -> UserString
356 occNameUserString occ = decode (occNameString occ)
358 occNameSpace :: OccName -> NameSpace
359 occNameSpace (OccName sp _) = sp
361 setOccNameSpace :: NameSpace -> OccName -> OccName
362 setOccNameSpace sp (OccName _ occ) = OccName sp occ
364 -- occNameFlavour is used only to generate good error messages
365 occNameFlavour :: OccName -> SDoc
366 occNameFlavour (OccName DataName _) = ptext SLIT("data constructor")
367 occNameFlavour (OccName TvName _) = ptext SLIT("type variable")
368 occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
369 occNameFlavour (OccName VarName s) = empty
371 -- briefOccNameFlavour is used in debug-printing of names
372 briefOccNameFlavour :: OccName -> String
373 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
375 briefNameSpaceFlavour DataName = "d"
376 briefNameSpaceFlavour VarName = "v"
377 briefNameSpaceFlavour TvName = "tv"
378 briefNameSpaceFlavour TcClsName = "tc"
382 isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
384 isVarOcc (OccName VarName _) = True
385 isVarOcc other = False
387 isTvOcc (OccName TvName _) = True
388 isTvOcc other = False
390 isTcOcc (OccName TcClsName _) = True
391 isTcOcc other = False
393 isValOcc (OccName VarName _) = True
394 isValOcc (OccName DataName _) = True
395 isValOcc other = False
397 -- Data constructor operator (starts with ':', or '[]')
398 -- Pretty inefficient!
399 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
400 isDataSymOcc (OccName VarName s) = isLexConSym (decodeFS s)
401 isDataSymOcc other = False
403 isDataOcc (OccName DataName _) = True
404 isDataOcc (OccName VarName s) = isLexCon (decodeFS s)
405 isDataOcc other = False
407 -- Any operator (data constructor or variable)
408 -- Pretty inefficient!
409 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
410 isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
411 isSymOcc (OccName VarName s) = isLexSym (decodeFS s)
412 isSymOcc other = False
414 parenSymOcc :: OccName -> SDoc -> SDoc
415 -- Wrap parens around an operator
416 parenSymOcc occ doc | isSymOcc occ = parens doc
422 reportIfUnused :: OccName -> Bool
423 -- Haskell 98 encourages compilers to suppress warnings about
424 -- unused names in a pattern if they start with "_".
425 reportIfUnused occ = case occNameUserString occ of
432 %************************************************************************
434 \subsection{Making system names}
436 %************************************************************************
438 Here's our convention for splitting up the interface file name space:
440 d... dictionary identifiers
441 (local variables, so no name-clash worries)
443 $f... dict-fun identifiers (from inst decls)
444 $dm... default methods
445 $p... superclass selectors
447 :T... compiler-generated tycons for dictionaries
448 :D... ...ditto data cons
449 $sf.. specialised version of f
451 in encoded form these appear as Zdfxxx etc
453 :... keywords (export:, letrec: etc.)
454 --- I THINK THIS IS WRONG!
456 This knowledge is encoded in the following functions.
459 @mk_deriv@ generates an @OccName@ from the prefix and a string.
460 NB: The string must already be encoded!
463 mk_deriv :: NameSpace
464 -> String -- Distinguishes one sort of derived name from another
465 -> EncodedString -- Must be already encoded!! We don't want to encode it a
466 -- second time because encoding isn't idempotent
469 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
473 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
474 mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
475 :: OccName -> OccName
477 -- These derived variables have a prefix that no Haskell value could have
478 mkDataConWrapperOcc = mk_simple_deriv varName "$W"
479 mkWorkerOcc = mk_simple_deriv varName "$w"
480 mkDefaultMethodOcc = mk_simple_deriv varName "$dm"
481 mkDerivedTyConOcc = mk_simple_deriv tcName ":" -- The : prefix makes sure it classifies
482 mkClassTyConOcc = mk_simple_deriv tcName ":T" -- as a tycon/datacon
483 mkClassDataConOcc = mk_simple_deriv dataName ":D" -- We go straight to the "real" data con
484 -- for datacons from classes
485 mkDictOcc = mk_simple_deriv varName "$d"
486 mkIPOcc = mk_simple_deriv varName "$i"
487 mkSpecOcc = mk_simple_deriv varName "$s"
488 mkForeignExportOcc = mk_simple_deriv varName "$f"
490 -- Generic derivable classes
491 mkGenOcc1 = mk_simple_deriv varName "$gfrom"
492 mkGenOcc2 = mk_simple_deriv varName "$gto"
494 -- data T = MkT ... deriving( Data ) needs defintions for
495 -- $tT :: Data.Generics.Basics.DataType
496 -- $cMkT :: Data.Generics.Basics.Constr
497 mkDataTOcc = mk_simple_deriv varName "$t"
498 mkDataCOcc = mk_simple_deriv varName "$c"
500 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
503 -- Data constructor workers are made by setting the name space
504 -- of the data constructor OccName (which should be a DataName)
506 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
510 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
511 -> OccName -- Class, eg "Ord"
512 -> OccName -- eg "$p3Ord"
513 mkSuperDictSelOcc index cls_occ
514 = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
516 mkLocalOcc :: Unique -- Unique
517 -> OccName -- Local name (e.g. "sat")
518 -> OccName -- Nice unique version ("$L23sat")
520 = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
521 -- The Unique might print with characters
522 -- that need encoding (e.g. 'z'!)
527 mkDFunOcc :: EncodedString -- Typically the class and type glommed together e.g. "OrdMaybe"
528 -- Only used in debug mode, for extra clarity
529 -> Bool -- True <=> hs-boot instance dfun
530 -> Int -- Unique index
531 -> OccName -- "$f3OrdMaybe"
533 -- In hs-boot files we make dict funs like $fx7ClsTy, which get bound to the real
534 -- thing when we compile the mother module. Reason: we don't know exactly
535 -- what the mother module will call it.
537 mkDFunOcc info_str is_boot index
538 = mk_deriv VarName prefix string
540 prefix | is_boot = "$fx"
542 string | opt_PprStyle_Debug = show index ++ info_str
543 | otherwise = show index
546 We used to add a '$m' to indicate a method, but that gives rise to bad
547 error messages from the type checker when we print the function name or pattern
548 of an instance-decl binding. Why? Because the binding is zapped
549 to use the method name in place of the selector name.
550 (See TcClassDcl.tcMethodBind)
552 The way it is now, -ddump-xx output may look confusing, but
553 you can always say -dppr-debug to get the uniques.
555 However, we *do* have to zap the first character to be lower case,
556 because overloaded constructors (blarg) generate methods too.
557 And convert to VarName space
559 e.g. a call to constructor MkFoo where
560 data (Ord a) => Foo a = MkFoo a
562 If this is necessary, we do it by prefixing '$m'. These
563 guys never show up in error messages. What a hack.
566 mkMethodOcc :: OccName -> OccName
567 mkMethodOcc occ@(OccName VarName fs) = occ
568 mkMethodOcc occ = mk_simple_deriv varName "$m" occ
572 %************************************************************************
574 \subsection{Tidying them up}
576 %************************************************************************
578 Before we print chunks of code we like to rename it so that
579 we don't have to print lots of silly uniques in it. But we mustn't
580 accidentally introduce name clashes! So the idea is that we leave the
581 OccName alone unless it accidentally clashes with one that is already
582 in scope; if so, we tack on '1' at the end and try again, then '2', and
583 so on till we find a unique one.
585 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
586 because that isn't a single lexeme. So we encode it to 'lle' and *then*
587 tack on the '1', if necessary.
590 type TidyOccEnv = OccEnv Int -- The in-scope OccNames
591 -- Range gives a plausible starting point for new guesses
593 emptyTidyOccEnv = emptyOccEnv
595 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
596 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
598 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
600 tidyOccName in_scope occ@(OccName occ_sp fs)
601 = case lookupOccEnv in_scope occ of
602 Nothing -> -- Not already used: make it used
603 (extendOccEnv in_scope occ 1, occ)
605 Just n -> -- Already used: make a new guess,
606 -- change the guess base, and try again
607 tidyOccName (extendOccEnv in_scope occ (n+1))
608 (mkSysOcc occ_sp (unpackFS fs ++ show n))
612 %************************************************************************
614 \subsection{The 'Z' encoding}
616 %************************************************************************
618 This is the main name-encoding and decoding function. It encodes any
619 string into a string that is acceptable as a C name. This is the name
620 by which things are known right through the compiler.
622 The basic encoding scheme is this.
624 * Tuples (,,,) are coded as Z3T
626 * Alphabetic characters (upper and lower) and digits
627 all translate to themselves;
628 except 'Z', which translates to 'ZZ'
629 and 'z', which translates to 'zz'
630 We need both so that we can preserve the variable/tycon distinction
632 * Most other printable characters translate to 'zx' or 'Zx' for some
633 alphabetic character x
635 * The others translate as 'znnnU' where 'nnn' is the decimal number
639 --------------------------
651 (# #) Z1H unboxed 1-tuple (note the space)
652 (#,,,,#) Z5H unboxed 5-tuple
653 (NB: There is no Z1T nor Z0H.)
656 -- alreadyEncoded is used in ASSERTs to check for encoded
657 -- strings. It isn't fail-safe, of course, because, say 'zh' might
658 -- be encoded or not.
659 alreadyEncoded :: String -> Bool
660 alreadyEncoded s = all ok s
663 -- This is a bit of a lie; if we really wanted spaces
664 -- in names we'd have to encode them. But we do put
665 -- spaces in ccall "occurrences", and we don't want to
667 ok ch = isAlphaNum ch
669 alreadyEncodedFS :: FastString -> Bool
670 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
672 encode :: UserString -> EncodedString
673 encode cs = case maybe_tuple cs of
674 Just n -> n -- Tuples go to Z2T etc
678 go (c:cs) = encode_ch c ++ go cs
680 encodeFS :: UserFS -> EncodedFS
681 encodeFS fast_str | all unencodedChar str = fast_str
682 | otherwise = mkFastString (encode str)
684 str = unpackFS fast_str
686 unencodedChar :: Char -> Bool -- True for chars that don't need encoding
687 unencodedChar 'Z' = False
688 unencodedChar 'z' = False
689 unencodedChar c = c >= 'a' && c <= 'z'
690 || c >= 'A' && c <= 'Z'
691 || c >= '0' && c <= '9'
693 encode_ch :: Char -> EncodedString
694 encode_ch c | unencodedChar c = [c] -- Common case first
697 encode_ch '(' = "ZL" -- Needed for things like (,), and (->)
698 encode_ch ')' = "ZR" -- For symmetry with (
718 encode_ch '\'' = "zq"
719 encode_ch '\\' = "zr"
724 encode_ch c = 'z' : shows (ord c) "U"
727 Decode is used for user printing.
730 decodeFS :: FastString -> FastString
731 decodeFS fs = mkFastString (decode (unpackFS fs))
733 decode :: EncodedString -> UserString
735 decode ('Z' : d : rest) | isDigit d = decode_tuple d rest
736 | otherwise = decode_upper d : decode rest
737 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
738 | otherwise = decode_lower d : decode rest
739 decode (c : rest) = c : decode rest
741 decode_upper, decode_lower :: Char -> Char
743 decode_upper 'L' = '('
744 decode_upper 'R' = ')'
745 decode_upper 'M' = '['
746 decode_upper 'N' = ']'
747 decode_upper 'C' = ':'
748 decode_upper 'Z' = 'Z'
749 decode_upper ch = pprTrace "decode_upper" (char ch) ch
751 decode_lower 'z' = 'z'
752 decode_lower 'a' = '&'
753 decode_lower 'b' = '|'
754 decode_lower 'c' = '^'
755 decode_lower 'd' = '$'
756 decode_lower 'e' = '='
757 decode_lower 'g' = '>'
758 decode_lower 'h' = '#'
759 decode_lower 'i' = '.'
760 decode_lower 'l' = '<'
761 decode_lower 'm' = '-'
762 decode_lower 'n' = '!'
763 decode_lower 'p' = '+'
764 decode_lower 'q' = '\''
765 decode_lower 'r' = '\\'
766 decode_lower 's' = '/'
767 decode_lower 't' = '*'
768 decode_lower 'u' = '_'
769 decode_lower 'v' = '%'
770 decode_lower ch = pprTrace "decode_lower" (char ch) ch
772 -- Characters not having a specific code are coded as z224U
773 decode_num_esc d rest
774 = go (digitToInt d) rest
776 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
777 go n ('U' : rest) = chr n : decode rest
778 go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
780 decode_tuple :: Char -> EncodedString -> UserString
782 = go (digitToInt d) rest
784 -- NB. recurse back to decode after decoding the tuple, because
785 -- the tuple might be embedded in a longer name.
786 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
787 go 0 ('T':rest) = "()" ++ decode rest
788 go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
789 go 1 ('H':rest) = "(# #)" ++ decode rest
790 go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
791 go n other = pprPanic "decode_tuple" (ppr n <+> text other)
795 %************************************************************************
797 Stuff for dealing with tuples
799 %************************************************************************
801 Tuples are encoded as
803 for 3-tuples or unboxed 3-tuples respectively. No other encoding starts
806 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
807 There are no unboxed 0-tuples.
809 * "()" is the tycon for a boxed 0-tuple.
810 There are no boxed 1-tuples.
814 maybe_tuple :: UserString -> Maybe EncodedString
816 maybe_tuple "(# #)" = Just("Z1H")
817 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
818 (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
820 maybe_tuple "()" = Just("Z0T")
821 maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of
822 (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
824 maybe_tuple other = Nothing
826 count_commas :: Int -> String -> (Int, String)
827 count_commas n (',' : cs) = count_commas (n+1) cs
828 count_commas n cs = (n,cs)
832 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
834 = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
840 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
841 -- Tuples are special, because there are so many of them!
842 isTupleOcc_maybe (OccName ns fs)
843 = case unpackFS fs of
844 ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
847 decode_tup n "H" = (ns, Unboxed, n)
848 decode_tup n "T" = (ns, Boxed, n)
849 decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
852 %************************************************************************
854 \subsection{Lexical categories}
856 %************************************************************************
858 These functions test strings to see if they fit the lexical categories
859 defined in the Haskell report.
862 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
863 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
865 isLexCon cs = isLexConId cs || isLexConSym cs
866 isLexVar cs = isLexVarId cs || isLexVarSym cs
868 isLexId cs = isLexConId cs || isLexVarId cs
869 isLexSym cs = isLexConSym cs || isLexVarSym cs
873 isLexConId cs -- Prefix type or data constructors
874 | nullFastString cs = False -- e.g. "Foo", "[]", "(,)"
875 | cs == FSLIT("[]") = True
876 | otherwise = startsConId (headFS cs)
878 isLexVarId cs -- Ordinary prefix identifiers
879 | nullFastString cs = False -- e.g. "x", "_x"
880 | otherwise = startsVarId (headFS cs)
882 isLexConSym cs -- Infix type or data constructors
883 | nullFastString cs = False -- e.g. ":-:", ":", "->"
884 | cs == FSLIT("->") = True
885 | otherwise = startsConSym (headFS cs)
887 isLexVarSym cs -- Infix identifiers
888 | nullFastString cs = False -- e.g. "+"
889 | otherwise = startsVarSym (headFS cs)
892 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
893 startsVarSym c = isSymbolASCII c || isSymbolISO c -- Infix Ids
894 startsConSym c = c == ':' -- Infix data constructors
895 startsVarId c = isLower c || isLowerISO c || c == '_' -- Ordinary Ids
896 startsConId c = isUpper c || isUpperISO c || c == '(' -- Ordinary type constructors and data constructors
899 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
900 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
901 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
902 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
903 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
904 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
907 %************************************************************************
910 Here rather than BinIface because OccName is abstract
912 %************************************************************************
915 instance Binary NameSpace where
918 put_ bh DataName = do
922 put_ bh TcClsName = do
927 0 -> do return VarName
928 1 -> do return DataName
929 2 -> do return TvName
930 _ -> do return TcClsName
932 instance Binary OccName where
933 put_ bh (OccName aa ab) = do
939 return (OccName aa ab)