1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
6 \section[OccName]{@OccName@}
10 -- The NameSpace type; abstact
11 NameSpace, tcName, clsName, tcClsName, dataName, varName,
12 tvName, srcDataName, nameSpaceString,
15 OccName, -- Abstract, instance of Outputable
19 OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
20 lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
21 occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
25 OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
26 unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
27 foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
29 mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
30 mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
31 mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
32 mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
33 mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
34 mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
35 mkDataConWrapperOcc, mkDataConWorkerOcc,
37 isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
38 parenSymOcc, reportIfUnused, isTcClsName, isVarName,
40 occNameFS, occNameString, occNameUserString, occNameSpace,
41 occNameFlavour, briefOccNameFlavour,
44 mkTupleOcc, isTupleOcc_maybe,
47 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
50 EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
52 -- The basic form of names
53 isLexCon, isLexVar, isLexId, isLexSym,
54 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
55 isLowerISO, isUpperISO,
56 startsVarSym, startsVarId, startsConSym, startsConId
59 #include "HsVersions.h"
61 import Char ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
62 import Util ( thenCmp )
63 import Unique ( Unique, mkUnique, Uniquable(..) )
64 import BasicTypes ( Boxity(..), Arity )
65 import StaticFlags ( opt_PprStyle_Debug )
75 We hold both module names and identifier names in a 'Z-encoded' form
76 that makes them acceptable both as a C identifier and as a Haskell
79 They can always be decoded again when printing error messages
80 or anything else for the user, but it does make sense for it
81 to be represented here in encoded form, so that when generating
82 code the encoding operation is not performed on each occurrence.
84 These type synonyms help documentation.
87 type UserFS = FastString -- As the user typed it
88 type EncodedFS = FastString -- Encoded form
90 type UserString = String -- As the user typed it
91 type EncodedString = String -- Encoded form
94 pprEncodedFS :: EncodedFS -> SDoc
96 = getPprStyle $ \ sty ->
97 if userStyle sty || dumpStyle sty
98 -- ftext (decodeFS fs) would needlessly pack the string again
99 then text (decode (unpackFS fs))
103 %************************************************************************
105 \subsection{Name space}
107 %************************************************************************
110 data NameSpace = VarName -- Variables, including "source" data constructors
111 | DataName -- "Real" data constructors
112 | TvName -- Type variables
113 | TcClsName -- Type constructors and classes; Haskell has them
114 -- in the same name space for now.
116 {-! derive: Binary !-}
118 -- Note [Data Constructors]
119 -- see also: Note [Data Constructor Naming] in DataCon.lhs
121 -- "Source" data constructors are the data constructors mentioned
122 -- in Haskell source code
124 -- "Real" data constructors are the data constructors of the
125 -- representation type, which may not be the same as the source
129 -- data T = T !(Int,Int)
131 -- The source datacon has type (Int,Int) -> T
132 -- The real datacon has type Int -> Int -> T
133 -- GHC chooses a representation based on the strictness etc.
136 -- Though type constructors and classes are in the same name space now,
137 -- the NameSpace type is abstract, so we can easily separate them later
138 tcName = TcClsName -- Type constructors
139 clsName = TcClsName -- Classes
140 tcClsName = TcClsName -- Not sure which!
143 srcDataName = DataName -- Haskell-source data constructors should be
144 -- in the Data name space
149 isTcClsName :: NameSpace -> Bool
150 isTcClsName TcClsName = True
151 isTcClsName _ = False
153 isVarName :: NameSpace -> Bool -- Variables or type variables, but not constructors
154 isVarName TvName = True
155 isVarName VarName = True
156 isVarName other = False
159 nameSpaceString :: NameSpace -> String
160 nameSpaceString DataName = "data constructor"
161 nameSpaceString VarName = "variable"
162 nameSpaceString TvName = "type variable"
163 nameSpaceString TcClsName = "type constructor or class"
167 %************************************************************************
169 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
171 %************************************************************************
174 data OccName = OccName
175 { occNameSpace :: !NameSpace
176 , occNameFS :: !EncodedFS
182 instance Eq OccName where
183 (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
185 instance Ord OccName where
186 compare (OccName sp1 s1) (OccName sp2 s2) = (s1 `compare` s2) `thenCmp`
191 %************************************************************************
193 \subsection{Printing}
195 %************************************************************************
198 instance Outputable OccName where
201 pprOccName :: OccName -> SDoc
202 pprOccName (OccName sp occ)
203 = getPprStyle $ \ sty ->
204 pprEncodedFS occ <> if debugStyle sty then
205 braces (text (briefNameSpaceFlavour sp))
210 %************************************************************************
212 \subsection{Construction}
214 %*****p*******************************************************************
216 *Sys* things do no encoding; the caller should ensure that the thing is
220 mkSysOcc :: NameSpace -> EncodedString -> OccName
221 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
222 OccName occ_sp (mkFastString str)
224 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
225 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
228 mkFCallOcc :: EncodedString -> OccName
229 -- This version of mkSysOcc doesn't check that the string is already encoded,
230 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
231 -- This encodes a lot into something that then parses like an Id.
232 -- But then alreadyEncoded complains about the braces!
233 mkFCallOcc str = OccName varName (mkFastString str)
235 -- Kind constructors get a special function. Uniquely, they are not encoded,
236 -- so that they have names like '*'. This means that *even in interface files*
237 -- we'll get kinds like (* -> (* -> *)). We can't use mkSysOcc because it
238 -- has an ASSERT that doesn't hold.
239 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
240 mkKindOccFS occ_sp fs = OccName occ_sp fs
243 *Source-code* things are encoded.
246 mkOccFS :: NameSpace -> UserFS -> OccName
247 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
249 mkOccName :: NameSpace -> String -> OccName
250 mkOccName ns s = mkSysOcc ns (encode s)
252 mkVarOcc :: UserFS -> OccName
253 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
255 mkTyVarOcc :: UserFS -> OccName
256 mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
258 mkVarOccEncoded :: EncodedFS -> OccName
259 mkVarOccEncoded fs = mkSysOccFS varName fs
264 %************************************************************************
268 %************************************************************************
270 OccEnvs are used mainly for the envts in ModIfaces.
272 They are efficient, because FastStrings have unique Int# keys. We assume
273 this key is less than 2^24, so we can make a Unique using
274 mkUnique ns key :: Unique
275 where 'ns' is a Char reprsenting the name space. This in turn makes it
276 easy to build an OccEnv.
279 instance Uniquable OccName where
280 getUnique (OccName ns fs)
281 = mkUnique char (I# (uniqueOfFS fs))
282 where -- See notes above about this getUnique function
289 type OccEnv a = UniqFM a
291 emptyOccEnv :: OccEnv a
292 unitOccEnv :: OccName -> a -> OccEnv a
293 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
294 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
295 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
296 mkOccEnv :: [(OccName,a)] -> OccEnv a
297 elemOccEnv :: OccName -> OccEnv a -> Bool
298 foldOccEnv :: (a -> b -> b) -> b -> OccEnv a -> b
299 occEnvElts :: OccEnv a -> [a]
300 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
301 plusOccEnv :: OccEnv a -> OccEnv a -> OccEnv a
302 plusOccEnv_C :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
304 emptyOccEnv = emptyUFM
306 extendOccEnv = addToUFM
307 extendOccEnvList = addListToUFM
308 lookupOccEnv = lookupUFM
314 plusOccEnv_C = plusUFM_C
315 extendOccEnv_C = addToUFM_C
318 type OccSet = UniqFM OccName
320 emptyOccSet :: OccSet
321 unitOccSet :: OccName -> OccSet
322 mkOccSet :: [OccName] -> OccSet
323 extendOccSet :: OccSet -> OccName -> OccSet
324 extendOccSetList :: OccSet -> [OccName] -> OccSet
325 unionOccSets :: OccSet -> OccSet -> OccSet
326 unionManyOccSets :: [OccSet] -> OccSet
327 minusOccSet :: OccSet -> OccSet -> OccSet
328 elemOccSet :: OccName -> OccSet -> Bool
329 occSetElts :: OccSet -> [OccName]
330 foldOccSet :: (OccName -> b -> b) -> b -> OccSet -> b
331 isEmptyOccSet :: OccSet -> Bool
332 intersectOccSet :: OccSet -> OccSet -> OccSet
333 intersectsOccSet :: OccSet -> OccSet -> Bool
335 emptyOccSet = emptyUniqSet
336 unitOccSet = unitUniqSet
338 extendOccSet = addOneToUniqSet
339 extendOccSetList = addListToUniqSet
340 unionOccSets = unionUniqSets
341 unionManyOccSets = unionManyUniqSets
342 minusOccSet = minusUniqSet
343 elemOccSet = elementOfUniqSet
344 occSetElts = uniqSetToList
345 foldOccSet = foldUniqSet
346 isEmptyOccSet = isEmptyUniqSet
347 intersectOccSet = intersectUniqSets
348 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
352 %************************************************************************
354 \subsection{Predicates and taking them apart}
356 %************************************************************************
359 occNameString :: OccName -> EncodedString
360 occNameString (OccName _ s) = unpackFS s
362 occNameUserString :: OccName -> UserString
363 occNameUserString occ = decode (occNameString occ)
365 setOccNameSpace :: NameSpace -> OccName -> OccName
366 setOccNameSpace sp (OccName _ occ) = OccName sp occ
368 -- occNameFlavour is used only to generate good error messages
369 occNameFlavour :: OccName -> SDoc
370 occNameFlavour (OccName DataName _) = ptext SLIT("data constructor")
371 occNameFlavour (OccName TvName _) = ptext SLIT("type variable")
372 occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
373 occNameFlavour (OccName VarName s) = empty
375 -- briefOccNameFlavour is used in debug-printing of names
376 briefOccNameFlavour :: OccName -> String
377 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
379 briefNameSpaceFlavour DataName = "d"
380 briefNameSpaceFlavour VarName = "v"
381 briefNameSpaceFlavour TvName = "tv"
382 briefNameSpaceFlavour TcClsName = "tc"
386 isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
388 isVarOcc (OccName VarName _) = True
389 isVarOcc other = False
391 isTvOcc (OccName TvName _) = True
392 isTvOcc other = False
394 isTcOcc (OccName TcClsName _) = True
395 isTcOcc other = False
397 isValOcc (OccName VarName _) = True
398 isValOcc (OccName DataName _) = True
399 isValOcc other = False
401 -- Data constructor operator (starts with ':', or '[]')
402 -- Pretty inefficient!
403 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
404 isDataSymOcc (OccName VarName s) = isLexConSym (decodeFS s)
405 isDataSymOcc other = False
407 isDataOcc (OccName DataName _) = True
408 isDataOcc (OccName VarName s) = isLexCon (decodeFS s)
409 isDataOcc other = False
411 -- Any operator (data constructor or variable)
412 -- Pretty inefficient!
413 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
414 isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
415 isSymOcc (OccName VarName s) = isLexSym (decodeFS s)
416 isSymOcc other = False
418 parenSymOcc :: OccName -> SDoc -> SDoc
419 -- Wrap parens around an operator
420 parenSymOcc occ doc | isSymOcc occ = parens doc
426 reportIfUnused :: OccName -> Bool
427 -- Haskell 98 encourages compilers to suppress warnings about
428 -- unused names in a pattern if they start with "_".
429 reportIfUnused occ = case occNameUserString occ of
436 %************************************************************************
438 \subsection{Making system names}
440 %************************************************************************
442 Here's our convention for splitting up the interface file name space:
444 d... dictionary identifiers
445 (local variables, so no name-clash worries)
447 $f... dict-fun identifiers (from inst decls)
448 $dm... default methods
449 $p... superclass selectors
451 :T... compiler-generated tycons for dictionaries
452 :D... ...ditto data cons
453 $sf.. specialised version of f
455 in encoded form these appear as Zdfxxx etc
457 :... keywords (export:, letrec: etc.)
458 --- I THINK THIS IS WRONG!
460 This knowledge is encoded in the following functions.
463 @mk_deriv@ generates an @OccName@ from the prefix and a string.
464 NB: The string must already be encoded!
467 mk_deriv :: NameSpace
468 -> String -- Distinguishes one sort of derived name from another
469 -> EncodedString -- Must be already encoded!! We don't want to encode it a
470 -- second time because encoding isn't idempotent
473 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
477 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
478 mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
479 :: OccName -> OccName
481 -- These derived variables have a prefix that no Haskell value could have
482 mkDataConWrapperOcc = mk_simple_deriv varName "$W"
483 mkWorkerOcc = mk_simple_deriv varName "$w"
484 mkDefaultMethodOcc = mk_simple_deriv varName "$dm"
485 mkDerivedTyConOcc = mk_simple_deriv tcName ":" -- The : prefix makes sure it classifies
486 mkClassTyConOcc = mk_simple_deriv tcName ":T" -- as a tycon/datacon
487 mkClassDataConOcc = mk_simple_deriv dataName ":D" -- We go straight to the "real" data con
488 -- for datacons from classes
489 mkDictOcc = mk_simple_deriv varName "$d"
490 mkIPOcc = mk_simple_deriv varName "$i"
491 mkSpecOcc = mk_simple_deriv varName "$s"
492 mkForeignExportOcc = mk_simple_deriv varName "$f"
494 -- Generic derivable classes
495 mkGenOcc1 = mk_simple_deriv varName "$gfrom"
496 mkGenOcc2 = mk_simple_deriv varName "$gto"
498 -- data T = MkT ... deriving( Data ) needs defintions for
499 -- $tT :: Data.Generics.Basics.DataType
500 -- $cMkT :: Data.Generics.Basics.Constr
501 mkDataTOcc = mk_simple_deriv varName "$t"
502 mkDataCOcc = mk_simple_deriv varName "$c"
504 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
507 -- Data constructor workers are made by setting the name space
508 -- of the data constructor OccName (which should be a DataName)
510 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
514 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
515 -> OccName -- Class, eg "Ord"
516 -> OccName -- eg "$p3Ord"
517 mkSuperDictSelOcc index cls_occ
518 = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
520 mkLocalOcc :: Unique -- Unique
521 -> OccName -- Local name (e.g. "sat")
522 -> OccName -- Nice unique version ("$L23sat")
524 = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
525 -- The Unique might print with characters
526 -- that need encoding (e.g. 'z'!)
531 mkDFunOcc :: EncodedString -- Typically the class and type glommed together e.g. "OrdMaybe"
532 -- Only used in debug mode, for extra clarity
533 -> Bool -- True <=> hs-boot instance dfun
534 -> Int -- Unique index
535 -> OccName -- "$f3OrdMaybe"
537 -- In hs-boot files we make dict funs like $fx7ClsTy, which get bound to the real
538 -- thing when we compile the mother module. Reason: we don't know exactly
539 -- what the mother module will call it.
541 mkDFunOcc info_str is_boot index
542 = mk_deriv VarName prefix string
544 prefix | is_boot = "$fx"
546 string | opt_PprStyle_Debug = show index ++ info_str
547 | otherwise = show index
550 We used to add a '$m' to indicate a method, but that gives rise to bad
551 error messages from the type checker when we print the function name or pattern
552 of an instance-decl binding. Why? Because the binding is zapped
553 to use the method name in place of the selector name.
554 (See TcClassDcl.tcMethodBind)
556 The way it is now, -ddump-xx output may look confusing, but
557 you can always say -dppr-debug to get the uniques.
559 However, we *do* have to zap the first character to be lower case,
560 because overloaded constructors (blarg) generate methods too.
561 And convert to VarName space
563 e.g. a call to constructor MkFoo where
564 data (Ord a) => Foo a = MkFoo a
566 If this is necessary, we do it by prefixing '$m'. These
567 guys never show up in error messages. What a hack.
570 mkMethodOcc :: OccName -> OccName
571 mkMethodOcc occ@(OccName VarName fs) = occ
572 mkMethodOcc occ = mk_simple_deriv varName "$m" occ
576 %************************************************************************
578 \subsection{Tidying them up}
580 %************************************************************************
582 Before we print chunks of code we like to rename it so that
583 we don't have to print lots of silly uniques in it. But we mustn't
584 accidentally introduce name clashes! So the idea is that we leave the
585 OccName alone unless it accidentally clashes with one that is already
586 in scope; if so, we tack on '1' at the end and try again, then '2', and
587 so on till we find a unique one.
589 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
590 because that isn't a single lexeme. So we encode it to 'lle' and *then*
591 tack on the '1', if necessary.
594 type TidyOccEnv = OccEnv Int -- The in-scope OccNames
595 -- Range gives a plausible starting point for new guesses
597 emptyTidyOccEnv = emptyOccEnv
599 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
600 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
602 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
604 tidyOccName in_scope occ@(OccName occ_sp fs)
605 = case lookupOccEnv in_scope occ of
606 Nothing -> -- Not already used: make it used
607 (extendOccEnv in_scope occ 1, occ)
609 Just n -> -- Already used: make a new guess,
610 -- change the guess base, and try again
611 tidyOccName (extendOccEnv in_scope occ (n+1))
612 (mkSysOcc occ_sp (unpackFS fs ++ show n))
616 %************************************************************************
618 \subsection{The 'Z' encoding}
620 %************************************************************************
622 This is the main name-encoding and decoding function. It encodes any
623 string into a string that is acceptable as a C name. This is the name
624 by which things are known right through the compiler.
626 The basic encoding scheme is this.
628 * Tuples (,,,) are coded as Z3T
630 * Alphabetic characters (upper and lower) and digits
631 all translate to themselves;
632 except 'Z', which translates to 'ZZ'
633 and 'z', which translates to 'zz'
634 We need both so that we can preserve the variable/tycon distinction
636 * Most other printable characters translate to 'zx' or 'Zx' for some
637 alphabetic character x
639 * The others translate as 'znnnU' where 'nnn' is the decimal number
643 --------------------------
655 (# #) Z1H unboxed 1-tuple (note the space)
656 (#,,,,#) Z5H unboxed 5-tuple
657 (NB: There is no Z1T nor Z0H.)
660 -- alreadyEncoded is used in ASSERTs to check for encoded
661 -- strings. It isn't fail-safe, of course, because, say 'zh' might
662 -- be encoded or not.
663 alreadyEncoded :: String -> Bool
664 alreadyEncoded s = all ok s
667 -- This is a bit of a lie; if we really wanted spaces
668 -- in names we'd have to encode them. But we do put
669 -- spaces in ccall "occurrences", and we don't want to
671 ok ch = isAlphaNum ch
673 alreadyEncodedFS :: FastString -> Bool
674 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
676 encode :: UserString -> EncodedString
677 encode cs = case maybe_tuple cs of
678 Just n -> n -- Tuples go to Z2T etc
682 go (c:cs) = encode_ch c ++ go cs
684 encodeFS :: UserFS -> EncodedFS
685 encodeFS fast_str | all unencodedChar str = fast_str
686 | otherwise = mkFastString (encode str)
688 str = unpackFS fast_str
690 unencodedChar :: Char -> Bool -- True for chars that don't need encoding
691 unencodedChar 'Z' = False
692 unencodedChar 'z' = False
693 unencodedChar c = c >= 'a' && c <= 'z'
694 || c >= 'A' && c <= 'Z'
695 || c >= '0' && c <= '9'
697 encode_ch :: Char -> EncodedString
698 encode_ch c | unencodedChar c = [c] -- Common case first
701 encode_ch '(' = "ZL" -- Needed for things like (,), and (->)
702 encode_ch ')' = "ZR" -- For symmetry with (
722 encode_ch '\'' = "zq"
723 encode_ch '\\' = "zr"
728 encode_ch c = 'z' : shows (ord c) "U"
731 Decode is used for user printing.
734 decodeFS :: FastString -> FastString
735 decodeFS fs = mkFastString (decode (unpackFS fs))
737 decode :: EncodedString -> UserString
739 decode ('Z' : d : rest) | isDigit d = decode_tuple d rest
740 | otherwise = decode_upper d : decode rest
741 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
742 | otherwise = decode_lower d : decode rest
743 decode (c : rest) = c : decode rest
745 decode_upper, decode_lower :: Char -> Char
747 decode_upper 'L' = '('
748 decode_upper 'R' = ')'
749 decode_upper 'M' = '['
750 decode_upper 'N' = ']'
751 decode_upper 'C' = ':'
752 decode_upper 'Z' = 'Z'
753 decode_upper ch = pprTrace "decode_upper" (char ch) ch
755 decode_lower 'z' = 'z'
756 decode_lower 'a' = '&'
757 decode_lower 'b' = '|'
758 decode_lower 'c' = '^'
759 decode_lower 'd' = '$'
760 decode_lower 'e' = '='
761 decode_lower 'g' = '>'
762 decode_lower 'h' = '#'
763 decode_lower 'i' = '.'
764 decode_lower 'l' = '<'
765 decode_lower 'm' = '-'
766 decode_lower 'n' = '!'
767 decode_lower 'p' = '+'
768 decode_lower 'q' = '\''
769 decode_lower 'r' = '\\'
770 decode_lower 's' = '/'
771 decode_lower 't' = '*'
772 decode_lower 'u' = '_'
773 decode_lower 'v' = '%'
774 decode_lower ch = pprTrace "decode_lower" (char ch) ch
776 -- Characters not having a specific code are coded as z224U
777 decode_num_esc d rest
778 = go (digitToInt d) rest
780 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
781 go n ('U' : rest) = chr n : decode rest
782 go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
784 decode_tuple :: Char -> EncodedString -> UserString
786 = go (digitToInt d) rest
788 -- NB. recurse back to decode after decoding the tuple, because
789 -- the tuple might be embedded in a longer name.
790 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
791 go 0 ('T':rest) = "()" ++ decode rest
792 go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
793 go 1 ('H':rest) = "(# #)" ++ decode rest
794 go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
795 go n other = pprPanic "decode_tuple" (ppr n <+> text other)
799 %************************************************************************
801 Stuff for dealing with tuples
803 %************************************************************************
805 Tuples are encoded as
807 for 3-tuples or unboxed 3-tuples respectively. No other encoding starts
810 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
811 There are no unboxed 0-tuples.
813 * "()" is the tycon for a boxed 0-tuple.
814 There are no boxed 1-tuples.
818 maybe_tuple :: UserString -> Maybe EncodedString
820 maybe_tuple "(# #)" = Just("Z1H")
821 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
822 (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
824 maybe_tuple "()" = Just("Z0T")
825 maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of
826 (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
828 maybe_tuple other = Nothing
830 count_commas :: Int -> String -> (Int, String)
831 count_commas n (',' : cs) = count_commas (n+1) cs
832 count_commas n cs = (n,cs)
836 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
838 = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
844 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
845 -- Tuples are special, because there are so many of them!
846 isTupleOcc_maybe (OccName ns fs)
847 = case unpackFS fs of
848 ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
851 decode_tup n "H" = (ns, Unboxed, n)
852 decode_tup n "T" = (ns, Boxed, n)
853 decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
856 %************************************************************************
858 \subsection{Lexical categories}
860 %************************************************************************
862 These functions test strings to see if they fit the lexical categories
863 defined in the Haskell report.
866 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
867 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
869 isLexCon cs = isLexConId cs || isLexConSym cs
870 isLexVar cs = isLexVarId cs || isLexVarSym cs
872 isLexId cs = isLexConId cs || isLexVarId cs
873 isLexSym cs = isLexConSym cs || isLexVarSym cs
877 isLexConId cs -- Prefix type or data constructors
878 | nullFastString cs = False -- e.g. "Foo", "[]", "(,)"
879 | cs == FSLIT("[]") = True
880 | otherwise = startsConId (headFS cs)
882 isLexVarId cs -- Ordinary prefix identifiers
883 | nullFastString cs = False -- e.g. "x", "_x"
884 | otherwise = startsVarId (headFS cs)
886 isLexConSym cs -- Infix type or data constructors
887 | nullFastString cs = False -- e.g. ":-:", ":", "->"
888 | cs == FSLIT("->") = True
889 | otherwise = startsConSym (headFS cs)
891 isLexVarSym cs -- Infix identifiers
892 | nullFastString cs = False -- e.g. "+"
893 | otherwise = startsVarSym (headFS cs)
896 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
897 startsVarSym c = isSymbolASCII c || isSymbolISO c -- Infix Ids
898 startsConSym c = c == ':' -- Infix data constructors
899 startsVarId c = isLower c || isLowerISO c || c == '_' -- Ordinary Ids
900 startsConId c = isUpper c || isUpperISO c || c == '(' -- Ordinary type constructors and data constructors
903 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
904 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
905 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
906 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
907 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
908 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
911 %************************************************************************
914 Here rather than BinIface because OccName is abstract
916 %************************************************************************
919 instance Binary NameSpace where
922 put_ bh DataName = do
926 put_ bh TcClsName = do
931 0 -> do return VarName
932 1 -> do return DataName
933 2 -> do return TvName
934 _ -> do return TcClsName
936 instance Binary OccName where
937 put_ bh (OccName aa ab) = do
943 return (OccName aa ab)