1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
6 \section[OccName]{@OccName@}
10 -- The NameSpace type; abstact
11 NameSpace, tcName, clsName, tcClsName, dataName, varName,
12 tvName, srcDataName, nameSpaceString,
15 OccName, -- Abstract, instance of Outputable
19 OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
20 lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
21 occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
25 OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
26 unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
27 foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
29 mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
30 mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
31 mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
32 mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
33 mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
34 mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
35 mkDataConWrapperOcc, mkDataConWorkerOcc,
37 isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
38 parenSymOcc, reportIfUnused,
40 occNameFS, occNameString, occNameUserString, occNameSpace,
41 occNameFlavour, briefOccNameFlavour,
44 mkTupleOcc, isTupleOcc_maybe,
47 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
50 EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
52 -- The basic form of names
53 isLexCon, isLexVar, isLexId, isLexSym,
54 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
55 isLowerISO, isUpperISO
59 #include "HsVersions.h"
61 import Char ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
62 import Util ( thenCmp )
63 import Unique ( Unique, mkUnique, Uniquable(..) )
64 import BasicTypes ( Boxity(..), Arity )
74 We hold both module names and identifier names in a 'Z-encoded' form
75 that makes them acceptable both as a C identifier and as a Haskell
78 They can always be decoded again when printing error messages
79 or anything else for the user, but it does make sense for it
80 to be represented here in encoded form, so that when generating
81 code the encoding operation is not performed on each occurrence.
83 These type synonyms help documentation.
86 type UserFS = FastString -- As the user typed it
87 type EncodedFS = FastString -- Encoded form
89 type UserString = String -- As the user typed it
90 type EncodedString = String -- Encoded form
93 pprEncodedFS :: EncodedFS -> SDoc
95 = getPprStyle $ \ sty ->
96 if userStyle sty || dumpStyle sty
97 -- ftext (decodeFS fs) would needlessly pack the string again
98 then text (decode (unpackFS fs))
102 %************************************************************************
104 \subsection{Name space}
106 %************************************************************************
109 data NameSpace = VarName -- Variables, including "source" data constructors
110 | DataName -- "Real" data constructors
111 | TvName -- Type variables
112 | TcClsName -- Type constructors and classes; Haskell has them
113 -- in the same name space for now.
115 {-! derive: Binary !-}
117 -- Note [Data Constructors]
118 -- see also: Note [Data Constructor Naming] in DataCon.lhs
120 -- "Source" data constructors are the data constructors mentioned
121 -- in Haskell source code
123 -- "Real" data constructors are the data constructors of the
124 -- representation type, which may not be the same as the source
128 -- data T = T !(Int,Int)
130 -- The source datacon has type (Int,Int) -> T
131 -- The real datacon has type Int -> Int -> T
132 -- GHC chooses a representation based on the strictness etc.
135 -- Though type constructors and classes are in the same name space now,
136 -- the NameSpace type is abstract, so we can easily separate them later
137 tcName = TcClsName -- Type constructors
138 clsName = TcClsName -- Classes
139 tcClsName = TcClsName -- Not sure which!
142 srcDataName = DataName -- Haskell-source data constructors should be
143 -- in the Data name space
148 nameSpaceString :: NameSpace -> String
149 nameSpaceString DataName = "Data constructor"
150 nameSpaceString VarName = "Variable"
151 nameSpaceString TvName = "Type variable"
152 nameSpaceString TcClsName = "Type constructor or class"
156 %************************************************************************
158 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
160 %************************************************************************
163 data OccName = OccName
166 {-! derive : Binary !-}
171 instance Eq OccName where
172 (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
174 instance Ord OccName where
175 compare (OccName sp1 s1) (OccName sp2 s2) = (s1 `compare` s2) `thenCmp`
180 %************************************************************************
182 \subsection{Printing}
184 %************************************************************************
187 instance Outputable OccName where
190 pprOccName :: OccName -> SDoc
191 pprOccName (OccName sp occ)
192 = getPprStyle $ \ sty ->
193 pprEncodedFS occ <> if debugStyle sty then
194 braces (text (briefNameSpaceFlavour sp))
199 %************************************************************************
201 \subsection{Construction}
203 %*****p*******************************************************************
205 *Sys* things do no encoding; the caller should ensure that the thing is
209 mkSysOcc :: NameSpace -> EncodedString -> OccName
210 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
211 OccName occ_sp (mkFastString str)
213 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
214 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
217 mkFCallOcc :: EncodedString -> OccName
218 -- This version of mkSysOcc doesn't check that the string is already encoded,
219 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
220 -- This encodes a lot into something that then parses like an Id.
221 -- But then alreadyEncoded complains about the braces!
222 mkFCallOcc str = OccName varName (mkFastString str)
224 -- Kind constructors get a special function. Uniquely, they are not encoded,
225 -- so that they have names like '*'. This means that *even in interface files*
226 -- we'll get kinds like (* -> (* -> *)). We can't use mkSysOcc because it
227 -- has an ASSERT that doesn't hold.
228 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
229 mkKindOccFS occ_sp fs = OccName occ_sp fs
232 *Source-code* things are encoded.
235 mkOccFS :: NameSpace -> UserFS -> OccName
236 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
238 mkOccName :: NameSpace -> String -> OccName
239 mkOccName ns s = mkSysOcc ns (encode s)
241 mkVarOcc :: UserFS -> OccName
242 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
244 mkTyVarOcc :: UserFS -> OccName
245 mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
247 mkVarOccEncoded :: EncodedFS -> OccName
248 mkVarOccEncoded fs = mkSysOccFS varName fs
253 %************************************************************************
257 %************************************************************************
259 OccEnvs are used mainly for the envts in ModIfaces.
261 They are efficient, because FastStrings have unique Int# keys. We assume
262 this key is less than 2^24, so we can make a Unique using
263 mkUnique ns key :: Unique
264 where 'ns' is a Char reprsenting the name space. This in turn makes it
265 easy to build an OccEnv.
268 instance Uniquable OccName where
269 getUnique (OccName ns fs)
270 = mkUnique char (I# (uniqueOfFS fs))
271 where -- See notes above about this getUnique function
278 type OccEnv a = UniqFM a
280 emptyOccEnv :: OccEnv a
281 unitOccEnv :: OccName -> a -> OccEnv a
282 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
283 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
284 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
285 mkOccEnv :: [(OccName,a)] -> OccEnv a
286 elemOccEnv :: OccName -> OccEnv a -> Bool
287 foldOccEnv :: (a -> b -> b) -> b -> OccEnv a -> b
288 occEnvElts :: OccEnv a -> [a]
289 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
290 plusOccEnv :: OccEnv a -> OccEnv a -> OccEnv a
291 plusOccEnv_C :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
293 emptyOccEnv = emptyUFM
295 extendOccEnv = addToUFM
296 extendOccEnvList = addListToUFM
297 lookupOccEnv = lookupUFM
303 plusOccEnv_C = plusUFM_C
304 extendOccEnv_C = addToUFM_C
307 type OccSet = UniqFM OccName
309 emptyOccSet :: OccSet
310 unitOccSet :: OccName -> OccSet
311 mkOccSet :: [OccName] -> OccSet
312 extendOccSet :: OccSet -> OccName -> OccSet
313 extendOccSetList :: OccSet -> [OccName] -> OccSet
314 unionOccSets :: OccSet -> OccSet -> OccSet
315 unionManyOccSets :: [OccSet] -> OccSet
316 minusOccSet :: OccSet -> OccSet -> OccSet
317 elemOccSet :: OccName -> OccSet -> Bool
318 occSetElts :: OccSet -> [OccName]
319 foldOccSet :: (OccName -> b -> b) -> b -> OccSet -> b
320 isEmptyOccSet :: OccSet -> Bool
321 intersectOccSet :: OccSet -> OccSet -> OccSet
322 intersectsOccSet :: OccSet -> OccSet -> Bool
324 emptyOccSet = emptyUniqSet
325 unitOccSet = unitUniqSet
327 extendOccSet = addOneToUniqSet
328 extendOccSetList = addListToUniqSet
329 unionOccSets = unionUniqSets
330 unionManyOccSets = unionManyUniqSets
331 minusOccSet = minusUniqSet
332 elemOccSet = elementOfUniqSet
333 occSetElts = uniqSetToList
334 foldOccSet = foldUniqSet
335 isEmptyOccSet = isEmptyUniqSet
336 intersectOccSet = intersectUniqSets
337 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
341 %************************************************************************
343 \subsection{Predicates and taking them apart}
345 %************************************************************************
348 occNameFS :: OccName -> EncodedFS
349 occNameFS (OccName _ s) = s
351 occNameString :: OccName -> EncodedString
352 occNameString (OccName _ s) = unpackFS s
354 occNameUserString :: OccName -> UserString
355 occNameUserString occ = decode (occNameString occ)
357 occNameSpace :: OccName -> NameSpace
358 occNameSpace (OccName sp _) = sp
360 setOccNameSpace :: NameSpace -> OccName -> OccName
361 setOccNameSpace sp (OccName _ occ) = OccName sp occ
363 -- occNameFlavour is used only to generate good error messages
364 occNameFlavour :: OccName -> SDoc
365 occNameFlavour (OccName DataName _) = ptext SLIT("data constructor")
366 occNameFlavour (OccName TvName _) = ptext SLIT("type variable")
367 occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
368 occNameFlavour (OccName VarName s) = empty
370 -- briefOccNameFlavour is used in debug-printing of names
371 briefOccNameFlavour :: OccName -> String
372 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
374 briefNameSpaceFlavour DataName = "d"
375 briefNameSpaceFlavour VarName = "v"
376 briefNameSpaceFlavour TvName = "tv"
377 briefNameSpaceFlavour TcClsName = "tc"
381 isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
383 isVarOcc (OccName VarName _) = True
384 isVarOcc other = False
386 isTvOcc (OccName TvName _) = True
387 isTvOcc other = False
389 isTcOcc (OccName TcClsName _) = True
390 isTcOcc other = False
392 isValOcc (OccName VarName _) = True
393 isValOcc (OccName DataName _) = True
394 isValOcc other = False
396 -- Data constructor operator (starts with ':', or '[]')
397 -- Pretty inefficient!
398 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
399 isDataSymOcc (OccName VarName s) = isLexConSym (decodeFS s)
400 isDataSymOcc other = False
402 isDataOcc (OccName DataName _) = True
403 isDataOcc (OccName VarName s) = isLexCon (decodeFS s)
404 isDataOcc other = False
406 -- Any operator (data constructor or variable)
407 -- Pretty inefficient!
408 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
409 isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
410 isSymOcc (OccName VarName s) = isLexSym (decodeFS s)
411 isSymOcc other = False
413 parenSymOcc :: OccName -> SDoc -> SDoc
414 -- Wrap parens around an operator
415 parenSymOcc occ doc | isSymOcc occ = parens doc
421 reportIfUnused :: OccName -> Bool
422 -- Haskell 98 encourages compilers to suppress warnings about
423 -- unused names in a pattern if they start with "_".
424 reportIfUnused occ = case occNameUserString occ of
431 %************************************************************************
433 \subsection{Making system names}
435 %************************************************************************
437 Here's our convention for splitting up the interface file name space:
439 d... dictionary identifiers
440 (local variables, so no name-clash worries)
442 $f... dict-fun identifiers (from inst decls)
443 $dm... default methods
444 $p... superclass selectors
446 :T... compiler-generated tycons for dictionaries
447 :D... ...ditto data cons
448 $sf.. specialised version of f
450 in encoded form these appear as Zdfxxx etc
452 :... keywords (export:, letrec: etc.)
453 --- I THINK THIS IS WRONG!
455 This knowledge is encoded in the following functions.
458 @mk_deriv@ generates an @OccName@ from the prefix and a string.
459 NB: The string must already be encoded!
462 mk_deriv :: NameSpace
463 -> String -- Distinguishes one sort of derived name from another
464 -> EncodedString -- Must be already encoded!! We don't want to encode it a
465 -- second time because encoding isn't idempotent
468 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
472 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
473 mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
474 :: OccName -> OccName
476 -- These derived variables have a prefix that no Haskell value could have
477 mkDataConWrapperOcc = mk_simple_deriv varName "$W"
478 mkWorkerOcc = mk_simple_deriv varName "$w"
479 mkDefaultMethodOcc = mk_simple_deriv varName "$dm"
480 mkDerivedTyConOcc = mk_simple_deriv tcName ":" -- The : prefix makes sure it classifies
481 mkClassTyConOcc = mk_simple_deriv tcName ":T" -- as a tycon/datacon
482 mkClassDataConOcc = mk_simple_deriv dataName ":D" -- We go straight to the "real" data con
483 -- for datacons from classes
484 mkDictOcc = mk_simple_deriv varName "$d"
485 mkIPOcc = mk_simple_deriv varName "$i"
486 mkSpecOcc = mk_simple_deriv varName "$s"
487 mkForeignExportOcc = mk_simple_deriv varName "$f"
489 -- Generic derivable classes
490 mkGenOcc1 = mk_simple_deriv varName "$gfrom"
491 mkGenOcc2 = mk_simple_deriv varName "$gto"
493 -- data T = MkT ... deriving( Data ) needs defintions for
494 -- $tT :: Data.Generics.Basics.DataType
495 -- $cMkT :: Data.Generics.Basics.Constr
496 mkDataTOcc = mk_simple_deriv varName "$t"
497 mkDataCOcc = mk_simple_deriv varName "$c"
499 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
502 -- Data constructor workers are made by setting the name space
503 -- of the data constructor OccName (which should be a DataName)
505 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
509 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
510 -> OccName -- Class, eg "Ord"
511 -> OccName -- eg "$p3Ord"
512 mkSuperDictSelOcc index cls_occ
513 = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
515 mkLocalOcc :: Unique -- Unique
516 -> OccName -- Local name (e.g. "sat")
517 -> OccName -- Nice unique version ("$L23sat")
519 = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
520 -- The Unique might print with characters
521 -- that need encoding (e.g. 'z'!)
526 mkDFunOcc :: EncodedString -- Typically the class and type glommed together e.g. "OrdMaybe"
527 -> OccName -- "$fOrdMaybe"
529 mkDFunOcc string = mk_deriv VarName "$f" string
532 We used to add a '$m' to indicate a method, but that gives rise to bad
533 error messages from the type checker when we print the function name or pattern
534 of an instance-decl binding. Why? Because the binding is zapped
535 to use the method name in place of the selector name.
536 (See TcClassDcl.tcMethodBind)
538 The way it is now, -ddump-xx output may look confusing, but
539 you can always say -dppr-debug to get the uniques.
541 However, we *do* have to zap the first character to be lower case,
542 because overloaded constructors (blarg) generate methods too.
543 And convert to VarName space
545 e.g. a call to constructor MkFoo where
546 data (Ord a) => Foo a = MkFoo a
548 If this is necessary, we do it by prefixing '$m'. These
549 guys never show up in error messages. What a hack.
552 mkMethodOcc :: OccName -> OccName
553 mkMethodOcc occ@(OccName VarName fs) = occ
554 mkMethodOcc occ = mk_simple_deriv varName "$m" occ
558 %************************************************************************
560 \subsection{Tidying them up}
562 %************************************************************************
564 Before we print chunks of code we like to rename it so that
565 we don't have to print lots of silly uniques in it. But we mustn't
566 accidentally introduce name clashes! So the idea is that we leave the
567 OccName alone unless it accidentally clashes with one that is already
568 in scope; if so, we tack on '1' at the end and try again, then '2', and
569 so on till we find a unique one.
571 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
572 because that isn't a single lexeme. So we encode it to 'lle' and *then*
573 tack on the '1', if necessary.
576 type TidyOccEnv = OccEnv Int -- The in-scope OccNames
577 -- Range gives a plausible starting point for new guesses
579 emptyTidyOccEnv = emptyOccEnv
581 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
582 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
584 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
586 tidyOccName in_scope occ@(OccName occ_sp fs)
587 = case lookupOccEnv in_scope occ of
588 Nothing -> -- Not already used: make it used
589 (extendOccEnv in_scope occ 1, occ)
591 Just n -> -- Already used: make a new guess,
592 -- change the guess base, and try again
593 tidyOccName (extendOccEnv in_scope occ (n+1))
594 (mkSysOcc occ_sp (unpackFS fs ++ show n))
598 %************************************************************************
600 \subsection{The 'Z' encoding}
602 %************************************************************************
604 This is the main name-encoding and decoding function. It encodes any
605 string into a string that is acceptable as a C name. This is the name
606 by which things are known right through the compiler.
608 The basic encoding scheme is this.
610 * Tuples (,,,) are coded as Z3T
612 * Alphabetic characters (upper and lower) and digits
613 all translate to themselves;
614 except 'Z', which translates to 'ZZ'
615 and 'z', which translates to 'zz'
616 We need both so that we can preserve the variable/tycon distinction
618 * Most other printable characters translate to 'zx' or 'Zx' for some
619 alphabetic character x
621 * The others translate as 'znnnU' where 'nnn' is the decimal number
625 --------------------------
637 (# #) Z1H unboxed 1-tuple (note the space)
638 (#,,,,#) Z5H unboxed 5-tuple
639 (NB: There is no Z1T nor Z0H.)
642 -- alreadyEncoded is used in ASSERTs to check for encoded
643 -- strings. It isn't fail-safe, of course, because, say 'zh' might
644 -- be encoded or not.
645 alreadyEncoded :: String -> Bool
646 alreadyEncoded s = all ok s
649 -- This is a bit of a lie; if we really wanted spaces
650 -- in names we'd have to encode them. But we do put
651 -- spaces in ccall "occurrences", and we don't want to
653 ok ch = isAlphaNum ch
655 alreadyEncodedFS :: FastString -> Bool
656 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
658 encode :: UserString -> EncodedString
659 encode cs = case maybe_tuple cs of
660 Just n -> n -- Tuples go to Z2T etc
664 go (c:cs) = encode_ch c ++ go cs
666 encodeFS :: UserFS -> EncodedFS
667 encodeFS fast_str | all unencodedChar str = fast_str
668 | otherwise = mkFastString (encode str)
670 str = unpackFS fast_str
672 unencodedChar :: Char -> Bool -- True for chars that don't need encoding
673 unencodedChar 'Z' = False
674 unencodedChar 'z' = False
675 unencodedChar c = c >= 'a' && c <= 'z'
676 || c >= 'A' && c <= 'Z'
677 || c >= '0' && c <= '9'
679 encode_ch :: Char -> EncodedString
680 encode_ch c | unencodedChar c = [c] -- Common case first
683 encode_ch '(' = "ZL" -- Needed for things like (,), and (->)
684 encode_ch ')' = "ZR" -- For symmetry with (
704 encode_ch '\'' = "zq"
705 encode_ch '\\' = "zr"
710 encode_ch c = 'z' : shows (ord c) "U"
713 Decode is used for user printing.
716 decodeFS :: FastString -> FastString
717 decodeFS fs = mkFastString (decode (unpackFS fs))
719 decode :: EncodedString -> UserString
721 decode ('Z' : d : rest) | isDigit d = decode_tuple d rest
722 | otherwise = decode_upper d : decode rest
723 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
724 | otherwise = decode_lower d : decode rest
725 decode (c : rest) = c : decode rest
727 decode_upper, decode_lower :: Char -> Char
729 decode_upper 'L' = '('
730 decode_upper 'R' = ')'
731 decode_upper 'M' = '['
732 decode_upper 'N' = ']'
733 decode_upper 'C' = ':'
734 decode_upper 'Z' = 'Z'
735 decode_upper ch = pprTrace "decode_upper" (char ch) ch
737 decode_lower 'z' = 'z'
738 decode_lower 'a' = '&'
739 decode_lower 'b' = '|'
740 decode_lower 'c' = '^'
741 decode_lower 'd' = '$'
742 decode_lower 'e' = '='
743 decode_lower 'g' = '>'
744 decode_lower 'h' = '#'
745 decode_lower 'i' = '.'
746 decode_lower 'l' = '<'
747 decode_lower 'm' = '-'
748 decode_lower 'n' = '!'
749 decode_lower 'p' = '+'
750 decode_lower 'q' = '\''
751 decode_lower 'r' = '\\'
752 decode_lower 's' = '/'
753 decode_lower 't' = '*'
754 decode_lower 'u' = '_'
755 decode_lower 'v' = '%'
756 decode_lower ch = pprTrace "decode_lower" (char ch) ch
758 -- Characters not having a specific code are coded as z224U
759 decode_num_esc d rest
760 = go (digitToInt d) rest
762 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
763 go n ('U' : rest) = chr n : decode rest
764 go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
766 decode_tuple :: Char -> EncodedString -> UserString
768 = go (digitToInt d) rest
770 -- NB. recurse back to decode after decoding the tuple, because
771 -- the tuple might be embedded in a longer name.
772 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
773 go 0 ('T':rest) = "()" ++ decode rest
774 go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
775 go 1 ('H':rest) = "(# #)" ++ decode rest
776 go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
777 go n other = pprPanic "decode_tuple" (ppr n <+> text other)
781 %************************************************************************
783 Stuff for dealing with tuples
785 %************************************************************************
787 Tuples are encoded as
789 for 3-tuples or unboxed 3-tuples respectively. No other encoding starts
792 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
793 There are no unboxed 0-tuples.
795 * "()" is the tycon for a boxed 0-tuple.
796 There are no boxed 1-tuples.
800 maybe_tuple :: UserString -> Maybe EncodedString
802 maybe_tuple "(# #)" = Just("Z1H")
803 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
804 (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
806 maybe_tuple "()" = Just("Z0T")
807 maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of
808 (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
810 maybe_tuple other = Nothing
812 count_commas :: Int -> String -> (Int, String)
813 count_commas n (',' : cs) = count_commas (n+1) cs
814 count_commas n cs = (n,cs)
818 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
820 = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
826 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
827 -- Tuples are special, because there are so many of them!
828 isTupleOcc_maybe (OccName ns fs)
829 = case unpackFS fs of
830 ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
833 decode_tup n "H" = (ns, Unboxed, n)
834 decode_tup n "T" = (ns, Boxed, n)
835 decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
838 %************************************************************************
840 \subsection{Lexical categories}
842 %************************************************************************
844 These functions test strings to see if they fit the lexical categories
845 defined in the Haskell report.
848 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
849 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
851 isLexCon cs = isLexConId cs || isLexConSym cs
852 isLexVar cs = isLexVarId cs || isLexVarSym cs
854 isLexId cs = isLexConId cs || isLexVarId cs
855 isLexSym cs = isLexConSym cs || isLexVarSym cs
859 isLexConId cs -- Prefix type or data constructors
860 | nullFastString cs = False -- e.g. "Foo", "[]", "(,)"
861 | cs == FSLIT("[]") = True
862 | otherwise = startsConId (headFS cs)
864 isLexVarId cs -- Ordinary prefix identifiers
865 | nullFastString cs = False -- e.g. "x", "_x"
866 | otherwise = startsVarId (headFS cs)
868 isLexConSym cs -- Infix type or data constructors
869 | nullFastString cs = False -- e.g. ":-:", ":", "->"
870 | cs == FSLIT("->") = True
871 | otherwise = startsConSym (headFS cs)
873 isLexVarSym cs -- Infix identifiers
874 | nullFastString cs = False -- e.g. "+"
875 | otherwise = startsVarSym (headFS cs)
878 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
879 startsVarSym c = isSymbolASCII c || isSymbolISO c -- Infix Ids
880 startsConSym c = c == ':' -- Infix data constructors
881 startsVarId c = isLower c || isLowerISO c || c == '_' -- Ordinary Ids
882 startsConId c = isUpper c || isUpperISO c || c == '(' -- Ordinary type constructors and data constructors
885 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
886 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
887 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
888 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
889 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
890 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
893 %************************************************************************
896 Here rather than BinIface because OccName is abstract
898 %************************************************************************
901 instance Binary NameSpace where
904 put_ bh DataName = do
908 put_ bh TcClsName = do
913 0 -> do return VarName
914 1 -> do return DataName
915 2 -> do return TvName
916 _ -> do return TcClsName
918 instance Binary OccName where
919 put_ bh (OccName aa ab) = do
925 return (OccName aa ab)