1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
6 \section[OccName]{@OccName@}
10 -- The NameSpace type; abstact
11 NameSpace, tcName, clsName, tcClsName, dataName, varName,
12 tvName, srcDataName, nameSpaceString,
15 OccName, -- Abstract, instance of Outputable
19 OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
20 lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
21 occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
25 OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
26 unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
27 foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
29 mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
30 mkVarOcc, mkVarOccEncoded,
31 mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
32 mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
33 mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
34 mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
35 mkDataConWrapperOcc, mkDataConWorkerOcc,
37 isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
40 occNameFS, occNameString, occNameUserString, occNameSpace,
41 occNameFlavour, briefOccNameFlavour,
44 mkTupleOcc, isTupleOcc_maybe,
47 TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
50 EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
52 -- The basic form of names
53 isLexCon, isLexVar, isLexId, isLexSym,
54 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
55 isLowerISO, isUpperISO
59 #include "HsVersions.h"
61 import Char ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
62 import Util ( thenCmp )
63 import Unique ( Unique, mkUnique, Uniquable(..) )
64 import BasicTypes ( Boxity(..), Arity )
74 We hold both module names and identifier names in a 'Z-encoded' form
75 that makes them acceptable both as a C identifier and as a Haskell
78 They can always be decoded again when printing error messages
79 or anything else for the user, but it does make sense for it
80 to be represented here in encoded form, so that when generating
81 code the encoding operation is not performed on each occurrence.
83 These type synonyms help documentation.
86 type UserFS = FastString -- As the user typed it
87 type EncodedFS = FastString -- Encoded form
89 type UserString = String -- As the user typed it
90 type EncodedString = String -- Encoded form
93 pprEncodedFS :: EncodedFS -> SDoc
95 = getPprStyle $ \ sty ->
97 -- ftext (decodeFS fs) would needlessly pack the string again
98 then text (decode (unpackFS fs))
102 %************************************************************************
104 \subsection{Name space}
106 %************************************************************************
109 data NameSpace = VarName -- Variables, including "source" data constructors
110 | DataName -- "Real" data constructors
111 | TvName -- Type variables
112 | TcClsName -- Type constructors and classes; Haskell has them
113 -- in the same name space for now.
115 {-! derive: Binary !-}
117 -- Note [Data Constructors]
118 -- see also: Note [Data Constructor Naming] in DataCon.lhs
120 -- "Source" data constructors are the data constructors mentioned
121 -- in Haskell source code
123 -- "Real" data constructors are the data constructors of the
124 -- representation type, which may not be the same as the source
128 -- data T = T !(Int,Int)
130 -- The source datacon has type (Int,Int) -> T
131 -- The real datacon has type Int -> Int -> T
132 -- GHC chooses a representation based on the strictness etc.
135 -- Though type constructors and classes are in the same name space now,
136 -- the NameSpace type is abstract, so we can easily separate them later
137 tcName = TcClsName -- Type constructors
138 clsName = TcClsName -- Classes
139 tcClsName = TcClsName -- Not sure which!
142 srcDataName = DataName -- Haskell-source data constructors should be
143 -- in the Data name space
148 nameSpaceString :: NameSpace -> String
149 nameSpaceString DataName = "Data constructor"
150 nameSpaceString VarName = "Variable"
151 nameSpaceString TvName = "Type variable"
152 nameSpaceString TcClsName = "Type constructor or class"
156 %************************************************************************
158 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
160 %************************************************************************
163 data OccName = OccName
166 {-! derive : Binary !-}
171 instance Eq OccName where
172 (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
174 instance Ord OccName where
175 compare (OccName sp1 s1) (OccName sp2 s2) = (s1 `compare` s2) `thenCmp`
180 %************************************************************************
182 \subsection{Printing}
184 %************************************************************************
187 instance Outputable OccName where
190 pprOccName :: OccName -> SDoc
191 pprOccName (OccName sp occ)
192 = getPprStyle $ \ sty ->
193 pprEncodedFS occ <> if debugStyle sty then
194 braces (text (briefNameSpaceFlavour sp))
199 %************************************************************************
201 \subsection{Construction}
203 %************************************************************************
205 *Sys* things do no encoding; the caller should ensure that the thing is
209 mkSysOcc :: NameSpace -> EncodedString -> OccName
210 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
211 OccName occ_sp (mkFastString str)
213 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
214 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
217 mkFCallOcc :: EncodedString -> OccName
218 -- This version of mkSysOcc doesn't check that the string is already encoded,
219 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
220 -- This encodes a lot into something that then parses like an Id.
221 -- But then alreadyEncoded complains about the braces!
222 mkFCallOcc str = OccName varName (mkFastString str)
224 -- Kind constructors get a special function. Uniquely, they are not encoded,
225 -- so that they have names like '*'. This means that *even in interface files*
226 -- we'll get kinds like (* -> (* -> *)). We can't use mkSysOcc because it
227 -- has an ASSERT that doesn't hold.
228 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
229 mkKindOccFS occ_sp fs = OccName occ_sp fs
232 *Source-code* things are encoded.
235 mkOccFS :: NameSpace -> UserFS -> OccName
236 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
238 mkVarOcc :: UserFS -> OccName
239 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
241 mkVarOccEncoded :: EncodedFS -> OccName
242 mkVarOccEncoded fs = mkSysOccFS varName fs
247 %************************************************************************
251 %************************************************************************
253 OccEnvs are used mainly for the envts in ModIfaces.
255 They are efficient, because FastStrings have unique Int# keys. We assume
256 this key is less than 2^24, so we can make a Unique using
257 mkUnique ns key :: Unique
258 where 'ns' is a Char reprsenting the name space. This in turn makes it
259 easy to build an OccEnv.
262 instance Uniquable OccName where
263 getUnique (OccName ns fs)
264 = mkUnique char (I# (uniqueOfFS fs))
265 where -- See notes above about this getUnique function
272 type OccEnv a = UniqFM a
274 emptyOccEnv :: OccEnv a
275 unitOccEnv :: OccName -> a -> OccEnv a
276 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
277 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
278 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
279 mkOccEnv :: [(OccName,a)] -> OccEnv a
280 elemOccEnv :: OccName -> OccEnv a -> Bool
281 foldOccEnv :: (a -> b -> b) -> b -> OccEnv a -> b
282 occEnvElts :: OccEnv a -> [a]
283 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
284 plusOccEnv :: OccEnv a -> OccEnv a -> OccEnv a
285 plusOccEnv_C :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
287 emptyOccEnv = emptyUFM
289 extendOccEnv = addToUFM
290 extendOccEnvList = addListToUFM
291 lookupOccEnv = lookupUFM
297 plusOccEnv_C = plusUFM_C
298 extendOccEnv_C = addToUFM_C
301 type OccSet = UniqFM OccName
303 emptyOccSet :: OccSet
304 unitOccSet :: OccName -> OccSet
305 mkOccSet :: [OccName] -> OccSet
306 extendOccSet :: OccSet -> OccName -> OccSet
307 extendOccSetList :: OccSet -> [OccName] -> OccSet
308 unionOccSets :: OccSet -> OccSet -> OccSet
309 unionManyOccSets :: [OccSet] -> OccSet
310 minusOccSet :: OccSet -> OccSet -> OccSet
311 elemOccSet :: OccName -> OccSet -> Bool
312 occSetElts :: OccSet -> [OccName]
313 foldOccSet :: (OccName -> b -> b) -> b -> OccSet -> b
314 isEmptyOccSet :: OccSet -> Bool
315 intersectOccSet :: OccSet -> OccSet -> OccSet
316 intersectsOccSet :: OccSet -> OccSet -> Bool
318 emptyOccSet = emptyUniqSet
319 unitOccSet = unitUniqSet
321 extendOccSet = addOneToUniqSet
322 extendOccSetList = addListToUniqSet
323 unionOccSets = unionUniqSets
324 unionManyOccSets = unionManyUniqSets
325 minusOccSet = minusUniqSet
326 elemOccSet = elementOfUniqSet
327 occSetElts = uniqSetToList
328 foldOccSet = foldUniqSet
329 isEmptyOccSet = isEmptyUniqSet
330 intersectOccSet = intersectUniqSets
331 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
335 %************************************************************************
337 \subsection{Predicates and taking them apart}
339 %************************************************************************
342 occNameFS :: OccName -> EncodedFS
343 occNameFS (OccName _ s) = s
345 occNameString :: OccName -> EncodedString
346 occNameString (OccName _ s) = unpackFS s
348 occNameUserString :: OccName -> UserString
349 occNameUserString occ = decode (occNameString occ)
351 occNameSpace :: OccName -> NameSpace
352 occNameSpace (OccName sp _) = sp
354 setOccNameSpace :: NameSpace -> OccName -> OccName
355 setOccNameSpace sp (OccName _ occ) = OccName sp occ
357 -- occNameFlavour is used only to generate good error messages
358 occNameFlavour :: OccName -> String
359 occNameFlavour (OccName DataName _) = "Data constructor"
360 occNameFlavour (OccName TvName _) = "Type variable"
361 occNameFlavour (OccName TcClsName _) = "Type constructor or class"
362 occNameFlavour (OccName VarName s) = "Variable"
364 -- briefOccNameFlavour is used in debug-printing of names
365 briefOccNameFlavour :: OccName -> String
366 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
368 briefNameSpaceFlavour DataName = "d"
369 briefNameSpaceFlavour VarName = "v"
370 briefNameSpaceFlavour TvName = "tv"
371 briefNameSpaceFlavour TcClsName = "tc"
375 isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
377 isTvOcc (OccName TvName _) = True
378 isTvOcc other = False
380 isTcOcc (OccName TcClsName _) = True
381 isTcOcc other = False
383 isValOcc (OccName VarName _) = True
384 isValOcc (OccName DataName _) = True
385 isValOcc other = False
387 -- Data constructor operator (starts with ':', or '[]')
388 -- Pretty inefficient!
389 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
390 isDataSymOcc (OccName VarName s) = isLexConSym (decodeFS s)
391 isDataSymOcc other = False
393 isDataOcc (OccName DataName _) = True
394 isDataOcc (OccName VarName s) = isLexCon (decodeFS s)
395 isDataOcc other = False
397 -- Any operator (data constructor or variable)
398 -- Pretty inefficient!
399 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
400 isSymOcc (OccName VarName s) = isLexSym (decodeFS s)
401 isSymOcc other = False
406 reportIfUnused :: OccName -> Bool
407 -- Haskell 98 encourages compilers to suppress warnings about
408 -- unused names in a pattern if they start with "_".
409 reportIfUnused occ = case occNameUserString occ of
416 %************************************************************************
418 \subsection{Making system names}
420 %************************************************************************
422 Here's our convention for splitting up the interface file name space:
424 d... dictionary identifiers
425 (local variables, so no name-clash worries)
427 $f... dict-fun identifiers (from inst decls)
428 $dm... default methods
429 $p... superclass selectors
431 :T... compiler-generated tycons for dictionaries
432 :D... ...ditto data cons
433 $sf.. specialised version of f
435 in encoded form these appear as Zdfxxx etc
437 :... keywords (export:, letrec: etc.)
438 --- I THINK THIS IS WRONG!
440 This knowledge is encoded in the following functions.
443 @mk_deriv@ generates an @OccName@ from the prefix and a string.
444 NB: The string must already be encoded!
447 mk_deriv :: NameSpace
448 -> String -- Distinguishes one sort of derived name from another
449 -> EncodedString -- Must be already encoded!! We don't want to encode it a
450 -- second time because encoding isn't idempotent
453 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
457 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
458 mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
459 :: OccName -> OccName
461 -- These derived variables have a prefix that no Haskell value could have
462 mkDataConWrapperOcc = mk_simple_deriv varName "$W"
463 mkWorkerOcc = mk_simple_deriv varName "$w"
464 mkDefaultMethodOcc = mk_simple_deriv varName "$dm"
465 mkDerivedTyConOcc = mk_simple_deriv tcName ":" -- The : prefix makes sure it classifies
466 mkClassTyConOcc = mk_simple_deriv tcName ":T" -- as a tycon/datacon
467 mkClassDataConOcc = mk_simple_deriv dataName ":D" -- We go straight to the "real" data con
468 -- for datacons from classes
469 mkDictOcc = mk_simple_deriv varName "$d"
470 mkIPOcc = mk_simple_deriv varName "$i"
471 mkSpecOcc = mk_simple_deriv varName "$s"
472 mkForeignExportOcc = mk_simple_deriv varName "$f"
474 -- Generic derivable classes
475 mkGenOcc1 = mk_simple_deriv varName "$gfrom"
476 mkGenOcc2 = mk_simple_deriv varName "$gto"
478 -- data T = MkT ... deriving( Data ) needs defintions for
479 -- $tT :: Data.Generics.Basics.DataType
480 -- $cMkT :: Data.Generics.Basics.Constr
481 mkDataTOcc = mk_simple_deriv varName "$t"
482 mkDataCOcc = mk_simple_deriv varName "$c"
484 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
487 -- Data constructor workers are made by setting the name space
488 -- of the data constructor OccName (which should be a DataName)
490 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
494 mkSuperDictSelOcc :: Int -- Index of superclass, eg 3
495 -> OccName -- Class, eg "Ord"
496 -> OccName -- eg "$p3Ord"
497 mkSuperDictSelOcc index cls_occ
498 = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
500 mkLocalOcc :: Unique -- Unique
501 -> OccName -- Local name (e.g. "sat")
502 -> OccName -- Nice unique version ("$L23sat")
504 = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
505 -- The Unique might print with characters
506 -- that need encoding (e.g. 'z'!)
511 mkDFunOcc :: EncodedString -- Typically the class and type glommed together e.g. "OrdMaybe"
512 -> OccName -- "$fOrdMaybe"
514 mkDFunOcc string = mk_deriv VarName "$f" string
517 We used to add a '$m' to indicate a method, but that gives rise to bad
518 error messages from the type checker when we print the function name or pattern
519 of an instance-decl binding. Why? Because the binding is zapped
520 to use the method name in place of the selector name.
521 (See TcClassDcl.tcMethodBind)
523 The way it is now, -ddump-xx output may look confusing, but
524 you can always say -dppr-debug to get the uniques.
526 However, we *do* have to zap the first character to be lower case,
527 because overloaded constructors (blarg) generate methods too.
528 And convert to VarName space
530 e.g. a call to constructor MkFoo where
531 data (Ord a) => Foo a = MkFoo a
533 If this is necessary, we do it by prefixing '$m'. These
534 guys never show up in error messages. What a hack.
537 mkMethodOcc :: OccName -> OccName
538 mkMethodOcc occ@(OccName VarName fs) = occ
539 mkMethodOcc occ = mk_simple_deriv varName "$m" occ
543 %************************************************************************
545 \subsection{Tidying them up}
547 %************************************************************************
549 Before we print chunks of code we like to rename it so that
550 we don't have to print lots of silly uniques in it. But we mustn't
551 accidentally introduce name clashes! So the idea is that we leave the
552 OccName alone unless it accidentally clashes with one that is already
553 in scope; if so, we tack on '1' at the end and try again, then '2', and
554 so on till we find a unique one.
556 There's a wrinkle for operators. Consider '>>='. We can't use '>>=1'
557 because that isn't a single lexeme. So we encode it to 'lle' and *then*
558 tack on the '1', if necessary.
561 type TidyOccEnv = OccEnv Int -- The in-scope OccNames
562 -- Range gives a plausible starting point for new guesses
564 emptyTidyOccEnv = emptyOccEnv
566 initTidyOccEnv :: [OccName] -> TidyOccEnv -- Initialise with names to avoid!
567 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
569 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
571 tidyOccName in_scope occ@(OccName occ_sp fs)
572 = case lookupOccEnv in_scope occ of
573 Nothing -> -- Not already used: make it used
574 (extendOccEnv in_scope occ 1, occ)
576 Just n -> -- Already used: make a new guess,
577 -- change the guess base, and try again
578 tidyOccName (extendOccEnv in_scope occ (n+1))
579 (mkSysOcc occ_sp (unpackFS fs ++ show n))
583 %************************************************************************
585 \subsection{The 'Z' encoding}
587 %************************************************************************
589 This is the main name-encoding and decoding function. It encodes any
590 string into a string that is acceptable as a C name. This is the name
591 by which things are known right through the compiler.
593 The basic encoding scheme is this.
595 * Tuples (,,,) are coded as Z3T
597 * Alphabetic characters (upper and lower) and digits
598 all translate to themselves;
599 except 'Z', which translates to 'ZZ'
600 and 'z', which translates to 'zz'
601 We need both so that we can preserve the variable/tycon distinction
603 * Most other printable characters translate to 'zx' or 'Zx' for some
604 alphabetic character x
606 * The others translate as 'znnnU' where 'nnn' is the decimal number
610 --------------------------
622 (# #) Z1H unboxed 1-tuple (note the space)
623 (#,,,,#) Z5H unboxed 5-tuple
624 (NB: There is no Z1T nor Z0H.)
627 -- alreadyEncoded is used in ASSERTs to check for encoded
628 -- strings. It isn't fail-safe, of course, because, say 'zh' might
629 -- be encoded or not.
630 alreadyEncoded :: String -> Bool
631 alreadyEncoded s = all ok s
634 -- This is a bit of a lie; if we really wanted spaces
635 -- in names we'd have to encode them. But we do put
636 -- spaces in ccall "occurrences", and we don't want to
638 ok ch = isAlphaNum ch
640 alreadyEncodedFS :: FastString -> Bool
641 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
643 encode :: UserString -> EncodedString
644 encode cs = case maybe_tuple cs of
645 Just n -> n -- Tuples go to Z2T etc
649 go (c:cs) = encode_ch c ++ go cs
651 encodeFS :: UserFS -> EncodedFS
652 encodeFS fast_str | all unencodedChar str = fast_str
653 | otherwise = mkFastString (encode str)
655 str = unpackFS fast_str
657 unencodedChar :: Char -> Bool -- True for chars that don't need encoding
658 unencodedChar 'Z' = False
659 unencodedChar 'z' = False
660 unencodedChar c = c >= 'a' && c <= 'z'
661 || c >= 'A' && c <= 'Z'
662 || c >= '0' && c <= '9'
664 encode_ch :: Char -> EncodedString
665 encode_ch c | unencodedChar c = [c] -- Common case first
668 encode_ch '(' = "ZL" -- Needed for things like (,), and (->)
669 encode_ch ')' = "ZR" -- For symmetry with (
689 encode_ch '\'' = "zq"
690 encode_ch '\\' = "zr"
695 encode_ch c = 'z' : shows (ord c) "U"
698 Decode is used for user printing.
701 decodeFS :: FastString -> FastString
702 decodeFS fs = mkFastString (decode (unpackFS fs))
704 decode :: EncodedString -> UserString
706 decode ('Z' : d : rest) | isDigit d = decode_tuple d rest
707 | otherwise = decode_upper d : decode rest
708 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
709 | otherwise = decode_lower d : decode rest
710 decode (c : rest) = c : decode rest
712 decode_upper, decode_lower :: Char -> Char
714 decode_upper 'L' = '('
715 decode_upper 'R' = ')'
716 decode_upper 'M' = '['
717 decode_upper 'N' = ']'
718 decode_upper 'C' = ':'
719 decode_upper 'Z' = 'Z'
720 decode_upper ch = pprTrace "decode_upper" (char ch) ch
722 decode_lower 'z' = 'z'
723 decode_lower 'a' = '&'
724 decode_lower 'b' = '|'
725 decode_lower 'c' = '^'
726 decode_lower 'd' = '$'
727 decode_lower 'e' = '='
728 decode_lower 'g' = '>'
729 decode_lower 'h' = '#'
730 decode_lower 'i' = '.'
731 decode_lower 'l' = '<'
732 decode_lower 'm' = '-'
733 decode_lower 'n' = '!'
734 decode_lower 'p' = '+'
735 decode_lower 'q' = '\''
736 decode_lower 'r' = '\\'
737 decode_lower 's' = '/'
738 decode_lower 't' = '*'
739 decode_lower 'u' = '_'
740 decode_lower 'v' = '%'
741 decode_lower ch = pprTrace "decode_lower" (char ch) ch
743 -- Characters not having a specific code are coded as z224U
744 decode_num_esc d rest
745 = go (digitToInt d) rest
747 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
748 go n ('U' : rest) = chr n : decode rest
749 go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
753 %************************************************************************
755 Stuff for dealing with tuples
757 %************************************************************************
759 Tuples are encoded as
761 for 3-tuples or unboxed 3-tuples respectively. No other encoding starts
764 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
765 There are no unboxed 0-tuples.
767 * "()" is the tycon for a boxed 0-tuple.
768 There are no boxed 1-tuples.
772 maybe_tuple :: UserString -> Maybe EncodedString
774 maybe_tuple "(# #)" = Just("Z1H")
775 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
776 (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
778 maybe_tuple "()" = Just("Z0T")
779 maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of
780 (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
782 maybe_tuple other = Nothing
784 count_commas :: Int -> String -> (Int, String)
785 count_commas n (',' : cs) = count_commas (n+1) cs
786 count_commas n cs = (n,cs)
790 decode_tuple :: Char -> EncodedString -> UserString
792 = go (digitToInt d) rest
794 go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
796 go n ['T'] = '(' : replicate (n-1) ',' ++ ")"
798 go n ['H'] = '(' : '#' : replicate (n-1) ',' ++ "#)"
799 go n other = pprPanic "decode_tuple" (ppr n <+> text other)
801 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
803 = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
809 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
810 -- Tuples are special, because there are so many of them!
811 isTupleOcc_maybe (OccName ns fs)
812 = case unpackFS fs of
813 ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
816 decode_tup n "H" = (ns, Unboxed, n)
817 decode_tup n "T" = (ns, Boxed, n)
818 decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
821 %************************************************************************
823 \subsection{Lexical categories}
825 %************************************************************************
827 These functions test strings to see if they fit the lexical categories
828 defined in the Haskell report.
831 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
832 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
834 isLexCon cs = isLexConId cs || isLexConSym cs
835 isLexVar cs = isLexVarId cs || isLexVarSym cs
837 isLexId cs = isLexConId cs || isLexVarId cs
838 isLexSym cs = isLexConSym cs || isLexVarSym cs
842 isLexConId cs -- Prefix type or data constructors
843 | nullFastString cs = False -- e.g. "Foo", "[]", "(,)"
844 | cs == FSLIT("[]") = True
845 | otherwise = startsConId (headFS cs)
847 isLexVarId cs -- Ordinary prefix identifiers
848 | nullFastString cs = False -- e.g. "x", "_x"
849 | otherwise = startsVarId (headFS cs)
851 isLexConSym cs -- Infix type or data constructors
852 | nullFastString cs = False -- e.g. ":-:", ":", "->"
853 | cs == FSLIT("->") = True
854 | otherwise = startsConSym (headFS cs)
856 isLexVarSym cs -- Infix identifiers
857 | nullFastString cs = False -- e.g. "+"
858 | otherwise = startsVarSym (headFS cs)
861 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
862 startsVarSym c = isSymbolASCII c || isSymbolISO c -- Infix Ids
863 startsConSym c = c == ':' -- Infix data constructors
864 startsVarId c = isLower c || isLowerISO c || c == '_' -- Ordinary Ids
865 startsConId c = isUpper c || isUpperISO c || c == '(' -- Ordinary type constructors and data constructors
868 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
869 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
870 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
871 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
872 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
873 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
876 %************************************************************************
879 Here rather than BinIface because OccName is abstract
881 %************************************************************************
884 instance Binary NameSpace where
887 put_ bh DataName = do
891 put_ bh TcClsName = do
896 0 -> do return VarName
897 1 -> do return DataName
898 2 -> do return TvName
899 _ -> do return TcClsName
901 instance Binary OccName where
902 put_ bh (OccName aa ab) = do
908 return (OccName aa ab)