ghc/compiler/basicTypes/OccName.lhs

   1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
   2 %
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5
   6 \section[OccName]{@OccName@}
   7
   8 \begin{code}
   9 module OccName (
  10         -- The NameSpace type; abstact
  11         NameSpace, tcName, clsName, tcClsName, dataName, varName,
  12         tvName, srcDataName, nameSpaceString,
  13
  14         -- The OccName type
  15         OccName,        -- Abstract, instance of Outputable
  16         pprOccName,
  17
  18         -- The OccEnv type
  19         OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
  20         lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
  21         occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
  22
  23
  24         -- The OccSet type
  25         OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
  26         unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
  27         foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
  28
  29         mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
  30         mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
  31         mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
  32         mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
  33         mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
  34         mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
  35         mkDataConWrapperOcc, mkDataConWorkerOcc,
  36
  37         isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
  38         parenSymOcc, reportIfUnused,
  39
  40         occNameFS, occNameString, occNameUserString, occNameSpace,
  41         occNameFlavour, briefOccNameFlavour,
  42         setOccNameSpace,
  43
  44         mkTupleOcc, isTupleOcc_maybe,
  45
  46         -- Tidying up
  47         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  48
  49         -- Encoding
  50         EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
  51
  52         -- The basic form of names
  53         isLexCon, isLexVar, isLexId, isLexSym,
  54         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  55         isLowerISO, isUpperISO
  56
  57     ) where
  58
  59 #include "HsVersions.h"
  60
  61 import Char     ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
  62 import Util     ( thenCmp )
  63 import Unique   ( Unique, mkUnique, Uniquable(..) )
  64 import BasicTypes ( Boxity(..), Arity )
  65 import StaticFlags ( opt_PprStyle_Debug )
  66 import UniqFM
  67 import UniqSet
  68 import FastString
  69 import Outputable
  70 import Binary
  71
  72 import GLAEXTS
  73 \end{code}
  74
  75 We hold both module names and identifier names in a 'Z-encoded' form
  76 that makes them acceptable both as a C identifier and as a Haskell
  77 (prefix) identifier.
  78
  79 They can always be decoded again when printing error messages
  80 or anything else for the user, but it does make sense for it
  81 to be represented here in encoded form, so that when generating
  82 code the encoding operation is not performed on each occurrence.
  83
  84 These type synonyms help documentation.
  85
  86 \begin{code}
  87 type UserFS    = FastString     -- As the user typed it
  88 type EncodedFS = FastString     -- Encoded form
  89
  90 type UserString = String        -- As the user typed it
  91 type EncodedString = String     -- Encoded form
  92
  93
  94 pprEncodedFS :: EncodedFS -> SDoc
  95 pprEncodedFS fs
  96   = getPprStyle         $ \ sty ->
  97     if userStyle sty || dumpStyle sty
  98         -- ftext (decodeFS fs) would needlessly pack the string again
  99         then text (decode (unpackFS fs))
 100         else ftext fs
 101 \end{code}
 102
 103 %************************************************************************
 104 %*                                                                      *
 105 \subsection{Name space}
 106 %*                                                                      *
 107 %************************************************************************
 108
 109 \begin{code}
 110 data NameSpace = VarName        -- Variables, including "source" data constructors
 111                | DataName       -- "Real" data constructors
 112                | TvName         -- Type variables
 113                | TcClsName      -- Type constructors and classes; Haskell has them
 114                                 -- in the same name space for now.
 115                deriving( Eq, Ord )
 116    {-! derive: Binary !-}
 117
 118 -- Note [Data Constructors]
 119 -- see also: Note [Data Constructor Naming] in DataCon.lhs
 120 --
 121 --      "Source" data constructors are the data constructors mentioned
 122 --      in Haskell source code
 123 --
 124 --      "Real" data constructors are the data constructors of the
 125 --      representation type, which may not be the same as the source
 126 --      type
 127
 128 -- Example:
 129 --      data T = T !(Int,Int)
 130 --
 131 -- The source datacon has type (Int,Int) -> T
 132 -- The real   datacon has type Int -> Int -> T
 133 -- GHC chooses a representation based on the strictness etc.
 134
 135
 136 -- Though type constructors and classes are in the same name space now,
 137 -- the NameSpace type is abstract, so we can easily separate them later
 138 tcName    = TcClsName           -- Type constructors
 139 clsName   = TcClsName           -- Classes
 140 tcClsName = TcClsName           -- Not sure which!
 141
 142 dataName    = DataName
 143 srcDataName = DataName  -- Haskell-source data constructors should be
 144                         -- in the Data name space
 145
 146 tvName      = TvName
 147 varName     = VarName
 148
 149 nameSpaceString :: NameSpace -> String
 150 nameSpaceString DataName  = "Data constructor"
 151 nameSpaceString VarName   = "Variable"
 152 nameSpaceString TvName    = "Type variable"
 153 nameSpaceString TcClsName = "Type constructor or class"
 154 \end{code}
 155
 156
 157 %************************************************************************
 158 %*                                                                      *
 159 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
 160 %*                                                                      *
 161 %************************************************************************
 162
 163 \begin{code}
 164 data OccName = OccName
 165                         NameSpace
 166                         EncodedFS
 167    {-! derive : Binary !-}
 168 \end{code}
 169
 170
 171 \begin{code}
 172 instance Eq OccName where
 173     (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
 174
 175 instance Ord OccName where
 176     compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
 177                                                 (sp1 `compare` sp2)
 178 \end{code}
 179
 180
 181 %************************************************************************
 182 %*                                                                      *
 183 \subsection{Printing}
 184 %*                                                                      *
 185 %************************************************************************
 186
 187 \begin{code}
 188 instance Outputable OccName where
 189     ppr = pprOccName
 190
 191 pprOccName :: OccName -> SDoc
 192 pprOccName (OccName sp occ)
 193   = getPprStyle $ \ sty ->
 194     pprEncodedFS occ <> if debugStyle sty then
 195                            braces (text (briefNameSpaceFlavour sp))
 196                         else empty
 197 \end{code}
 198
 199
 200 %************************************************************************
 201 %*                                                                      *
 202 \subsection{Construction}
 203 %*                                                                      *
 204 %*****p*******************************************************************
 205
 206 *Sys* things do no encoding; the caller should ensure that the thing is
 207 already encoded
 208
 209 \begin{code}
 210 mkSysOcc :: NameSpace -> EncodedString -> OccName
 211 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
 212                       OccName occ_sp (mkFastString str)
 213
 214 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
 215 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
 216                        OccName occ_sp fs
 217
 218 mkFCallOcc :: EncodedString -> OccName
 219 -- This version of mkSysOcc doesn't check that the string is already encoded,
 220 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
 221 -- This encodes a lot into something that then parses like an Id.
 222 -- But then alreadyEncoded complains about the braces!
 223 mkFCallOcc str = OccName varName (mkFastString str)
 224
 225 -- Kind constructors get a special function.  Uniquely, they are not encoded,
 226 -- so that they have names like '*'.  This means that *even in interface files*
 227 -- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
 228 -- has an ASSERT that doesn't hold.
 229 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
 230 mkKindOccFS occ_sp fs = OccName occ_sp fs
 231 \end{code}
 232
 233 *Source-code* things are encoded.
 234
 235 \begin{code}
 236 mkOccFS :: NameSpace -> UserFS -> OccName
 237 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
 238
 239 mkOccName :: NameSpace -> String -> OccName
 240 mkOccName ns s = mkSysOcc ns (encode s)
 241
 242 mkVarOcc :: UserFS -> OccName
 243 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
 244
 245 mkTyVarOcc :: UserFS -> OccName
 246 mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
 247
 248 mkVarOccEncoded :: EncodedFS -> OccName
 249 mkVarOccEncoded fs = mkSysOccFS varName fs
 250 \end{code}
 251
 252
 253
 254 %************************************************************************
 255 %*                                                                      *
 256                 Environments
 257 %*                                                                      *
 258 %************************************************************************
 259
 260 OccEnvs are used mainly for the envts in ModIfaces.
 261
 262 They are efficient, because FastStrings have unique Int# keys.  We assume
 263 this key is less than 2^24, so we can make a Unique using
 264         mkUnique ns key  :: Unique
 265 where 'ns' is a Char reprsenting the name space.  This in turn makes it
 266 easy to build an OccEnv.
 267
 268 \begin{code}
 269 instance Uniquable OccName where
 270   getUnique (OccName ns fs)
 271       = mkUnique char (I# (uniqueOfFS fs))
 272       where     -- See notes above about this getUnique function
 273         char = case ns of
 274                 VarName   -> 'i'
 275                 DataName  -> 'd'
 276                 TvName    -> 'v'
 277                 TcClsName -> 't'
 278
 279 type OccEnv a = UniqFM a
 280
 281 emptyOccEnv :: OccEnv a
 282 unitOccEnv  :: OccName -> a -> OccEnv a
 283 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
 284 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
 285 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
 286 mkOccEnv     :: [(OccName,a)] -> OccEnv a
 287 elemOccEnv   :: OccName -> OccEnv a -> Bool
 288 foldOccEnv   :: (a -> b -> b) -> b -> OccEnv a -> b
 289 occEnvElts   :: OccEnv a -> [a]
 290 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
 291 plusOccEnv     :: OccEnv a -> OccEnv a -> OccEnv a
 292 plusOccEnv_C   :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
 293
 294 emptyOccEnv      = emptyUFM
 295 unitOccEnv       = unitUFM
 296 extendOccEnv     = addToUFM
 297 extendOccEnvList = addListToUFM
 298 lookupOccEnv     = lookupUFM
 299 mkOccEnv         = listToUFM
 300 elemOccEnv       = elemUFM
 301 foldOccEnv       = foldUFM
 302 occEnvElts       = eltsUFM
 303 plusOccEnv       = plusUFM
 304 plusOccEnv_C     = plusUFM_C
 305 extendOccEnv_C   = addToUFM_C
 306
 307
 308 type OccSet = UniqFM OccName
 309
 310 emptyOccSet       :: OccSet
 311 unitOccSet        :: OccName -> OccSet
 312 mkOccSet          :: [OccName] -> OccSet
 313 extendOccSet      :: OccSet -> OccName -> OccSet
 314 extendOccSetList  :: OccSet -> [OccName] -> OccSet
 315 unionOccSets      :: OccSet -> OccSet -> OccSet
 316 unionManyOccSets  :: [OccSet] -> OccSet
 317 minusOccSet       :: OccSet -> OccSet -> OccSet
 318 elemOccSet        :: OccName -> OccSet -> Bool
 319 occSetElts        :: OccSet -> [OccName]
 320 foldOccSet        :: (OccName -> b -> b) -> b -> OccSet -> b
 321 isEmptyOccSet     :: OccSet -> Bool
 322 intersectOccSet   :: OccSet -> OccSet -> OccSet
 323 intersectsOccSet  :: OccSet -> OccSet -> Bool
 324
 325 emptyOccSet       = emptyUniqSet
 326 unitOccSet        = unitUniqSet
 327 mkOccSet          = mkUniqSet
 328 extendOccSet      = addOneToUniqSet
 329 extendOccSetList  = addListToUniqSet
 330 unionOccSets      = unionUniqSets
 331 unionManyOccSets  = unionManyUniqSets
 332 minusOccSet       = minusUniqSet
 333 elemOccSet        = elementOfUniqSet
 334 occSetElts        = uniqSetToList
 335 foldOccSet        = foldUniqSet
 336 isEmptyOccSet     = isEmptyUniqSet
 337 intersectOccSet   = intersectUniqSets
 338 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
 339 \end{code}
 340
 341
 342 %************************************************************************
 343 %*                                                                      *
 344 \subsection{Predicates and taking them apart}
 345 %*                                                                      *
 346 %************************************************************************
 347
 348 \begin{code}
 349 occNameFS :: OccName -> EncodedFS
 350 occNameFS (OccName _ s) = s
 351
 352 occNameString :: OccName -> EncodedString
 353 occNameString (OccName _ s) = unpackFS s
 354
 355 occNameUserString :: OccName -> UserString
 356 occNameUserString occ = decode (occNameString occ)
 357
 358 occNameSpace :: OccName -> NameSpace
 359 occNameSpace (OccName sp _) = sp
 360
 361 setOccNameSpace :: NameSpace -> OccName -> OccName
 362 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 363
 364 -- occNameFlavour is used only to generate good error messages
 365 occNameFlavour :: OccName -> SDoc
 366 occNameFlavour (OccName DataName _)  = ptext SLIT("data constructor")
 367 occNameFlavour (OccName TvName _)    = ptext SLIT("type variable")
 368 occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
 369 occNameFlavour (OccName VarName s)   = empty
 370
 371 -- briefOccNameFlavour is used in debug-printing of names
 372 briefOccNameFlavour :: OccName -> String
 373 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
 374
 375 briefNameSpaceFlavour DataName  = "d"
 376 briefNameSpaceFlavour VarName   = "v"
 377 briefNameSpaceFlavour TvName    = "tv"
 378 briefNameSpaceFlavour TcClsName = "tc"
 379 \end{code}
 380
 381 \begin{code}
 382 isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 383
 384 isVarOcc (OccName VarName _) = True
 385 isVarOcc other               = False
 386
 387 isTvOcc (OccName TvName _) = True
 388 isTvOcc other              = False
 389
 390 isTcOcc (OccName TcClsName _) = True
 391 isTcOcc other                 = False
 392
 393 isValOcc (OccName VarName  _) = True
 394 isValOcc (OccName DataName _) = True
 395 isValOcc other                = False
 396
 397 -- Data constructor operator (starts with ':', or '[]')
 398 -- Pretty inefficient!
 399 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 400 isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
 401 isDataSymOcc other                = False
 402
 403 isDataOcc (OccName DataName _) = True
 404 isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
 405 isDataOcc other                = False
 406
 407 -- Any operator (data constructor or variable)
 408 -- Pretty inefficient!
 409 isSymOcc (OccName DataName s)  = isLexConSym (decodeFS s)
 410 isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
 411 isSymOcc (OccName VarName s)   = isLexSym (decodeFS s)
 412 isSymOcc other                 = False
 413
 414 parenSymOcc :: OccName -> SDoc -> SDoc
 415 -- Wrap parens around an operator
 416 parenSymOcc occ doc | isSymOcc occ = parens doc
 417                     | otherwise    = doc
 418 \end{code}
 419
 420
 421 \begin{code}
 422 reportIfUnused :: OccName -> Bool
 423   -- Haskell 98 encourages compilers to suppress warnings about
 424   -- unused names in a pattern if they start with "_".
 425 reportIfUnused occ = case occNameUserString occ of
 426                         ('_' : _) -> False
 427                         zz_other  -> True
 428 \end{code}
 429
 430
 431
 432 %************************************************************************
 433 %*                                                                      *
 434 \subsection{Making system names}
 435 %*                                                                      *
 436 %************************************************************************
 437
 438 Here's our convention for splitting up the interface file name space:
 439
 440         d...            dictionary identifiers
 441                         (local variables, so no name-clash worries)
 442
 443         $f...           dict-fun identifiers (from inst decls)
 444         $dm...          default methods
 445         $p...           superclass selectors
 446         $w...           workers
 447         :T...           compiler-generated tycons for dictionaries
 448         :D...           ...ditto data cons
 449         $sf..           specialised version of f
 450
 451         in encoded form these appear as Zdfxxx etc
 452
 453         :...            keywords (export:, letrec: etc.)
 454 --- I THINK THIS IS WRONG!
 455
 456 This knowledge is encoded in the following functions.
 457
 458
 459 @mk_deriv@ generates an @OccName@ from the prefix and a string.
 460 NB: The string must already be encoded!
 461
 462 \begin{code}
 463 mk_deriv :: NameSpace
 464          -> String              -- Distinguishes one sort of derived name from another
 465          -> EncodedString       -- Must be already encoded!!  We don't want to encode it a
 466                                 -- second time because encoding isn't idempotent
 467          -> OccName
 468
 469 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
 470 \end{code}
 471
 472 \begin{code}
 473 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
 474            mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
 475    :: OccName -> OccName
 476
 477 -- These derived variables have a prefix that no Haskell value could have
 478 mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
 479 mkWorkerOcc         = mk_simple_deriv varName  "$w"
 480 mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
 481 mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"      -- The : prefix makes sure it classifies
 482 mkClassTyConOcc     = mk_simple_deriv tcName   ":T"     -- as a tycon/datacon
 483 mkClassDataConOcc   = mk_simple_deriv dataName ":D"     -- We go straight to the "real" data con
 484                                                         -- for datacons from classes
 485 mkDictOcc           = mk_simple_deriv varName  "$d"
 486 mkIPOcc             = mk_simple_deriv varName  "$i"
 487 mkSpecOcc           = mk_simple_deriv varName  "$s"
 488 mkForeignExportOcc  = mk_simple_deriv varName  "$f"
 489
 490 -- Generic derivable classes
 491 mkGenOcc1           = mk_simple_deriv varName  "$gfrom"
 492 mkGenOcc2           = mk_simple_deriv varName  "$gto"
 493
 494 -- data T = MkT ... deriving( Data ) needs defintions for
 495 --      $tT   :: Data.Generics.Basics.DataType
 496 --      $cMkT :: Data.Generics.Basics.Constr
 497 mkDataTOcc = mk_simple_deriv varName  "$t"
 498 mkDataCOcc = mk_simple_deriv varName  "$c"
 499
 500 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 501
 502
 503 -- Data constructor workers are made by setting the name space
 504 -- of the data constructor OccName (which should be a DataName)
 505 -- to VarName
 506 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
 507 \end{code}
 508
 509 \begin{code}
 510 mkSuperDictSelOcc :: Int        -- Index of superclass, eg 3
 511                   -> OccName    -- Class, eg "Ord"
 512                   -> OccName    -- eg "$p3Ord"
 513 mkSuperDictSelOcc index cls_occ
 514   = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
 515
 516 mkLocalOcc :: Unique            -- Unique
 517            -> OccName           -- Local name (e.g. "sat")
 518            -> OccName           -- Nice unique version ("$L23sat")
 519 mkLocalOcc uniq occ
 520    = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
 521         -- The Unique might print with characters
 522         -- that need encoding (e.g. 'z'!)
 523 \end{code}
 524
 525
 526 \begin{code}
 527 mkDFunOcc :: EncodedString      -- Typically the class and type glommed together e.g. "OrdMaybe"
 528                                 -- Only used in debug mode, for extra clarity
 529           -> Bool               -- True <=> hs-boot instance dfun
 530           -> Int                -- Unique index
 531           -> OccName            -- "$f3OrdMaybe"
 532
 533 -- In hs-boot files we make dict funs like $fx7ClsTy, which get bound to the real
 534 -- thing when we compile the mother module. Reason: we don't know exactly
 535 -- what the  mother module will call it.
 536
 537 mkDFunOcc info_str is_boot index
 538   = mk_deriv VarName prefix string
 539   where
 540     prefix | is_boot   = "$fx"
 541            | otherwise = "$f"
 542     string | opt_PprStyle_Debug = show index ++ info_str
 543            | otherwise          = show index
 544 \end{code}
 545
 546 We used to add a '$m' to indicate a method, but that gives rise to bad
 547 error messages from the type checker when we print the function name or pattern
 548 of an instance-decl binding.  Why? Because the binding is zapped
 549 to use the method name in place of the selector name.
 550 (See TcClassDcl.tcMethodBind)
 551
 552 The way it is now, -ddump-xx output may look confusing, but
 553 you can always say -dppr-debug to get the uniques.
 554
 555 However, we *do* have to zap the first character to be lower case,
 556 because overloaded constructors (blarg) generate methods too.
 557 And convert to VarName space
 558
 559 e.g. a call to constructor MkFoo where
 560         data (Ord a) => Foo a = MkFoo a
 561
 562 If this is necessary, we do it by prefixing '$m'.  These
 563 guys never show up in error messages.  What a hack.
 564
 565 \begin{code}
 566 mkMethodOcc :: OccName -> OccName
 567 mkMethodOcc occ@(OccName VarName fs) = occ
 568 mkMethodOcc occ                      = mk_simple_deriv varName "$m" occ
 569 \end{code}
 570
 571
 572 %************************************************************************
 573 %*                                                                      *
 574 \subsection{Tidying them up}
 575 %*                                                                      *
 576 %************************************************************************
 577
 578 Before we print chunks of code we like to rename it so that
 579 we don't have to print lots of silly uniques in it.  But we mustn't
 580 accidentally introduce name clashes!  So the idea is that we leave the
 581 OccName alone unless it accidentally clashes with one that is already
 582 in scope; if so, we tack on '1' at the end and try again, then '2', and
 583 so on till we find a unique one.
 584
 585 There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1'
 586 because that isn't a single lexeme.  So we encode it to 'lle' and *then*
 587 tack on the '1', if necessary.
 588
 589 \begin{code}
 590 type TidyOccEnv = OccEnv Int    -- The in-scope OccNames
 591         -- Range gives a plausible starting point for new guesses
 592
 593 emptyTidyOccEnv = emptyOccEnv
 594
 595 initTidyOccEnv :: [OccName] -> TidyOccEnv       -- Initialise with names to avoid!
 596 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
 597
 598 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
 599
 600 tidyOccName in_scope occ@(OccName occ_sp fs)
 601   = case lookupOccEnv in_scope occ of
 602         Nothing ->      -- Not already used: make it used
 603                    (extendOccEnv in_scope occ 1, occ)
 604
 605         Just n  ->      -- Already used: make a new guess,
 606                         -- change the guess base, and try again
 607                    tidyOccName  (extendOccEnv in_scope occ (n+1))
 608                                 (mkSysOcc occ_sp (unpackFS fs ++ show n))
 609 \end{code}
 610
 611
 612 %************************************************************************
 613 %*                                                                      *
 614 \subsection{The 'Z' encoding}
 615 %*                                                                      *
 616 %************************************************************************
 617
 618 This is the main name-encoding and decoding function.  It encodes any
 619 string into a string that is acceptable as a C name.  This is the name
 620 by which things are known right through the compiler.
 621
 622 The basic encoding scheme is this.
 623
 624 * Tuples (,,,) are coded as Z3T
 625
 626 * Alphabetic characters (upper and lower) and digits
 627         all translate to themselves;
 628         except 'Z', which translates to 'ZZ'
 629         and    'z', which translates to 'zz'
 630   We need both so that we can preserve the variable/tycon distinction
 631
 632 * Most other printable characters translate to 'zx' or 'Zx' for some
 633         alphabetic character x
 634
 635 * The others translate as 'znnnU' where 'nnn' is the decimal number
 636         of the character
 637
 638         Before          After
 639         --------------------------
 640         Trak            Trak
 641         foo_wib         foozuwib
 642         >               zg
 643         >1              zg1
 644         foo#            foozh
 645         foo##           foozhzh
 646         foo##1          foozhzh1
 647         fooZ            fooZZ
 648         :+              ZCzp
 649         ()              Z0T     0-tuple
 650         (,,,,)          Z5T     5-tuple
 651         (# #)           Z1H     unboxed 1-tuple (note the space)
 652         (#,,,,#)        Z5H     unboxed 5-tuple
 653                 (NB: There is no Z1T nor Z0H.)
 654
 655 \begin{code}
 656 -- alreadyEncoded is used in ASSERTs to check for encoded
 657 -- strings.  It isn't fail-safe, of course, because, say 'zh' might
 658 -- be encoded or not.
 659 alreadyEncoded :: String -> Bool
 660 alreadyEncoded s = all ok s
 661                  where
 662                    ok ' ' = True
 663                         -- This is a bit of a lie; if we really wanted spaces
 664                         -- in names we'd have to encode them.  But we do put
 665                         -- spaces in ccall "occurrences", and we don't want to
 666                         -- reject them here
 667                    ok ch  = isAlphaNum ch
 668
 669 alreadyEncodedFS :: FastString -> Bool
 670 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
 671
 672 encode :: UserString -> EncodedString
 673 encode cs = case maybe_tuple cs of
 674                 Just n  -> n            -- Tuples go to Z2T etc
 675                 Nothing -> go cs
 676           where
 677                 go []     = []
 678                 go (c:cs) = encode_ch c ++ go cs
 679
 680 encodeFS :: UserFS -> EncodedFS
 681 encodeFS fast_str  | all unencodedChar str = fast_str
 682                    | otherwise             = mkFastString (encode str)
 683                    where
 684                      str = unpackFS fast_str
 685
 686 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 687 unencodedChar 'Z' = False
 688 unencodedChar 'z' = False
 689 unencodedChar c   =  c >= 'a' && c <= 'z'
 690                   || c >= 'A' && c <= 'Z'
 691                   || c >= '0' && c <= '9'
 692
 693 encode_ch :: Char -> EncodedString
 694 encode_ch c | unencodedChar c = [c]     -- Common case first
 695
 696 -- Constructors
 697 encode_ch '('  = "ZL"   -- Needed for things like (,), and (->)
 698 encode_ch ')'  = "ZR"   -- For symmetry with (
 699 encode_ch '['  = "ZM"
 700 encode_ch ']'  = "ZN"
 701 encode_ch ':'  = "ZC"
 702 encode_ch 'Z'  = "ZZ"
 703
 704 -- Variables
 705 encode_ch 'z'  = "zz"
 706 encode_ch '&'  = "za"
 707 encode_ch '|'  = "zb"
 708 encode_ch '^'  = "zc"
 709 encode_ch '$'  = "zd"
 710 encode_ch '='  = "ze"
 711 encode_ch '>'  = "zg"
 712 encode_ch '#'  = "zh"
 713 encode_ch '.'  = "zi"
 714 encode_ch '<'  = "zl"
 715 encode_ch '-'  = "zm"
 716 encode_ch '!'  = "zn"
 717 encode_ch '+'  = "zp"
 718 encode_ch '\'' = "zq"
 719 encode_ch '\\' = "zr"
 720 encode_ch '/'  = "zs"
 721 encode_ch '*'  = "zt"
 722 encode_ch '_'  = "zu"
 723 encode_ch '%'  = "zv"
 724 encode_ch c    = 'z' : shows (ord c) "U"
 725 \end{code}
 726
 727 Decode is used for user printing.
 728
 729 \begin{code}
 730 decodeFS :: FastString -> FastString
 731 decodeFS fs = mkFastString (decode (unpackFS fs))
 732
 733 decode :: EncodedString -> UserString
 734 decode [] = []
 735 decode ('Z' : d : rest) | isDigit d = decode_tuple   d rest
 736                         | otherwise = decode_upper   d : decode rest
 737 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
 738                         | otherwise = decode_lower   d : decode rest
 739 decode (c   : rest) = c : decode rest
 740
 741 decode_upper, decode_lower :: Char -> Char
 742
 743 decode_upper 'L' = '('
 744 decode_upper 'R' = ')'
 745 decode_upper 'M' = '['
 746 decode_upper 'N' = ']'
 747 decode_upper 'C' = ':'
 748 decode_upper 'Z' = 'Z'
 749 decode_upper ch  = pprTrace "decode_upper" (char ch) ch
 750
 751 decode_lower 'z' = 'z'
 752 decode_lower 'a' = '&'
 753 decode_lower 'b' = '|'
 754 decode_lower 'c' = '^'
 755 decode_lower 'd' = '$'
 756 decode_lower 'e' = '='
 757 decode_lower 'g' = '>'
 758 decode_lower 'h' = '#'
 759 decode_lower 'i' = '.'
 760 decode_lower 'l' = '<'
 761 decode_lower 'm' = '-'
 762 decode_lower 'n' = '!'
 763 decode_lower 'p' = '+'
 764 decode_lower 'q' = '\''
 765 decode_lower 'r' = '\\'
 766 decode_lower 's' = '/'
 767 decode_lower 't' = '*'
 768 decode_lower 'u' = '_'
 769 decode_lower 'v' = '%'
 770 decode_lower ch  = pprTrace "decode_lower" (char ch) ch
 771
 772 -- Characters not having a specific code are coded as z224U
 773 decode_num_esc d rest
 774   = go (digitToInt d) rest
 775   where
 776     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 777     go n ('U' : rest)           = chr n : decode rest
 778     go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
 779
 780 decode_tuple :: Char -> EncodedString -> UserString
 781 decode_tuple d rest
 782   = go (digitToInt d) rest
 783   where
 784         -- NB. recurse back to decode after decoding the tuple, because
 785         -- the tuple might be embedded in a longer name.
 786     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 787     go 0 ('T':rest)     = "()" ++ decode rest
 788     go n ('T':rest)     = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
 789     go 1 ('H':rest)     = "(# #)" ++ decode rest
 790     go n ('H':rest)     = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
 791     go n other = pprPanic "decode_tuple" (ppr n <+> text other)
 792 \end{code}
 793
 794
 795 %************************************************************************
 796 %*                                                                      *
 797                 Stuff for dealing with tuples
 798 %*                                                                      *
 799 %************************************************************************
 800
 801 Tuples are encoded as
 802         Z3T or Z3H
 803 for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts
 804         Z<digit>
 805
 806 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
 807   There are no unboxed 0-tuples.
 808
 809 * "()" is the tycon for a boxed 0-tuple.
 810   There are no boxed 1-tuples.
 811
 812
 813 \begin{code}
 814 maybe_tuple :: UserString -> Maybe EncodedString
 815
 816 maybe_tuple "(# #)" = Just("Z1H")
 817 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
 818                                  (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
 819                                  other               -> Nothing
 820 maybe_tuple "()" = Just("Z0T")
 821 maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
 822                                  (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
 823                                  other         -> Nothing
 824 maybe_tuple other            = Nothing
 825
 826 count_commas :: Int -> String -> (Int, String)
 827 count_commas n (',' : cs) = count_commas (n+1) cs
 828 count_commas n cs         = (n,cs)
 829 \end{code}
 830
 831 \begin{code}
 832 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
 833 mkTupleOcc ns bx ar
 834   = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
 835   where
 836     bx_char = case bx of
 837                 Boxed   -> "T"
 838                 Unboxed -> "H"
 839
 840 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
 841 -- Tuples are special, because there are so many of them!
 842 isTupleOcc_maybe (OccName ns fs)
 843   = case unpackFS fs of
 844         ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
 845         other                    -> Nothing
 846   where
 847     decode_tup n "H"      = (ns, Unboxed, n)
 848     decode_tup n "T"      = (ns, Boxed, n)
 849     decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
 850 \end{code}
 851
 852 %************************************************************************
 853 %*                                                                      *
 854 \subsection{Lexical categories}
 855 %*                                                                      *
 856 %************************************************************************
 857
 858 These functions test strings to see if they fit the lexical categories
 859 defined in the Haskell report.
 860
 861 \begin{code}
 862 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
 863 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
 864
 865 isLexCon cs = isLexConId  cs || isLexConSym cs
 866 isLexVar cs = isLexVarId  cs || isLexVarSym cs
 867
 868 isLexId  cs = isLexConId  cs || isLexVarId  cs
 869 isLexSym cs = isLexConSym cs || isLexVarSym cs
 870
 871 -------------
 872
 873 isLexConId cs                           -- Prefix type or data constructors
 874   | nullFastString cs = False           --      e.g. "Foo", "[]", "(,)"
 875   | cs == FSLIT("[]") = True
 876   | otherwise         = startsConId (headFS cs)
 877
 878 isLexVarId cs                           -- Ordinary prefix identifiers
 879   | nullFastString cs = False           --      e.g. "x", "_x"
 880   | otherwise         = startsVarId (headFS cs)
 881
 882 isLexConSym cs                          -- Infix type or data constructors
 883   | nullFastString cs = False           --      e.g. ":-:", ":", "->"
 884   | cs == FSLIT("->") = True
 885   | otherwise         = startsConSym (headFS cs)
 886
 887 isLexVarSym cs                          -- Infix identifiers
 888   | nullFastString cs = False           --      e.g. "+"
 889   | otherwise         = startsVarSym (headFS cs)
 890
 891 -------------
 892 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 893 startsVarSym c = isSymbolASCII c || isSymbolISO c       -- Infix Ids
 894 startsConSym c = c == ':'                               -- Infix data constructors
 895 startsVarId c  = isLower c || isLowerISO c || c == '_'  -- Ordinary Ids
 896 startsConId c  = isUpper c || isUpperISO c || c == '('  -- Ordinary type constructors and data constructors
 897
 898
 899 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 900 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 901 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 902         --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 903 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 904         --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 905 \end{code}
 906
 907 %************************************************************************
 908 %*                                                                      *
 909                 Binary instance
 910     Here rather than BinIface because OccName is abstract
 911 %*                                                                      *
 912 %************************************************************************
 913
 914 \begin{code}
 915 instance Binary NameSpace where
 916     put_ bh VarName = do
 917             putByte bh 0
 918     put_ bh DataName = do
 919             putByte bh 1
 920     put_ bh TvName = do
 921             putByte bh 2
 922     put_ bh TcClsName = do
 923             putByte bh 3
 924     get bh = do
 925             h <- getByte bh
 926             case h of
 927               0 -> do return VarName
 928               1 -> do return DataName
 929               2 -> do return TvName
 930               _ -> do return TcClsName
 931
 932 instance Binary OccName where
 933     put_ bh (OccName aa ab) = do
 934             put_ bh aa
 935             put_ bh ab
 936     get bh = do
 937           aa <- get bh
 938           ab <- get bh
 939           return (OccName aa ab)
 940 \end{code}