ghc/compiler/basicTypes/OccName.lhs

   1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
   2 %
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5
   6 \section[OccName]{@OccName@}
   7
   8 \begin{code}
   9 module OccName (
  10         -- The NameSpace type; abstact
  11         NameSpace, tcName, clsName, tcClsName, dataName, varName,
  12         tvName, srcDataName, nameSpaceString,
  13
  14         -- The OccName type
  15         OccName,        -- Abstract, instance of Outputable
  16         pprOccName,
  17
  18         -- The OccEnv type
  19         OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
  20         lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
  21         occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
  22
  23
  24         -- The OccSet type
  25         OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
  26         unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
  27         foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
  28
  29         mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
  30         mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
  31         mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
  32         mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
  33         mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
  34         mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
  35         mkDataConWrapperOcc, mkDataConWorkerOcc,
  36
  37         isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
  38         parenSymOcc, reportIfUnused,
  39
  40         occNameFS, occNameString, occNameUserString, occNameSpace,
  41         occNameFlavour, briefOccNameFlavour,
  42         setOccNameSpace,
  43
  44         mkTupleOcc, isTupleOcc_maybe,
  45
  46         -- Tidying up
  47         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  48
  49         -- Encoding
  50         EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
  51
  52         -- The basic form of names
  53         isLexCon, isLexVar, isLexId, isLexSym,
  54         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  55         isLowerISO, isUpperISO
  56
  57     ) where
  58
  59 #include "HsVersions.h"
  60
  61 import Char     ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
  62 import Util     ( thenCmp )
  63 import Unique   ( Unique, mkUnique, Uniquable(..) )
  64 import BasicTypes ( Boxity(..), Arity )
  65 import UniqFM
  66 import UniqSet
  67 import FastString
  68 import Outputable
  69 import Binary
  70
  71 import GLAEXTS
  72 \end{code}
  73
  74 We hold both module names and identifier names in a 'Z-encoded' form
  75 that makes them acceptable both as a C identifier and as a Haskell
  76 (prefix) identifier.
  77
  78 They can always be decoded again when printing error messages
  79 or anything else for the user, but it does make sense for it
  80 to be represented here in encoded form, so that when generating
  81 code the encoding operation is not performed on each occurrence.
  82
  83 These type synonyms help documentation.
  84
  85 \begin{code}
  86 type UserFS    = FastString     -- As the user typed it
  87 type EncodedFS = FastString     -- Encoded form
  88
  89 type UserString = String        -- As the user typed it
  90 type EncodedString = String     -- Encoded form
  91
  92
  93 pprEncodedFS :: EncodedFS -> SDoc
  94 pprEncodedFS fs
  95   = getPprStyle         $ \ sty ->
  96     if userStyle sty
  97         -- ftext (decodeFS fs) would needlessly pack the string again
  98         then text (decode (unpackFS fs))
  99         else ftext fs
 100 \end{code}
 101
 102 %************************************************************************
 103 %*                                                                      *
 104 \subsection{Name space}
 105 %*                                                                      *
 106 %************************************************************************
 107
 108 \begin{code}
 109 data NameSpace = VarName        -- Variables, including "source" data constructors
 110                | DataName       -- "Real" data constructors
 111                | TvName         -- Type variables
 112                | TcClsName      -- Type constructors and classes; Haskell has them
 113                                 -- in the same name space for now.
 114                deriving( Eq, Ord )
 115    {-! derive: Binary !-}
 116
 117 -- Note [Data Constructors]
 118 -- see also: Note [Data Constructor Naming] in DataCon.lhs
 119 --
 120 --      "Source" data constructors are the data constructors mentioned
 121 --      in Haskell source code
 122 --
 123 --      "Real" data constructors are the data constructors of the
 124 --      representation type, which may not be the same as the source
 125 --      type
 126
 127 -- Example:
 128 --      data T = T !(Int,Int)
 129 --
 130 -- The source datacon has type (Int,Int) -> T
 131 -- The real   datacon has type Int -> Int -> T
 132 -- GHC chooses a representation based on the strictness etc.
 133
 134
 135 -- Though type constructors and classes are in the same name space now,
 136 -- the NameSpace type is abstract, so we can easily separate them later
 137 tcName    = TcClsName           -- Type constructors
 138 clsName   = TcClsName           -- Classes
 139 tcClsName = TcClsName           -- Not sure which!
 140
 141 dataName    = DataName
 142 srcDataName = DataName  -- Haskell-source data constructors should be
 143                         -- in the Data name space
 144
 145 tvName      = TvName
 146 varName     = VarName
 147
 148 nameSpaceString :: NameSpace -> String
 149 nameSpaceString DataName  = "Data constructor"
 150 nameSpaceString VarName   = "Variable"
 151 nameSpaceString TvName    = "Type variable"
 152 nameSpaceString TcClsName = "Type constructor or class"
 153 \end{code}
 154
 155
 156 %************************************************************************
 157 %*                                                                      *
 158 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
 159 %*                                                                      *
 160 %************************************************************************
 161
 162 \begin{code}
 163 data OccName = OccName
 164                         NameSpace
 165                         EncodedFS
 166    {-! derive : Binary !-}
 167 \end{code}
 168
 169
 170 \begin{code}
 171 instance Eq OccName where
 172     (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
 173
 174 instance Ord OccName where
 175     compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
 176                                                 (sp1 `compare` sp2)
 177 \end{code}
 178
 179
 180 %************************************************************************
 181 %*                                                                      *
 182 \subsection{Printing}
 183 %*                                                                      *
 184 %************************************************************************
 185
 186 \begin{code}
 187 instance Outputable OccName where
 188     ppr = pprOccName
 189
 190 pprOccName :: OccName -> SDoc
 191 pprOccName (OccName sp occ)
 192   = getPprStyle $ \ sty ->
 193     pprEncodedFS occ <> if debugStyle sty then
 194                            braces (text (briefNameSpaceFlavour sp))
 195                         else empty
 196 \end{code}
 197
 198
 199 %************************************************************************
 200 %*                                                                      *
 201 \subsection{Construction}
 202 %*                                                                      *
 203 %*****p*******************************************************************
 204
 205 *Sys* things do no encoding; the caller should ensure that the thing is
 206 already encoded
 207
 208 \begin{code}
 209 mkSysOcc :: NameSpace -> EncodedString -> OccName
 210 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
 211                       OccName occ_sp (mkFastString str)
 212
 213 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
 214 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
 215                        OccName occ_sp fs
 216
 217 mkFCallOcc :: EncodedString -> OccName
 218 -- This version of mkSysOcc doesn't check that the string is already encoded,
 219 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
 220 -- This encodes a lot into something that then parses like an Id.
 221 -- But then alreadyEncoded complains about the braces!
 222 mkFCallOcc str = OccName varName (mkFastString str)
 223
 224 -- Kind constructors get a special function.  Uniquely, they are not encoded,
 225 -- so that they have names like '*'.  This means that *even in interface files*
 226 -- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
 227 -- has an ASSERT that doesn't hold.
 228 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
 229 mkKindOccFS occ_sp fs = OccName occ_sp fs
 230 \end{code}
 231
 232 *Source-code* things are encoded.
 233
 234 \begin{code}
 235 mkOccFS :: NameSpace -> UserFS -> OccName
 236 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
 237
 238 mkOccName :: NameSpace -> String -> OccName
 239 mkOccName ns s = mkSysOcc ns (encode s)
 240
 241 mkVarOcc :: UserFS -> OccName
 242 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
 243
 244 mkTyVarOcc :: UserFS -> OccName
 245 mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
 246
 247 mkVarOccEncoded :: EncodedFS -> OccName
 248 mkVarOccEncoded fs = mkSysOccFS varName fs
 249 \end{code}
 250
 251
 252
 253 %************************************************************************
 254 %*                                                                      *
 255                 Environments
 256 %*                                                                      *
 257 %************************************************************************
 258
 259 OccEnvs are used mainly for the envts in ModIfaces.
 260
 261 They are efficient, because FastStrings have unique Int# keys.  We assume
 262 this key is less than 2^24, so we can make a Unique using
 263         mkUnique ns key  :: Unique
 264 where 'ns' is a Char reprsenting the name space.  This in turn makes it
 265 easy to build an OccEnv.
 266
 267 \begin{code}
 268 instance Uniquable OccName where
 269   getUnique (OccName ns fs)
 270       = mkUnique char (I# (uniqueOfFS fs))
 271       where     -- See notes above about this getUnique function
 272         char = case ns of
 273                 VarName   -> 'i'
 274                 DataName  -> 'd'
 275                 TvName    -> 'v'
 276                 TcClsName -> 't'
 277
 278 type OccEnv a = UniqFM a
 279
 280 emptyOccEnv :: OccEnv a
 281 unitOccEnv  :: OccName -> a -> OccEnv a
 282 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
 283 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
 284 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
 285 mkOccEnv     :: [(OccName,a)] -> OccEnv a
 286 elemOccEnv   :: OccName -> OccEnv a -> Bool
 287 foldOccEnv   :: (a -> b -> b) -> b -> OccEnv a -> b
 288 occEnvElts   :: OccEnv a -> [a]
 289 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
 290 plusOccEnv     :: OccEnv a -> OccEnv a -> OccEnv a
 291 plusOccEnv_C   :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
 292
 293 emptyOccEnv      = emptyUFM
 294 unitOccEnv       = unitUFM
 295 extendOccEnv     = addToUFM
 296 extendOccEnvList = addListToUFM
 297 lookupOccEnv     = lookupUFM
 298 mkOccEnv         = listToUFM
 299 elemOccEnv       = elemUFM
 300 foldOccEnv       = foldUFM
 301 occEnvElts       = eltsUFM
 302 plusOccEnv       = plusUFM
 303 plusOccEnv_C     = plusUFM_C
 304 extendOccEnv_C   = addToUFM_C
 305
 306
 307 type OccSet = UniqFM OccName
 308
 309 emptyOccSet       :: OccSet
 310 unitOccSet        :: OccName -> OccSet
 311 mkOccSet          :: [OccName] -> OccSet
 312 extendOccSet      :: OccSet -> OccName -> OccSet
 313 extendOccSetList  :: OccSet -> [OccName] -> OccSet
 314 unionOccSets      :: OccSet -> OccSet -> OccSet
 315 unionManyOccSets  :: [OccSet] -> OccSet
 316 minusOccSet       :: OccSet -> OccSet -> OccSet
 317 elemOccSet        :: OccName -> OccSet -> Bool
 318 occSetElts        :: OccSet -> [OccName]
 319 foldOccSet        :: (OccName -> b -> b) -> b -> OccSet -> b
 320 isEmptyOccSet     :: OccSet -> Bool
 321 intersectOccSet   :: OccSet -> OccSet -> OccSet
 322 intersectsOccSet  :: OccSet -> OccSet -> Bool
 323
 324 emptyOccSet       = emptyUniqSet
 325 unitOccSet        = unitUniqSet
 326 mkOccSet          = mkUniqSet
 327 extendOccSet      = addOneToUniqSet
 328 extendOccSetList  = addListToUniqSet
 329 unionOccSets      = unionUniqSets
 330 unionManyOccSets  = unionManyUniqSets
 331 minusOccSet       = minusUniqSet
 332 elemOccSet        = elementOfUniqSet
 333 occSetElts        = uniqSetToList
 334 foldOccSet        = foldUniqSet
 335 isEmptyOccSet     = isEmptyUniqSet
 336 intersectOccSet   = intersectUniqSets
 337 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
 338 \end{code}
 339
 340
 341 %************************************************************************
 342 %*                                                                      *
 343 \subsection{Predicates and taking them apart}
 344 %*                                                                      *
 345 %************************************************************************
 346
 347 \begin{code}
 348 occNameFS :: OccName -> EncodedFS
 349 occNameFS (OccName _ s) = s
 350
 351 occNameString :: OccName -> EncodedString
 352 occNameString (OccName _ s) = unpackFS s
 353
 354 occNameUserString :: OccName -> UserString
 355 occNameUserString occ = decode (occNameString occ)
 356
 357 occNameSpace :: OccName -> NameSpace
 358 occNameSpace (OccName sp _) = sp
 359
 360 setOccNameSpace :: NameSpace -> OccName -> OccName
 361 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 362
 363 -- occNameFlavour is used only to generate good error messages
 364 occNameFlavour :: OccName -> SDoc
 365 occNameFlavour (OccName DataName _)  = ptext SLIT("data constructor")
 366 occNameFlavour (OccName TvName _)    = ptext SLIT("type variable")
 367 occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
 368 occNameFlavour (OccName VarName s)   = empty
 369
 370 -- briefOccNameFlavour is used in debug-printing of names
 371 briefOccNameFlavour :: OccName -> String
 372 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
 373
 374 briefNameSpaceFlavour DataName  = "d"
 375 briefNameSpaceFlavour VarName   = "v"
 376 briefNameSpaceFlavour TvName    = "tv"
 377 briefNameSpaceFlavour TcClsName = "tc"
 378 \end{code}
 379
 380 \begin{code}
 381 isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 382
 383 isVarOcc (OccName VarName _) = True
 384 isVarOcc other               = False
 385
 386 isTvOcc (OccName TvName _) = True
 387 isTvOcc other              = False
 388
 389 isTcOcc (OccName TcClsName _) = True
 390 isTcOcc other                 = False
 391
 392 isValOcc (OccName VarName  _) = True
 393 isValOcc (OccName DataName _) = True
 394 isValOcc other                = False
 395
 396 -- Data constructor operator (starts with ':', or '[]')
 397 -- Pretty inefficient!
 398 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 399 isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
 400 isDataSymOcc other                = False
 401
 402 isDataOcc (OccName DataName _) = True
 403 isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
 404 isDataOcc other                = False
 405
 406 -- Any operator (data constructor or variable)
 407 -- Pretty inefficient!
 408 isSymOcc (OccName DataName s)  = isLexConSym (decodeFS s)
 409 isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
 410 isSymOcc (OccName VarName s)   = isLexSym (decodeFS s)
 411 isSymOcc other                 = False
 412
 413 parenSymOcc :: OccName -> SDoc -> SDoc
 414 -- Wrap parens around an operator
 415 parenSymOcc occ doc | isSymOcc occ = parens doc
 416                     | otherwise    = doc
 417 \end{code}
 418
 419
 420 \begin{code}
 421 reportIfUnused :: OccName -> Bool
 422   -- Haskell 98 encourages compilers to suppress warnings about
 423   -- unused names in a pattern if they start with "_".
 424 reportIfUnused occ = case occNameUserString occ of
 425                         ('_' : _) -> False
 426                         zz_other  -> True
 427 \end{code}
 428
 429
 430
 431 %************************************************************************
 432 %*                                                                      *
 433 \subsection{Making system names}
 434 %*                                                                      *
 435 %************************************************************************
 436
 437 Here's our convention for splitting up the interface file name space:
 438
 439         d...            dictionary identifiers
 440                         (local variables, so no name-clash worries)
 441
 442         $f...           dict-fun identifiers (from inst decls)
 443         $dm...          default methods
 444         $p...           superclass selectors
 445         $w...           workers
 446         :T...           compiler-generated tycons for dictionaries
 447         :D...           ...ditto data cons
 448         $sf..           specialised version of f
 449
 450         in encoded form these appear as Zdfxxx etc
 451
 452         :...            keywords (export:, letrec: etc.)
 453 --- I THINK THIS IS WRONG!
 454
 455 This knowledge is encoded in the following functions.
 456
 457
 458 @mk_deriv@ generates an @OccName@ from the prefix and a string.
 459 NB: The string must already be encoded!
 460
 461 \begin{code}
 462 mk_deriv :: NameSpace
 463          -> String              -- Distinguishes one sort of derived name from another
 464          -> EncodedString       -- Must be already encoded!!  We don't want to encode it a
 465                                 -- second time because encoding isn't idempotent
 466          -> OccName
 467
 468 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
 469 \end{code}
 470
 471 \begin{code}
 472 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
 473            mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
 474    :: OccName -> OccName
 475
 476 -- These derived variables have a prefix that no Haskell value could have
 477 mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
 478 mkWorkerOcc         = mk_simple_deriv varName  "$w"
 479 mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
 480 mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"      -- The : prefix makes sure it classifies
 481 mkClassTyConOcc     = mk_simple_deriv tcName   ":T"     -- as a tycon/datacon
 482 mkClassDataConOcc   = mk_simple_deriv dataName ":D"     -- We go straight to the "real" data con
 483                                                         -- for datacons from classes
 484 mkDictOcc           = mk_simple_deriv varName  "$d"
 485 mkIPOcc             = mk_simple_deriv varName  "$i"
 486 mkSpecOcc           = mk_simple_deriv varName  "$s"
 487 mkForeignExportOcc  = mk_simple_deriv varName  "$f"
 488
 489 -- Generic derivable classes
 490 mkGenOcc1           = mk_simple_deriv varName  "$gfrom"
 491 mkGenOcc2           = mk_simple_deriv varName  "$gto"
 492
 493 -- data T = MkT ... deriving( Data ) needs defintions for
 494 --      $tT   :: Data.Generics.Basics.DataType
 495 --      $cMkT :: Data.Generics.Basics.Constr
 496 mkDataTOcc = mk_simple_deriv varName  "$t"
 497 mkDataCOcc = mk_simple_deriv varName  "$c"
 498
 499 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 500
 501
 502 -- Data constructor workers are made by setting the name space
 503 -- of the data constructor OccName (which should be a DataName)
 504 -- to DataName
 505 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
 506 \end{code}
 507
 508 \begin{code}
 509 mkSuperDictSelOcc :: Int        -- Index of superclass, eg 3
 510                   -> OccName    -- Class, eg "Ord"
 511                   -> OccName    -- eg "$p3Ord"
 512 mkSuperDictSelOcc index cls_occ
 513   = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
 514
 515 mkLocalOcc :: Unique            -- Unique
 516            -> OccName           -- Local name (e.g. "sat")
 517            -> OccName           -- Nice unique version ("$L23sat")
 518 mkLocalOcc uniq occ
 519    = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
 520         -- The Unique might print with characters
 521         -- that need encoding (e.g. 'z'!)
 522 \end{code}
 523
 524
 525 \begin{code}
 526 mkDFunOcc :: EncodedString      -- Typically the class and type glommed together e.g. "OrdMaybe"
 527           -> OccName            -- "$fOrdMaybe"
 528
 529 mkDFunOcc string = mk_deriv VarName "$f" string
 530 \end{code}
 531
 532 We used to add a '$m' to indicate a method, but that gives rise to bad
 533 error messages from the type checker when we print the function name or pattern
 534 of an instance-decl binding.  Why? Because the binding is zapped
 535 to use the method name in place of the selector name.
 536 (See TcClassDcl.tcMethodBind)
 537
 538 The way it is now, -ddump-xx output may look confusing, but
 539 you can always say -dppr-debug to get the uniques.
 540
 541 However, we *do* have to zap the first character to be lower case,
 542 because overloaded constructors (blarg) generate methods too.
 543 And convert to VarName space
 544
 545 e.g. a call to constructor MkFoo where
 546         data (Ord a) => Foo a = MkFoo a
 547
 548 If this is necessary, we do it by prefixing '$m'.  These
 549 guys never show up in error messages.  What a hack.
 550
 551 \begin{code}
 552 mkMethodOcc :: OccName -> OccName
 553 mkMethodOcc occ@(OccName VarName fs) = occ
 554 mkMethodOcc occ                      = mk_simple_deriv varName "$m" occ
 555 \end{code}
 556
 557
 558 %************************************************************************
 559 %*                                                                      *
 560 \subsection{Tidying them up}
 561 %*                                                                      *
 562 %************************************************************************
 563
 564 Before we print chunks of code we like to rename it so that
 565 we don't have to print lots of silly uniques in it.  But we mustn't
 566 accidentally introduce name clashes!  So the idea is that we leave the
 567 OccName alone unless it accidentally clashes with one that is already
 568 in scope; if so, we tack on '1' at the end and try again, then '2', and
 569 so on till we find a unique one.
 570
 571 There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1'
 572 because that isn't a single lexeme.  So we encode it to 'lle' and *then*
 573 tack on the '1', if necessary.
 574
 575 \begin{code}
 576 type TidyOccEnv = OccEnv Int    -- The in-scope OccNames
 577         -- Range gives a plausible starting point for new guesses
 578
 579 emptyTidyOccEnv = emptyOccEnv
 580
 581 initTidyOccEnv :: [OccName] -> TidyOccEnv       -- Initialise with names to avoid!
 582 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
 583
 584 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
 585
 586 tidyOccName in_scope occ@(OccName occ_sp fs)
 587   = case lookupOccEnv in_scope occ of
 588         Nothing ->      -- Not already used: make it used
 589                    (extendOccEnv in_scope occ 1, occ)
 590
 591         Just n  ->      -- Already used: make a new guess,
 592                         -- change the guess base, and try again
 593                    tidyOccName  (extendOccEnv in_scope occ (n+1))
 594                                 (mkSysOcc occ_sp (unpackFS fs ++ show n))
 595 \end{code}
 596
 597
 598 %************************************************************************
 599 %*                                                                      *
 600 \subsection{The 'Z' encoding}
 601 %*                                                                      *
 602 %************************************************************************
 603
 604 This is the main name-encoding and decoding function.  It encodes any
 605 string into a string that is acceptable as a C name.  This is the name
 606 by which things are known right through the compiler.
 607
 608 The basic encoding scheme is this.
 609
 610 * Tuples (,,,) are coded as Z3T
 611
 612 * Alphabetic characters (upper and lower) and digits
 613         all translate to themselves;
 614         except 'Z', which translates to 'ZZ'
 615         and    'z', which translates to 'zz'
 616   We need both so that we can preserve the variable/tycon distinction
 617
 618 * Most other printable characters translate to 'zx' or 'Zx' for some
 619         alphabetic character x
 620
 621 * The others translate as 'znnnU' where 'nnn' is the decimal number
 622         of the character
 623
 624         Before          After
 625         --------------------------
 626         Trak            Trak
 627         foo_wib         foozuwib
 628         >               zg
 629         >1              zg1
 630         foo#            foozh
 631         foo##           foozhzh
 632         foo##1          foozhzh1
 633         fooZ            fooZZ
 634         :+              ZCzp
 635         ()              Z0T     0-tuple
 636         (,,,,)          Z5T     5-tuple
 637         (# #)           Z1H     unboxed 1-tuple (note the space)
 638         (#,,,,#)        Z5H     unboxed 5-tuple
 639                 (NB: There is no Z1T nor Z0H.)
 640
 641 \begin{code}
 642 -- alreadyEncoded is used in ASSERTs to check for encoded
 643 -- strings.  It isn't fail-safe, of course, because, say 'zh' might
 644 -- be encoded or not.
 645 alreadyEncoded :: String -> Bool
 646 alreadyEncoded s = all ok s
 647                  where
 648                    ok ' ' = True
 649                         -- This is a bit of a lie; if we really wanted spaces
 650                         -- in names we'd have to encode them.  But we do put
 651                         -- spaces in ccall "occurrences", and we don't want to
 652                         -- reject them here
 653                    ok ch  = isAlphaNum ch
 654
 655 alreadyEncodedFS :: FastString -> Bool
 656 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
 657
 658 encode :: UserString -> EncodedString
 659 encode cs = case maybe_tuple cs of
 660                 Just n  -> n            -- Tuples go to Z2T etc
 661                 Nothing -> go cs
 662           where
 663                 go []     = []
 664                 go (c:cs) = encode_ch c ++ go cs
 665
 666 encodeFS :: UserFS -> EncodedFS
 667 encodeFS fast_str  | all unencodedChar str = fast_str
 668                    | otherwise             = mkFastString (encode str)
 669                    where
 670                      str = unpackFS fast_str
 671
 672 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 673 unencodedChar 'Z' = False
 674 unencodedChar 'z' = False
 675 unencodedChar c   =  c >= 'a' && c <= 'z'
 676                   || c >= 'A' && c <= 'Z'
 677                   || c >= '0' && c <= '9'
 678
 679 encode_ch :: Char -> EncodedString
 680 encode_ch c | unencodedChar c = [c]     -- Common case first
 681
 682 -- Constructors
 683 encode_ch '('  = "ZL"   -- Needed for things like (,), and (->)
 684 encode_ch ')'  = "ZR"   -- For symmetry with (
 685 encode_ch '['  = "ZM"
 686 encode_ch ']'  = "ZN"
 687 encode_ch ':'  = "ZC"
 688 encode_ch 'Z'  = "ZZ"
 689
 690 -- Variables
 691 encode_ch 'z'  = "zz"
 692 encode_ch '&'  = "za"
 693 encode_ch '|'  = "zb"
 694 encode_ch '^'  = "zc"
 695 encode_ch '$'  = "zd"
 696 encode_ch '='  = "ze"
 697 encode_ch '>'  = "zg"
 698 encode_ch '#'  = "zh"
 699 encode_ch '.'  = "zi"
 700 encode_ch '<'  = "zl"
 701 encode_ch '-'  = "zm"
 702 encode_ch '!'  = "zn"
 703 encode_ch '+'  = "zp"
 704 encode_ch '\'' = "zq"
 705 encode_ch '\\' = "zr"
 706 encode_ch '/'  = "zs"
 707 encode_ch '*'  = "zt"
 708 encode_ch '_'  = "zu"
 709 encode_ch '%'  = "zv"
 710 encode_ch c    = 'z' : shows (ord c) "U"
 711 \end{code}
 712
 713 Decode is used for user printing.
 714
 715 \begin{code}
 716 decodeFS :: FastString -> FastString
 717 decodeFS fs = mkFastString (decode (unpackFS fs))
 718
 719 decode :: EncodedString -> UserString
 720 decode [] = []
 721 decode ('Z' : d : rest) | isDigit d = decode_tuple   d rest
 722                         | otherwise = decode_upper   d : decode rest
 723 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
 724                         | otherwise = decode_lower   d : decode rest
 725 decode (c   : rest) = c : decode rest
 726
 727 decode_upper, decode_lower :: Char -> Char
 728
 729 decode_upper 'L' = '('
 730 decode_upper 'R' = ')'
 731 decode_upper 'M' = '['
 732 decode_upper 'N' = ']'
 733 decode_upper 'C' = ':'
 734 decode_upper 'Z' = 'Z'
 735 decode_upper ch  = pprTrace "decode_upper" (char ch) ch
 736
 737 decode_lower 'z' = 'z'
 738 decode_lower 'a' = '&'
 739 decode_lower 'b' = '|'
 740 decode_lower 'c' = '^'
 741 decode_lower 'd' = '$'
 742 decode_lower 'e' = '='
 743 decode_lower 'g' = '>'
 744 decode_lower 'h' = '#'
 745 decode_lower 'i' = '.'
 746 decode_lower 'l' = '<'
 747 decode_lower 'm' = '-'
 748 decode_lower 'n' = '!'
 749 decode_lower 'p' = '+'
 750 decode_lower 'q' = '\''
 751 decode_lower 'r' = '\\'
 752 decode_lower 's' = '/'
 753 decode_lower 't' = '*'
 754 decode_lower 'u' = '_'
 755 decode_lower 'v' = '%'
 756 decode_lower ch  = pprTrace "decode_lower" (char ch) ch
 757
 758 -- Characters not having a specific code are coded as z224U
 759 decode_num_esc d rest
 760   = go (digitToInt d) rest
 761   where
 762     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 763     go n ('U' : rest)           = chr n : decode rest
 764     go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
 765
 766 decode_tuple :: Char -> EncodedString -> UserString
 767 decode_tuple d rest
 768   = go (digitToInt d) rest
 769   where
 770         -- NB. recurse back to decode after decoding the tuple, because
 771         -- the tuple might be embedded in a longer name.
 772     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 773     go 0 ('T':rest)     = "()" ++ decode rest
 774     go n ('T':rest)     = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
 775     go 1 ('H':rest)     = "(# #)" ++ decode rest
 776     go n ('H':rest)     = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
 777     go n other = pprPanic "decode_tuple" (ppr n <+> text other)
 778 \end{code}
 779
 780
 781 %************************************************************************
 782 %*                                                                      *
 783                 Stuff for dealing with tuples
 784 %*                                                                      *
 785 %************************************************************************
 786
 787 Tuples are encoded as
 788         Z3T or Z3H
 789 for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts
 790         Z<digit>
 791
 792 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
 793   There are no unboxed 0-tuples.
 794
 795 * "()" is the tycon for a boxed 0-tuple.
 796   There are no boxed 1-tuples.
 797
 798
 799 \begin{code}
 800 maybe_tuple :: UserString -> Maybe EncodedString
 801
 802 maybe_tuple "(# #)" = Just("Z1H")
 803 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
 804                                  (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
 805                                  other               -> Nothing
 806 maybe_tuple "()" = Just("Z0T")
 807 maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
 808                                  (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
 809                                  other         -> Nothing
 810 maybe_tuple other            = Nothing
 811
 812 count_commas :: Int -> String -> (Int, String)
 813 count_commas n (',' : cs) = count_commas (n+1) cs
 814 count_commas n cs         = (n,cs)
 815 \end{code}
 816
 817 \begin{code}
 818 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
 819 mkTupleOcc ns bx ar
 820   = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
 821   where
 822     bx_char = case bx of
 823                 Boxed   -> "T"
 824                 Unboxed -> "H"
 825
 826 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
 827 -- Tuples are special, because there are so many of them!
 828 isTupleOcc_maybe (OccName ns fs)
 829   = case unpackFS fs of
 830         ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
 831         other                    -> Nothing
 832   where
 833     decode_tup n "H"      = (ns, Unboxed, n)
 834     decode_tup n "T"      = (ns, Boxed, n)
 835     decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
 836 \end{code}
 837
 838 %************************************************************************
 839 %*                                                                      *
 840 \subsection{Lexical categories}
 841 %*                                                                      *
 842 %************************************************************************
 843
 844 These functions test strings to see if they fit the lexical categories
 845 defined in the Haskell report.
 846
 847 \begin{code}
 848 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
 849 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
 850
 851 isLexCon cs = isLexConId  cs || isLexConSym cs
 852 isLexVar cs = isLexVarId  cs || isLexVarSym cs
 853
 854 isLexId  cs = isLexConId  cs || isLexVarId  cs
 855 isLexSym cs = isLexConSym cs || isLexVarSym cs
 856
 857 -------------
 858
 859 isLexConId cs                           -- Prefix type or data constructors
 860   | nullFastString cs = False           --      e.g. "Foo", "[]", "(,)"
 861   | cs == FSLIT("[]") = True
 862   | otherwise         = startsConId (headFS cs)
 863
 864 isLexVarId cs                           -- Ordinary prefix identifiers
 865   | nullFastString cs = False           --      e.g. "x", "_x"
 866   | otherwise         = startsVarId (headFS cs)
 867
 868 isLexConSym cs                          -- Infix type or data constructors
 869   | nullFastString cs = False           --      e.g. ":-:", ":", "->"
 870   | cs == FSLIT("->") = True
 871   | otherwise         = startsConSym (headFS cs)
 872
 873 isLexVarSym cs                          -- Infix identifiers
 874   | nullFastString cs = False           --      e.g. "+"
 875   | otherwise         = startsVarSym (headFS cs)
 876
 877 -------------
 878 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 879 startsVarSym c = isSymbolASCII c || isSymbolISO c       -- Infix Ids
 880 startsConSym c = c == ':'                               -- Infix data constructors
 881 startsVarId c  = isLower c || isLowerISO c || c == '_'  -- Ordinary Ids
 882 startsConId c  = isUpper c || isUpperISO c || c == '('  -- Ordinary type constructors and data constructors
 883
 884
 885 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 886 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 887 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 888         --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 889 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 890         --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 891 \end{code}
 892
 893 %************************************************************************
 894 %*                                                                      *
 895                 Binary instance
 896     Here rather than BinIface because OccName is abstract
 897 %*                                                                      *
 898 %************************************************************************
 899
 900 \begin{code}
 901 instance Binary NameSpace where
 902     put_ bh VarName = do
 903             putByte bh 0
 904     put_ bh DataName = do
 905             putByte bh 1
 906     put_ bh TvName = do
 907             putByte bh 2
 908     put_ bh TcClsName = do
 909             putByte bh 3
 910     get bh = do
 911             h <- getByte bh
 912             case h of
 913               0 -> do return VarName
 914               1 -> do return DataName
 915               2 -> do return TvName
 916               _ -> do return TcClsName
 917
 918 instance Binary OccName where
 919     put_ bh (OccName aa ab) = do
 920             put_ bh aa
 921             put_ bh ab
 922     get bh = do
 923           aa <- get bh
 924           ab <- get bh
 925           return (OccName aa ab)
 926 \end{code}