ghc/compiler/basicTypes/OccName.lhs

   1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
   2 %
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5
   6 \section[OccName]{@OccName@}
   7
   8 \begin{code}
   9 module OccName (
  10         -- The NameSpace type; abstact
  11         NameSpace, tcName, clsName, tcClsName, dataName, varName,
  12         tvName, srcDataName, nameSpaceString,
  13
  14         -- The OccName type
  15         OccName,        -- Abstract, instance of Outputable
  16         pprOccName,
  17
  18         -- The OccEnv type
  19         OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
  20         lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
  21         occEnvElts, foldOccEnv, plusOccEnv_C, extendOccEnv_C,
  22
  23
  24         -- The OccSet type
  25         OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
  26         unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
  27         foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
  28
  29         mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
  30         mkVarOcc, mkVarOccEncoded,
  31         mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
  32         mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
  33         mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
  34         mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
  35         mkDataConWrapperOcc, mkDataConWorkerOcc,
  36
  37         isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
  38         reportIfUnused,
  39
  40         occNameFS, occNameString, occNameUserString, occNameSpace,
  41         occNameFlavour, briefOccNameFlavour,
  42         setOccNameSpace,
  43
  44         mkTupleOcc, isTupleOcc_maybe,
  45
  46         -- Tidying up
  47         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  48
  49         -- Encoding
  50         EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
  51
  52         -- The basic form of names
  53         isLexCon, isLexVar, isLexId, isLexSym,
  54         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  55         isLowerISO, isUpperISO
  56
  57     ) where
  58
  59 #include "HsVersions.h"
  60
  61 import Char     ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
  62 import Util     ( thenCmp )
  63 import Unique   ( Unique, mkUnique, Uniquable(..) )
  64 import BasicTypes ( Boxity(..), Arity )
  65 import UniqFM
  66 import UniqSet
  67 import FastString
  68 import Outputable
  69 import Binary
  70
  71 import GLAEXTS
  72 \end{code}
  73
  74 We hold both module names and identifier names in a 'Z-encoded' form
  75 that makes them acceptable both as a C identifier and as a Haskell
  76 (prefix) identifier.
  77
  78 They can always be decoded again when printing error messages
  79 or anything else for the user, but it does make sense for it
  80 to be represented here in encoded form, so that when generating
  81 code the encoding operation is not performed on each occurrence.
  82
  83 These type synonyms help documentation.
  84
  85 \begin{code}
  86 type UserFS    = FastString     -- As the user typed it
  87 type EncodedFS = FastString     -- Encoded form
  88
  89 type UserString = String        -- As the user typed it
  90 type EncodedString = String     -- Encoded form
  91
  92
  93 pprEncodedFS :: EncodedFS -> SDoc
  94 pprEncodedFS fs
  95   = getPprStyle         $ \ sty ->
  96     if userStyle sty
  97         -- ftext (decodeFS fs) would needlessly pack the string again
  98         then text (decode (unpackFS fs))
  99         else ftext fs
 100 \end{code}
 101
 102 %************************************************************************
 103 %*                                                                      *
 104 \subsection{Name space}
 105 %*                                                                      *
 106 %************************************************************************
 107
 108 \begin{code}
 109 data NameSpace = VarName        -- Variables, including "source" data constructors
 110                | DataName       -- "Real" data constructors
 111                | TvName         -- Type variables
 112                | TcClsName      -- Type constructors and classes; Haskell has them
 113                                 -- in the same name space for now.
 114                deriving( Eq, Ord )
 115    {-! derive: Binary !-}
 116
 117 -- Note [Data Constructors]
 118 -- see also: Note [Data Constructor Naming] in DataCon.lhs
 119 --
 120 --      "Source" data constructors are the data constructors mentioned
 121 --      in Haskell source code
 122 --
 123 --      "Real" data constructors are the data constructors of the
 124 --      representation type, which may not be the same as the source
 125 --      type
 126
 127 -- Example:
 128 --      data T = T !(Int,Int)
 129 --
 130 -- The source datacon has type (Int,Int) -> T
 131 -- The real   datacon has type Int -> Int -> T
 132 -- GHC chooses a representation based on the strictness etc.
 133
 134
 135 -- Though type constructors and classes are in the same name space now,
 136 -- the NameSpace type is abstract, so we can easily separate them later
 137 tcName    = TcClsName           -- Type constructors
 138 clsName   = TcClsName           -- Classes
 139 tcClsName = TcClsName           -- Not sure which!
 140
 141 dataName    = DataName
 142 srcDataName = DataName  -- Haskell-source data constructors should be
 143                         -- in the Data name space
 144
 145 tvName      = TvName
 146 varName     = VarName
 147
 148 nameSpaceString :: NameSpace -> String
 149 nameSpaceString DataName  = "Data constructor"
 150 nameSpaceString VarName   = "Variable"
 151 nameSpaceString TvName    = "Type variable"
 152 nameSpaceString TcClsName = "Type constructor or class"
 153 \end{code}
 154
 155
 156 %************************************************************************
 157 %*                                                                      *
 158 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
 159 %*                                                                      *
 160 %************************************************************************
 161
 162 \begin{code}
 163 data OccName = OccName
 164                         NameSpace
 165                         EncodedFS
 166    {-! derive : Binary !-}
 167 \end{code}
 168
 169
 170 \begin{code}
 171 instance Eq OccName where
 172     (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
 173
 174 instance Ord OccName where
 175     compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
 176                                                 (sp1 `compare` sp2)
 177 \end{code}
 178
 179
 180 %************************************************************************
 181 %*                                                                      *
 182 \subsection{Printing}
 183 %*                                                                      *
 184 %************************************************************************
 185
 186 \begin{code}
 187 instance Outputable OccName where
 188     ppr = pprOccName
 189
 190 pprOccName :: OccName -> SDoc
 191 pprOccName (OccName sp occ)
 192   = getPprStyle $ \ sty ->
 193     pprEncodedFS occ <> if debugStyle sty then
 194                            braces (text (briefNameSpaceFlavour sp))
 195                         else empty
 196 \end{code}
 197
 198
 199 %************************************************************************
 200 %*                                                                      *
 201 \subsection{Construction}
 202 %*                                                                      *
 203 %************************************************************************
 204
 205 *Sys* things do no encoding; the caller should ensure that the thing is
 206 already encoded
 207
 208 \begin{code}
 209 mkSysOcc :: NameSpace -> EncodedString -> OccName
 210 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
 211                       OccName occ_sp (mkFastString str)
 212
 213 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
 214 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
 215                        OccName occ_sp fs
 216
 217 mkFCallOcc :: EncodedString -> OccName
 218 -- This version of mkSysOcc doesn't check that the string is already encoded,
 219 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
 220 -- This encodes a lot into something that then parses like an Id.
 221 -- But then alreadyEncoded complains about the braces!
 222 mkFCallOcc str = OccName varName (mkFastString str)
 223
 224 -- Kind constructors get a special function.  Uniquely, they are not encoded,
 225 -- so that they have names like '*'.  This means that *even in interface files*
 226 -- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
 227 -- has an ASSERT that doesn't hold.
 228 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
 229 mkKindOccFS occ_sp fs = OccName occ_sp fs
 230 \end{code}
 231
 232 *Source-code* things are encoded.
 233
 234 \begin{code}
 235 mkOccFS :: NameSpace -> UserFS -> OccName
 236 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
 237
 238 mkVarOcc :: UserFS -> OccName
 239 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
 240
 241 mkVarOccEncoded :: EncodedFS -> OccName
 242 mkVarOccEncoded fs = mkSysOccFS varName fs
 243 \end{code}
 244
 245
 246
 247 %************************************************************************
 248 %*                                                                      *
 249                 Environments
 250 %*                                                                      *
 251 %************************************************************************
 252
 253 OccEnvs are used mainly for the envts in ModIfaces.
 254
 255 They are efficient, because FastStrings have unique Int# keys.  We assume
 256 this key is less than 2^24, so we can make a Unique using
 257         mkUnique ns key  :: Unique
 258 where 'ns' is a Char reprsenting the name space.  This in turn makes it
 259 easy to build an OccEnv.
 260
 261 \begin{code}
 262 instance Uniquable OccName where
 263   getUnique (OccName ns fs)
 264       = mkUnique char (I# (uniqueOfFS fs))
 265       where     -- See notes above about this getUnique function
 266         char = case ns of
 267                 VarName   -> 'i'
 268                 DataName  -> 'd'
 269                 TvName    -> 'v'
 270                 TcClsName -> 't'
 271
 272 type OccEnv a = UniqFM a
 273
 274 emptyOccEnv :: OccEnv a
 275 unitOccEnv  :: OccName -> a -> OccEnv a
 276 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
 277 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
 278 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
 279 mkOccEnv     :: [(OccName,a)] -> OccEnv a
 280 elemOccEnv   :: OccName -> OccEnv a -> Bool
 281 foldOccEnv   :: (a -> b -> b) -> b -> OccEnv a -> b
 282 occEnvElts   :: OccEnv a -> [a]
 283 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
 284 plusOccEnv_C   :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
 285
 286 emptyOccEnv      = emptyUFM
 287 unitOccEnv       = unitUFM
 288 extendOccEnv     = addToUFM
 289 extendOccEnvList = addListToUFM
 290 lookupOccEnv     = lookupUFM
 291 mkOccEnv         = listToUFM
 292 elemOccEnv       = elemUFM
 293 foldOccEnv       = foldUFM
 294 occEnvElts       = eltsUFM
 295 plusOccEnv_C     = plusUFM_C
 296 extendOccEnv_C   = addToUFM_C
 297
 298
 299 type OccSet = UniqFM OccName
 300
 301 emptyOccSet       :: OccSet
 302 unitOccSet        :: OccName -> OccSet
 303 mkOccSet          :: [OccName] -> OccSet
 304 extendOccSet      :: OccSet -> OccName -> OccSet
 305 extendOccSetList  :: OccSet -> [OccName] -> OccSet
 306 unionOccSets      :: OccSet -> OccSet -> OccSet
 307 unionManyOccSets  :: [OccSet] -> OccSet
 308 minusOccSet       :: OccSet -> OccSet -> OccSet
 309 elemOccSet        :: OccName -> OccSet -> Bool
 310 occSetElts        :: OccSet -> [OccName]
 311 foldOccSet        :: (OccName -> b -> b) -> b -> OccSet -> b
 312 isEmptyOccSet     :: OccSet -> Bool
 313 intersectOccSet   :: OccSet -> OccSet -> OccSet
 314 intersectsOccSet  :: OccSet -> OccSet -> Bool
 315
 316 emptyOccSet       = emptyUniqSet
 317 unitOccSet        = unitUniqSet
 318 mkOccSet          = mkUniqSet
 319 extendOccSet      = addOneToUniqSet
 320 extendOccSetList  = addListToUniqSet
 321 unionOccSets      = unionUniqSets
 322 unionManyOccSets  = unionManyUniqSets
 323 minusOccSet       = minusUniqSet
 324 elemOccSet        = elementOfUniqSet
 325 occSetElts        = uniqSetToList
 326 foldOccSet        = foldUniqSet
 327 isEmptyOccSet     = isEmptyUniqSet
 328 intersectOccSet   = intersectUniqSets
 329 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
 330 \end{code}
 331
 332
 333 %************************************************************************
 334 %*                                                                      *
 335 \subsection{Predicates and taking them apart}
 336 %*                                                                      *
 337 %************************************************************************
 338
 339 \begin{code}
 340 occNameFS :: OccName -> EncodedFS
 341 occNameFS (OccName _ s) = s
 342
 343 occNameString :: OccName -> EncodedString
 344 occNameString (OccName _ s) = unpackFS s
 345
 346 occNameUserString :: OccName -> UserString
 347 occNameUserString occ = decode (occNameString occ)
 348
 349 occNameSpace :: OccName -> NameSpace
 350 occNameSpace (OccName sp _) = sp
 351
 352 setOccNameSpace :: NameSpace -> OccName -> OccName
 353 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 354
 355 -- occNameFlavour is used only to generate good error messages
 356 occNameFlavour :: OccName -> String
 357 occNameFlavour (OccName DataName _)  = "Data constructor"
 358 occNameFlavour (OccName TvName _)    = "Type variable"
 359 occNameFlavour (OccName TcClsName _) = "Type constructor or class"
 360 occNameFlavour (OccName VarName s)   = "Variable"
 361
 362 -- briefOccNameFlavour is used in debug-printing of names
 363 briefOccNameFlavour :: OccName -> String
 364 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
 365
 366 briefNameSpaceFlavour DataName  = "d"
 367 briefNameSpaceFlavour VarName   = "v"
 368 briefNameSpaceFlavour TvName    = "tv"
 369 briefNameSpaceFlavour TcClsName = "tc"
 370 \end{code}
 371
 372 \begin{code}
 373 isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 374
 375 isTvOcc (OccName TvName _) = True
 376 isTvOcc other              = False
 377
 378 isTcOcc (OccName TcClsName _) = True
 379 isTcOcc other                 = False
 380
 381 isValOcc (OccName VarName  _) = True
 382 isValOcc (OccName DataName _) = True
 383 isValOcc other                = False
 384
 385 -- Data constructor operator (starts with ':', or '[]')
 386 -- Pretty inefficient!
 387 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 388 isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
 389 isDataSymOcc other                = False
 390
 391 isDataOcc (OccName DataName _) = True
 392 isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
 393 isDataOcc other                = False
 394
 395 -- Any operator (data constructor or variable)
 396 -- Pretty inefficient!
 397 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 398 isSymOcc (OccName VarName s)  = isLexSym (decodeFS s)
 399 isSymOcc other                = False
 400 \end{code}
 401
 402
 403 \begin{code}
 404 reportIfUnused :: OccName -> Bool
 405   -- Haskell 98 encourages compilers to suppress warnings about
 406   -- unused names in a pattern if they start with "_".
 407 reportIfUnused occ = case occNameUserString occ of
 408                         ('_' : _) -> False
 409                         zz_other  -> True
 410 \end{code}
 411
 412
 413
 414 %************************************************************************
 415 %*                                                                      *
 416 \subsection{Making system names}
 417 %*                                                                      *
 418 %************************************************************************
 419
 420 Here's our convention for splitting up the interface file name space:
 421
 422         d...            dictionary identifiers
 423                         (local variables, so no name-clash worries)
 424
 425         $f...           dict-fun identifiers (from inst decls)
 426         $dm...          default methods
 427         $p...           superclass selectors
 428         $w...           workers
 429         :T...           compiler-generated tycons for dictionaries
 430         :D...           ...ditto data cons
 431         $sf..           specialised version of f
 432
 433         in encoded form these appear as Zdfxxx etc
 434
 435         :...            keywords (export:, letrec: etc.)
 436 --- I THINK THIS IS WRONG!
 437
 438 This knowledge is encoded in the following functions.
 439
 440
 441 @mk_deriv@ generates an @OccName@ from the prefix and a string.
 442 NB: The string must already be encoded!
 443
 444 \begin{code}
 445 mk_deriv :: NameSpace
 446          -> String              -- Distinguishes one sort of derived name from another
 447          -> EncodedString       -- Must be already encoded!!  We don't want to encode it a
 448                                 -- second time because encoding isn't idempotent
 449          -> OccName
 450
 451 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
 452 \end{code}
 453
 454 \begin{code}
 455 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
 456            mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
 457    :: OccName -> OccName
 458
 459 -- These derived variables have a prefix that no Haskell value could have
 460 mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
 461 mkWorkerOcc         = mk_simple_deriv varName  "$w"
 462 mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
 463 mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"      -- The : prefix makes sure it classifies
 464 mkClassTyConOcc     = mk_simple_deriv tcName   ":T"     -- as a tycon/datacon
 465 mkClassDataConOcc   = mk_simple_deriv dataName ":D"     -- We go straight to the "real" data con
 466                                                         -- for datacons from classes
 467 mkDictOcc           = mk_simple_deriv varName  "$d"
 468 mkIPOcc             = mk_simple_deriv varName  "$i"
 469 mkSpecOcc           = mk_simple_deriv varName  "$s"
 470 mkForeignExportOcc  = mk_simple_deriv varName  "$f"
 471
 472 -- Generic derivable classes
 473 mkGenOcc1           = mk_simple_deriv varName  "$gfrom"
 474 mkGenOcc2           = mk_simple_deriv varName  "$gto"
 475
 476 -- data T = MkT ... deriving( Data ) needs defintions for
 477 --      $tT   :: Data.Generics.Basics.DataType
 478 --      $cMkT :: Data.Generics.Basics.Constr
 479 mkDataTOcc = mk_simple_deriv varName  "$t"
 480 mkDataCOcc = mk_simple_deriv varName  "$c"
 481
 482 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 483
 484
 485 -- Data constructor workers are made by setting the name space
 486 -- of the data constructor OccName (which should be a DataName)
 487 -- to DataName
 488 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
 489 \end{code}
 490
 491 \begin{code}
 492 mkSuperDictSelOcc :: Int        -- Index of superclass, eg 3
 493                   -> OccName    -- Class, eg "Ord"
 494                   -> OccName    -- eg "$p3Ord"
 495 mkSuperDictSelOcc index cls_occ
 496   = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
 497
 498 mkLocalOcc :: Unique            -- Unique
 499            -> OccName           -- Local name (e.g. "sat")
 500            -> OccName           -- Nice unique version ("$L23sat")
 501 mkLocalOcc uniq occ
 502    = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
 503         -- The Unique might print with characters
 504         -- that need encoding (e.g. 'z'!)
 505 \end{code}
 506
 507
 508 \begin{code}
 509 mkDFunOcc :: EncodedString      -- Typically the class and type glommed together e.g. "OrdMaybe"
 510           -> OccName            -- "$fOrdMaybe"
 511
 512 mkDFunOcc string = mk_deriv VarName "$f" string
 513 \end{code}
 514
 515 We used to add a '$m' to indicate a method, but that gives rise to bad
 516 error messages from the type checker when we print the function name or pattern
 517 of an instance-decl binding.  Why? Because the binding is zapped
 518 to use the method name in place of the selector name.
 519 (See TcClassDcl.tcMethodBind)
 520
 521 The way it is now, -ddump-xx output may look confusing, but
 522 you can always say -dppr-debug to get the uniques.
 523
 524 However, we *do* have to zap the first character to be lower case,
 525 because overloaded constructors (blarg) generate methods too.
 526 And convert to VarName space
 527
 528 e.g. a call to constructor MkFoo where
 529         data (Ord a) => Foo a = MkFoo a
 530
 531 If this is necessary, we do it by prefixing '$m'.  These
 532 guys never show up in error messages.  What a hack.
 533
 534 \begin{code}
 535 mkMethodOcc :: OccName -> OccName
 536 mkMethodOcc occ@(OccName VarName fs) = occ
 537 mkMethodOcc occ                      = mk_simple_deriv varName "$m" occ
 538 \end{code}
 539
 540
 541 %************************************************************************
 542 %*                                                                      *
 543 \subsection{Tidying them up}
 544 %*                                                                      *
 545 %************************************************************************
 546
 547 Before we print chunks of code we like to rename it so that
 548 we don't have to print lots of silly uniques in it.  But we mustn't
 549 accidentally introduce name clashes!  So the idea is that we leave the
 550 OccName alone unless it accidentally clashes with one that is already
 551 in scope; if so, we tack on '1' at the end and try again, then '2', and
 552 so on till we find a unique one.
 553
 554 There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1'
 555 because that isn't a single lexeme.  So we encode it to 'lle' and *then*
 556 tack on the '1', if necessary.
 557
 558 \begin{code}
 559 type TidyOccEnv = OccEnv Int    -- The in-scope OccNames
 560         -- Range gives a plausible starting point for new guesses
 561
 562 emptyTidyOccEnv = emptyOccEnv
 563
 564 initTidyOccEnv :: [OccName] -> TidyOccEnv       -- Initialise with names to avoid!
 565 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
 566
 567 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
 568
 569 tidyOccName in_scope occ@(OccName occ_sp fs)
 570   = case lookupOccEnv in_scope occ of
 571         Nothing ->      -- Not already used: make it used
 572                    (extendOccEnv in_scope occ 1, occ)
 573
 574         Just n  ->      -- Already used: make a new guess,
 575                         -- change the guess base, and try again
 576                    tidyOccName  (extendOccEnv in_scope occ (n+1))
 577                                 (mkSysOcc occ_sp (unpackFS fs ++ show n))
 578 \end{code}
 579
 580
 581 %************************************************************************
 582 %*                                                                      *
 583 \subsection{The 'Z' encoding}
 584 %*                                                                      *
 585 %************************************************************************
 586
 587 This is the main name-encoding and decoding function.  It encodes any
 588 string into a string that is acceptable as a C name.  This is the name
 589 by which things are known right through the compiler.
 590
 591 The basic encoding scheme is this.
 592
 593 * Tuples (,,,) are coded as Z3T
 594
 595 * Alphabetic characters (upper and lower) and digits
 596         all translate to themselves;
 597         except 'Z', which translates to 'ZZ'
 598         and    'z', which translates to 'zz'
 599   We need both so that we can preserve the variable/tycon distinction
 600
 601 * Most other printable characters translate to 'zx' or 'Zx' for some
 602         alphabetic character x
 603
 604 * The others translate as 'znnnU' where 'nnn' is the decimal number
 605         of the character
 606
 607         Before          After
 608         --------------------------
 609         Trak            Trak
 610         foo_wib         foozuwib
 611         >               zg
 612         >1              zg1
 613         foo#            foozh
 614         foo##           foozhzh
 615         foo##1          foozhzh1
 616         fooZ            fooZZ
 617         :+              ZCzp
 618         ()              Z0T     0-tuple
 619         (,,,,)          Z5T     5-tuple
 620         (# #)           Z1H     unboxed 1-tuple (note the space)
 621         (#,,,,#)        Z5H     unboxed 5-tuple
 622                 (NB: There is no Z1T nor Z0H.)
 623
 624 \begin{code}
 625 -- alreadyEncoded is used in ASSERTs to check for encoded
 626 -- strings.  It isn't fail-safe, of course, because, say 'zh' might
 627 -- be encoded or not.
 628 alreadyEncoded :: String -> Bool
 629 alreadyEncoded s = all ok s
 630                  where
 631                    ok ' ' = True
 632                         -- This is a bit of a lie; if we really wanted spaces
 633                         -- in names we'd have to encode them.  But we do put
 634                         -- spaces in ccall "occurrences", and we don't want to
 635                         -- reject them here
 636                    ok ch  = isAlphaNum ch
 637
 638 alreadyEncodedFS :: FastString -> Bool
 639 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
 640
 641 encode :: UserString -> EncodedString
 642 encode cs = case maybe_tuple cs of
 643                 Just n  -> n            -- Tuples go to Z2T etc
 644                 Nothing -> go cs
 645           where
 646                 go []     = []
 647                 go (c:cs) = encode_ch c ++ go cs
 648
 649 encodeFS :: UserFS -> EncodedFS
 650 encodeFS fast_str  | all unencodedChar str = fast_str
 651                    | otherwise             = mkFastString (encode str)
 652                    where
 653                      str = unpackFS fast_str
 654
 655 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 656 unencodedChar 'Z' = False
 657 unencodedChar 'z' = False
 658 unencodedChar c   =  c >= 'a' && c <= 'z'
 659                   || c >= 'A' && c <= 'Z'
 660                   || c >= '0' && c <= '9'
 661
 662 encode_ch :: Char -> EncodedString
 663 encode_ch c | unencodedChar c = [c]     -- Common case first
 664
 665 -- Constructors
 666 encode_ch '('  = "ZL"   -- Needed for things like (,), and (->)
 667 encode_ch ')'  = "ZR"   -- For symmetry with (
 668 encode_ch '['  = "ZM"
 669 encode_ch ']'  = "ZN"
 670 encode_ch ':'  = "ZC"
 671 encode_ch 'Z'  = "ZZ"
 672
 673 -- Variables
 674 encode_ch 'z'  = "zz"
 675 encode_ch '&'  = "za"
 676 encode_ch '|'  = "zb"
 677 encode_ch '^'  = "zc"
 678 encode_ch '$'  = "zd"
 679 encode_ch '='  = "ze"
 680 encode_ch '>'  = "zg"
 681 encode_ch '#'  = "zh"
 682 encode_ch '.'  = "zi"
 683 encode_ch '<'  = "zl"
 684 encode_ch '-'  = "zm"
 685 encode_ch '!'  = "zn"
 686 encode_ch '+'  = "zp"
 687 encode_ch '\'' = "zq"
 688 encode_ch '\\' = "zr"
 689 encode_ch '/'  = "zs"
 690 encode_ch '*'  = "zt"
 691 encode_ch '_'  = "zu"
 692 encode_ch '%'  = "zv"
 693 encode_ch c    = 'z' : shows (ord c) "U"
 694 \end{code}
 695
 696 Decode is used for user printing.
 697
 698 \begin{code}
 699 decodeFS :: FastString -> FastString
 700 decodeFS fs = mkFastString (decode (unpackFS fs))
 701
 702 decode :: EncodedString -> UserString
 703 decode [] = []
 704 decode ('Z' : d : rest) | isDigit d = decode_tuple   d rest
 705                         | otherwise = decode_upper   d : decode rest
 706 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
 707                         | otherwise = decode_lower   d : decode rest
 708 decode (c   : rest) = c : decode rest
 709
 710 decode_upper, decode_lower :: Char -> Char
 711
 712 decode_upper 'L' = '('
 713 decode_upper 'R' = ')'
 714 decode_upper 'M' = '['
 715 decode_upper 'N' = ']'
 716 decode_upper 'C' = ':'
 717 decode_upper 'Z' = 'Z'
 718 decode_upper ch  = pprTrace "decode_upper" (char ch) ch
 719
 720 decode_lower 'z' = 'z'
 721 decode_lower 'a' = '&'
 722 decode_lower 'b' = '|'
 723 decode_lower 'c' = '^'
 724 decode_lower 'd' = '$'
 725 decode_lower 'e' = '='
 726 decode_lower 'g' = '>'
 727 decode_lower 'h' = '#'
 728 decode_lower 'i' = '.'
 729 decode_lower 'l' = '<'
 730 decode_lower 'm' = '-'
 731 decode_lower 'n' = '!'
 732 decode_lower 'p' = '+'
 733 decode_lower 'q' = '\''
 734 decode_lower 'r' = '\\'
 735 decode_lower 's' = '/'
 736 decode_lower 't' = '*'
 737 decode_lower 'u' = '_'
 738 decode_lower 'v' = '%'
 739 decode_lower ch  = pprTrace "decode_lower" (char ch) ch
 740
 741 -- Characters not having a specific code are coded as z224U
 742 decode_num_esc d rest
 743   = go (digitToInt d) rest
 744   where
 745     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 746     go n ('U' : rest)           = chr n : decode rest
 747     go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
 748 \end{code}
 749
 750
 751 %************************************************************************
 752 %*                                                                      *
 753                 Stuff for dealing with tuples
 754 %*                                                                      *
 755 %************************************************************************
 756
 757 Tuples are encoded as
 758         Z3T or Z3H
 759 for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts
 760         Z<digit>
 761
 762 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
 763   There are no unboxed 0-tuples.
 764
 765 * "()" is the tycon for a boxed 0-tuple.
 766   There are no boxed 1-tuples.
 767
 768
 769 \begin{code}
 770 maybe_tuple :: UserString -> Maybe EncodedString
 771
 772 maybe_tuple "(# #)" = Just("Z1H")
 773 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
 774                                  (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
 775                                  other               -> Nothing
 776 maybe_tuple "()" = Just("Z0T")
 777 maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
 778                                  (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
 779                                  other         -> Nothing
 780 maybe_tuple other            = Nothing
 781
 782 count_commas :: Int -> String -> (Int, String)
 783 count_commas n (',' : cs) = count_commas (n+1) cs
 784 count_commas n cs         = (n,cs)
 785 \end{code}
 786
 787 \begin{code}
 788 decode_tuple :: Char -> EncodedString -> UserString
 789 decode_tuple d rest
 790   = go (digitToInt d) rest
 791   where
 792     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 793     go 0 ['T']                  = "()"
 794     go n ['T']                  = '(' : replicate (n-1) ',' ++ ")"
 795     go 1 ['H']                  = "(# #)"
 796     go n ['H']                  = '(' : '#' : replicate (n-1) ',' ++ "#)"
 797     go n other = pprPanic "decode_tuple" (ppr n <+> text other)
 798
 799 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
 800 mkTupleOcc ns bx ar
 801   = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
 802   where
 803     bx_char = case bx of
 804                 Boxed   -> "T"
 805                 Unboxed -> "H"
 806
 807 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
 808 -- Tuples are special, because there are so many of them!
 809 isTupleOcc_maybe (OccName ns fs)
 810   = case unpackFS fs of
 811         ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
 812         other                    -> Nothing
 813   where
 814     decode_tup n "H"      = (ns, Unboxed, n)
 815     decode_tup n "T"      = (ns, Boxed, n)
 816     decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
 817 \end{code}
 818
 819 %************************************************************************
 820 %*                                                                      *
 821 \subsection{Lexical categories}
 822 %*                                                                      *
 823 %************************************************************************
 824
 825 These functions test strings to see if they fit the lexical categories
 826 defined in the Haskell report.
 827
 828 \begin{code}
 829 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
 830 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
 831
 832 isLexCon cs = isLexConId  cs || isLexConSym cs
 833 isLexVar cs = isLexVarId  cs || isLexVarSym cs
 834
 835 isLexId  cs = isLexConId  cs || isLexVarId  cs
 836 isLexSym cs = isLexConSym cs || isLexVarSym cs
 837
 838 -------------
 839
 840 isLexConId cs                           -- Prefix type or data constructors
 841   | nullFastString cs = False           --      e.g. "Foo", "[]", "(,)"
 842   | cs == FSLIT("[]") = True
 843   | otherwise         = startsConId (headFS cs)
 844
 845 isLexVarId cs                           -- Ordinary prefix identifiers
 846   | nullFastString cs = False           --      e.g. "x", "_x"
 847   | otherwise         = startsVarId (headFS cs)
 848
 849 isLexConSym cs                          -- Infix type or data constructors
 850   | nullFastString cs = False           --      e.g. ":-:", ":", "->"
 851   | cs == FSLIT("->") = True
 852   | otherwise         = startsConSym (headFS cs)
 853
 854 isLexVarSym cs                          -- Infix identifiers
 855   | nullFastString cs = False           --      e.g. "+"
 856   | otherwise         = startsVarSym (headFS cs)
 857
 858 -------------
 859 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 860 startsVarSym c = isSymbolASCII c || isSymbolISO c       -- Infix Ids
 861 startsConSym c = c == ':'                               -- Infix data constructors
 862 startsVarId c  = isLower c || isLowerISO c || c == '_'  -- Ordinary Ids
 863 startsConId c  = isUpper c || isUpperISO c || c == '('  -- Ordinary type constructors and data constructors
 864
 865
 866 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 867 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 868 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 869         --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 870 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 871         --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 872 \end{code}
 873
 874 %************************************************************************
 875 %*                                                                      *
 876                 Binary instance
 877     Here rather than BinIface because OccName is abstract
 878 %*                                                                      *
 879 %************************************************************************
 880
 881 \begin{code}
 882 instance Binary NameSpace where
 883     put_ bh VarName = do
 884             putByte bh 0
 885     put_ bh DataName = do
 886             putByte bh 1
 887     put_ bh TvName = do
 888             putByte bh 2
 889     put_ bh TcClsName = do
 890             putByte bh 3
 891     get bh = do
 892             h <- getByte bh
 893             case h of
 894               0 -> do return VarName
 895               1 -> do return DataName
 896               2 -> do return TvName
 897               _ -> do return TcClsName
 898
 899 instance Binary OccName where
 900     put_ bh (OccName aa ab) = do
 901             put_ bh aa
 902             put_ bh ab
 903     get bh = do
 904           aa <- get bh
 905           ab <- get bh
 906           return (OccName aa ab)
 907 \end{code}