ghc/compiler/basicTypes/OccName.lhs

   1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
   2 %
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5
   6 \section[OccName]{@OccName@}
   7
   8 \begin{code}
   9 module OccName (
  10         -- The NameSpace type; abstact
  11         NameSpace, tcName, clsName, tcClsName, dataName, varName,
  12         tvName, srcDataName, nameSpaceString,
  13
  14         -- The OccName type
  15         OccName,        -- Abstract, instance of Outputable
  16         pprOccName,
  17
  18         -- The OccEnv type
  19         OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
  20         lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
  21         occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
  22
  23
  24         -- The OccSet type
  25         OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
  26         unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
  27         foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
  28
  29         mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
  30         mkVarOcc, mkVarOccEncoded,
  31         mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
  32         mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
  33         mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
  34         mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
  35         mkDataConWrapperOcc, mkDataConWorkerOcc,
  36
  37         isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
  38         reportIfUnused,
  39
  40         occNameFS, occNameString, occNameUserString, occNameSpace,
  41         occNameFlavour, briefOccNameFlavour,
  42         setOccNameSpace,
  43
  44         mkTupleOcc, isTupleOcc_maybe,
  45
  46         -- Tidying up
  47         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  48
  49         -- Encoding
  50         EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
  51
  52         -- The basic form of names
  53         isLexCon, isLexVar, isLexId, isLexSym,
  54         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  55         isLowerISO, isUpperISO
  56
  57     ) where
  58
  59 #include "HsVersions.h"
  60
  61 import Char     ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
  62 import Util     ( thenCmp )
  63 import Unique   ( Unique, mkUnique, Uniquable(..) )
  64 import BasicTypes ( Boxity(..), Arity )
  65 import UniqFM
  66 import UniqSet
  67 import FastString
  68 import Outputable
  69 import Binary
  70
  71 import GLAEXTS
  72 \end{code}
  73
  74 We hold both module names and identifier names in a 'Z-encoded' form
  75 that makes them acceptable both as a C identifier and as a Haskell
  76 (prefix) identifier.
  77
  78 They can always be decoded again when printing error messages
  79 or anything else for the user, but it does make sense for it
  80 to be represented here in encoded form, so that when generating
  81 code the encoding operation is not performed on each occurrence.
  82
  83 These type synonyms help documentation.
  84
  85 \begin{code}
  86 type UserFS    = FastString     -- As the user typed it
  87 type EncodedFS = FastString     -- Encoded form
  88
  89 type UserString = String        -- As the user typed it
  90 type EncodedString = String     -- Encoded form
  91
  92
  93 pprEncodedFS :: EncodedFS -> SDoc
  94 pprEncodedFS fs
  95   = getPprStyle         $ \ sty ->
  96     if userStyle sty
  97         -- ftext (decodeFS fs) would needlessly pack the string again
  98         then text (decode (unpackFS fs))
  99         else ftext fs
 100 \end{code}
 101
 102 %************************************************************************
 103 %*                                                                      *
 104 \subsection{Name space}
 105 %*                                                                      *
 106 %************************************************************************
 107
 108 \begin{code}
 109 data NameSpace = VarName        -- Variables, including "source" data constructors
 110                | DataName       -- "Real" data constructors
 111                | TvName         -- Type variables
 112                | TcClsName      -- Type constructors and classes; Haskell has them
 113                                 -- in the same name space for now.
 114                deriving( Eq, Ord )
 115    {-! derive: Binary !-}
 116
 117 -- Note [Data Constructors]
 118 -- see also: Note [Data Constructor Naming] in DataCon.lhs
 119 --
 120 --      "Source" data constructors are the data constructors mentioned
 121 --      in Haskell source code
 122 --
 123 --      "Real" data constructors are the data constructors of the
 124 --      representation type, which may not be the same as the source
 125 --      type
 126
 127 -- Example:
 128 --      data T = T !(Int,Int)
 129 --
 130 -- The source datacon has type (Int,Int) -> T
 131 -- The real   datacon has type Int -> Int -> T
 132 -- GHC chooses a representation based on the strictness etc.
 133
 134
 135 -- Though type constructors and classes are in the same name space now,
 136 -- the NameSpace type is abstract, so we can easily separate them later
 137 tcName    = TcClsName           -- Type constructors
 138 clsName   = TcClsName           -- Classes
 139 tcClsName = TcClsName           -- Not sure which!
 140
 141 dataName    = DataName
 142 srcDataName = DataName  -- Haskell-source data constructors should be
 143                         -- in the Data name space
 144
 145 tvName      = TvName
 146 varName     = VarName
 147
 148 nameSpaceString :: NameSpace -> String
 149 nameSpaceString DataName  = "Data constructor"
 150 nameSpaceString VarName   = "Variable"
 151 nameSpaceString TvName    = "Type variable"
 152 nameSpaceString TcClsName = "Type constructor or class"
 153 \end{code}
 154
 155
 156 %************************************************************************
 157 %*                                                                      *
 158 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
 159 %*                                                                      *
 160 %************************************************************************
 161
 162 \begin{code}
 163 data OccName = OccName
 164                         NameSpace
 165                         EncodedFS
 166    {-! derive : Binary !-}
 167 \end{code}
 168
 169
 170 \begin{code}
 171 instance Eq OccName where
 172     (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
 173
 174 instance Ord OccName where
 175     compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
 176                                                 (sp1 `compare` sp2)
 177 \end{code}
 178
 179
 180 %************************************************************************
 181 %*                                                                      *
 182 \subsection{Printing}
 183 %*                                                                      *
 184 %************************************************************************
 185
 186 \begin{code}
 187 instance Outputable OccName where
 188     ppr = pprOccName
 189
 190 pprOccName :: OccName -> SDoc
 191 pprOccName (OccName sp occ)
 192   = getPprStyle $ \ sty ->
 193     pprEncodedFS occ <> if debugStyle sty then
 194                            braces (text (briefNameSpaceFlavour sp))
 195                         else empty
 196 \end{code}
 197
 198
 199 %************************************************************************
 200 %*                                                                      *
 201 \subsection{Construction}
 202 %*                                                                      *
 203 %************************************************************************
 204
 205 *Sys* things do no encoding; the caller should ensure that the thing is
 206 already encoded
 207
 208 \begin{code}
 209 mkSysOcc :: NameSpace -> EncodedString -> OccName
 210 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
 211                       OccName occ_sp (mkFastString str)
 212
 213 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
 214 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
 215                        OccName occ_sp fs
 216
 217 mkFCallOcc :: EncodedString -> OccName
 218 -- This version of mkSysOcc doesn't check that the string is already encoded,
 219 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
 220 -- This encodes a lot into something that then parses like an Id.
 221 -- But then alreadyEncoded complains about the braces!
 222 mkFCallOcc str = OccName varName (mkFastString str)
 223
 224 -- Kind constructors get a special function.  Uniquely, they are not encoded,
 225 -- so that they have names like '*'.  This means that *even in interface files*
 226 -- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
 227 -- has an ASSERT that doesn't hold.
 228 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
 229 mkKindOccFS occ_sp fs = OccName occ_sp fs
 230 \end{code}
 231
 232 *Source-code* things are encoded.
 233
 234 \begin{code}
 235 mkOccFS :: NameSpace -> UserFS -> OccName
 236 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
 237
 238 mkVarOcc :: UserFS -> OccName
 239 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
 240
 241 mkVarOccEncoded :: EncodedFS -> OccName
 242 mkVarOccEncoded fs = mkSysOccFS varName fs
 243 \end{code}
 244
 245
 246
 247 %************************************************************************
 248 %*                                                                      *
 249                 Environments
 250 %*                                                                      *
 251 %************************************************************************
 252
 253 OccEnvs are used mainly for the envts in ModIfaces.
 254
 255 They are efficient, because FastStrings have unique Int# keys.  We assume
 256 this key is less than 2^24, so we can make a Unique using
 257         mkUnique ns key  :: Unique
 258 where 'ns' is a Char reprsenting the name space.  This in turn makes it
 259 easy to build an OccEnv.
 260
 261 \begin{code}
 262 instance Uniquable OccName where
 263   getUnique (OccName ns fs)
 264       = mkUnique char (I# (uniqueOfFS fs))
 265       where     -- See notes above about this getUnique function
 266         char = case ns of
 267                 VarName   -> 'i'
 268                 DataName  -> 'd'
 269                 TvName    -> 'v'
 270                 TcClsName -> 't'
 271
 272 type OccEnv a = UniqFM a
 273
 274 emptyOccEnv :: OccEnv a
 275 unitOccEnv  :: OccName -> a -> OccEnv a
 276 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
 277 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
 278 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
 279 mkOccEnv     :: [(OccName,a)] -> OccEnv a
 280 elemOccEnv   :: OccName -> OccEnv a -> Bool
 281 foldOccEnv   :: (a -> b -> b) -> b -> OccEnv a -> b
 282 occEnvElts   :: OccEnv a -> [a]
 283 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
 284 plusOccEnv     :: OccEnv a -> OccEnv a -> OccEnv a
 285 plusOccEnv_C   :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
 286
 287 emptyOccEnv      = emptyUFM
 288 unitOccEnv       = unitUFM
 289 extendOccEnv     = addToUFM
 290 extendOccEnvList = addListToUFM
 291 lookupOccEnv     = lookupUFM
 292 mkOccEnv         = listToUFM
 293 elemOccEnv       = elemUFM
 294 foldOccEnv       = foldUFM
 295 occEnvElts       = eltsUFM
 296 plusOccEnv       = plusUFM
 297 plusOccEnv_C     = plusUFM_C
 298 extendOccEnv_C   = addToUFM_C
 299
 300
 301 type OccSet = UniqFM OccName
 302
 303 emptyOccSet       :: OccSet
 304 unitOccSet        :: OccName -> OccSet
 305 mkOccSet          :: [OccName] -> OccSet
 306 extendOccSet      :: OccSet -> OccName -> OccSet
 307 extendOccSetList  :: OccSet -> [OccName] -> OccSet
 308 unionOccSets      :: OccSet -> OccSet -> OccSet
 309 unionManyOccSets  :: [OccSet] -> OccSet
 310 minusOccSet       :: OccSet -> OccSet -> OccSet
 311 elemOccSet        :: OccName -> OccSet -> Bool
 312 occSetElts        :: OccSet -> [OccName]
 313 foldOccSet        :: (OccName -> b -> b) -> b -> OccSet -> b
 314 isEmptyOccSet     :: OccSet -> Bool
 315 intersectOccSet   :: OccSet -> OccSet -> OccSet
 316 intersectsOccSet  :: OccSet -> OccSet -> Bool
 317
 318 emptyOccSet       = emptyUniqSet
 319 unitOccSet        = unitUniqSet
 320 mkOccSet          = mkUniqSet
 321 extendOccSet      = addOneToUniqSet
 322 extendOccSetList  = addListToUniqSet
 323 unionOccSets      = unionUniqSets
 324 unionManyOccSets  = unionManyUniqSets
 325 minusOccSet       = minusUniqSet
 326 elemOccSet        = elementOfUniqSet
 327 occSetElts        = uniqSetToList
 328 foldOccSet        = foldUniqSet
 329 isEmptyOccSet     = isEmptyUniqSet
 330 intersectOccSet   = intersectUniqSets
 331 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
 332 \end{code}
 333
 334
 335 %************************************************************************
 336 %*                                                                      *
 337 \subsection{Predicates and taking them apart}
 338 %*                                                                      *
 339 %************************************************************************
 340
 341 \begin{code}
 342 occNameFS :: OccName -> EncodedFS
 343 occNameFS (OccName _ s) = s
 344
 345 occNameString :: OccName -> EncodedString
 346 occNameString (OccName _ s) = unpackFS s
 347
 348 occNameUserString :: OccName -> UserString
 349 occNameUserString occ = decode (occNameString occ)
 350
 351 occNameSpace :: OccName -> NameSpace
 352 occNameSpace (OccName sp _) = sp
 353
 354 setOccNameSpace :: NameSpace -> OccName -> OccName
 355 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 356
 357 -- occNameFlavour is used only to generate good error messages
 358 occNameFlavour :: OccName -> String
 359 occNameFlavour (OccName DataName _)  = "Data constructor"
 360 occNameFlavour (OccName TvName _)    = "Type variable"
 361 occNameFlavour (OccName TcClsName _) = "Type constructor or class"
 362 occNameFlavour (OccName VarName s)   = "Variable"
 363
 364 -- briefOccNameFlavour is used in debug-printing of names
 365 briefOccNameFlavour :: OccName -> String
 366 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
 367
 368 briefNameSpaceFlavour DataName  = "d"
 369 briefNameSpaceFlavour VarName   = "v"
 370 briefNameSpaceFlavour TvName    = "tv"
 371 briefNameSpaceFlavour TcClsName = "tc"
 372 \end{code}
 373
 374 \begin{code}
 375 isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 376
 377 isTvOcc (OccName TvName _) = True
 378 isTvOcc other              = False
 379
 380 isTcOcc (OccName TcClsName _) = True
 381 isTcOcc other                 = False
 382
 383 isValOcc (OccName VarName  _) = True
 384 isValOcc (OccName DataName _) = True
 385 isValOcc other                = False
 386
 387 -- Data constructor operator (starts with ':', or '[]')
 388 -- Pretty inefficient!
 389 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 390 isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
 391 isDataSymOcc other                = False
 392
 393 isDataOcc (OccName DataName _) = True
 394 isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
 395 isDataOcc other                = False
 396
 397 -- Any operator (data constructor or variable)
 398 -- Pretty inefficient!
 399 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 400 isSymOcc (OccName VarName s)  = isLexSym (decodeFS s)
 401 isSymOcc other                = False
 402 \end{code}
 403
 404
 405 \begin{code}
 406 reportIfUnused :: OccName -> Bool
 407   -- Haskell 98 encourages compilers to suppress warnings about
 408   -- unused names in a pattern if they start with "_".
 409 reportIfUnused occ = case occNameUserString occ of
 410                         ('_' : _) -> False
 411                         zz_other  -> True
 412 \end{code}
 413
 414
 415
 416 %************************************************************************
 417 %*                                                                      *
 418 \subsection{Making system names}
 419 %*                                                                      *
 420 %************************************************************************
 421
 422 Here's our convention for splitting up the interface file name space:
 423
 424         d...            dictionary identifiers
 425                         (local variables, so no name-clash worries)
 426
 427         $f...           dict-fun identifiers (from inst decls)
 428         $dm...          default methods
 429         $p...           superclass selectors
 430         $w...           workers
 431         :T...           compiler-generated tycons for dictionaries
 432         :D...           ...ditto data cons
 433         $sf..           specialised version of f
 434
 435         in encoded form these appear as Zdfxxx etc
 436
 437         :...            keywords (export:, letrec: etc.)
 438 --- I THINK THIS IS WRONG!
 439
 440 This knowledge is encoded in the following functions.
 441
 442
 443 @mk_deriv@ generates an @OccName@ from the prefix and a string.
 444 NB: The string must already be encoded!
 445
 446 \begin{code}
 447 mk_deriv :: NameSpace
 448          -> String              -- Distinguishes one sort of derived name from another
 449          -> EncodedString       -- Must be already encoded!!  We don't want to encode it a
 450                                 -- second time because encoding isn't idempotent
 451          -> OccName
 452
 453 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
 454 \end{code}
 455
 456 \begin{code}
 457 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
 458            mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
 459    :: OccName -> OccName
 460
 461 -- These derived variables have a prefix that no Haskell value could have
 462 mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
 463 mkWorkerOcc         = mk_simple_deriv varName  "$w"
 464 mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
 465 mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"      -- The : prefix makes sure it classifies
 466 mkClassTyConOcc     = mk_simple_deriv tcName   ":T"     -- as a tycon/datacon
 467 mkClassDataConOcc   = mk_simple_deriv dataName ":D"     -- We go straight to the "real" data con
 468                                                         -- for datacons from classes
 469 mkDictOcc           = mk_simple_deriv varName  "$d"
 470 mkIPOcc             = mk_simple_deriv varName  "$i"
 471 mkSpecOcc           = mk_simple_deriv varName  "$s"
 472 mkForeignExportOcc  = mk_simple_deriv varName  "$f"
 473
 474 -- Generic derivable classes
 475 mkGenOcc1           = mk_simple_deriv varName  "$gfrom"
 476 mkGenOcc2           = mk_simple_deriv varName  "$gto"
 477
 478 -- data T = MkT ... deriving( Data ) needs defintions for
 479 --      $tT   :: Data.Generics.Basics.DataType
 480 --      $cMkT :: Data.Generics.Basics.Constr
 481 mkDataTOcc = mk_simple_deriv varName  "$t"
 482 mkDataCOcc = mk_simple_deriv varName  "$c"
 483
 484 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 485
 486
 487 -- Data constructor workers are made by setting the name space
 488 -- of the data constructor OccName (which should be a DataName)
 489 -- to DataName
 490 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
 491 \end{code}
 492
 493 \begin{code}
 494 mkSuperDictSelOcc :: Int        -- Index of superclass, eg 3
 495                   -> OccName    -- Class, eg "Ord"
 496                   -> OccName    -- eg "$p3Ord"
 497 mkSuperDictSelOcc index cls_occ
 498   = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
 499
 500 mkLocalOcc :: Unique            -- Unique
 501            -> OccName           -- Local name (e.g. "sat")
 502            -> OccName           -- Nice unique version ("$L23sat")
 503 mkLocalOcc uniq occ
 504    = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
 505         -- The Unique might print with characters
 506         -- that need encoding (e.g. 'z'!)
 507 \end{code}
 508
 509
 510 \begin{code}
 511 mkDFunOcc :: EncodedString      -- Typically the class and type glommed together e.g. "OrdMaybe"
 512           -> OccName            -- "$fOrdMaybe"
 513
 514 mkDFunOcc string = mk_deriv VarName "$f" string
 515 \end{code}
 516
 517 We used to add a '$m' to indicate a method, but that gives rise to bad
 518 error messages from the type checker when we print the function name or pattern
 519 of an instance-decl binding.  Why? Because the binding is zapped
 520 to use the method name in place of the selector name.
 521 (See TcClassDcl.tcMethodBind)
 522
 523 The way it is now, -ddump-xx output may look confusing, but
 524 you can always say -dppr-debug to get the uniques.
 525
 526 However, we *do* have to zap the first character to be lower case,
 527 because overloaded constructors (blarg) generate methods too.
 528 And convert to VarName space
 529
 530 e.g. a call to constructor MkFoo where
 531         data (Ord a) => Foo a = MkFoo a
 532
 533 If this is necessary, we do it by prefixing '$m'.  These
 534 guys never show up in error messages.  What a hack.
 535
 536 \begin{code}
 537 mkMethodOcc :: OccName -> OccName
 538 mkMethodOcc occ@(OccName VarName fs) = occ
 539 mkMethodOcc occ                      = mk_simple_deriv varName "$m" occ
 540 \end{code}
 541
 542
 543 %************************************************************************
 544 %*                                                                      *
 545 \subsection{Tidying them up}
 546 %*                                                                      *
 547 %************************************************************************
 548
 549 Before we print chunks of code we like to rename it so that
 550 we don't have to print lots of silly uniques in it.  But we mustn't
 551 accidentally introduce name clashes!  So the idea is that we leave the
 552 OccName alone unless it accidentally clashes with one that is already
 553 in scope; if so, we tack on '1' at the end and try again, then '2', and
 554 so on till we find a unique one.
 555
 556 There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1'
 557 because that isn't a single lexeme.  So we encode it to 'lle' and *then*
 558 tack on the '1', if necessary.
 559
 560 \begin{code}
 561 type TidyOccEnv = OccEnv Int    -- The in-scope OccNames
 562         -- Range gives a plausible starting point for new guesses
 563
 564 emptyTidyOccEnv = emptyOccEnv
 565
 566 initTidyOccEnv :: [OccName] -> TidyOccEnv       -- Initialise with names to avoid!
 567 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
 568
 569 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
 570
 571 tidyOccName in_scope occ@(OccName occ_sp fs)
 572   = case lookupOccEnv in_scope occ of
 573         Nothing ->      -- Not already used: make it used
 574                    (extendOccEnv in_scope occ 1, occ)
 575
 576         Just n  ->      -- Already used: make a new guess,
 577                         -- change the guess base, and try again
 578                    tidyOccName  (extendOccEnv in_scope occ (n+1))
 579                                 (mkSysOcc occ_sp (unpackFS fs ++ show n))
 580 \end{code}
 581
 582
 583 %************************************************************************
 584 %*                                                                      *
 585 \subsection{The 'Z' encoding}
 586 %*                                                                      *
 587 %************************************************************************
 588
 589 This is the main name-encoding and decoding function.  It encodes any
 590 string into a string that is acceptable as a C name.  This is the name
 591 by which things are known right through the compiler.
 592
 593 The basic encoding scheme is this.
 594
 595 * Tuples (,,,) are coded as Z3T
 596
 597 * Alphabetic characters (upper and lower) and digits
 598         all translate to themselves;
 599         except 'Z', which translates to 'ZZ'
 600         and    'z', which translates to 'zz'
 601   We need both so that we can preserve the variable/tycon distinction
 602
 603 * Most other printable characters translate to 'zx' or 'Zx' for some
 604         alphabetic character x
 605
 606 * The others translate as 'znnnU' where 'nnn' is the decimal number
 607         of the character
 608
 609         Before          After
 610         --------------------------
 611         Trak            Trak
 612         foo_wib         foozuwib
 613         >               zg
 614         >1              zg1
 615         foo#            foozh
 616         foo##           foozhzh
 617         foo##1          foozhzh1
 618         fooZ            fooZZ
 619         :+              ZCzp
 620         ()              Z0T     0-tuple
 621         (,,,,)          Z5T     5-tuple
 622         (# #)           Z1H     unboxed 1-tuple (note the space)
 623         (#,,,,#)        Z5H     unboxed 5-tuple
 624                 (NB: There is no Z1T nor Z0H.)
 625
 626 \begin{code}
 627 -- alreadyEncoded is used in ASSERTs to check for encoded
 628 -- strings.  It isn't fail-safe, of course, because, say 'zh' might
 629 -- be encoded or not.
 630 alreadyEncoded :: String -> Bool
 631 alreadyEncoded s = all ok s
 632                  where
 633                    ok ' ' = True
 634                         -- This is a bit of a lie; if we really wanted spaces
 635                         -- in names we'd have to encode them.  But we do put
 636                         -- spaces in ccall "occurrences", and we don't want to
 637                         -- reject them here
 638                    ok ch  = isAlphaNum ch
 639
 640 alreadyEncodedFS :: FastString -> Bool
 641 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
 642
 643 encode :: UserString -> EncodedString
 644 encode cs = case maybe_tuple cs of
 645                 Just n  -> n            -- Tuples go to Z2T etc
 646                 Nothing -> go cs
 647           where
 648                 go []     = []
 649                 go (c:cs) = encode_ch c ++ go cs
 650
 651 encodeFS :: UserFS -> EncodedFS
 652 encodeFS fast_str  | all unencodedChar str = fast_str
 653                    | otherwise             = mkFastString (encode str)
 654                    where
 655                      str = unpackFS fast_str
 656
 657 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 658 unencodedChar 'Z' = False
 659 unencodedChar 'z' = False
 660 unencodedChar c   =  c >= 'a' && c <= 'z'
 661                   || c >= 'A' && c <= 'Z'
 662                   || c >= '0' && c <= '9'
 663
 664 encode_ch :: Char -> EncodedString
 665 encode_ch c | unencodedChar c = [c]     -- Common case first
 666
 667 -- Constructors
 668 encode_ch '('  = "ZL"   -- Needed for things like (,), and (->)
 669 encode_ch ')'  = "ZR"   -- For symmetry with (
 670 encode_ch '['  = "ZM"
 671 encode_ch ']'  = "ZN"
 672 encode_ch ':'  = "ZC"
 673 encode_ch 'Z'  = "ZZ"
 674
 675 -- Variables
 676 encode_ch 'z'  = "zz"
 677 encode_ch '&'  = "za"
 678 encode_ch '|'  = "zb"
 679 encode_ch '^'  = "zc"
 680 encode_ch '$'  = "zd"
 681 encode_ch '='  = "ze"
 682 encode_ch '>'  = "zg"
 683 encode_ch '#'  = "zh"
 684 encode_ch '.'  = "zi"
 685 encode_ch '<'  = "zl"
 686 encode_ch '-'  = "zm"
 687 encode_ch '!'  = "zn"
 688 encode_ch '+'  = "zp"
 689 encode_ch '\'' = "zq"
 690 encode_ch '\\' = "zr"
 691 encode_ch '/'  = "zs"
 692 encode_ch '*'  = "zt"
 693 encode_ch '_'  = "zu"
 694 encode_ch '%'  = "zv"
 695 encode_ch c    = 'z' : shows (ord c) "U"
 696 \end{code}
 697
 698 Decode is used for user printing.
 699
 700 \begin{code}
 701 decodeFS :: FastString -> FastString
 702 decodeFS fs = mkFastString (decode (unpackFS fs))
 703
 704 decode :: EncodedString -> UserString
 705 decode [] = []
 706 decode ('Z' : d : rest) | isDigit d = decode_tuple   d rest
 707                         | otherwise = decode_upper   d : decode rest
 708 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
 709                         | otherwise = decode_lower   d : decode rest
 710 decode (c   : rest) = c : decode rest
 711
 712 decode_upper, decode_lower :: Char -> Char
 713
 714 decode_upper 'L' = '('
 715 decode_upper 'R' = ')'
 716 decode_upper 'M' = '['
 717 decode_upper 'N' = ']'
 718 decode_upper 'C' = ':'
 719 decode_upper 'Z' = 'Z'
 720 decode_upper ch  = pprTrace "decode_upper" (char ch) ch
 721
 722 decode_lower 'z' = 'z'
 723 decode_lower 'a' = '&'
 724 decode_lower 'b' = '|'
 725 decode_lower 'c' = '^'
 726 decode_lower 'd' = '$'
 727 decode_lower 'e' = '='
 728 decode_lower 'g' = '>'
 729 decode_lower 'h' = '#'
 730 decode_lower 'i' = '.'
 731 decode_lower 'l' = '<'
 732 decode_lower 'm' = '-'
 733 decode_lower 'n' = '!'
 734 decode_lower 'p' = '+'
 735 decode_lower 'q' = '\''
 736 decode_lower 'r' = '\\'
 737 decode_lower 's' = '/'
 738 decode_lower 't' = '*'
 739 decode_lower 'u' = '_'
 740 decode_lower 'v' = '%'
 741 decode_lower ch  = pprTrace "decode_lower" (char ch) ch
 742
 743 -- Characters not having a specific code are coded as z224U
 744 decode_num_esc d rest
 745   = go (digitToInt d) rest
 746   where
 747     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 748     go n ('U' : rest)           = chr n : decode rest
 749     go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
 750 \end{code}
 751
 752
 753 %************************************************************************
 754 %*                                                                      *
 755                 Stuff for dealing with tuples
 756 %*                                                                      *
 757 %************************************************************************
 758
 759 Tuples are encoded as
 760         Z3T or Z3H
 761 for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts
 762         Z<digit>
 763
 764 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
 765   There are no unboxed 0-tuples.
 766
 767 * "()" is the tycon for a boxed 0-tuple.
 768   There are no boxed 1-tuples.
 769
 770
 771 \begin{code}
 772 maybe_tuple :: UserString -> Maybe EncodedString
 773
 774 maybe_tuple "(# #)" = Just("Z1H")
 775 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
 776                                  (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
 777                                  other               -> Nothing
 778 maybe_tuple "()" = Just("Z0T")
 779 maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
 780                                  (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
 781                                  other         -> Nothing
 782 maybe_tuple other            = Nothing
 783
 784 count_commas :: Int -> String -> (Int, String)
 785 count_commas n (',' : cs) = count_commas (n+1) cs
 786 count_commas n cs         = (n,cs)
 787 \end{code}
 788
 789 \begin{code}
 790 decode_tuple :: Char -> EncodedString -> UserString
 791 decode_tuple d rest
 792   = go (digitToInt d) rest
 793   where
 794     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 795     go 0 ['T']                  = "()"
 796     go n ['T']                  = '(' : replicate (n-1) ',' ++ ")"
 797     go 1 ['H']                  = "(# #)"
 798     go n ['H']                  = '(' : '#' : replicate (n-1) ',' ++ "#)"
 799     go n other = pprPanic "decode_tuple" (ppr n <+> text other)
 800
 801 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
 802 mkTupleOcc ns bx ar
 803   = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
 804   where
 805     bx_char = case bx of
 806                 Boxed   -> "T"
 807                 Unboxed -> "H"
 808
 809 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
 810 -- Tuples are special, because there are so many of them!
 811 isTupleOcc_maybe (OccName ns fs)
 812   = case unpackFS fs of
 813         ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
 814         other                    -> Nothing
 815   where
 816     decode_tup n "H"      = (ns, Unboxed, n)
 817     decode_tup n "T"      = (ns, Boxed, n)
 818     decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
 819 \end{code}
 820
 821 %************************************************************************
 822 %*                                                                      *
 823 \subsection{Lexical categories}
 824 %*                                                                      *
 825 %************************************************************************
 826
 827 These functions test strings to see if they fit the lexical categories
 828 defined in the Haskell report.
 829
 830 \begin{code}
 831 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
 832 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
 833
 834 isLexCon cs = isLexConId  cs || isLexConSym cs
 835 isLexVar cs = isLexVarId  cs || isLexVarSym cs
 836
 837 isLexId  cs = isLexConId  cs || isLexVarId  cs
 838 isLexSym cs = isLexConSym cs || isLexVarSym cs
 839
 840 -------------
 841
 842 isLexConId cs                           -- Prefix type or data constructors
 843   | nullFastString cs = False           --      e.g. "Foo", "[]", "(,)"
 844   | cs == FSLIT("[]") = True
 845   | otherwise         = startsConId (headFS cs)
 846
 847 isLexVarId cs                           -- Ordinary prefix identifiers
 848   | nullFastString cs = False           --      e.g. "x", "_x"
 849   | otherwise         = startsVarId (headFS cs)
 850
 851 isLexConSym cs                          -- Infix type or data constructors
 852   | nullFastString cs = False           --      e.g. ":-:", ":", "->"
 853   | cs == FSLIT("->") = True
 854   | otherwise         = startsConSym (headFS cs)
 855
 856 isLexVarSym cs                          -- Infix identifiers
 857   | nullFastString cs = False           --      e.g. "+"
 858   | otherwise         = startsVarSym (headFS cs)
 859
 860 -------------
 861 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 862 startsVarSym c = isSymbolASCII c || isSymbolISO c       -- Infix Ids
 863 startsConSym c = c == ':'                               -- Infix data constructors
 864 startsVarId c  = isLower c || isLowerISO c || c == '_'  -- Ordinary Ids
 865 startsConId c  = isUpper c || isUpperISO c || c == '('  -- Ordinary type constructors and data constructors
 866
 867
 868 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 869 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 870 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 871         --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 872 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 873         --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 874 \end{code}
 875
 876 %************************************************************************
 877 %*                                                                      *
 878                 Binary instance
 879     Here rather than BinIface because OccName is abstract
 880 %*                                                                      *
 881 %************************************************************************
 882
 883 \begin{code}
 884 instance Binary NameSpace where
 885     put_ bh VarName = do
 886             putByte bh 0
 887     put_ bh DataName = do
 888             putByte bh 1
 889     put_ bh TvName = do
 890             putByte bh 2
 891     put_ bh TcClsName = do
 892             putByte bh 3
 893     get bh = do
 894             h <- getByte bh
 895             case h of
 896               0 -> do return VarName
 897               1 -> do return DataName
 898               2 -> do return TvName
 899               _ -> do return TcClsName
 900
 901 instance Binary OccName where
 902     put_ bh (OccName aa ab) = do
 903             put_ bh aa
 904             put_ bh ab
 905     get bh = do
 906           aa <- get bh
 907           ab <- get bh
 908           return (OccName aa ab)
 909 \end{code}