ghc/compiler/basicTypes/OccName.lhs

   1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
   2 %
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5
   6 \section[OccName]{@OccName@}
   7
   8 \begin{code}
   9 module OccName (
  10         -- The NameSpace type; abstact
  11         NameSpace, tcName, clsName, tcClsName, dataName, varName,
  12         tvName, srcDataName, nameSpaceString,
  13
  14         -- The OccName type
  15         OccName,        -- Abstract, instance of Outputable
  16         pprOccName,
  17
  18         -- The OccEnv type
  19         OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
  20         lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
  21         occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
  22
  23
  24         -- The OccSet type
  25         OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
  26         unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
  27         foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
  28
  29         mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
  30         mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
  31         mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
  32         mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
  33         mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
  34         mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
  35         mkDataConWrapperOcc, mkDataConWorkerOcc,
  36
  37         isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
  38         parenSymOcc, reportIfUnused, isTcClsName, isVarName,
  39
  40         occNameFS, occNameString, occNameUserString, occNameSpace,
  41         occNameFlavour, briefOccNameFlavour,
  42         setOccNameSpace,
  43
  44         mkTupleOcc, isTupleOcc_maybe,
  45
  46         -- Tidying up
  47         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  48
  49         -- Encoding
  50         EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
  51
  52         -- The basic form of names
  53         isLexCon, isLexVar, isLexId, isLexSym,
  54         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  55         isLowerISO, isUpperISO,
  56         startsVarSym, startsVarId, startsConSym, startsConId
  57     ) where
  58
  59 #include "HsVersions.h"
  60
  61 import Char     ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
  62 import Util     ( thenCmp )
  63 import Unique   ( Unique, mkUnique, Uniquable(..) )
  64 import BasicTypes ( Boxity(..), Arity )
  65 import StaticFlags ( opt_PprStyle_Debug )
  66 import UniqFM
  67 import UniqSet
  68 import FastString
  69 import Outputable
  70 import Binary
  71
  72 import GLAEXTS
  73 \end{code}
  74
  75 We hold both module names and identifier names in a 'Z-encoded' form
  76 that makes them acceptable both as a C identifier and as a Haskell
  77 (prefix) identifier.
  78
  79 They can always be decoded again when printing error messages
  80 or anything else for the user, but it does make sense for it
  81 to be represented here in encoded form, so that when generating
  82 code the encoding operation is not performed on each occurrence.
  83
  84 These type synonyms help documentation.
  85
  86 \begin{code}
  87 type UserFS    = FastString     -- As the user typed it
  88 type EncodedFS = FastString     -- Encoded form
  89
  90 type UserString = String        -- As the user typed it
  91 type EncodedString = String     -- Encoded form
  92
  93
  94 pprEncodedFS :: EncodedFS -> SDoc
  95 pprEncodedFS fs
  96   = getPprStyle         $ \ sty ->
  97     if userStyle sty || dumpStyle sty
  98         -- ftext (decodeFS fs) would needlessly pack the string again
  99         then text (decode (unpackFS fs))
 100         else ftext fs
 101 \end{code}
 102
 103 %************************************************************************
 104 %*                                                                      *
 105 \subsection{Name space}
 106 %*                                                                      *
 107 %************************************************************************
 108
 109 \begin{code}
 110 data NameSpace = VarName        -- Variables, including "source" data constructors
 111                | DataName       -- "Real" data constructors
 112                | TvName         -- Type variables
 113                | TcClsName      -- Type constructors and classes; Haskell has them
 114                                 -- in the same name space for now.
 115                deriving( Eq, Ord )
 116    {-! derive: Binary !-}
 117
 118 -- Note [Data Constructors]
 119 -- see also: Note [Data Constructor Naming] in DataCon.lhs
 120 --
 121 --      "Source" data constructors are the data constructors mentioned
 122 --      in Haskell source code
 123 --
 124 --      "Real" data constructors are the data constructors of the
 125 --      representation type, which may not be the same as the source
 126 --      type
 127
 128 -- Example:
 129 --      data T = T !(Int,Int)
 130 --
 131 -- The source datacon has type (Int,Int) -> T
 132 -- The real   datacon has type Int -> Int -> T
 133 -- GHC chooses a representation based on the strictness etc.
 134
 135
 136 -- Though type constructors and classes are in the same name space now,
 137 -- the NameSpace type is abstract, so we can easily separate them later
 138 tcName    = TcClsName           -- Type constructors
 139 clsName   = TcClsName           -- Classes
 140 tcClsName = TcClsName           -- Not sure which!
 141
 142 dataName    = DataName
 143 srcDataName = DataName  -- Haskell-source data constructors should be
 144                         -- in the Data name space
 145
 146 tvName      = TvName
 147 varName     = VarName
 148
 149 isTcClsName :: NameSpace -> Bool
 150 isTcClsName TcClsName = True
 151 isTcClsName _         = False
 152
 153 isVarName :: NameSpace -> Bool  -- Variables or type variables, but not constructors
 154 isVarName TvName  = True
 155 isVarName VarName = True
 156 isVarName other   = False
 157
 158
 159 nameSpaceString :: NameSpace -> String
 160 nameSpaceString DataName  = "data constructor"
 161 nameSpaceString VarName   = "variable"
 162 nameSpaceString TvName    = "type variable"
 163 nameSpaceString TcClsName = "type constructor or class"
 164 \end{code}
 165
 166
 167 %************************************************************************
 168 %*                                                                      *
 169 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
 170 %*                                                                      *
 171 %************************************************************************
 172
 173 \begin{code}
 174 data OccName = OccName
 175     { occNameSpace  :: !NameSpace
 176     , occNameFS     :: !EncodedFS
 177     }
 178 \end{code}
 179
 180
 181 \begin{code}
 182 instance Eq OccName where
 183     (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
 184
 185 instance Ord OccName where
 186     compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
 187                                                 (sp1 `compare` sp2)
 188 \end{code}
 189
 190
 191 %************************************************************************
 192 %*                                                                      *
 193 \subsection{Printing}
 194 %*                                                                      *
 195 %************************************************************************
 196
 197 \begin{code}
 198 instance Outputable OccName where
 199     ppr = pprOccName
 200
 201 pprOccName :: OccName -> SDoc
 202 pprOccName (OccName sp occ)
 203   = getPprStyle $ \ sty ->
 204     pprEncodedFS occ <> if debugStyle sty then
 205                            braces (text (briefNameSpaceFlavour sp))
 206                         else empty
 207 \end{code}
 208
 209
 210 %************************************************************************
 211 %*                                                                      *
 212 \subsection{Construction}
 213 %*                                                                      *
 214 %*****p*******************************************************************
 215
 216 *Sys* things do no encoding; the caller should ensure that the thing is
 217 already encoded
 218
 219 \begin{code}
 220 mkSysOcc :: NameSpace -> EncodedString -> OccName
 221 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
 222                       OccName occ_sp (mkFastString str)
 223
 224 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
 225 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
 226                        OccName occ_sp fs
 227
 228 mkFCallOcc :: EncodedString -> OccName
 229 -- This version of mkSysOcc doesn't check that the string is already encoded,
 230 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
 231 -- This encodes a lot into something that then parses like an Id.
 232 -- But then alreadyEncoded complains about the braces!
 233 mkFCallOcc str = OccName varName (mkFastString str)
 234
 235 -- Kind constructors get a special function.  Uniquely, they are not encoded,
 236 -- so that they have names like '*'.  This means that *even in interface files*
 237 -- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
 238 -- has an ASSERT that doesn't hold.
 239 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
 240 mkKindOccFS occ_sp fs = OccName occ_sp fs
 241 \end{code}
 242
 243 *Source-code* things are encoded.
 244
 245 \begin{code}
 246 mkOccFS :: NameSpace -> UserFS -> OccName
 247 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
 248
 249 mkOccName :: NameSpace -> String -> OccName
 250 mkOccName ns s = mkSysOcc ns (encode s)
 251
 252 mkVarOcc :: UserFS -> OccName
 253 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
 254
 255 mkTyVarOcc :: UserFS -> OccName
 256 mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
 257
 258 mkVarOccEncoded :: EncodedFS -> OccName
 259 mkVarOccEncoded fs = mkSysOccFS varName fs
 260 \end{code}
 261
 262
 263
 264 %************************************************************************
 265 %*                                                                      *
 266                 Environments
 267 %*                                                                      *
 268 %************************************************************************
 269
 270 OccEnvs are used mainly for the envts in ModIfaces.
 271
 272 They are efficient, because FastStrings have unique Int# keys.  We assume
 273 this key is less than 2^24, so we can make a Unique using
 274         mkUnique ns key  :: Unique
 275 where 'ns' is a Char reprsenting the name space.  This in turn makes it
 276 easy to build an OccEnv.
 277
 278 \begin{code}
 279 instance Uniquable OccName where
 280   getUnique (OccName ns fs)
 281       = mkUnique char (I# (uniqueOfFS fs))
 282       where     -- See notes above about this getUnique function
 283         char = case ns of
 284                 VarName   -> 'i'
 285                 DataName  -> 'd'
 286                 TvName    -> 'v'
 287                 TcClsName -> 't'
 288
 289 type OccEnv a = UniqFM a
 290
 291 emptyOccEnv :: OccEnv a
 292 unitOccEnv  :: OccName -> a -> OccEnv a
 293 extendOccEnv :: OccEnv a -> OccName -> a -> OccEnv a
 294 extendOccEnvList :: OccEnv a -> [(OccName, a)] -> OccEnv a
 295 lookupOccEnv :: OccEnv a -> OccName -> Maybe a
 296 mkOccEnv     :: [(OccName,a)] -> OccEnv a
 297 elemOccEnv   :: OccName -> OccEnv a -> Bool
 298 foldOccEnv   :: (a -> b -> b) -> b -> OccEnv a -> b
 299 occEnvElts   :: OccEnv a -> [a]
 300 extendOccEnv_C :: (a->a->a) -> OccEnv a -> OccName -> a -> OccEnv a
 301 plusOccEnv     :: OccEnv a -> OccEnv a -> OccEnv a
 302 plusOccEnv_C   :: (a->a->a) -> OccEnv a -> OccEnv a -> OccEnv a
 303
 304 emptyOccEnv      = emptyUFM
 305 unitOccEnv       = unitUFM
 306 extendOccEnv     = addToUFM
 307 extendOccEnvList = addListToUFM
 308 lookupOccEnv     = lookupUFM
 309 mkOccEnv         = listToUFM
 310 elemOccEnv       = elemUFM
 311 foldOccEnv       = foldUFM
 312 occEnvElts       = eltsUFM
 313 plusOccEnv       = plusUFM
 314 plusOccEnv_C     = plusUFM_C
 315 extendOccEnv_C   = addToUFM_C
 316
 317
 318 type OccSet = UniqFM OccName
 319
 320 emptyOccSet       :: OccSet
 321 unitOccSet        :: OccName -> OccSet
 322 mkOccSet          :: [OccName] -> OccSet
 323 extendOccSet      :: OccSet -> OccName -> OccSet
 324 extendOccSetList  :: OccSet -> [OccName] -> OccSet
 325 unionOccSets      :: OccSet -> OccSet -> OccSet
 326 unionManyOccSets  :: [OccSet] -> OccSet
 327 minusOccSet       :: OccSet -> OccSet -> OccSet
 328 elemOccSet        :: OccName -> OccSet -> Bool
 329 occSetElts        :: OccSet -> [OccName]
 330 foldOccSet        :: (OccName -> b -> b) -> b -> OccSet -> b
 331 isEmptyOccSet     :: OccSet -> Bool
 332 intersectOccSet   :: OccSet -> OccSet -> OccSet
 333 intersectsOccSet  :: OccSet -> OccSet -> Bool
 334
 335 emptyOccSet       = emptyUniqSet
 336 unitOccSet        = unitUniqSet
 337 mkOccSet          = mkUniqSet
 338 extendOccSet      = addOneToUniqSet
 339 extendOccSetList  = addListToUniqSet
 340 unionOccSets      = unionUniqSets
 341 unionManyOccSets  = unionManyUniqSets
 342 minusOccSet       = minusUniqSet
 343 elemOccSet        = elementOfUniqSet
 344 occSetElts        = uniqSetToList
 345 foldOccSet        = foldUniqSet
 346 isEmptyOccSet     = isEmptyUniqSet
 347 intersectOccSet   = intersectUniqSets
 348 intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
 349 \end{code}
 350
 351
 352 %************************************************************************
 353 %*                                                                      *
 354 \subsection{Predicates and taking them apart}
 355 %*                                                                      *
 356 %************************************************************************
 357
 358 \begin{code}
 359 occNameString :: OccName -> EncodedString
 360 occNameString (OccName _ s) = unpackFS s
 361
 362 occNameUserString :: OccName -> UserString
 363 occNameUserString occ = decode (occNameString occ)
 364
 365 setOccNameSpace :: NameSpace -> OccName -> OccName
 366 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 367
 368 -- occNameFlavour is used only to generate good error messages
 369 occNameFlavour :: OccName -> SDoc
 370 occNameFlavour (OccName DataName _)  = ptext SLIT("data constructor")
 371 occNameFlavour (OccName TvName _)    = ptext SLIT("type variable")
 372 occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
 373 occNameFlavour (OccName VarName s)   = empty
 374
 375 -- briefOccNameFlavour is used in debug-printing of names
 376 briefOccNameFlavour :: OccName -> String
 377 briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
 378
 379 briefNameSpaceFlavour DataName  = "d"
 380 briefNameSpaceFlavour VarName   = "v"
 381 briefNameSpaceFlavour TvName    = "tv"
 382 briefNameSpaceFlavour TcClsName = "tc"
 383 \end{code}
 384
 385 \begin{code}
 386 isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 387
 388 isVarOcc (OccName VarName _) = True
 389 isVarOcc other               = False
 390
 391 isTvOcc (OccName TvName _) = True
 392 isTvOcc other              = False
 393
 394 isTcOcc (OccName TcClsName _) = True
 395 isTcOcc other                 = False
 396
 397 isValOcc (OccName VarName  _) = True
 398 isValOcc (OccName DataName _) = True
 399 isValOcc other                = False
 400
 401 -- Data constructor operator (starts with ':', or '[]')
 402 -- Pretty inefficient!
 403 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 404 isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
 405 isDataSymOcc other                = False
 406
 407 isDataOcc (OccName DataName _) = True
 408 isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
 409 isDataOcc other                = False
 410
 411 -- Any operator (data constructor or variable)
 412 -- Pretty inefficient!
 413 isSymOcc (OccName DataName s)  = isLexConSym (decodeFS s)
 414 isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
 415 isSymOcc (OccName VarName s)   = isLexSym (decodeFS s)
 416 isSymOcc other                 = False
 417
 418 parenSymOcc :: OccName -> SDoc -> SDoc
 419 -- Wrap parens around an operator
 420 parenSymOcc occ doc | isSymOcc occ = parens doc
 421                     | otherwise    = doc
 422 \end{code}
 423
 424
 425 \begin{code}
 426 reportIfUnused :: OccName -> Bool
 427   -- Haskell 98 encourages compilers to suppress warnings about
 428   -- unused names in a pattern if they start with "_".
 429 reportIfUnused occ = case occNameUserString occ of
 430                         ('_' : _) -> False
 431                         zz_other  -> True
 432 \end{code}
 433
 434
 435
 436 %************************************************************************
 437 %*                                                                      *
 438 \subsection{Making system names}
 439 %*                                                                      *
 440 %************************************************************************
 441
 442 Here's our convention for splitting up the interface file name space:
 443
 444         d...            dictionary identifiers
 445                         (local variables, so no name-clash worries)
 446
 447         $f...           dict-fun identifiers (from inst decls)
 448         $dm...          default methods
 449         $p...           superclass selectors
 450         $w...           workers
 451         :T...           compiler-generated tycons for dictionaries
 452         :D...           ...ditto data cons
 453         $sf..           specialised version of f
 454
 455         in encoded form these appear as Zdfxxx etc
 456
 457         :...            keywords (export:, letrec: etc.)
 458 --- I THINK THIS IS WRONG!
 459
 460 This knowledge is encoded in the following functions.
 461
 462
 463 @mk_deriv@ generates an @OccName@ from the prefix and a string.
 464 NB: The string must already be encoded!
 465
 466 \begin{code}
 467 mk_deriv :: NameSpace
 468          -> String              -- Distinguishes one sort of derived name from another
 469          -> EncodedString       -- Must be already encoded!!  We don't want to encode it a
 470                                 -- second time because encoding isn't idempotent
 471          -> OccName
 472
 473 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
 474 \end{code}
 475
 476 \begin{code}
 477 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
 478            mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
 479    :: OccName -> OccName
 480
 481 -- These derived variables have a prefix that no Haskell value could have
 482 mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
 483 mkWorkerOcc         = mk_simple_deriv varName  "$w"
 484 mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
 485 mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"      -- The : prefix makes sure it classifies
 486 mkClassTyConOcc     = mk_simple_deriv tcName   ":T"     -- as a tycon/datacon
 487 mkClassDataConOcc   = mk_simple_deriv dataName ":D"     -- We go straight to the "real" data con
 488                                                         -- for datacons from classes
 489 mkDictOcc           = mk_simple_deriv varName  "$d"
 490 mkIPOcc             = mk_simple_deriv varName  "$i"
 491 mkSpecOcc           = mk_simple_deriv varName  "$s"
 492 mkForeignExportOcc  = mk_simple_deriv varName  "$f"
 493
 494 -- Generic derivable classes
 495 mkGenOcc1           = mk_simple_deriv varName  "$gfrom"
 496 mkGenOcc2           = mk_simple_deriv varName  "$gto"
 497
 498 -- data T = MkT ... deriving( Data ) needs defintions for
 499 --      $tT   :: Data.Generics.Basics.DataType
 500 --      $cMkT :: Data.Generics.Basics.Constr
 501 mkDataTOcc = mk_simple_deriv varName  "$t"
 502 mkDataCOcc = mk_simple_deriv varName  "$c"
 503
 504 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 505
 506
 507 -- Data constructor workers are made by setting the name space
 508 -- of the data constructor OccName (which should be a DataName)
 509 -- to VarName
 510 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
 511 \end{code}
 512
 513 \begin{code}
 514 mkSuperDictSelOcc :: Int        -- Index of superclass, eg 3
 515                   -> OccName    -- Class, eg "Ord"
 516                   -> OccName    -- eg "$p3Ord"
 517 mkSuperDictSelOcc index cls_occ
 518   = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
 519
 520 mkLocalOcc :: Unique            -- Unique
 521            -> OccName           -- Local name (e.g. "sat")
 522            -> OccName           -- Nice unique version ("$L23sat")
 523 mkLocalOcc uniq occ
 524    = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
 525         -- The Unique might print with characters
 526         -- that need encoding (e.g. 'z'!)
 527 \end{code}
 528
 529
 530 \begin{code}
 531 mkDFunOcc :: EncodedString      -- Typically the class and type glommed together e.g. "OrdMaybe"
 532                                 -- Only used in debug mode, for extra clarity
 533           -> Bool               -- True <=> hs-boot instance dfun
 534           -> Int                -- Unique index
 535           -> OccName            -- "$f3OrdMaybe"
 536
 537 -- In hs-boot files we make dict funs like $fx7ClsTy, which get bound to the real
 538 -- thing when we compile the mother module. Reason: we don't know exactly
 539 -- what the  mother module will call it.
 540
 541 mkDFunOcc info_str is_boot index
 542   = mk_deriv VarName prefix string
 543   where
 544     prefix | is_boot   = "$fx"
 545            | otherwise = "$f"
 546     string | opt_PprStyle_Debug = show index ++ info_str
 547            | otherwise          = show index
 548 \end{code}
 549
 550 We used to add a '$m' to indicate a method, but that gives rise to bad
 551 error messages from the type checker when we print the function name or pattern
 552 of an instance-decl binding.  Why? Because the binding is zapped
 553 to use the method name in place of the selector name.
 554 (See TcClassDcl.tcMethodBind)
 555
 556 The way it is now, -ddump-xx output may look confusing, but
 557 you can always say -dppr-debug to get the uniques.
 558
 559 However, we *do* have to zap the first character to be lower case,
 560 because overloaded constructors (blarg) generate methods too.
 561 And convert to VarName space
 562
 563 e.g. a call to constructor MkFoo where
 564         data (Ord a) => Foo a = MkFoo a
 565
 566 If this is necessary, we do it by prefixing '$m'.  These
 567 guys never show up in error messages.  What a hack.
 568
 569 \begin{code}
 570 mkMethodOcc :: OccName -> OccName
 571 mkMethodOcc occ@(OccName VarName fs) = occ
 572 mkMethodOcc occ                      = mk_simple_deriv varName "$m" occ
 573 \end{code}
 574
 575
 576 %************************************************************************
 577 %*                                                                      *
 578 \subsection{Tidying them up}
 579 %*                                                                      *
 580 %************************************************************************
 581
 582 Before we print chunks of code we like to rename it so that
 583 we don't have to print lots of silly uniques in it.  But we mustn't
 584 accidentally introduce name clashes!  So the idea is that we leave the
 585 OccName alone unless it accidentally clashes with one that is already
 586 in scope; if so, we tack on '1' at the end and try again, then '2', and
 587 so on till we find a unique one.
 588
 589 There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1'
 590 because that isn't a single lexeme.  So we encode it to 'lle' and *then*
 591 tack on the '1', if necessary.
 592
 593 \begin{code}
 594 type TidyOccEnv = OccEnv Int    -- The in-scope OccNames
 595         -- Range gives a plausible starting point for new guesses
 596
 597 emptyTidyOccEnv = emptyOccEnv
 598
 599 initTidyOccEnv :: [OccName] -> TidyOccEnv       -- Initialise with names to avoid!
 600 initTidyOccEnv = foldl (\env occ -> extendOccEnv env occ 1) emptyTidyOccEnv
 601
 602 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
 603
 604 tidyOccName in_scope occ@(OccName occ_sp fs)
 605   = case lookupOccEnv in_scope occ of
 606         Nothing ->      -- Not already used: make it used
 607                    (extendOccEnv in_scope occ 1, occ)
 608
 609         Just n  ->      -- Already used: make a new guess,
 610                         -- change the guess base, and try again
 611                    tidyOccName  (extendOccEnv in_scope occ (n+1))
 612                                 (mkSysOcc occ_sp (unpackFS fs ++ show n))
 613 \end{code}
 614
 615
 616 %************************************************************************
 617 %*                                                                      *
 618 \subsection{The 'Z' encoding}
 619 %*                                                                      *
 620 %************************************************************************
 621
 622 This is the main name-encoding and decoding function.  It encodes any
 623 string into a string that is acceptable as a C name.  This is the name
 624 by which things are known right through the compiler.
 625
 626 The basic encoding scheme is this.
 627
 628 * Tuples (,,,) are coded as Z3T
 629
 630 * Alphabetic characters (upper and lower) and digits
 631         all translate to themselves;
 632         except 'Z', which translates to 'ZZ'
 633         and    'z', which translates to 'zz'
 634   We need both so that we can preserve the variable/tycon distinction
 635
 636 * Most other printable characters translate to 'zx' or 'Zx' for some
 637         alphabetic character x
 638
 639 * The others translate as 'znnnU' where 'nnn' is the decimal number
 640         of the character
 641
 642         Before          After
 643         --------------------------
 644         Trak            Trak
 645         foo_wib         foozuwib
 646         >               zg
 647         >1              zg1
 648         foo#            foozh
 649         foo##           foozhzh
 650         foo##1          foozhzh1
 651         fooZ            fooZZ
 652         :+              ZCzp
 653         ()              Z0T     0-tuple
 654         (,,,,)          Z5T     5-tuple
 655         (# #)           Z1H     unboxed 1-tuple (note the space)
 656         (#,,,,#)        Z5H     unboxed 5-tuple
 657                 (NB: There is no Z1T nor Z0H.)
 658
 659 \begin{code}
 660 -- alreadyEncoded is used in ASSERTs to check for encoded
 661 -- strings.  It isn't fail-safe, of course, because, say 'zh' might
 662 -- be encoded or not.
 663 alreadyEncoded :: String -> Bool
 664 alreadyEncoded s = all ok s
 665                  where
 666                    ok ' ' = True
 667                         -- This is a bit of a lie; if we really wanted spaces
 668                         -- in names we'd have to encode them.  But we do put
 669                         -- spaces in ccall "occurrences", and we don't want to
 670                         -- reject them here
 671                    ok ch  = isAlphaNum ch
 672
 673 alreadyEncodedFS :: FastString -> Bool
 674 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
 675
 676 encode :: UserString -> EncodedString
 677 encode cs = case maybe_tuple cs of
 678                 Just n  -> n            -- Tuples go to Z2T etc
 679                 Nothing -> go cs
 680           where
 681                 go []     = []
 682                 go (c:cs) = encode_ch c ++ go cs
 683
 684 encodeFS :: UserFS -> EncodedFS
 685 encodeFS fast_str  | all unencodedChar str = fast_str
 686                    | otherwise             = mkFastString (encode str)
 687                    where
 688                      str = unpackFS fast_str
 689
 690 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 691 unencodedChar 'Z' = False
 692 unencodedChar 'z' = False
 693 unencodedChar c   =  c >= 'a' && c <= 'z'
 694                   || c >= 'A' && c <= 'Z'
 695                   || c >= '0' && c <= '9'
 696
 697 encode_ch :: Char -> EncodedString
 698 encode_ch c | unencodedChar c = [c]     -- Common case first
 699
 700 -- Constructors
 701 encode_ch '('  = "ZL"   -- Needed for things like (,), and (->)
 702 encode_ch ')'  = "ZR"   -- For symmetry with (
 703 encode_ch '['  = "ZM"
 704 encode_ch ']'  = "ZN"
 705 encode_ch ':'  = "ZC"
 706 encode_ch 'Z'  = "ZZ"
 707
 708 -- Variables
 709 encode_ch 'z'  = "zz"
 710 encode_ch '&'  = "za"
 711 encode_ch '|'  = "zb"
 712 encode_ch '^'  = "zc"
 713 encode_ch '$'  = "zd"
 714 encode_ch '='  = "ze"
 715 encode_ch '>'  = "zg"
 716 encode_ch '#'  = "zh"
 717 encode_ch '.'  = "zi"
 718 encode_ch '<'  = "zl"
 719 encode_ch '-'  = "zm"
 720 encode_ch '!'  = "zn"
 721 encode_ch '+'  = "zp"
 722 encode_ch '\'' = "zq"
 723 encode_ch '\\' = "zr"
 724 encode_ch '/'  = "zs"
 725 encode_ch '*'  = "zt"
 726 encode_ch '_'  = "zu"
 727 encode_ch '%'  = "zv"
 728 encode_ch c    = 'z' : shows (ord c) "U"
 729 \end{code}
 730
 731 Decode is used for user printing.
 732
 733 \begin{code}
 734 decodeFS :: FastString -> FastString
 735 decodeFS fs = mkFastString (decode (unpackFS fs))
 736
 737 decode :: EncodedString -> UserString
 738 decode [] = []
 739 decode ('Z' : d : rest) | isDigit d = decode_tuple   d rest
 740                         | otherwise = decode_upper   d : decode rest
 741 decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
 742                         | otherwise = decode_lower   d : decode rest
 743 decode (c   : rest) = c : decode rest
 744
 745 decode_upper, decode_lower :: Char -> Char
 746
 747 decode_upper 'L' = '('
 748 decode_upper 'R' = ')'
 749 decode_upper 'M' = '['
 750 decode_upper 'N' = ']'
 751 decode_upper 'C' = ':'
 752 decode_upper 'Z' = 'Z'
 753 decode_upper ch  = pprTrace "decode_upper" (char ch) ch
 754
 755 decode_lower 'z' = 'z'
 756 decode_lower 'a' = '&'
 757 decode_lower 'b' = '|'
 758 decode_lower 'c' = '^'
 759 decode_lower 'd' = '$'
 760 decode_lower 'e' = '='
 761 decode_lower 'g' = '>'
 762 decode_lower 'h' = '#'
 763 decode_lower 'i' = '.'
 764 decode_lower 'l' = '<'
 765 decode_lower 'm' = '-'
 766 decode_lower 'n' = '!'
 767 decode_lower 'p' = '+'
 768 decode_lower 'q' = '\''
 769 decode_lower 'r' = '\\'
 770 decode_lower 's' = '/'
 771 decode_lower 't' = '*'
 772 decode_lower 'u' = '_'
 773 decode_lower 'v' = '%'
 774 decode_lower ch  = pprTrace "decode_lower" (char ch) ch
 775
 776 -- Characters not having a specific code are coded as z224U
 777 decode_num_esc d rest
 778   = go (digitToInt d) rest
 779   where
 780     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 781     go n ('U' : rest)           = chr n : decode rest
 782     go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
 783
 784 decode_tuple :: Char -> EncodedString -> UserString
 785 decode_tuple d rest
 786   = go (digitToInt d) rest
 787   where
 788         -- NB. recurse back to decode after decoding the tuple, because
 789         -- the tuple might be embedded in a longer name.
 790     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 791     go 0 ('T':rest)     = "()" ++ decode rest
 792     go n ('T':rest)     = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
 793     go 1 ('H':rest)     = "(# #)" ++ decode rest
 794     go n ('H':rest)     = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
 795     go n other = pprPanic "decode_tuple" (ppr n <+> text other)
 796 \end{code}
 797
 798
 799 %************************************************************************
 800 %*                                                                      *
 801                 Stuff for dealing with tuples
 802 %*                                                                      *
 803 %************************************************************************
 804
 805 Tuples are encoded as
 806         Z3T or Z3H
 807 for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts
 808         Z<digit>
 809
 810 * "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
 811   There are no unboxed 0-tuples.
 812
 813 * "()" is the tycon for a boxed 0-tuple.
 814   There are no boxed 1-tuples.
 815
 816
 817 \begin{code}
 818 maybe_tuple :: UserString -> Maybe EncodedString
 819
 820 maybe_tuple "(# #)" = Just("Z1H")
 821 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
 822                                  (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
 823                                  other               -> Nothing
 824 maybe_tuple "()" = Just("Z0T")
 825 maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
 826                                  (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
 827                                  other         -> Nothing
 828 maybe_tuple other            = Nothing
 829
 830 count_commas :: Int -> String -> (Int, String)
 831 count_commas n (',' : cs) = count_commas (n+1) cs
 832 count_commas n cs         = (n,cs)
 833 \end{code}
 834
 835 \begin{code}
 836 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
 837 mkTupleOcc ns bx ar
 838   = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
 839   where
 840     bx_char = case bx of
 841                 Boxed   -> "T"
 842                 Unboxed -> "H"
 843
 844 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
 845 -- Tuples are special, because there are so many of them!
 846 isTupleOcc_maybe (OccName ns fs)
 847   = case unpackFS fs of
 848         ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
 849         other                    -> Nothing
 850   where
 851     decode_tup n "H"      = (ns, Unboxed, n)
 852     decode_tup n "T"      = (ns, Boxed, n)
 853     decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
 854 \end{code}
 855
 856 %************************************************************************
 857 %*                                                                      *
 858 \subsection{Lexical categories}
 859 %*                                                                      *
 860 %************************************************************************
 861
 862 These functions test strings to see if they fit the lexical categories
 863 defined in the Haskell report.
 864
 865 \begin{code}
 866 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
 867 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
 868
 869 isLexCon cs = isLexConId  cs || isLexConSym cs
 870 isLexVar cs = isLexVarId  cs || isLexVarSym cs
 871
 872 isLexId  cs = isLexConId  cs || isLexVarId  cs
 873 isLexSym cs = isLexConSym cs || isLexVarSym cs
 874
 875 -------------
 876
 877 isLexConId cs                           -- Prefix type or data constructors
 878   | nullFastString cs = False           --      e.g. "Foo", "[]", "(,)"
 879   | cs == FSLIT("[]") = True
 880   | otherwise         = startsConId (headFS cs)
 881
 882 isLexVarId cs                           -- Ordinary prefix identifiers
 883   | nullFastString cs = False           --      e.g. "x", "_x"
 884   | otherwise         = startsVarId (headFS cs)
 885
 886 isLexConSym cs                          -- Infix type or data constructors
 887   | nullFastString cs = False           --      e.g. ":-:", ":", "->"
 888   | cs == FSLIT("->") = True
 889   | otherwise         = startsConSym (headFS cs)
 890
 891 isLexVarSym cs                          -- Infix identifiers
 892   | nullFastString cs = False           --      e.g. "+"
 893   | otherwise         = startsVarSym (headFS cs)
 894
 895 -------------
 896 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 897 startsVarSym c = isSymbolASCII c || isSymbolISO c       -- Infix Ids
 898 startsConSym c = c == ':'                               -- Infix data constructors
 899 startsVarId c  = isLower c || isLowerISO c || c == '_'  -- Ordinary Ids
 900 startsConId c  = isUpper c || isUpperISO c || c == '('  -- Ordinary type constructors and data constructors
 901
 902
 903 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 904 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 905 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 906         --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 907 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 908         --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 909 \end{code}
 910
 911 %************************************************************************
 912 %*                                                                      *
 913                 Binary instance
 914     Here rather than BinIface because OccName is abstract
 915 %*                                                                      *
 916 %************************************************************************
 917
 918 \begin{code}
 919 instance Binary NameSpace where
 920     put_ bh VarName = do
 921             putByte bh 0
 922     put_ bh DataName = do
 923             putByte bh 1
 924     put_ bh TvName = do
 925             putByte bh 2
 926     put_ bh TcClsName = do
 927             putByte bh 3
 928     get bh = do
 929             h <- getByte bh
 930             case h of
 931               0 -> do return VarName
 932               1 -> do return DataName
 933               2 -> do return TvName
 934               _ -> do return TcClsName
 935
 936 instance Binary OccName where
 937     put_ bh (OccName aa ab) = do
 938             put_ bh aa
 939             put_ bh ab
 940     get bh = do
 941           aa <- get bh
 942           ab <- get bh
 943           return (OccName aa ab)
 944 \end{code}