ghc/compiler/basicTypes/OccName.lhs

   1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
   2 %
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5
   6 \section[OccName]{@OccName@}
   7
   8 \begin{code}
   9 module OccName (
  10         -- The NameSpace type; abstact
  11         NameSpace, tcName, clsName, tcClsName, dataName, varName,
  12         tvName, srcDataName, nameSpaceString,
  13
  14         -- The OccName type
  15         OccName,        -- Abstract, instance of Outputable
  16         pprOccName,
  17
  18         mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
  19         mkVarOcc, mkVarOccEncoded,
  20         mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
  21         mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
  22         mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
  23         mkGenOcc1, mkGenOcc2, mkLocalOcc,
  24         mkDataConWrapperOcc, mkDataConWorkerOcc,
  25
  26         isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
  27         reportIfUnused,
  28
  29         occNameFS, occNameString, occNameUserString, occNameSpace,
  30         occNameFlavour, briefOccNameFlavour,
  31         setOccNameSpace,
  32
  33         -- Tidying up
  34         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  35
  36         -- Encoding
  37         EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
  38
  39         -- The basic form of names
  40         isLexCon, isLexVar, isLexId, isLexSym,
  41         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  42         isLowerISO, isUpperISO
  43
  44     ) where
  45
  46 #include "HsVersions.h"
  47
  48 import Char     ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
  49 import Util     ( thenCmp )
  50 import Unique   ( Unique )
  51 import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
  52 import FastString
  53 import Outputable
  54 import Binary
  55
  56 import GLAEXTS
  57 \end{code}
  58
  59 We hold both module names and identifier names in a 'Z-encoded' form
  60 that makes them acceptable both as a C identifier and as a Haskell
  61 (prefix) identifier.
  62
  63 They can always be decoded again when printing error messages
  64 or anything else for the user, but it does make sense for it
  65 to be represented here in encoded form, so that when generating
  66 code the encoding operation is not performed on each occurrence.
  67
  68 These type synonyms help documentation.
  69
  70 \begin{code}
  71 type UserFS    = FastString     -- As the user typed it
  72 type EncodedFS = FastString     -- Encoded form
  73
  74 type UserString = String        -- As the user typed it
  75 type EncodedString = String     -- Encoded form
  76
  77
  78 pprEncodedFS :: EncodedFS -> SDoc
  79 pprEncodedFS fs
  80   = getPprStyle         $ \ sty ->
  81     if userStyle sty
  82         -- ftext (decodeFS fs) would needlessly pack the string again
  83         then text (decode (unpackFS fs))
  84         else ftext fs
  85 \end{code}
  86
  87 %************************************************************************
  88 %*                                                                      *
  89 \subsection{Name space}
  90 %*                                                                      *
  91 %************************************************************************
  92
  93 \begin{code}
  94 data NameSpace = VarName        -- Variables, including "source" data constructors
  95                | DataName       -- "Real" data constructors
  96                | TvName         -- Type variables
  97                | TcClsName      -- Type constructors and classes; Haskell has them
  98                                 -- in the same name space for now.
  99                deriving( Eq, Ord )
 100    {-! derive: Binary !-}
 101
 102 -- Note [Data Constructors]
 103 -- see also: Note [Data Constructor Naming] in DataCon.lhs
 104 --
 105 --      "Source" data constructors are the data constructors mentioned
 106 --      in Haskell source code
 107 --
 108 --      "Real" data constructors are the data constructors of the
 109 --      representation type, which may not be the same as the source
 110 --      type
 111
 112 -- Example:
 113 --      data T = T !(Int,Int)
 114 --
 115 -- The source datacon has type (Int,Int) -> T
 116 -- The real   datacon has type Int -> Int -> T
 117 -- GHC chooses a representation based on the strictness etc.
 118
 119
 120 -- Though type constructors and classes are in the same name space now,
 121 -- the NameSpace type is abstract, so we can easily separate them later
 122 tcName    = TcClsName           -- Type constructors
 123 clsName   = TcClsName           -- Classes
 124 tcClsName = TcClsName           -- Not sure which!
 125
 126 dataName    = DataName
 127 srcDataName = DataName  -- Haskell-source data constructors should be
 128                         -- in the Data name space
 129
 130 tvName      = TvName
 131 varName     = VarName
 132
 133 nameSpaceString :: NameSpace -> String
 134 nameSpaceString DataName  = "Data constructor"
 135 nameSpaceString VarName   = "Variable"
 136 nameSpaceString TvName    = "Type variable"
 137 nameSpaceString TcClsName = "Type constructor or class"
 138 \end{code}
 139
 140
 141 %************************************************************************
 142 %*                                                                      *
 143 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
 144 %*                                                                      *
 145 %************************************************************************
 146
 147 \begin{code}
 148 data OccName = OccName
 149                         NameSpace
 150                         EncodedFS
 151    {-! derive : Binary !-}
 152 \end{code}
 153
 154
 155 \begin{code}
 156 instance Eq OccName where
 157     (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
 158
 159 instance Ord OccName where
 160     compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
 161                                                 (sp1 `compare` sp2)
 162 \end{code}
 163
 164
 165 %************************************************************************
 166 %*                                                                      *
 167 \subsection{Printing}
 168 %*                                                                      *
 169 %************************************************************************
 170
 171 \begin{code}
 172 instance Outputable OccName where
 173     ppr = pprOccName
 174
 175 pprOccName :: OccName -> SDoc
 176 pprOccName (OccName sp occ) = pprEncodedFS occ
 177 \end{code}
 178
 179
 180 %************************************************************************
 181 %*                                                                      *
 182 \subsection{Construction}
 183 %*                                                                      *
 184 %************************************************************************
 185
 186 *Sys* things do no encoding; the caller should ensure that the thing is
 187 already encoded
 188
 189 \begin{code}
 190 mkSysOcc :: NameSpace -> EncodedString -> OccName
 191 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
 192                       OccName occ_sp (mkFastString str)
 193
 194 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
 195 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
 196                        OccName occ_sp fs
 197
 198 mkFCallOcc :: EncodedString -> OccName
 199 -- This version of mkSysOcc doesn't check that the string is already encoded,
 200 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
 201 -- This encodes a lot into something that then parses like an Id.
 202 -- But then alreadyEncoded complains about the braces!
 203 mkFCallOcc str = OccName varName (mkFastString str)
 204
 205 -- Kind constructors get a special function.  Uniquely, they are not encoded,
 206 -- so that they have names like '*'.  This means that *even in interface files*
 207 -- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
 208 -- has an ASSERT that doesn't hold.
 209 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
 210 mkKindOccFS occ_sp fs = OccName occ_sp fs
 211 \end{code}
 212
 213 *Source-code* things are encoded.
 214
 215 \begin{code}
 216 mkOccFS :: NameSpace -> UserFS -> OccName
 217 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
 218
 219 mkVarOcc :: UserFS -> OccName
 220 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
 221
 222 mkVarOccEncoded :: EncodedFS -> OccName
 223 mkVarOccEncoded fs = mkSysOccFS varName fs
 224 \end{code}
 225
 226
 227
 228 %************************************************************************
 229 %*                                                                      *
 230 \subsection{Predicates and taking them apart}
 231 %*                                                                      *
 232 %************************************************************************
 233
 234 \begin{code}
 235 occNameFS :: OccName -> EncodedFS
 236 occNameFS (OccName _ s) = s
 237
 238 occNameString :: OccName -> EncodedString
 239 occNameString (OccName _ s) = unpackFS s
 240
 241 occNameUserString :: OccName -> UserString
 242 occNameUserString occ = decode (occNameString occ)
 243
 244 occNameSpace :: OccName -> NameSpace
 245 occNameSpace (OccName sp _) = sp
 246
 247 setOccNameSpace :: NameSpace -> OccName -> OccName
 248 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 249
 250 -- occNameFlavour is used only to generate good error messages
 251 occNameFlavour :: OccName -> String
 252 occNameFlavour (OccName DataName _)  = "Data constructor"
 253 occNameFlavour (OccName TvName _)    = "Type variable"
 254 occNameFlavour (OccName TcClsName _) = "Type constructor or class"
 255 occNameFlavour (OccName VarName s)   = "Variable"
 256
 257 -- briefOccNameFlavour is used in debug-printing of names
 258 briefOccNameFlavour :: OccName -> String
 259 briefOccNameFlavour (OccName DataName _)    = "d"
 260 briefOccNameFlavour (OccName VarName _)     = "v"
 261 briefOccNameFlavour (OccName TvName _)      = "tv"
 262 briefOccNameFlavour (OccName TcClsName _)   = "tc"
 263 \end{code}
 264
 265 \begin{code}
 266 isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 267
 268 isTvOcc (OccName TvName _) = True
 269 isTvOcc other              = False
 270
 271 isTcOcc (OccName TcClsName _) = True
 272 isTcOcc other                 = False
 273
 274 isValOcc (OccName VarName  _) = True
 275 isValOcc (OccName DataName _) = True
 276 isValOcc other                = False
 277
 278 -- Data constructor operator (starts with ':', or '[]')
 279 -- Pretty inefficient!
 280 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 281 isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
 282 isDataSymOcc other                = False
 283
 284 isDataOcc (OccName DataName _) = True
 285 isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
 286 isDataOcc other                = False
 287
 288 -- Any operator (data constructor or variable)
 289 -- Pretty inefficient!
 290 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 291 isSymOcc (OccName VarName s)  = isLexSym (decodeFS s)
 292 \end{code}
 293
 294
 295 \begin{code}
 296 reportIfUnused :: OccName -> Bool
 297   -- Haskell 98 encourages compilers to suppress warnings about
 298   -- unused names in a pattern if they start with "_".
 299 reportIfUnused occ = case occNameUserString occ of
 300                         ('_' : _) -> False
 301                         zz_other  -> True
 302 \end{code}
 303
 304
 305
 306 %************************************************************************
 307 %*                                                                      *
 308 \subsection{Making system names}
 309 %*                                                                      *
 310 %************************************************************************
 311
 312 Here's our convention for splitting up the interface file name space:
 313
 314         d...            dictionary identifiers
 315                         (local variables, so no name-clash worries)
 316
 317         $f...           dict-fun identifiers (from inst decls)
 318         $dm...          default methods
 319         $p...           superclass selectors
 320         $w...           workers
 321         :T...           compiler-generated tycons for dictionaries
 322         :D...           ...ditto data cons
 323         $sf..           specialised version of f
 324
 325         in encoded form these appear as Zdfxxx etc
 326
 327         :...            keywords (export:, letrec: etc.)
 328 --- I THINK THIS IS WRONG!
 329
 330 This knowledge is encoded in the following functions.
 331
 332
 333 @mk_deriv@ generates an @OccName@ from the prefix and a string.
 334 NB: The string must already be encoded!
 335
 336 \begin{code}
 337 mk_deriv :: NameSpace
 338          -> String              -- Distinguishes one sort of derived name from another
 339          -> EncodedString       -- Must be already encoded!!  We don't want to encode it a
 340                                 -- second time because encoding isn't idempotent
 341          -> OccName
 342
 343 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
 344 \end{code}
 345
 346 \begin{code}
 347 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
 348            mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
 349    :: OccName -> OccName
 350
 351 -- These derived variables have a prefix that no Haskell value could have
 352 mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
 353 mkWorkerOcc         = mk_simple_deriv varName  "$w"
 354 mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
 355 mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"      -- The : prefix makes sure it classifies
 356 mkClassTyConOcc     = mk_simple_deriv tcName   ":T"     -- as a tycon/datacon
 357 mkClassDataConOcc   = mk_simple_deriv dataName ":D"     -- We go straight to the "real" data con
 358                                                         -- for datacons from classes
 359 mkDictOcc           = mk_simple_deriv varName  "$d"
 360 mkIPOcc             = mk_simple_deriv varName  "$i"
 361 mkSpecOcc           = mk_simple_deriv varName  "$s"
 362 mkForeignExportOcc  = mk_simple_deriv varName  "$f"
 363 mkGenOcc1           = mk_simple_deriv varName  "$gfrom"      -- Generics
 364 mkGenOcc2           = mk_simple_deriv varName  "$gto"        -- Generics
 365 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 366
 367
 368 -- Data constructor workers are made by setting the name space
 369 -- of the data constructor OccName (which should be a DataName)
 370 -- to DataName
 371 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
 372 \end{code}
 373
 374 \begin{code}
 375 mkSuperDictSelOcc :: Int        -- Index of superclass, eg 3
 376                   -> OccName    -- Class, eg "Ord"
 377                   -> OccName    -- eg "$p3Ord"
 378 mkSuperDictSelOcc index cls_occ
 379   = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
 380
 381 mkLocalOcc :: Unique            -- Unique
 382            -> OccName           -- Local name (e.g. "sat")
 383            -> OccName           -- Nice unique version ("$L23sat")
 384 mkLocalOcc uniq occ
 385    = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
 386         -- The Unique might print with characters
 387         -- that need encoding (e.g. 'z'!)
 388 \end{code}
 389
 390
 391 \begin{code}
 392 mkDFunOcc :: EncodedString      -- Typically the class and type glommed together e.g. "OrdMaybe"
 393           -> OccName            -- "$fOrdMaybe"
 394
 395 mkDFunOcc string = mk_deriv VarName "$f" string
 396 \end{code}
 397
 398 We used to add a '$m' to indicate a method, but that gives rise to bad
 399 error messages from the type checker when we print the function name or pattern
 400 of an instance-decl binding.  Why? Because the binding is zapped
 401 to use the method name in place of the selector name.
 402 (See TcClassDcl.tcMethodBind)
 403
 404 The way it is now, -ddump-xx output may look confusing, but
 405 you can always say -dppr-debug to get the uniques.
 406
 407 However, we *do* have to zap the first character to be lower case,
 408 because overloaded constructors (blarg) generate methods too.
 409 And convert to VarName space
 410
 411 e.g. a call to constructor MkFoo where
 412         data (Ord a) => Foo a = MkFoo a
 413
 414 If this is necessary, we do it by prefixing '$m'.  These
 415 guys never show up in error messages.  What a hack.
 416
 417 \begin{code}
 418 mkMethodOcc :: OccName -> OccName
 419 mkMethodOcc occ@(OccName VarName fs) = occ
 420 mkMethodOcc occ                      = mk_simple_deriv varName "$m" occ
 421 \end{code}
 422
 423
 424 %************************************************************************
 425 %*                                                                      *
 426 \subsection{Tidying them up}
 427 %*                                                                      *
 428 %************************************************************************
 429
 430 Before we print chunks of code we like to rename it so that
 431 we don't have to print lots of silly uniques in it.  But we mustn't
 432 accidentally introduce name clashes!  So the idea is that we leave the
 433 OccName alone unless it accidentally clashes with one that is already
 434 in scope; if so, we tack on '1' at the end and try again, then '2', and
 435 so on till we find a unique one.
 436
 437 There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1'
 438 because that isn't a single lexeme.  So we encode it to 'lle' and *then*
 439 tack on the '1', if necessary.
 440
 441 \begin{code}
 442 type TidyOccEnv = FiniteMap FastString Int      -- The in-scope OccNames
 443 emptyTidyOccEnv = emptyFM
 444
 445 initTidyOccEnv :: [OccName] -> TidyOccEnv       -- Initialise with names to avoid!
 446 initTidyOccEnv = foldl (\env (OccName _ fs) -> addToFM env fs 1) emptyTidyOccEnv
 447
 448 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
 449
 450 tidyOccName in_scope occ@(OccName occ_sp fs)
 451   | not (fs `elemFM` in_scope)
 452   = (addToFM in_scope fs 1, occ)        -- First occurrence
 453
 454   | otherwise                           -- Already occurs
 455   = go in_scope (unpackFS fs)
 456   where
 457
 458     go in_scope str = case lookupFM in_scope pk_str of
 459                         Just n  -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
 460                                 -- Need to go round again, just in case "t3" (say)
 461                                 -- clashes with a "t3" that's already in scope
 462
 463                         Nothing -> (addToFM in_scope pk_str 1, mkSysOccFS occ_sp pk_str)
 464                                 -- str is now unique
 465                     where
 466                       pk_str = mkFastString str
 467 \end{code}
 468
 469
 470 %************************************************************************
 471 %*                                                                      *
 472 \subsection{The 'Z' encoding}
 473 %*                                                                      *
 474 %************************************************************************
 475
 476 This is the main name-encoding and decoding function.  It encodes any
 477 string into a string that is acceptable as a C name.  This is the name
 478 by which things are known right through the compiler.
 479
 480 The basic encoding scheme is this.
 481
 482 * Tuples (,,,) are coded as Z3T
 483
 484 * Alphabetic characters (upper and lower) and digits
 485         all translate to themselves;
 486         except 'Z', which translates to 'ZZ'
 487         and    'z', which translates to 'zz'
 488   We need both so that we can preserve the variable/tycon distinction
 489
 490 * Most other printable characters translate to 'zx' or 'Zx' for some
 491         alphabetic character x
 492
 493 * The others translate as 'znnnU' where 'nnn' is the decimal number
 494         of the character
 495
 496         Before          After
 497         --------------------------
 498         Trak            Trak
 499         foo_wib         foozuwib
 500         >               zg
 501         >1              zg1
 502         foo#            foozh
 503         foo##           foozhzh
 504         foo##1          foozhzh1
 505         fooZ            fooZZ
 506         :+              ZCzp
 507         ()              Z0T     0-tuple
 508         (,,,,)          Z5T     5-tuple
 509         (# #)           Z1H     unboxed 1-tuple (note the space)
 510         (#,,,,#)        Z5H     unboxed 5-tuple
 511                 (NB: There is no Z1T nor Z0H.)
 512
 513 \begin{code}
 514 -- alreadyEncoded is used in ASSERTs to check for encoded
 515 -- strings.  It isn't fail-safe, of course, because, say 'zh' might
 516 -- be encoded or not.
 517 alreadyEncoded :: String -> Bool
 518 alreadyEncoded s = all ok s
 519                  where
 520                    ok ' ' = True
 521                         -- This is a bit of a lie; if we really wanted spaces
 522                         -- in names we'd have to encode them.  But we do put
 523                         -- spaces in ccall "occurrences", and we don't want to
 524                         -- reject them here
 525                    ok ch  = isAlphaNum ch
 526
 527 alreadyEncodedFS :: FastString -> Bool
 528 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
 529
 530 encode :: UserString -> EncodedString
 531 encode cs = case maybe_tuple cs of
 532                 Just n  -> n            -- Tuples go to Z2T etc
 533                 Nothing -> go cs
 534           where
 535                 go []     = []
 536                 go (c:cs) = encode_ch c ++ go cs
 537
 538 maybe_tuple "(# #)" = Just("Z1H")
 539 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
 540                                  (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
 541                                  other               -> Nothing
 542 maybe_tuple "()" = Just("Z0T")
 543 maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
 544                                  (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
 545                                  other         -> Nothing
 546 maybe_tuple other            = Nothing
 547
 548 count_commas :: Int -> String -> (Int, String)
 549 count_commas n (',' : cs) = count_commas (n+1) cs
 550 count_commas n cs         = (n,cs)
 551
 552 encodeFS :: UserFS -> EncodedFS
 553 encodeFS fast_str  | all unencodedChar str = fast_str
 554                    | otherwise             = mkFastString (encode str)
 555                    where
 556                      str = unpackFS fast_str
 557
 558 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 559 unencodedChar 'Z' = False
 560 unencodedChar 'z' = False
 561 unencodedChar c   =  c >= 'a' && c <= 'z'
 562                   || c >= 'A' && c <= 'Z'
 563                   || c >= '0' && c <= '9'
 564
 565 encode_ch :: Char -> EncodedString
 566 encode_ch c | unencodedChar c = [c]     -- Common case first
 567
 568 -- Constructors
 569 encode_ch '('  = "ZL"   -- Needed for things like (,), and (->)
 570 encode_ch ')'  = "ZR"   -- For symmetry with (
 571 encode_ch '['  = "ZM"
 572 encode_ch ']'  = "ZN"
 573 encode_ch ':'  = "ZC"
 574 encode_ch 'Z'  = "ZZ"
 575
 576 -- Variables
 577 encode_ch 'z'  = "zz"
 578 encode_ch '&'  = "za"
 579 encode_ch '|'  = "zb"
 580 encode_ch '^'  = "zc"
 581 encode_ch '$'  = "zd"
 582 encode_ch '='  = "ze"
 583 encode_ch '>'  = "zg"
 584 encode_ch '#'  = "zh"
 585 encode_ch '.'  = "zi"
 586 encode_ch '<'  = "zl"
 587 encode_ch '-'  = "zm"
 588 encode_ch '!'  = "zn"
 589 encode_ch '+'  = "zp"
 590 encode_ch '\'' = "zq"
 591 encode_ch '\\' = "zr"
 592 encode_ch '/'  = "zs"
 593 encode_ch '*'  = "zt"
 594 encode_ch '_'  = "zu"
 595 encode_ch '%'  = "zv"
 596 encode_ch c    = 'z' : shows (ord c) "U"
 597 \end{code}
 598
 599 Decode is used for user printing.
 600
 601 \begin{code}
 602 decodeFS :: FastString -> FastString
 603 decodeFS fs = mkFastString (decode (unpackFS fs))
 604
 605 decode :: EncodedString -> UserString
 606 decode [] = []
 607 decode ('Z' : rest) = decode_escape rest
 608 decode ('z' : rest) = decode_escape rest
 609 decode (c   : rest) = c : decode rest
 610
 611 decode_escape :: EncodedString -> UserString
 612
 613 decode_escape ('L' : rest) = '(' : decode rest
 614 decode_escape ('R' : rest) = ')' : decode rest
 615 decode_escape ('M' : rest) = '[' : decode rest
 616 decode_escape ('N' : rest) = ']' : decode rest
 617 decode_escape ('C' : rest) = ':' : decode rest
 618 decode_escape ('Z' : rest) = 'Z' : decode rest
 619
 620 decode_escape ('z' : rest) = 'z' : decode rest
 621 decode_escape ('a' : rest) = '&' : decode rest
 622 decode_escape ('b' : rest) = '|' : decode rest
 623 decode_escape ('c' : rest) = '^' : decode rest
 624 decode_escape ('d' : rest) = '$' : decode rest
 625 decode_escape ('e' : rest) = '=' : decode rest
 626 decode_escape ('g' : rest) = '>' : decode rest
 627 decode_escape ('h' : rest) = '#' : decode rest
 628 decode_escape ('i' : rest) = '.' : decode rest
 629 decode_escape ('l' : rest) = '<' : decode rest
 630 decode_escape ('m' : rest) = '-' : decode rest
 631 decode_escape ('n' : rest) = '!' : decode rest
 632 decode_escape ('p' : rest) = '+' : decode rest
 633 decode_escape ('q' : rest) = '\'' : decode rest
 634 decode_escape ('r' : rest) = '\\' : decode rest
 635 decode_escape ('s' : rest) = '/' : decode rest
 636 decode_escape ('t' : rest) = '*' : decode rest
 637 decode_escape ('u' : rest) = '_' : decode rest
 638 decode_escape ('v' : rest) = '%' : decode rest
 639
 640 -- Tuples are coded as Z23T
 641 -- Characters not having a specific code are coded as z224U
 642 decode_escape (c : rest)
 643   | isDigit c = go (digitToInt c) rest
 644   where
 645     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 646     go 0 ('T' : rest)           = "()" ++ (decode rest)
 647     go n ('T' : rest)           = '(' : replicate (n-1) ',' ++ ')' : decode rest
 648     go 1 ('H' : rest)           = "(# #)" ++ (decode rest)
 649     go n ('H' : rest)           = '(' : '#' : replicate (n-1) ',' ++ '#' : ')' : decode rest
 650     go n ('U' : rest)           = chr n : decode rest
 651     go n other = pprPanic "decode_escape" (ppr n <+> text (c:rest))
 652
 653 decode_escape (c : rest) = pprTrace "decode_escape" (char c) (decode rest)
 654 decode_escape []         = pprTrace "decode_escape" (text "empty") ""
 655 \end{code}
 656
 657
 658 %************************************************************************
 659 %*                                                                      *
 660 \subsection{Lexical categories}
 661 %*                                                                      *
 662 %************************************************************************
 663
 664 These functions test strings to see if they fit the lexical categories
 665 defined in the Haskell report.
 666
 667 \begin{code}
 668 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
 669 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
 670
 671 isLexCon cs = isLexConId  cs || isLexConSym cs
 672 isLexVar cs = isLexVarId  cs || isLexVarSym cs
 673
 674 isLexId  cs = isLexConId  cs || isLexVarId  cs
 675 isLexSym cs = isLexConSym cs || isLexVarSym cs
 676
 677 -------------
 678
 679 isLexConId cs                           -- Prefix type or data constructors
 680   | nullFastString cs = False           --      e.g. "Foo", "[]", "(,)"
 681   | cs == FSLIT("[]") = True
 682   | otherwise         = startsConId (headFS cs)
 683
 684 isLexVarId cs                           -- Ordinary prefix identifiers
 685   | nullFastString cs = False           --      e.g. "x", "_x"
 686   | otherwise         = startsVarId (headFS cs)
 687
 688 isLexConSym cs                          -- Infix type or data constructors
 689   | nullFastString cs = False           --      e.g. ":-:", ":", "->"
 690   | cs == FSLIT("->") = True
 691   | otherwise         = startsConSym (headFS cs)
 692
 693 isLexVarSym cs                          -- Infix identifiers
 694   | nullFastString cs = False           --      e.g. "+"
 695   | otherwise         = startsVarSym (headFS cs)
 696
 697 -------------
 698 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 699 startsVarSym c = isSymbolASCII c || isSymbolISO c       -- Infix Ids
 700 startsConSym c = c == ':'                               -- Infix data constructors
 701 startsVarId c  = isLower c || isLowerISO c || c == '_'  -- Ordinary Ids
 702 startsConId c  = isUpper c || isUpperISO c || c == '('  -- Ordinary type constructors and data constructors
 703
 704
 705 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 706 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 707 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 708         --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 709 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 710         --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 711 \end{code}
 712 \begin{code}
 713 {-* Generated by DrIFT-v1.0 : Look, but Don't Touch. *-}
 714 instance Binary NameSpace where
 715     put_ bh VarName = do
 716             putByte bh 0
 717     put_ bh DataName = do
 718             putByte bh 1
 719     put_ bh TvName = do
 720             putByte bh 2
 721     put_ bh TcClsName = do
 722             putByte bh 3
 723     get bh = do
 724             h <- getByte bh
 725             case h of
 726               0 -> do return VarName
 727               1 -> do return DataName
 728               2 -> do return TvName
 729               _ -> do return TcClsName
 730
 731 instance Binary OccName where
 732     put_ bh (OccName aa ab) = do
 733             put_ bh aa
 734             put_ bh ab
 735     get bh = do
 736           aa <- get bh
 737           ab <- get bh
 738           return (OccName aa ab)
 739
 740 --  Imported from other files :-
 741
 742 \end{code}