ghc/compiler/basicTypes/OccName.lhs

   1 {-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
   2 %
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5
   6 \section[OccName]{@OccName@}
   7
   8 \begin{code}
   9 module OccName (
  10         -- The NameSpace type; abstact
  11         NameSpace, tcName, clsName, tcClsName, dataName, varName,
  12         tvName, srcDataName, nameSpaceString,
  13
  14         -- The OccName type
  15         OccName,        -- Abstract, instance of Outputable
  16         pprOccName,
  17
  18         mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
  19         mkVarOcc, mkVarOccEncoded,
  20         mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
  21         mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
  22         mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
  23         mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
  24         mkDataConWrapperOcc, mkDataConWorkerOcc,
  25
  26         isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
  27         reportIfUnused,
  28
  29         occNameFS, occNameString, occNameUserString, occNameSpace,
  30         occNameFlavour, briefOccNameFlavour,
  31         setOccNameSpace,
  32
  33         -- Tidying up
  34         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  35
  36         -- Encoding
  37         EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
  38
  39         -- The basic form of names
  40         isLexCon, isLexVar, isLexId, isLexSym,
  41         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  42         isLowerISO, isUpperISO
  43
  44     ) where
  45
  46 #include "HsVersions.h"
  47
  48 import Char     ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
  49 import Util     ( thenCmp )
  50 import Unique   ( Unique )
  51 import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
  52 import FastString
  53 import Outputable
  54 import Binary
  55
  56 import GLAEXTS
  57 \end{code}
  58
  59 We hold both module names and identifier names in a 'Z-encoded' form
  60 that makes them acceptable both as a C identifier and as a Haskell
  61 (prefix) identifier.
  62
  63 They can always be decoded again when printing error messages
  64 or anything else for the user, but it does make sense for it
  65 to be represented here in encoded form, so that when generating
  66 code the encoding operation is not performed on each occurrence.
  67
  68 These type synonyms help documentation.
  69
  70 \begin{code}
  71 type UserFS    = FastString     -- As the user typed it
  72 type EncodedFS = FastString     -- Encoded form
  73
  74 type UserString = String        -- As the user typed it
  75 type EncodedString = String     -- Encoded form
  76
  77
  78 pprEncodedFS :: EncodedFS -> SDoc
  79 pprEncodedFS fs
  80   = getPprStyle         $ \ sty ->
  81     if userStyle sty
  82         -- ftext (decodeFS fs) would needlessly pack the string again
  83         then text (decode (unpackFS fs))
  84         else ftext fs
  85 \end{code}
  86
  87 %************************************************************************
  88 %*                                                                      *
  89 \subsection{Name space}
  90 %*                                                                      *
  91 %************************************************************************
  92
  93 \begin{code}
  94 data NameSpace = VarName        -- Variables, including "source" data constructors
  95                | DataName       -- "Real" data constructors
  96                | TvName         -- Type variables
  97                | TcClsName      -- Type constructors and classes; Haskell has them
  98                                 -- in the same name space for now.
  99                deriving( Eq, Ord )
 100    {-! derive: Binary !-}
 101
 102 -- Note [Data Constructors]
 103 -- see also: Note [Data Constructor Naming] in DataCon.lhs
 104 --
 105 --      "Source" data constructors are the data constructors mentioned
 106 --      in Haskell source code
 107 --
 108 --      "Real" data constructors are the data constructors of the
 109 --      representation type, which may not be the same as the source
 110 --      type
 111
 112 -- Example:
 113 --      data T = T !(Int,Int)
 114 --
 115 -- The source datacon has type (Int,Int) -> T
 116 -- The real   datacon has type Int -> Int -> T
 117 -- GHC chooses a representation based on the strictness etc.
 118
 119
 120 -- Though type constructors and classes are in the same name space now,
 121 -- the NameSpace type is abstract, so we can easily separate them later
 122 tcName    = TcClsName           -- Type constructors
 123 clsName   = TcClsName           -- Classes
 124 tcClsName = TcClsName           -- Not sure which!
 125
 126 dataName    = DataName
 127 srcDataName = DataName  -- Haskell-source data constructors should be
 128                         -- in the Data name space
 129
 130 tvName      = TvName
 131 varName     = VarName
 132
 133 nameSpaceString :: NameSpace -> String
 134 nameSpaceString DataName  = "Data constructor"
 135 nameSpaceString VarName   = "Variable"
 136 nameSpaceString TvName    = "Type variable"
 137 nameSpaceString TcClsName = "Type constructor or class"
 138 \end{code}
 139
 140
 141 %************************************************************************
 142 %*                                                                      *
 143 \subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
 144 %*                                                                      *
 145 %************************************************************************
 146
 147 \begin{code}
 148 data OccName = OccName
 149                         NameSpace
 150                         EncodedFS
 151    {-! derive : Binary !-}
 152 \end{code}
 153
 154
 155 \begin{code}
 156 instance Eq OccName where
 157     (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2
 158
 159 instance Ord OccName where
 160     compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
 161                                                 (sp1 `compare` sp2)
 162 \end{code}
 163
 164
 165 %************************************************************************
 166 %*                                                                      *
 167 \subsection{Printing}
 168 %*                                                                      *
 169 %************************************************************************
 170
 171 \begin{code}
 172 instance Outputable OccName where
 173     ppr = pprOccName
 174
 175 pprOccName :: OccName -> SDoc
 176 pprOccName (OccName sp occ) = pprEncodedFS occ
 177 \end{code}
 178
 179
 180 %************************************************************************
 181 %*                                                                      *
 182 \subsection{Construction}
 183 %*                                                                      *
 184 %************************************************************************
 185
 186 *Sys* things do no encoding; the caller should ensure that the thing is
 187 already encoded
 188
 189 \begin{code}
 190 mkSysOcc :: NameSpace -> EncodedString -> OccName
 191 mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
 192                       OccName occ_sp (mkFastString str)
 193
 194 mkSysOccFS :: NameSpace -> EncodedFS -> OccName
 195 mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
 196                        OccName occ_sp fs
 197
 198 mkFCallOcc :: EncodedString -> OccName
 199 -- This version of mkSysOcc doesn't check that the string is already encoded,
 200 -- because it will be something like "{__ccall f dyn Int# -> Int#}"
 201 -- This encodes a lot into something that then parses like an Id.
 202 -- But then alreadyEncoded complains about the braces!
 203 mkFCallOcc str = OccName varName (mkFastString str)
 204
 205 -- Kind constructors get a special function.  Uniquely, they are not encoded,
 206 -- so that they have names like '*'.  This means that *even in interface files*
 207 -- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
 208 -- has an ASSERT that doesn't hold.
 209 mkKindOccFS :: NameSpace -> EncodedFS -> OccName
 210 mkKindOccFS occ_sp fs = OccName occ_sp fs
 211 \end{code}
 212
 213 *Source-code* things are encoded.
 214
 215 \begin{code}
 216 mkOccFS :: NameSpace -> UserFS -> OccName
 217 mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
 218
 219 mkVarOcc :: UserFS -> OccName
 220 mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
 221
 222 mkVarOccEncoded :: EncodedFS -> OccName
 223 mkVarOccEncoded fs = mkSysOccFS varName fs
 224 \end{code}
 225
 226
 227
 228 %************************************************************************
 229 %*                                                                      *
 230 \subsection{Predicates and taking them apart}
 231 %*                                                                      *
 232 %************************************************************************
 233
 234 \begin{code}
 235 occNameFS :: OccName -> EncodedFS
 236 occNameFS (OccName _ s) = s
 237
 238 occNameString :: OccName -> EncodedString
 239 occNameString (OccName _ s) = unpackFS s
 240
 241 occNameUserString :: OccName -> UserString
 242 occNameUserString occ = decode (occNameString occ)
 243
 244 occNameSpace :: OccName -> NameSpace
 245 occNameSpace (OccName sp _) = sp
 246
 247 setOccNameSpace :: NameSpace -> OccName -> OccName
 248 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 249
 250 -- occNameFlavour is used only to generate good error messages
 251 occNameFlavour :: OccName -> String
 252 occNameFlavour (OccName DataName _)  = "Data constructor"
 253 occNameFlavour (OccName TvName _)    = "Type variable"
 254 occNameFlavour (OccName TcClsName _) = "Type constructor or class"
 255 occNameFlavour (OccName VarName s)   = "Variable"
 256
 257 -- briefOccNameFlavour is used in debug-printing of names
 258 briefOccNameFlavour :: OccName -> String
 259 briefOccNameFlavour (OccName DataName _)    = "d"
 260 briefOccNameFlavour (OccName VarName _)     = "v"
 261 briefOccNameFlavour (OccName TvName _)      = "tv"
 262 briefOccNameFlavour (OccName TcClsName _)   = "tc"
 263 \end{code}
 264
 265 \begin{code}
 266 isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 267
 268 isTvOcc (OccName TvName _) = True
 269 isTvOcc other              = False
 270
 271 isTcOcc (OccName TcClsName _) = True
 272 isTcOcc other                 = False
 273
 274 isValOcc (OccName VarName  _) = True
 275 isValOcc (OccName DataName _) = True
 276 isValOcc other                = False
 277
 278 -- Data constructor operator (starts with ':', or '[]')
 279 -- Pretty inefficient!
 280 isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 281 isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
 282 isDataSymOcc other                = False
 283
 284 isDataOcc (OccName DataName _) = True
 285 isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
 286 isDataOcc other                = False
 287
 288 -- Any operator (data constructor or variable)
 289 -- Pretty inefficient!
 290 isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
 291 isSymOcc (OccName VarName s)  = isLexSym (decodeFS s)
 292 \end{code}
 293
 294
 295 \begin{code}
 296 reportIfUnused :: OccName -> Bool
 297   -- Haskell 98 encourages compilers to suppress warnings about
 298   -- unused names in a pattern if they start with "_".
 299 reportIfUnused occ = case occNameUserString occ of
 300                         ('_' : _) -> False
 301                         zz_other  -> True
 302 \end{code}
 303
 304
 305
 306 %************************************************************************
 307 %*                                                                      *
 308 \subsection{Making system names}
 309 %*                                                                      *
 310 %************************************************************************
 311
 312 Here's our convention for splitting up the interface file name space:
 313
 314         d...            dictionary identifiers
 315                         (local variables, so no name-clash worries)
 316
 317         $f...           dict-fun identifiers (from inst decls)
 318         $dm...          default methods
 319         $p...           superclass selectors
 320         $w...           workers
 321         :T...           compiler-generated tycons for dictionaries
 322         :D...           ...ditto data cons
 323         $sf..           specialised version of f
 324
 325         in encoded form these appear as Zdfxxx etc
 326
 327         :...            keywords (export:, letrec: etc.)
 328 --- I THINK THIS IS WRONG!
 329
 330 This knowledge is encoded in the following functions.
 331
 332
 333 @mk_deriv@ generates an @OccName@ from the prefix and a string.
 334 NB: The string must already be encoded!
 335
 336 \begin{code}
 337 mk_deriv :: NameSpace
 338          -> String              -- Distinguishes one sort of derived name from another
 339          -> EncodedString       -- Must be already encoded!!  We don't want to encode it a
 340                                 -- second time because encoding isn't idempotent
 341          -> OccName
 342
 343 mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
 344 \end{code}
 345
 346 \begin{code}
 347 mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
 348            mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
 349    :: OccName -> OccName
 350
 351 -- These derived variables have a prefix that no Haskell value could have
 352 mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
 353 mkWorkerOcc         = mk_simple_deriv varName  "$w"
 354 mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
 355 mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"      -- The : prefix makes sure it classifies
 356 mkClassTyConOcc     = mk_simple_deriv tcName   ":T"     -- as a tycon/datacon
 357 mkClassDataConOcc   = mk_simple_deriv dataName ":D"     -- We go straight to the "real" data con
 358                                                         -- for datacons from classes
 359 mkDictOcc           = mk_simple_deriv varName  "$d"
 360 mkIPOcc             = mk_simple_deriv varName  "$i"
 361 mkSpecOcc           = mk_simple_deriv varName  "$s"
 362 mkForeignExportOcc  = mk_simple_deriv varName  "$f"
 363
 364 -- Generic derivable classes
 365 mkGenOcc1           = mk_simple_deriv varName  "$gfrom"
 366 mkGenOcc2           = mk_simple_deriv varName  "$gto"
 367
 368 -- data T = MkT ... deriving( Data ) needs defintions for
 369 --      $tT   :: Data.Generics.Basics.DataType
 370 --      $cMkT :: Data.Generics.Basics.Constr
 371 mkDataTOcc = mk_simple_deriv varName  "$t"
 372 mkDataCOcc = mk_simple_deriv varName  "$c"
 373
 374 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 375
 376
 377 -- Data constructor workers are made by setting the name space
 378 -- of the data constructor OccName (which should be a DataName)
 379 -- to DataName
 380 mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ
 381 \end{code}
 382
 383 \begin{code}
 384 mkSuperDictSelOcc :: Int        -- Index of superclass, eg 3
 385                   -> OccName    -- Class, eg "Ord"
 386                   -> OccName    -- eg "$p3Ord"
 387 mkSuperDictSelOcc index cls_occ
 388   = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
 389
 390 mkLocalOcc :: Unique            -- Unique
 391            -> OccName           -- Local name (e.g. "sat")
 392            -> OccName           -- Nice unique version ("$L23sat")
 393 mkLocalOcc uniq occ
 394    = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
 395         -- The Unique might print with characters
 396         -- that need encoding (e.g. 'z'!)
 397 \end{code}
 398
 399
 400 \begin{code}
 401 mkDFunOcc :: EncodedString      -- Typically the class and type glommed together e.g. "OrdMaybe"
 402           -> OccName            -- "$fOrdMaybe"
 403
 404 mkDFunOcc string = mk_deriv VarName "$f" string
 405 \end{code}
 406
 407 We used to add a '$m' to indicate a method, but that gives rise to bad
 408 error messages from the type checker when we print the function name or pattern
 409 of an instance-decl binding.  Why? Because the binding is zapped
 410 to use the method name in place of the selector name.
 411 (See TcClassDcl.tcMethodBind)
 412
 413 The way it is now, -ddump-xx output may look confusing, but
 414 you can always say -dppr-debug to get the uniques.
 415
 416 However, we *do* have to zap the first character to be lower case,
 417 because overloaded constructors (blarg) generate methods too.
 418 And convert to VarName space
 419
 420 e.g. a call to constructor MkFoo where
 421         data (Ord a) => Foo a = MkFoo a
 422
 423 If this is necessary, we do it by prefixing '$m'.  These
 424 guys never show up in error messages.  What a hack.
 425
 426 \begin{code}
 427 mkMethodOcc :: OccName -> OccName
 428 mkMethodOcc occ@(OccName VarName fs) = occ
 429 mkMethodOcc occ                      = mk_simple_deriv varName "$m" occ
 430 \end{code}
 431
 432
 433 %************************************************************************
 434 %*                                                                      *
 435 \subsection{Tidying them up}
 436 %*                                                                      *
 437 %************************************************************************
 438
 439 Before we print chunks of code we like to rename it so that
 440 we don't have to print lots of silly uniques in it.  But we mustn't
 441 accidentally introduce name clashes!  So the idea is that we leave the
 442 OccName alone unless it accidentally clashes with one that is already
 443 in scope; if so, we tack on '1' at the end and try again, then '2', and
 444 so on till we find a unique one.
 445
 446 There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1'
 447 because that isn't a single lexeme.  So we encode it to 'lle' and *then*
 448 tack on the '1', if necessary.
 449
 450 \begin{code}
 451 type TidyOccEnv = FiniteMap FastString Int      -- The in-scope OccNames
 452 emptyTidyOccEnv = emptyFM
 453
 454 initTidyOccEnv :: [OccName] -> TidyOccEnv       -- Initialise with names to avoid!
 455 initTidyOccEnv = foldl (\env (OccName _ fs) -> addToFM env fs 1) emptyTidyOccEnv
 456
 457 tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)
 458
 459 tidyOccName in_scope occ@(OccName occ_sp fs)
 460   | not (fs `elemFM` in_scope)
 461   = (addToFM in_scope fs 1, occ)        -- First occurrence
 462
 463   | otherwise                           -- Already occurs
 464   = go in_scope (unpackFS fs)
 465   where
 466
 467     go in_scope str = case lookupFM in_scope pk_str of
 468                         Just n  -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
 469                                 -- Need to go round again, just in case "t3" (say)
 470                                 -- clashes with a "t3" that's already in scope
 471
 472                         Nothing -> (addToFM in_scope pk_str 1, mkSysOccFS occ_sp pk_str)
 473                                 -- str is now unique
 474                     where
 475                       pk_str = mkFastString str
 476 \end{code}
 477
 478
 479 %************************************************************************
 480 %*                                                                      *
 481 \subsection{The 'Z' encoding}
 482 %*                                                                      *
 483 %************************************************************************
 484
 485 This is the main name-encoding and decoding function.  It encodes any
 486 string into a string that is acceptable as a C name.  This is the name
 487 by which things are known right through the compiler.
 488
 489 The basic encoding scheme is this.
 490
 491 * Tuples (,,,) are coded as Z3T
 492
 493 * Alphabetic characters (upper and lower) and digits
 494         all translate to themselves;
 495         except 'Z', which translates to 'ZZ'
 496         and    'z', which translates to 'zz'
 497   We need both so that we can preserve the variable/tycon distinction
 498
 499 * Most other printable characters translate to 'zx' or 'Zx' for some
 500         alphabetic character x
 501
 502 * The others translate as 'znnnU' where 'nnn' is the decimal number
 503         of the character
 504
 505         Before          After
 506         --------------------------
 507         Trak            Trak
 508         foo_wib         foozuwib
 509         >               zg
 510         >1              zg1
 511         foo#            foozh
 512         foo##           foozhzh
 513         foo##1          foozhzh1
 514         fooZ            fooZZ
 515         :+              ZCzp
 516         ()              Z0T     0-tuple
 517         (,,,,)          Z5T     5-tuple
 518         (# #)           Z1H     unboxed 1-tuple (note the space)
 519         (#,,,,#)        Z5H     unboxed 5-tuple
 520                 (NB: There is no Z1T nor Z0H.)
 521
 522 \begin{code}
 523 -- alreadyEncoded is used in ASSERTs to check for encoded
 524 -- strings.  It isn't fail-safe, of course, because, say 'zh' might
 525 -- be encoded or not.
 526 alreadyEncoded :: String -> Bool
 527 alreadyEncoded s = all ok s
 528                  where
 529                    ok ' ' = True
 530                         -- This is a bit of a lie; if we really wanted spaces
 531                         -- in names we'd have to encode them.  But we do put
 532                         -- spaces in ccall "occurrences", and we don't want to
 533                         -- reject them here
 534                    ok ch  = isAlphaNum ch
 535
 536 alreadyEncodedFS :: FastString -> Bool
 537 alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
 538
 539 encode :: UserString -> EncodedString
 540 encode cs = case maybe_tuple cs of
 541                 Just n  -> n            -- Tuples go to Z2T etc
 542                 Nothing -> go cs
 543           where
 544                 go []     = []
 545                 go (c:cs) = encode_ch c ++ go cs
 546
 547 maybe_tuple "(# #)" = Just("Z1H")
 548 maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
 549                                  (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
 550                                  other               -> Nothing
 551 maybe_tuple "()" = Just("Z0T")
 552 maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
 553                                  (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
 554                                  other         -> Nothing
 555 maybe_tuple other            = Nothing
 556
 557 count_commas :: Int -> String -> (Int, String)
 558 count_commas n (',' : cs) = count_commas (n+1) cs
 559 count_commas n cs         = (n,cs)
 560
 561 encodeFS :: UserFS -> EncodedFS
 562 encodeFS fast_str  | all unencodedChar str = fast_str
 563                    | otherwise             = mkFastString (encode str)
 564                    where
 565                      str = unpackFS fast_str
 566
 567 unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
 568 unencodedChar 'Z' = False
 569 unencodedChar 'z' = False
 570 unencodedChar c   =  c >= 'a' && c <= 'z'
 571                   || c >= 'A' && c <= 'Z'
 572                   || c >= '0' && c <= '9'
 573
 574 encode_ch :: Char -> EncodedString
 575 encode_ch c | unencodedChar c = [c]     -- Common case first
 576
 577 -- Constructors
 578 encode_ch '('  = "ZL"   -- Needed for things like (,), and (->)
 579 encode_ch ')'  = "ZR"   -- For symmetry with (
 580 encode_ch '['  = "ZM"
 581 encode_ch ']'  = "ZN"
 582 encode_ch ':'  = "ZC"
 583 encode_ch 'Z'  = "ZZ"
 584
 585 -- Variables
 586 encode_ch 'z'  = "zz"
 587 encode_ch '&'  = "za"
 588 encode_ch '|'  = "zb"
 589 encode_ch '^'  = "zc"
 590 encode_ch '$'  = "zd"
 591 encode_ch '='  = "ze"
 592 encode_ch '>'  = "zg"
 593 encode_ch '#'  = "zh"
 594 encode_ch '.'  = "zi"
 595 encode_ch '<'  = "zl"
 596 encode_ch '-'  = "zm"
 597 encode_ch '!'  = "zn"
 598 encode_ch '+'  = "zp"
 599 encode_ch '\'' = "zq"
 600 encode_ch '\\' = "zr"
 601 encode_ch '/'  = "zs"
 602 encode_ch '*'  = "zt"
 603 encode_ch '_'  = "zu"
 604 encode_ch '%'  = "zv"
 605 encode_ch c    = 'z' : shows (ord c) "U"
 606 \end{code}
 607
 608 Decode is used for user printing.
 609
 610 \begin{code}
 611 decodeFS :: FastString -> FastString
 612 decodeFS fs = mkFastString (decode (unpackFS fs))
 613
 614 decode :: EncodedString -> UserString
 615 decode [] = []
 616 decode ('Z' : rest) = decode_escape rest
 617 decode ('z' : rest) = decode_escape rest
 618 decode (c   : rest) = c : decode rest
 619
 620 decode_escape :: EncodedString -> UserString
 621
 622 decode_escape ('L' : rest) = '(' : decode rest
 623 decode_escape ('R' : rest) = ')' : decode rest
 624 decode_escape ('M' : rest) = '[' : decode rest
 625 decode_escape ('N' : rest) = ']' : decode rest
 626 decode_escape ('C' : rest) = ':' : decode rest
 627 decode_escape ('Z' : rest) = 'Z' : decode rest
 628
 629 decode_escape ('z' : rest) = 'z' : decode rest
 630 decode_escape ('a' : rest) = '&' : decode rest
 631 decode_escape ('b' : rest) = '|' : decode rest
 632 decode_escape ('c' : rest) = '^' : decode rest
 633 decode_escape ('d' : rest) = '$' : decode rest
 634 decode_escape ('e' : rest) = '=' : decode rest
 635 decode_escape ('g' : rest) = '>' : decode rest
 636 decode_escape ('h' : rest) = '#' : decode rest
 637 decode_escape ('i' : rest) = '.' : decode rest
 638 decode_escape ('l' : rest) = '<' : decode rest
 639 decode_escape ('m' : rest) = '-' : decode rest
 640 decode_escape ('n' : rest) = '!' : decode rest
 641 decode_escape ('p' : rest) = '+' : decode rest
 642 decode_escape ('q' : rest) = '\'' : decode rest
 643 decode_escape ('r' : rest) = '\\' : decode rest
 644 decode_escape ('s' : rest) = '/' : decode rest
 645 decode_escape ('t' : rest) = '*' : decode rest
 646 decode_escape ('u' : rest) = '_' : decode rest
 647 decode_escape ('v' : rest) = '%' : decode rest
 648
 649 -- Tuples are coded as Z23T
 650 -- Characters not having a specific code are coded as z224U
 651 decode_escape (c : rest)
 652   | isDigit c = go (digitToInt c) rest
 653   where
 654     go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
 655     go 0 ('T' : rest)           = "()" ++ (decode rest)
 656     go n ('T' : rest)           = '(' : replicate (n-1) ',' ++ ')' : decode rest
 657     go 1 ('H' : rest)           = "(# #)" ++ (decode rest)
 658     go n ('H' : rest)           = '(' : '#' : replicate (n-1) ',' ++ '#' : ')' : decode rest
 659     go n ('U' : rest)           = chr n : decode rest
 660     go n other = pprPanic "decode_escape" (ppr n <+> text (c:rest))
 661
 662 decode_escape (c : rest) = pprTrace "decode_escape" (char c) (decode rest)
 663 decode_escape []         = pprTrace "decode_escape" (text "empty") ""
 664 \end{code}
 665
 666
 667 %************************************************************************
 668 %*                                                                      *
 669 \subsection{Lexical categories}
 670 %*                                                                      *
 671 %************************************************************************
 672
 673 These functions test strings to see if they fit the lexical categories
 674 defined in the Haskell report.
 675
 676 \begin{code}
 677 isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
 678 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
 679
 680 isLexCon cs = isLexConId  cs || isLexConSym cs
 681 isLexVar cs = isLexVarId  cs || isLexVarSym cs
 682
 683 isLexId  cs = isLexConId  cs || isLexVarId  cs
 684 isLexSym cs = isLexConSym cs || isLexVarSym cs
 685
 686 -------------
 687
 688 isLexConId cs                           -- Prefix type or data constructors
 689   | nullFastString cs = False           --      e.g. "Foo", "[]", "(,)"
 690   | cs == FSLIT("[]") = True
 691   | otherwise         = startsConId (headFS cs)
 692
 693 isLexVarId cs                           -- Ordinary prefix identifiers
 694   | nullFastString cs = False           --      e.g. "x", "_x"
 695   | otherwise         = startsVarId (headFS cs)
 696
 697 isLexConSym cs                          -- Infix type or data constructors
 698   | nullFastString cs = False           --      e.g. ":-:", ":", "->"
 699   | cs == FSLIT("->") = True
 700   | otherwise         = startsConSym (headFS cs)
 701
 702 isLexVarSym cs                          -- Infix identifiers
 703   | nullFastString cs = False           --      e.g. "+"
 704   | otherwise         = startsVarSym (headFS cs)
 705
 706 -------------
 707 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 708 startsVarSym c = isSymbolASCII c || isSymbolISO c       -- Infix Ids
 709 startsConSym c = c == ':'                               -- Infix data constructors
 710 startsVarId c  = isLower c || isLowerISO c || c == '_'  -- Ordinary Ids
 711 startsConId c  = isUpper c || isUpperISO c || c == '('  -- Ordinary type constructors and data constructors
 712
 713
 714 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 715 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 716 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 717         --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 718 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 719         --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 720 \end{code}
 721 \begin{code}
 722 {-* Generated by DrIFT-v1.0 : Look, but Don't Touch. *-}
 723 instance Binary NameSpace where
 724     put_ bh VarName = do
 725             putByte bh 0
 726     put_ bh DataName = do
 727             putByte bh 1
 728     put_ bh TvName = do
 729             putByte bh 2
 730     put_ bh TcClsName = do
 731             putByte bh 3
 732     get bh = do
 733             h <- getByte bh
 734             case h of
 735               0 -> do return VarName
 736               1 -> do return DataName
 737               2 -> do return TvName
 738               _ -> do return TcClsName
 739
 740 instance Binary OccName where
 741     put_ bh (OccName aa ab) = do
 742             put_ bh aa
 743             put_ bh ab
 744     get bh = do
 745           aa <- get bh
 746           ab <- get bh
 747           return (OccName aa ab)
 748
 749 --  Imported from other files :-
 750
 751 \end{code}