ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 \begin{code}
   7 #include "HsVersions.h"
   8
   9 module Lex (
  10
  11         isLexCon, isLexVar, isLexId, isLexSym,
  12         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
  13         mkTupNameStr, ifaceParseErr,
  14
  15         -- Monad for parser
  16         IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
  17         StringBuffer
  18
  19     ) where
  20
  21
  22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper))
  23 IMPORT_DELOOPER(Ubiq)
  24 IMPORT_DELOOPER(IdLoop)    -- get the CostCentre type&constructors from here
  25
  26 import CmdLineOpts      ( opt_IgnoreIfacePragmas )
  27 import Demand           ( Demand(..) {- instance Read -} )
  28 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  29 --import FiniteMap      ( FiniteMap, listToFM, lookupFM )
  30 import Maybes           ( Maybe(..), MaybeErr(..) )
  31 import Pretty
  32 import CharSeq          ( CSeq )
  33
  34
  35
  36 import ErrUtils         ( Error(..) )
  37 import Outputable       ( Outputable(..) )
  38 import PprStyle         ( PprStyle(..) )
  39 import Util             ( nOfThem, panic )
  40
  41 import FastString
  42 import StringBuffer
  43
  44 import PreludeGlaST
  45
  46 \end{code}
  47
  48 %************************************************************************
  49 %*                                                                      *
  50 \subsection{Lexical categories}
  51 %*                                                                      *
  52 %************************************************************************
  53
  54 These functions test strings to see if they fit the lexical categories
  55 defined in the Haskell report.  Normally applied as in e.g. @isCon
  56 (getLocalName foo)@.
  57
  58 \begin{code}
  59 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
  60  isLexVarId, isLexVarSym  :: FAST_STRING -> Bool
  61
  62 isLexCon cs = isLexConId  cs || isLexConSym cs
  63 isLexVar cs = isLexVarId  cs || isLexVarSym cs
  64
  65 isLexId  cs = isLexConId  cs || isLexVarId  cs
  66 isLexSym cs = isLexConSym cs || isLexVarSym cs
  67
  68 -------------
  69
  70 isLexConId cs
  71   | _NULL_ cs        = False
  72   | cs == SLIT("[]") = True
  73   | c  == '('        = True     -- (), (,), (,,), ...
  74   | otherwise        = isUpper c || isUpperISO c
  75   where
  76     c = _HEAD_ cs
  77
  78 isLexVarId cs
  79   | _NULL_ cs    = False
  80   | otherwise    = isLower c || isLowerISO c
  81   where
  82     c = _HEAD_ cs
  83
  84 isLexConSym cs
  85   | _NULL_ cs   = False
  86   | otherwise   = c  == ':'
  87                || cs == SLIT("->")
  88   where
  89     c = _HEAD_ cs
  90
  91 isLexVarSym cs
  92   | _NULL_ cs = False
  93   | otherwise = isSymbolASCII c
  94              || isSymbolISO c
  95   where
  96     c = _HEAD_ cs
  97
  98 -------------
  99 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 100 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
 101 isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
 102 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
 103 isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
 104 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 105 \end{code}
 106
 107
 108 %************************************************************************
 109 %*                                                                      *
 110 \subsection{Tuple strings -- ugh!}
 111 %*                                                                      *
 112 %************************************************************************
 113
 114 \begin{code}
 115 mkTupNameStr 0 = SLIT("()")
 116 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
 117 mkTupNameStr 2 = _PK_ "(,)"   -- not strictly necessary
 118 mkTupNameStr 3 = _PK_ "(,,)"  -- ditto
 119 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
 120 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
 121 \end{code}
 122
 123
 124
 125 %************************************************************************
 126 %*                                                                      *
 127 \subsection{Data types}
 128 %*                                                                      *
 129 %************************************************************************
 130
 131 The token data type, fairly un-interesting except from two constructors,
 132 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
 133 strictness, unfolding etc) and types for id decls.
 134
 135 The Idea/Observation here is that the renamer needs to scan through
 136 all of an interface file before it can continue. But only a fraction
 137 of the information contained in the file turns out to be useful, so
 138 delaying as much as possible of the scanning and parsing of an
 139 interface file Makes Sense (Heap profiles of the compiler
 140 show at a reduction in heap usage by at least a factor of two,
 141 post-renamer).
 142
 143 Hence, the interface file lexer spots when value declarations are
 144 being scanned and return the @ITidinfo@ and @ITtype@ constructors
 145 for the type and any other id info for that binding (unfolding, strictness
 146 etc). These constructors are applied to the result of lexing these sub-chunks.
 147
 148 The lexing of the type and id info is all done lazily, of course, so
 149 the scanning (and subsequent parsing) will be done *only* on the ids the
 150 renamer finds out that it is interested in. The rest will just be junked.
 151 Laziness, you know it makes sense :-)
 152
 153 \begin{code}
 154 data IfaceToken
 155   = ITinterface         -- keywords
 156   | ITusages
 157   | ITversions
 158   | ITexports
 159   | ITinstance_modules
 160   | ITinstances
 161   | ITfixities
 162   | ITdeclarations
 163   | ITpragmas
 164   | ITdata
 165   | ITtype
 166   | ITnewtype
 167   | ITderiving
 168   | ITclass
 169   | ITwhere
 170   | ITinstance
 171   | ITinfixl
 172   | ITinfixr
 173   | ITinfix
 174   | ITforall
 175   | ITbang              -- magic symbols
 176   | ITvbar
 177   | ITdcolon
 178   | ITcomma
 179   | ITdarrow
 180   | ITdotdot
 181   | ITequal
 182   | ITocurly
 183   | ITobrack
 184   | IToparen
 185   | ITrarrow
 186   | ITccurly
 187   | ITcbrack
 188   | ITcparen
 189   | ITsemi
 190   | ITvarid   FAST_STRING
 191   | ITconid   FAST_STRING
 192   | ITvarsym  FAST_STRING
 193   | ITconsym  FAST_STRING
 194   | ITqvarid  (FAST_STRING,FAST_STRING)
 195   | ITqconid  (FAST_STRING,FAST_STRING)
 196   | ITqvarsym (FAST_STRING,FAST_STRING)
 197   | ITqconsym (FAST_STRING,FAST_STRING)
 198
 199   | ITidinfo [IfaceToken]  -- lazily return the stream of tokens for
 200                            -- the info attached to an id.
 201   | ITtysig [IfaceToken]   -- lazily return the stream of tokens for
 202                            -- the info attached to an id.
 203         -- Stuff for reading unfoldings
 204   | ITarity | ITstrict | ITunfold
 205   | ITdemand [Demand] | ITbottom
 206   | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
 207   | ITcoerce_in | ITcoerce_out | ITatsign
 208   | ITccall (Bool,Bool)         -- (is_casm, may_gc)
 209   | ITscc CostCentre
 210   | ITchar Char | ITstring FAST_STRING
 211   | ITinteger Integer | ITdouble Double
 212   | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
 213   | ITunknown String            -- Used when the lexer can't make sense of it
 214   deriving Text -- debugging
 215
 216 instance Text CostCentre -- cheat!
 217
 218 \end{code}
 219
 220 %************************************************************************
 221 %*                                                                      *
 222 \subsection{The lexical analyser}
 223 %*                                                                      *
 224 %************************************************************************
 225
 226 \begin{code}
 227 lexIface :: StringBuffer -> [IfaceToken]
 228 lexIface buf =
 229  _scc_ "Lexer"
 230 -- if bufferExhausted buf then
 231 --  []
 232 -- else
 233 --  _trace ("Lexer: "++[C# (currentChar# buf)]) $
 234   case currentChar# buf of
 235       -- whitespace and comments, ignore.
 236     ' '#  -> lexIface (stepOn buf)
 237     '\t'# -> lexIface (stepOn buf)
 238     '\n'# -> lexIface (stepOn buf)
 239
 240 -- Numbers and comments
 241     '-'#  ->
 242       case lookAhead# buf 1# of
 243         '-'# -> lex_comment (stepOnBy# buf 2#)
 244         c    ->
 245           if isDigit (C# c)
 246           then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 247           else lex_id buf
 248
 249 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
 250 --    '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
 251
 252     '('# ->
 253          case prefixMatch (stepOn buf) "..)" of
 254            Just buf' ->  ITdotdot : lexIface (stepOverLexeme buf')
 255            Nothing ->
 256             case lookAhead# buf 1# of
 257               ','# -> lex_tuple Nothing  (stepOnBy# buf 2#)
 258               ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
 259               _    -> IToparen : lexIface (stepOn buf)
 260
 261     '{'# -> ITocurly : lexIface (stepOn buf)
 262     '}'# -> ITccurly : lexIface (stepOn buf)
 263     ')'# -> ITcparen : lexIface (stepOn buf)
 264     '['# ->
 265       case lookAhead# buf 1# of
 266         ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
 267         _    -> ITobrack : lexIface (stepOn buf)
 268     ']'# -> ITcbrack     : lexIface (stepOn buf)
 269     ','# -> ITcomma      : lexIface (stepOn buf)
 270     ':'# -> case lookAhead# buf 1# of
 271               ':'# -> ITdcolon  : lexIface (stepOnBy# buf 2#)
 272               _    -> lex_id (incLexeme buf)
 273     ';'#  -> ITsemi     : lexIface (stepOn buf)
 274     '\"'# -> case untilEndOfString# (stepOn buf) of
 275               buf' ->
 276                   -- the string literal does *not* include the dquotes
 277                 case lexemeToFastString buf' of
 278                  v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
 279
 280     '\''# -> --
 281              -- untilEndOfChar# extends the current lexeme until
 282              -- it hits a non-escaped single quote. The lexeme of the
 283              -- StringBuffer returned does *not* include the closing quote,
 284              -- hence we augment the lexeme and make sure to add the
 285              -- starting quote, before `read'ing the string.
 286              --
 287              case untilEndOfChar# (stepOn buf) of
 288                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 289                         [  (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
 290
 291 -- ``thingy'' form for casm
 292     '`'# ->
 293             case lookAhead# buf 1# of
 294               '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
 295               _    -> lex_id (incLexeme buf)         -- add ` to lexeme and assume
 296                                                      -- scanning an id of some sort.
 297 -- Keywords
 298     '_'# ->
 299          case lookAhead# buf 1# of
 300            'S'# -> case lookAhead# buf 2# of
 301                     '_'# -> ITstrict :
 302                             lex_demand (stepOnUntil (not . isSpace)
 303                                                     (stepOnBy# buf 3#)) -- past _S_
 304            's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
 305                      Just buf' -> lex_scc (stepOnUntil (not . isSpace)
 306                                                        (stepOverLexeme buf'))
 307                      Nothing   -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
 308                                                                  -- it is a keyword.
 309            _    -> lex_keyword (stepOn buf)
 310
 311     '\NUL'# ->
 312             if bufferExhausted (stepOn buf) then
 313                []
 314             else
 315                lex_id buf
 316     c ->
 317         if isDigit (C# c) then
 318            lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
 319         else
 320            lex_id buf
 321 --  where
 322 lex_comment buf =
 323 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 324    case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
 325
 326 ------------------
 327 lex_demand buf =
 328 -- _trace ("demand: "++[C# (currentChar# buf)]) $
 329  case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
 330  where
 331    -- code snatched from Demand.lhs
 332   read_em acc buf =
 333 --   _trace ("read_em: "++[C# (currentChar# buf)]) $
 334    case currentChar# buf of
 335     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 336     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 337     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 338     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 339     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 340     ')'# -> (reverse acc, stepOn buf)
 341     'U'# -> do_unpack True  acc (stepOnBy# buf 2#)
 342     'u'# -> do_unpack False acc (stepOnBy# buf 2#)
 343     _    -> (reverse acc, buf)
 344
 345   do_unpack wrapper_unpacks acc buf
 346    = case read_em [] buf of
 347       (stuff, rest) -> read_em (WwUnpack wrapper_unpacks stuff : acc) rest
 348
 349 ------------------
 350 lex_scc buf =
 351 -- _trace ("scc: "++[C# (currentChar# buf)]) $
 352  case currentChar# buf of
 353   '"'# ->
 354       -- YUCK^2
 355      case prefixMatch (stepOn buf) "NO_CC\"" of
 356       Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
 357       Nothing ->
 358        case prefixMatch (stepOn buf) "CURRENT_CC\"" of
 359         Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
 360         Nothing   ->
 361          case prefixMatch (stepOn buf) "OVERHEAD\"" of
 362          Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
 363          Nothing   ->
 364           case prefixMatch (stepOn buf) "DONT_CARE\"" of
 365            Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
 366            Nothing   ->
 367             case prefixMatch (stepOn buf) "SUBSUMED\"" of
 368              Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
 369              Nothing ->
 370               case prefixMatch (stepOn buf) "CAFs_in_...\"" of
 371                Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
 372                Nothing ->
 373                 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
 374                  Just buf' ->
 375                   case untilChar# (stepOverLexeme buf') '\"'# of
 376                    buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_):
 377                             lexIface (stepOverLexeme buf'')
 378                  Nothing ->
 379                   case prefixMatch (stepOn buf) "DICTs_in_...\"" of
 380                    Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
 381                    Nothing ->
 382                     case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
 383                      Just buf' ->
 384                       case untilChar# (stepOverLexeme buf') '\"'# of
 385                        buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True):
 386                                 lexIface (stepOverLexeme buf'')
 387                      Nothing ->
 388                       case prefixMatch (stepOn buf) "CAF:" of
 389                        Just buf' ->
 390                         case untilChar# (stepOverLexeme buf') '\"'# of
 391                          buf'' -> ITscc (cafifyCC (mkUserCC (lexemeToFastString buf'') _NIL_ _NIL_)):
 392                                   lexIface (stepOverLexeme buf'')
 393                        Nothing ->
 394                         case untilChar# (stepOn buf) '\"'# of
 395                            buf' -> ITscc (mkUserCC (lexemeToFastString buf') _NIL_ _NIL_):
 396                                    lexIface (stepOverLexeme buf')
 397   c -> ITunknown [C# c] : lexIface (stepOn buf)
 398
 399
 400 -----------
 401 lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
 402 lex_num minus acc# buf =
 403 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
 404  case scanNumLit (I# acc#) buf of
 405      (acc',buf') ->
 406        case currentChar# buf' of
 407          '.'# ->
 408              -- this case is not optimised at all, as the
 409              -- presence of floating point numbers in interface
 410              -- files is not that common. (ToDo)
 411             case expandWhile (isDigit) (incLexeme buf') of
 412               buf'' -> -- points to first non digit char
 413                 case reads (lexemeToString buf'') of
 414                   [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
 415          _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
 416
 417 --         case reads (lexemeToString buf') of
 418 --           [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
 419
 420 ------------
 421 lex_keyword buf =
 422 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
 423  case currentChar# buf of
 424   ':'# -> case lookAhead# buf 1# of
 425             '_'# -> -- a binding, type (and other id-info) follows,
 426                     -- to make the parser ever so slightly, we push
 427                     --
 428                 lex_decl (stepOnBy# buf 2#)
 429             v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
 430   _ ->
 431     case expandWhile (is_kwd_char) buf of
 432      buf' ->
 433       let kw = lexemeToFastString buf' in
 434 --    _trace ("kw: "++lexemeToString buf') $
 435       case lookupUFM ifaceKeywordsFM kw of
 436        Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh
 437                   lexIface (stepOverLexeme buf')
 438        Just xx -> xx : lexIface (stepOverLexeme buf')
 439
 440 lex_decl buf =
 441  case expandUntilMatch buf ";;" of
 442    buf' ->
 443 --      _trace (show (lexemeToString buf')) $
 444       case currentChar# buf' of
 445        '\n'# -> -- newline, no id info.
 446            ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
 447            lexIface (stepOverLexeme buf')
 448        '\r'# -> -- just to be sure for those Win* boxes..
 449            ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
 450            lexIface (stepOverLexeme buf')
 451        '\NUL'# ->
 452            ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
 453            lexIface (stepOverLexeme buf')
 454        c     -> -- run all over the id info
 455          case expandUntilMatch (stepOverLexeme buf') ";;" of
 456            buf'' ->
 457                     --_trace ((C# c):show (lexemeToString (decLexeme buf')))  $
 458                     --_trace (show (lexemeToString (decLexeme buf''))) $
 459                     ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
 460                     let ls = lexIface (stepOverLexeme buf'') in
 461                     if opt_IgnoreIfacePragmas then
 462                         ls
 463                     else
 464                         let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
 465                         --_trace (show is) $
 466                         ITidinfo is : ls
 467
 468 -- ToDo: hammer!
 469 is_kwd_char c@(C# c#) =
 470  isAlphanum c || -- OLD: c `elem` "_@/\\"
 471  (case c# of
 472    '_'#  -> True
 473    '@'#  -> True
 474    '/'#  -> True
 475    '\\'# -> True
 476    _     -> False)
 477
 478
 479
 480 -----------
 481 lex_cstring buf =
 482 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
 483  case expandUntilMatch buf "\'\'" of
 484    buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
 485            lexIface (stepOverLexeme buf')
 486
 487 -----------
 488 lex_tuple module_dot buf =
 489 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
 490   go 2 buf
 491   where
 492    go n buf =
 493     case currentChar# buf of
 494       ','# -> go (n+1) (stepOn buf)
 495       ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
 496       _    -> ITunknown ("tuple " ++ show n) : lexIface buf
 497
 498 -- Similarly ' itself is ok inside an identifier, but not at the start
 499
 500 id_arr :: _ByteArray Int
 501 id_arr =
 502  unsafePerformPrimIO (
 503   newCharArray (0,255) `thenPrimIO` \ barr ->
 504   let
 505    loop 256# = returnPrimIO ()
 506    loop i# =
 507     if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
 508        writeCharArray barr (I# i#) '\1' `seqPrimIO`
 509        loop (i# +# 1#)
 510     else
 511        writeCharArray barr (I# i#) '\0' `seqPrimIO`
 512        loop (i# +# 1#)
 513   in
 514   loop 0#                    `seqPrimIO`
 515   unsafeFreezeByteArray barr)
 516
 517 is_id_char (C# c#) =
 518  let
 519   _ByteArray _ arr# = id_arr
 520  in
 521  case ord# (indexCharArray# arr# (ord# c#)) of
 522   0# -> False
 523   1# -> True
 524
 525 --is_id_char c@(C# c#)  = isAlphanum c || is_sym c#
 526
 527 is_sym c#=
 528  case c# of {
 529    ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
 530    '#'# -> True; '$'# -> True; ':'# -> True;  '%'# -> True;
 531    '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
 532    '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
 533    '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
 534    '-'# -> True; '~'# -> True; _ -> False }
 535
 536 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
 537
 538
 539 mod_arr :: _ByteArray Int
 540 mod_arr =
 541  unsafePerformPrimIO (
 542   newCharArray (0,255) `thenPrimIO` \ barr ->
 543   let
 544    loop 256# = returnPrimIO ()
 545    loop i# =
 546     if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
 547        writeCharArray barr (I# i#) '\1' `seqPrimIO`
 548        loop (i# +# 1#)
 549     else
 550        writeCharArray barr (I# i#) '\0' `seqPrimIO`
 551        loop (i# +# 1#)
 552   in
 553   loop 0#                    `seqPrimIO`
 554   unsafeFreezeByteArray barr)
 555
 556
 557 is_mod_char (C# c#) =
 558  let
 559   _ByteArray _ arr# = mod_arr
 560  in
 561  case ord# (indexCharArray# arr# (ord# c#)) of
 562   0# -> False
 563   1# -> True
 564
 565 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
 566
 567 {-
 568 lex_id cs =
 569  case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
 570    (xs, len, cs') ->
 571     case cs' of
 572      [] -> case xs of
 573             [] -> lex_id2 Nothing cs
 574             _  -> lex_id3 Nothing len xs cs
 575
 576      '.':cs'' ->
 577         case xs of
 578           [] -> lex_id2 Nothing cs
 579           _  ->
 580            let
 581             pk_str = _PK_ (xs::String)
 582             len = lengthPS pk_str
 583            in
 584            if len==len+1 then
 585               error "Well, I never!"
 586            else
 587               lex_id2 (Just pk_str) cs''
 588      _ -> case xs of
 589             [] -> lex_id2 Nothing cs
 590             _  -> lex_id3 Nothing len xs cs'
 591
 592 -}
 593
 594 lex_id buf =
 595 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
 596  case expandWhile (is_mod_char) buf of
 597    buf' ->
 598     case currentChar# buf' of
 599      '.'# ->
 600         if not (emptyLexeme buf') then
 601 --         _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
 602            case lexemeToFastString buf' of
 603              l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#))
 604                                                  (stepOn (stepOverLexeme buf'))
 605         else
 606            lex_id2 Nothing buf'
 607      _  -> lex_id2 Nothing buf'
 608
 609 -- Dealt with the Module.part
 610 lex_id2 module_dot buf =
 611 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
 612  case currentChar# buf of
 613   '['# ->
 614     case lookAhead# buf 1# of
 615      ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
 616      _    -> lex_id3 module_dot buf
 617   '('# ->
 618     case lookAhead# buf 1# of
 619      ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
 620      ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
 621      _    -> lex_id3 module_dot buf
 622   ':'# -> lex_id3 module_dot (incLexeme buf)
 623   _    -> lex_id3 module_dot buf
 624
 625
 626
 627 -- Dealt with [], (), : special cases
 628
 629 lex_id3 module_dot buf =
 630 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
 631  case expandWhile (is_id_char) buf of
 632   buf' ->
 633     case module_dot of
 634      Just _ ->
 635        end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
 636      Nothing ->
 637        case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
 638          Just kwd_token -> kwd_token           : lexIface new_buf
 639          Nothing        -> mk_var_token lexeme : lexIface new_buf
 640     where
 641      lexeme  = lexemeToFastString buf'
 642      new_buf = stepOverLexeme buf'
 643
 644
 645 {- OLD:
 646 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
 647 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
 648 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
 649 lex_id2 module_dot [] (':' : cs)     = lex_id3 module_dot [':'] cs
 650 lex_id2 module_dot xs cs             = lex_id3 module_dot xs cs
 651 -}
 652
 653
 654 -- Dealt with [], (), : special cases
 655
 656 {-
 657 lex_id3 module_dot len_xs xs cs =
 658  case my_span' (is_id_char) cs of
 659    (xs1,len_xs1,rest) ->
 660     case module_dot of
 661      Just m  -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
 662      Nothing ->
 663       case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
 664        Just kwd_token -> kwd_token          : lexIface rest
 665        other          -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
 666     where
 667      rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
 668 -}
 669 mk_var_token pk_str =
 670      let
 671       f = _HEAD_ pk_str
 672      in
 673      --
 674      -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
 675      -- remove the second half of disjunction when using a 1.3 prelude.
 676      --
 677      if      isUpper f    then ITconid pk_str
 678      else if isLower f    then ITvarid pk_str
 679      else if f == ':'     then ITconsym pk_str
 680      else if isLowerISO f then ITvarid pk_str
 681      else if isUpperISO f then ITconid pk_str
 682      else ITvarsym pk_str
 683
 684 {-
 685     mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
 686                           | f == ':'              = ITconsym n
 687                           | isAlpha f             = ITvarid n
 688                           | otherwise             = ITvarsym n
 689                 where
 690                       n = _PK_ xs
 691 -}
 692
 693 end_lex_id Nothing token buf  = token : lexIface buf
 694 end_lex_id (Just m) token buf =
 695  case token of
 696    ITconid n  -> ITqconid  (m,n)         : lexIface buf
 697    ITvarid n  -> ITqvarid  (m,n)         : lexIface buf
 698    ITconsym n -> ITqconsym (m,n)         : lexIface buf
 699    ITvarsym n -> ITqvarsym (m,n)         : lexIface buf
 700    ITbang     -> ITqvarsym (m,SLIT("!")) : lexIface buf
 701    _          -> ITunknown (show token)  : lexIface buf
 702
 703 ------------
 704 ifaceKeywordsFM :: UniqFM IfaceToken
 705 ifaceKeywordsFM = listToUFM $
 706       map (\ (x,y) -> (_PK_ x,y))
 707        [("/\\_",                ITbiglam)
 708        ,("@_",                  ITatsign)
 709        ,("interface_",          ITinterface)
 710        ,("usages_",             ITusages)
 711        ,("versions_",           ITversions)
 712        ,("exports_",            ITexports)
 713        ,("instance_modules_",   ITinstance_modules)
 714        ,("instances_",          ITinstances)
 715        ,("fixities_",           ITfixities)
 716        ,("declarations_",       ITdeclarations)
 717        ,("pragmas_",            ITpragmas)
 718        ,("forall_",             ITforall)
 719        ,("U_",                  ITunfold)
 720        ,("A_",                  ITarity)
 721        ,("coerce_in_",          ITcoerce_in)
 722        ,("coerce_out_",         ITcoerce_out)
 723        ,("bot_",                ITbottom)
 724        ,("integer_",            ITinteger_lit)
 725        ,("rational_",           ITrational_lit)
 726        ,("addr_",               ITaddr_lit)
 727        ,("float_",              ITfloat_lit)
 728        ,("string_",             ITstring_lit)
 729        ,("litlit_",             ITlit_lit)
 730        ,("ccall_",              ITccall (False, False))
 731        ,("ccall_GC_",           ITccall (False, True))
 732        ,("casm_",               ITccall (True,  False))
 733        ,("casm_GC_",            ITccall (True,  True))
 734        ]
 735
 736 haskellKeywordsFM = listToUFM $
 737       map (\ (x,y) -> (_PK_ x,y))
 738       [ ("data",                ITdata)
 739        ,("type",                ITtype)
 740        ,("newtype",             ITnewtype)
 741        ,("class",               ITclass)
 742        ,("where",               ITwhere)
 743        ,("instance",            ITinstance)
 744        ,("infixl",              ITinfixl)
 745        ,("infixr",              ITinfixr)
 746        ,("infix",               ITinfix)
 747        ,("case",                ITcase)
 748        ,("case#",               ITprim_case)
 749        ,("of",                  ITof)
 750        ,("in",                  ITin)
 751        ,("let",                 ITlet)
 752        ,("letrec",              ITletrec)
 753        ,("deriving",            ITderiving)
 754
 755        ,("->",                  ITrarrow)
 756        ,("\\",                  ITlam)
 757        ,("|",                   ITvbar)
 758        ,("!",                   ITbang)
 759        ,("=>",                  ITdarrow)
 760        ,("=",                   ITequal)
 761        ]
 762
 763
 764 -- doDiscard rips along really fast looking for a double semicolon,
 765 -- indicating the end of the pragma we're skipping
 766 doDiscard buf =
 767  case currentChar# buf of
 768    ';'# ->
 769     case lookAhead# buf 1# of
 770       ';'# -> stepOnBy# buf 2#
 771       _    -> doDiscard (stepOn buf)
 772    _ -> doDiscard (stepOn buf)
 773
 774 \end{code}
 775
 776 begin{code}
 777 my_span :: (a -> Bool) -> [a] -> ([a],[a])
 778 my_span p xs = go [] xs
 779   where
 780     go so_far (x:xs') | p x = go (x:so_far) xs'
 781     go so_far xs            = (reverse so_far, xs)
 782
 783 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
 784 my_span' p xs = go [] 0 xs
 785   where
 786     go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
 787     go so_far n xs            = (reverse so_far,n, xs)
 788 end{code}
 789
 790
 791 %************************************************************************
 792 %*                                                                      *
 793 \subsection{Other utility functions
 794 %*                                                                      *
 795 %************************************************************************
 796
 797 \begin{code}
 798 type IfM a = MaybeErr a Error
 799
 800 returnIf   :: a -> IfM a
 801 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 802 happyError :: Int -> [IfaceToken] -> IfM a
 803
 804 returnIf a = Succeeded a
 805
 806 thenIf (Succeeded a) k = k a
 807 thenIf (Failed  err) _ = Failed err
 808
 809 happyError ln toks = Failed (ifaceParseErr ln toks)
 810
 811 -----------------------------------------------------------------
 812
 813 ifaceParseErr ln toks sty
 814   = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppPStr SLIT("toks="), ppStr (show (take 10 toks))]
 815 \end{code}