ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
  27         checkVersion,
  28         happyError,
  29         StringBuffer
  30
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( ord, isSpace )
  36 import List             ( isSuffixOf )
  37
  38 import CostCentre       -- Pretty much all of it
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import Name             ( isLowerISO, isUpperISO, mkModule )
  41
  42 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..), IfaceFlavour(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile )
  48
  49 import Maybes           ( MaybeErr(..) )
  50 import ErrUtils         ( ErrMsg )
  51 import Outputable
  52
  53 import FastString
  54 import StringBuffer
  55 import GlaExts
  56 import ST               ( runST )
  57
  58 #if __GLASGOW_HASKELL__ >= 303
  59 import Bits
  60 import Word
  61 #endif
  62
  63 import Addr
  64
  65 import PrelRead                 ( readRational__ ) -- Glasgow non-std
  66 \end{code}
  67
  68 %************************************************************************
  69 %*                                                                      *
  70 \subsection{Data types}
  71 %*                                                                      *
  72 %************************************************************************
  73
  74 The token data type, fairly un-interesting except from one
  75 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  76 strictness, unfolding etc).
  77
  78 The Idea/Observation here is that the renamer needs to scan through
  79 all of an interface file before it can continue. But only a fraction
  80 of the information contained in the file turns out to be useful, so
  81 delaying as much as possible of the scanning and parsing of an
  82 interface file Makes Sense (Heap profiles of the compiler
  83 show a reduction in heap usage by at least a factor of two,
  84 post-renamer).
  85
  86 Hence, the interface file lexer spots when value declarations are
  87 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  88 for the type and any other id info for that binding (unfolding, strictness
  89 etc). These constructors are applied to the result of lexing these sub-chunks.
  90
  91 The lexing of the type and id info is all done lazily, of course, so
  92 the scanning (and subsequent parsing) will be done *only* on the ids the
  93 renamer finds out that it is interested in. The rest will just be junked.
  94 Laziness, you know it makes sense :-)
  95
  96 \begin{code}
  97 data IfaceToken
  98   = ITcase                      -- Haskell keywords
  99   | ITclass
 100   | ITdata
 101   | ITdefault
 102   | ITderiving
 103   | ITdo
 104   | ITelse
 105   | ITif
 106   | ITimport
 107   | ITin
 108   | ITinfix
 109   | ITinfixl
 110   | ITinfixr
 111   | ITinstance
 112   | ITlet
 113   | ITmodule
 114   | ITnewtype
 115   | ITof
 116   | ITthen
 117   | ITtype
 118   | ITwhere
 119   | ITas
 120   | ITqualified
 121   | IThiding
 122
 123   | ITinterface                 -- GHC-extension keywords
 124   | ITexport
 125   | ITinstimport
 126   | ITforall
 127   | ITletrec
 128   | ITcoerce
 129   | ITinline
 130   | ITccall (Bool,Bool) -- (is_casm, may_gc)
 131   | ITdefaultbranch
 132   | ITbottom
 133   | ITinteger_lit
 134   | ITfloat_lit
 135   | ITrational_lit
 136   | ITaddr_lit
 137   | ITlit_lit
 138   | ITstring_lit
 139   | ITtypeapp
 140   | ITarity
 141   | ITspecialise
 142   | ITnocaf
 143   | ITunfold InlinePragInfo
 144   | ITstrict ([Demand], Bool)
 145   | ITscc CostCentre
 146
 147   | ITdotdot                    -- reserved symbols
 148   | ITdcolon
 149   | ITequal
 150   | ITlam
 151   | ITvbar
 152   | ITlarrow
 153   | ITrarrow
 154   | ITat
 155   | ITtilde
 156   | ITdarrow
 157   | ITminus
 158   | ITbang
 159
 160   | ITbiglam                    -- GHC-extension symbols
 161
 162   | ITocurly                    -- special symbols
 163   | ITccurly
 164   | ITobrack
 165   | ITcbrack
 166   | IToparen
 167   | ITcparen
 168   | IToubxparen
 169   | ITcubxparen
 170   | ITsemi
 171   | ITcomma
 172
 173   | ITvarid   FAST_STRING       -- identifiers
 174   | ITconid   FAST_STRING
 175   | ITvarsym  FAST_STRING
 176   | ITconsym  FAST_STRING
 177   | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 178   | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 179   | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 180   | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 181
 182   | ITpragma StringBuffer
 183
 184   | ITchar Char
 185   | ITstring FAST_STRING
 186   | ITinteger Integer
 187   | ITrational Rational
 188
 189   | ITunknown String            -- Used when the lexer can't make sense of it
 190   | ITeof                       -- end of file token
 191   deriving Text -- debugging
 192
 193 instance Text CostCentre -- cheat!
 194
 195 \end{code}
 196
 197 %************************************************************************
 198 %*                                                                      *
 199 \subsection{The lexical analyser}
 200 %*                                                                      *
 201 %************************************************************************
 202
 203 \begin{code}
 204 lexIface :: (IfaceToken -> IfM a) -> IfM a
 205 lexIface cont buf =
 206  _scc_ "Lexer"
 207 -- if bufferExhausted buf then
 208 --  []
 209 -- else
 210 --  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
 211   case currentChar# buf of
 212       -- whitespace and comments, ignore.
 213     ' '#  -> lexIface cont (stepOn buf)
 214     '\t'# -> lexIface cont (stepOn buf)
 215     '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
 216
 217 -- Numbers and comments
 218     '-'#  ->
 219       case lookAhead# buf 1# of
 220 --        '-'# -> lex_comment cont (stepOnBy# buf 2#)
 221         c    ->
 222           if is_digit c
 223           then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 224           else lex_sym cont buf
 225
 226     '{'# ->                             -- look for "{-##" special iface pragma
 227         case lookAhead# buf 1# of
 228            '-'# -> case lookAhead# buf 2# of
 229                     '#'# -> case lookAhead# buf 3# of
 230                                 '#'# ->
 231                                    let (lexeme, buf')
 232                                           = doDiscard False (stepOnBy# buf 4#) in
 233                                    cont (ITpragma lexeme) buf'
 234                                 _ ->  lex_nested_comment (lexIface cont) buf
 235                     _    -> cont ITocurly (stepOn buf)
 236                             -- lex_nested_comment (lexIface cont) buf
 237            _ -> cont ITocurly (stepOn buf)
 238
 239     -- special symbols ----------------------------------------------------
 240     '('# ->
 241          case prefixMatch (stepOn buf) "..)" of
 242            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
 243            Nothing ->
 244             case lookAhead# buf 1# of
 245               '#'# -> cont IToubxparen (stepOnBy# buf 2#)
 246               _    -> cont IToparen (stepOn buf)
 247     ')'# -> cont ITcparen (stepOn buf)
 248     '}'# -> cont ITccurly (stepOn buf)
 249     '#'# -> case lookAhead# buf 1# of
 250                 ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
 251                 _    -> lex_sym cont (incLexeme buf)
 252     '['# -> cont ITobrack (stepOn buf)
 253     ']'# -> cont ITcbrack (stepOn buf)
 254     ','# -> cont ITcomma  (stepOn buf)
 255     ';'# -> cont ITsemi   (stepOn buf)
 256
 257     -- strings/characters -------------------------------------------------
 258     '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
 259               buf' ->
 260                   -- the string literal does *not* include the dquotes
 261                 case lexemeToFastString buf' of
 262                  v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
 263
 264     '\''# -> --
 265              -- untilEndOfChar# extends the current lexeme until
 266              -- it hits a non-escaped single quote. The lexeme of the
 267              -- StringBuffer returned does *not* include the closing quote,
 268              -- hence we augment the lexeme and make sure to add the
 269              -- starting quote, before `read'ing the string.
 270              --
 271              case untilEndOfChar# (stepOn buf) of
 272                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 273                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
 274
 275     -- strictness pragma and __scc treated specially.
 276     '_'# ->
 277          case lookAhead# buf 1# of
 278            '_'# -> case lookAhead# buf 2# of
 279                     'S'# ->
 280                         lex_demand cont (stepOnUntil (not . isSpace)
 281                                         (stepOnBy# buf 3#)) -- past __S
 282                     's'# ->
 283                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 284                                Just buf' -> lex_scc cont
 285                                                 (stepOnUntil (not . isSpace)
 286                                                 (stepOverLexeme buf'))
 287                                Nothing   -> lex_id cont buf
 288                     _ -> lex_id cont buf
 289            _    -> lex_id cont buf
 290
 291 -- ``thingy'' form for casm
 292     '`'# ->
 293             case lookAhead# buf 1# of
 294               '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
 295               _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
 296                                                      -- scanning an id of some sort.
 297
 298     '\NUL'# ->
 299             if bufferExhausted (stepOn buf) then
 300                cont ITeof buf
 301             else
 302                trace "lexIface: misplaced NUL?" $
 303                cont (ITunknown "\NUL") (stepOn buf)
 304
 305     c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
 306       | is_symbol c -> lex_sym cont buf
 307       | is_upper  c -> lex_con cont buf
 308       | is_ident  c -> lex_id  cont buf
 309
 310 --  where
 311 lex_comment cont buf =
 312 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 313    case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
 314
 315 -------------------------------------------------------------------------------
 316
 317 lex_nested_comment cont buf =
 318   case currentChar# buf of
 319         '-'# -> case lookAhead# buf 1# of
 320                  '}'# -> cont (stepOnBy# buf 2#)
 321                  _    -> lex_nested_comment cont (stepOn buf)
 322
 323         '{'# -> case lookAhead# buf 1# of
 324                  '-'# -> lex_nested_comment
 325                                 (lex_nested_comment cont)
 326                                 (stepOnBy# buf 2#)
 327                  _    -> lex_nested_comment cont (stepOn buf)
 328
 329         _   -> lex_nested_comment cont (stepOn buf)
 330
 331 -------------------------------------------------------------------------------
 332
 333 lex_demand cont buf =
 334  case read_em [] buf of { (ls,buf') ->
 335  case currentChar# buf' of
 336    'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
 337    _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
 338  }
 339  where
 340    -- code snatched from Demand.lhs
 341   read_em acc buf =
 342    case currentChar# buf of
 343     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 344     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 345     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 346     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 347     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 348     ')'# -> (reverse acc, stepOn buf)
 349     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 350     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 351     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 352     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 353     _    -> (reverse acc, buf)
 354
 355   do_unpack new_or_data wrapper_unpacks acc buf
 356    = case read_em [] buf of
 357       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 358
 359 ------------------
 360 lex_scc cont buf =
 361  case currentChar# buf of
 362   '"'# ->
 363         case prefixMatch (stepOn buf) "CAFs." of
 364          Just buf' ->
 365           case untilChar# (stepOverLexeme buf') '\"'# of
 366            buf'' -> cont (ITscc (mkAllCafsCC (mkModule (lexemeToString buf'')) _NIL_))
 367                          (stepOn (stepOverLexeme buf''))
 368          Nothing ->
 369             case prefixMatch (stepOn buf) "DICTs." of
 370              Just buf' ->
 371               case untilChar# (stepOverLexeme buf') '\"'# of
 372                buf'' -> cont (ITscc (mkAllDictsCC (mkModule (lexemeToString buf'')) _NIL_ True))
 373                         (stepOn (stepOverLexeme buf''))
 374              Nothing ->
 375               let
 376                match_user_cc buf =
 377                 case untilChar# buf '/'# of
 378                  buf' ->
 379                   let mod_name = mkModule (lexemeToString buf') in
 380 --                        case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
 381 --                         buf'' ->
 382 --                            let grp_name = lexemeToFastString buf'' in
 383                     case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
 384                      buf'' ->
 385                        -- The label may contain arbitrary characters, so it
 386                        -- may have been escaped etc., hence we `read' it in to get
 387                        -- rid of these meta-chars in the string and then pack it (again.)
 388                        -- ToDo: do the same for module name (single quotes allowed in m-names).
 389                        -- BTW, the code in this module is totally gruesome..
 390                        let upk_label = _UNPK_ (lexemeToFastString buf'') in
 391                        case reads ('"':upk_label++"\"") of
 392                         ((cc_label,_):_) ->
 393                             let cc_name = _PK_ cc_label in
 394                             (mkUserCC cc_name mod_name _NIL_{-grp_name-},
 395                              stepOn (stepOverLexeme buf''))
 396                         _ ->
 397                           trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
 398                           (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
 399                            stepOn (stepOverLexeme buf''))
 400               in
 401               case prefixMatch (stepOn buf) "CAF:" of
 402                Just buf' ->
 403                  case match_user_cc (stepOverLexeme buf') of
 404                   (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
 405                Nothing ->
 406                  case match_user_cc (stepOn buf) of
 407                   (cc, buf'') -> cont (ITscc cc) buf''
 408   c -> cont (ITunknown [C# c]) (stepOn buf)
 409
 410
 411 -----------
 412 lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
 413 lex_num cont minus acc# buf =
 414  --trace ("lex_num: "++[C# (currentChar# buf)]) $
 415  case scanNumLit (I# acc#) buf of
 416      (acc',buf') ->
 417        case currentChar# buf' of
 418          '.'# ->
 419              -- this case is not optimised at all, as the
 420              -- presence of floating point numbers in interface
 421              -- files is not that common. (ToDo)
 422             case expandWhile# is_digit (incLexeme buf') of
 423               buf2 -> -- points to first non digit char
 424                 let l = case currentChar# buf2 of
 425                           'e'# -> let buf3 = incLexeme buf2 in
 426                               case currentChar# buf3 of
 427                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 428                                 _    -> expandWhile# is_digit buf3
 429                           _ -> buf2
 430                 in let v = readRational__ (lexemeToString l) in
 431                    cont (ITrational v) (stepOverLexeme l)
 432
 433          _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
 434
 435 -----------
 436 lex_cstring cont buf =
 437  case expandUntilMatch buf "\'\'" of
 438    buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
 439            (stepOverLexeme buf')
 440
 441 ------------------------------------------------------------------------------
 442 -- Character Classes
 443
 444 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 445
 446 {-# INLINE is_ctype #-}
 447 #if __GLASGOW_HASKELL__ >= 303
 448 is_ctype :: Word8 -> Char# -> Bool
 449 is_ctype mask = \c ->
 450    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 451 #else
 452 is_ctype :: Int -> Char# -> Bool
 453 is_ctype (I# mask) = \c ->
 454     let (A# ctype) = ``char_types'' :: Addr
 455         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 456     in
 457         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 458 #endif
 459
 460 is_ident  = is_ctype 1
 461 is_symbol = is_ctype 2
 462 is_any    = is_ctype 4
 463 is_space  = is_ctype 8
 464 is_upper  = is_ctype 16
 465 is_digit  = is_ctype 32
 466
 467 -----------------------------------------------------------------------------
 468 -- identifiers, symbols etc.
 469
 470 lex_id cont buf =
 471  case expandWhile# is_ident buf of { buf1 ->
 472  case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 473  let new_buf = stepOverLexeme buf'
 474      lexeme  = lexemeToFastString buf'
 475  in
 476  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 477         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 478                           cont kwd_token new_buf;
 479         Nothing        ->
 480  case lookupUFM ifaceKeywordsFM lexeme of {
 481         Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
 482                           cont kwd_token new_buf;
 483         Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
 484                           cont (mk_var_token lexeme) new_buf
 485  }}}}
 486
 487 lex_sym cont buf =
 488  case expandWhile# is_symbol buf of
 489    buf'
 490      | is_comment lexeme -> lex_comment cont new_buf
 491      | otherwise         ->
 492            case lookupUFM haskellKeySymsFM lexeme of {
 493                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 494                                   cont kwd_token new_buf ;
 495                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 496                                   cont (mk_var_token lexeme) new_buf
 497            }
 498         where lexeme = lexemeToFastString buf'
 499               new_buf = stepOverLexeme buf'
 500
 501               is_comment fs
 502                 | len < 2   = False
 503                 | otherwise = trundle 0
 504                   where
 505                    len = lengthFS fs
 506
 507                    trundle n | n == len  = True
 508                              | otherwise = indexFS fs n == '-' && trundle (n+1)
 509
 510 lex_con cont buf =
 511  case expandWhile# is_ident buf of        { buf1 ->
 512  case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
 513  case currentChar# buf' of
 514      '.'# -> munch HiFile
 515      '!'# -> munch HiBootFile
 516      _    -> just_a_conid
 517
 518    where
 519     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 520                    cont (ITconid lexeme) new_buf
 521     lexeme = lexemeToFastString buf'
 522     new_buf = stepOverLexeme buf'
 523     munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid
 524  }}
 525
 526 lex_qid cont mod hif buf just_a_conid =
 527  case currentChar# buf of
 528   '['# ->       -- Special case for []
 529     case lookAhead# buf 1# of
 530      ']'# -> cont (ITqconid  (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#)
 531      _    -> just_a_conid
 532
 533   '('# ->  -- Special case for (,,,)
 534            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 535     case lookAhead# buf 1# of
 536      '#'# -> case lookAhead# buf 2# of
 537                 ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#)
 538                                 just_a_conid
 539                 _    -> just_a_conid
 540      ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#)
 541      ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid
 542      _    -> just_a_conid
 543
 544   '-'# -> case lookAhead# buf 1# of
 545             '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#)
 546             _    -> lex_id3 cont mod hif buf just_a_conid
 547   _    -> lex_id3 cont mod hif buf just_a_conid
 548
 549 lex_id3 cont mod hif buf just_a_conid
 550   | is_symbol c =
 551      case expandWhile# is_symbol buf of { buf' ->
 552      let
 553       lexeme  = lexemeToFastString buf'
 554       new_buf = stepOverLexeme buf'
 555      in
 556      case lookupUFM haskellKeySymsFM lexeme of {
 557         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 558         Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 559      }}
 560
 561   | otherwise   =
 562      case expandWhile# is_ident buf of { buf1 ->
 563      if emptyLexeme buf1
 564             then just_a_conid
 565             else
 566      case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 567      let
 568       lexeme  = lexemeToFastString buf'
 569       new_buf = stepOverLexeme buf'
 570      in
 571      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 572             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 573             Nothing        ->
 574      case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files
 575             Just kwd_token -> just_a_conid;
 576             Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 577      }}}}
 578   where c = currentChar# buf
 579
 580 mk_var_token pk_str
 581   | is_upper f          = ITconid pk_str
 582         -- _[A-Z] is treated as a constructor in interface files.
 583   | f `eqChar#` '_'# && not (_NULL_ tl)
 584         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
 585   | is_ident f          = ITvarid pk_str
 586   | f `eqChar#` ':'#    = ITconsym pk_str
 587   | otherwise           = ITvarsym pk_str
 588   where
 589       (C# f) = _HEAD_ pk_str
 590       tl     = _TAIL_ pk_str
 591
 592 mk_qvar_token m hif token =
 593  case mk_var_token token of
 594    ITconid n  -> ITqconid  (m,n,hif)
 595    ITvarid n  -> ITqvarid  (m,n,hif)
 596    ITconsym n -> ITqconsym (m,n,hif)
 597    ITvarsym n -> ITqvarsym (m,n,hif)
 598    _          -> ITunknown (show token)
 599 \end{code}
 600
 601 ----------------------------------------------------------------------------
 602 Horrible stuff for dealing with M.(,,,)
 603
 604 \begin{code}
 605 lex_tuple cont mod hif buf back_off =
 606   go 2 buf
 607   where
 608    go n buf =
 609     case currentChar# buf of
 610       ','# -> go (n+1) (stepOn buf)
 611       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf)
 612       _    -> back_off
 613
 614 lex_ubx_tuple cont mod hif buf back_off =
 615   go 2 buf
 616   where
 617    go n buf =
 618     case currentChar# buf of
 619       ','# -> go (n+1) (stepOn buf)
 620       '#'# -> case lookAhead# buf 1# of
 621                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif))
 622                                  (stepOnBy# buf 2#)
 623                 _    -> back_off
 624       _    -> back_off
 625 \end{code}
 626
 627 -----------------------------------------------------------------------------
 628 Keyword Lists
 629
 630 \begin{code}
 631 ifaceKeywordsFM :: UniqFM IfaceToken
 632 ifaceKeywordsFM = listToUFM $
 633       map (\ (x,y) -> (_PK_ x,y))
 634      [  ("__interface",         ITinterface),
 635         ("__export",            ITexport),
 636         ("__instimport",        ITinstimport),
 637         ("__forall",            ITforall),
 638         ("__letrec",            ITletrec),
 639         ("__coerce",            ITcoerce),
 640         ("__inline",            ITinline),
 641         ("__DEFAULT",           ITdefaultbranch),
 642         ("__bot",               ITbottom),
 643         ("__integer",           ITinteger_lit),
 644         ("__float",             ITfloat_lit),
 645         ("__rational",          ITrational_lit),
 646         ("__addr",              ITaddr_lit),
 647         ("__litlit",            ITlit_lit),
 648         ("__string",            ITstring_lit),
 649         ("__a",                 ITtypeapp),
 650         ("__A",                 ITarity),
 651         ("__P",                 ITspecialise),
 652         ("__C",                 ITnocaf),
 653         ("__u",                 ITunfold NoInlinePragInfo),
 654         ("__U",                 ITunfold IWantToBeINLINEd),
 655         ("__UU",                ITunfold IMustBeINLINEd),
 656         ("__Unot",              ITunfold IMustNotBeINLINEd),
 657         ("__Ux",                ITunfold IAmALoopBreaker),
 658
 659         ("__ccall",             ITccall (False, False)),
 660         ("__ccall_GC",          ITccall (False, True)),
 661         ("__casm",              ITccall (True,  False)),
 662         ("__casm_GC",           ITccall (True,  True)),
 663
 664         ("/\\",                 ITbiglam)
 665        ]
 666
 667 haskellKeywordsFM = listToUFM $
 668       map (\ (x,y) -> (_PK_ x,y))
 669        [( "case",       ITcase ),
 670         ( "class",      ITclass ),
 671         ( "data",       ITdata ),
 672         ( "default",    ITdefault ),
 673         ( "deriving",   ITderiving ),
 674         ( "do",         ITdo ),
 675         ( "else",       ITelse ),
 676         ( "if",         ITif ),
 677         ( "import",     ITimport ),
 678         ( "in",         ITin ),
 679         ( "infix",      ITinfix ),
 680         ( "infixl",     ITinfixl ),
 681         ( "infixr",     ITinfixr ),
 682         ( "instance",   ITinstance ),
 683         ( "let",        ITlet ),
 684         ( "module",     ITmodule ),
 685         ( "newtype",    ITnewtype ),
 686         ( "of",         ITof ),
 687         ( "then",       ITthen ),
 688         ( "type",       ITtype ),
 689         ( "where",      ITwhere )
 690
 691 --      These three aren't Haskell keywords at all
 692 --      and 'as' is often used as a variable name
 693 --      ( "as",         ITas ),
 694 --      ( "qualified",  ITqualified ),
 695 --      ( "hiding",     IThiding )
 696
 697      ]
 698
 699 haskellKeySymsFM = listToUFM $
 700         map (\ (x,y) -> (_PK_ x,y))
 701       [ ("..",                  ITdotdot)
 702        ,("::",                  ITdcolon)
 703        ,("=",                   ITequal)
 704        ,("\\",                  ITlam)
 705        ,("|",                   ITvbar)
 706        ,("<-",                  ITlarrow)
 707        ,("->",                  ITrarrow)
 708        ,("@",                   ITat)
 709        ,("~",                   ITtilde)
 710        ,("=>",                  ITdarrow)
 711        ,("-",                   ITminus)
 712        ,("!",                   ITbang)
 713        ]
 714 \end{code}
 715
 716 -----------------------------------------------------------------------------
 717 doDiscard rips along really fast, looking for a '#-}',
 718 indicating the end of the pragma we're skipping
 719
 720 \begin{code}
 721 doDiscard inStr buf =
 722  case currentChar# buf of
 723    '#'# | not inStr ->
 724        case lookAhead# buf 1# of { '#'# ->
 725        case lookAhead# buf 2# of { '-'# ->
 726        case lookAhead# buf 3# of { '}'# ->
 727            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
 728         _    -> doDiscard inStr (incLexeme buf) };
 729         _    -> doDiscard inStr (incLexeme buf) };
 730         _    -> doDiscard inStr (incLexeme buf) }
 731    '"'# ->
 732        let
 733         odd_slashes buf flg i# =
 734           case lookAhead# buf i# of
 735            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
 736            _     -> flg
 737        in
 738        case lookAhead# buf (negateInt# 1#) of --backwards, actually
 739          '\\'# -> -- escaping something..
 740            if odd_slashes buf True (negateInt# 2#) then
 741                -- odd number of slashes, " is escaped.
 742               doDiscard inStr (incLexeme buf)
 743            else
 744                -- even number of slashes, \ is escaped.
 745               doDiscard (not inStr) (incLexeme buf)
 746          _ -> case inStr of -- forced to avoid build-up
 747                True  -> doDiscard False (incLexeme buf)
 748                False -> doDiscard True  (incLexeme buf)
 749    _ -> doDiscard inStr (incLexeme buf)
 750
 751 \end{code}
 752
 753 -----------------------------------------------------------------------------
 754
 755 \begin{code}
 756 type IfM a = StringBuffer       -- Input string
 757           -> SrcLoc
 758           -> MaybeErr a ErrMsg
 759
 760 returnIf   :: a -> IfM a
 761 returnIf a s l = Succeeded a
 762
 763 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 764 m `thenIf` k = \s l ->
 765         case m s l of
 766                 Succeeded a -> k a s l
 767                 Failed err  -> Failed err
 768
 769 getSrcLocIf :: IfM SrcLoc
 770 getSrcLocIf s l = Succeeded l
 771
 772 happyError :: IfM a
 773 happyError s l = Failed (ifaceParseErr s l)
 774
 775
 776 {-
 777  Note that if the name of the file we're processing ends
 778  with `hi-boot', we accept it on faith as having the right
 779  version. This is done so that .hi-boot files that comes
 780  with hsc don't have to be updated before every release,
 781  *and* it allows us to share .hi-boot files with versions
 782  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
 783
 784  If the version number is 0, the checking is also turned off.
 785  (needed to deal with GHC.hi only!)
 786
 787  Once we can assume we're compiling with a version of ghc that
 788  supports interface file checking, we can drop the special
 789  pleading
 790 -}
 791 checkVersion :: Maybe Integer -> IfM ()
 792 checkVersion mb@(Just v) s l
 793  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
 794  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 795 checkVersion mb@Nothing  s l
 796  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
 797  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 798
 799 -----------------------------------------------------------------
 800
 801 ifaceParseErr :: StringBuffer -> SrcLoc -> ErrMsg
 802 ifaceParseErr s l
 803   = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
 804           ptext SLIT("current input ="), text first_bit]
 805   where
 806     first_bit = lexemeToString (stepOnBy# s 100#)
 807
 808 ifaceVersionErr hi_vers l toks
 809   = hsep [ppr l, ptext SLIT("Interface file version error;"),
 810           ptext SLIT("Expected"), int opt_HiVersion,
 811           ptext SLIT("found "), pp_version]
 812     where
 813      pp_version =
 814       case hi_vers of
 815         Nothing -> ptext SLIT("pre ghc-3.02 version")
 816         Just v  -> ptext SLIT("version") <+> integer v
 817
 818 \end{code}