ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
  27         checkVersion,
  28         happyError,
  29         StringBuffer
  30
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( ord, isSpace )
  36 import List             ( isSuffixOf )
  37
  38 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  39 import Name             ( isLowerISO, isUpperISO )
  40 import Module           ( IfaceFlavour, hiFile, hiBootFile )
  41 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  42 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  43 import Demand           ( Demand(..) {- instance Read -} )
  44 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  45 import BasicTypes       ( NewOrData(..) )
  46 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile )
  47
  48 import Maybes           ( MaybeErr(..) )
  49 import ErrUtils         ( Message )
  50 import Outputable
  51
  52 import FastString
  53 import StringBuffer
  54 import GlaExts
  55 import ST               ( runST )
  56
  57 #if __GLASGOW_HASKELL__ >= 303
  58 import Bits
  59 import Word
  60 #endif
  61
  62 import Addr
  63 import PrelRead                 ( readRational__ ) -- Glasgow non-std
  64 \end{code}
  65
  66 %************************************************************************
  67 %*                                                                      *
  68 \subsection{Data types}
  69 %*                                                                      *
  70 %************************************************************************
  71
  72 The token data type, fairly un-interesting except from one
  73 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  74 strictness, unfolding etc).
  75
  76 The Idea/Observation here is that the renamer needs to scan through
  77 all of an interface file before it can continue. But only a fraction
  78 of the information contained in the file turns out to be useful, so
  79 delaying as much as possible of the scanning and parsing of an
  80 interface file Makes Sense (Heap profiles of the compiler
  81 show a reduction in heap usage by at least a factor of two,
  82 post-renamer).
  83
  84 Hence, the interface file lexer spots when value declarations are
  85 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  86 for the type and any other id info for that binding (unfolding, strictness
  87 etc). These constructors are applied to the result of lexing these sub-chunks.
  88
  89 The lexing of the type and id info is all done lazily, of course, so
  90 the scanning (and subsequent parsing) will be done *only* on the ids the
  91 renamer finds out that it is interested in. The rest will just be junked.
  92 Laziness, you know it makes sense :-)
  93
  94 \begin{code}
  95 data IfaceToken
  96   = ITcase                      -- Haskell keywords
  97   | ITclass
  98   | ITdata
  99   | ITdefault
 100   | ITderiving
 101   | ITdo
 102   | ITelse
 103   | ITif
 104   | ITimport
 105   | ITin
 106   | ITinfix
 107   | ITinfixl
 108   | ITinfixr
 109   | ITinstance
 110   | ITlet
 111   | ITmodule
 112   | ITnewtype
 113   | ITof
 114   | ITthen
 115   | ITtype
 116   | ITwhere
 117   | ITas
 118   | ITqualified
 119   | IThiding
 120
 121   | ITinterface                 -- GHC-extension keywords
 122   | ITexport
 123   | ITinstimport
 124   | ITforall
 125   | ITletrec
 126   | ITcoerce
 127   | ITinline
 128   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 129   | ITdefaultbranch
 130   | ITbottom
 131   | ITinteger_lit
 132   | ITfloat_lit
 133   | ITrational_lit
 134   | ITaddr_lit
 135   | ITlit_lit
 136   | ITstring_lit
 137   | ITtypeapp
 138   | ITonce                      -- usage annotations
 139   | ITmany
 140   | ITarity
 141   | ITspecialise
 142   | ITnocaf
 143   | ITunfold InlinePragInfo
 144   | ITstrict ([Demand], Bool)
 145   | ITcprinfo (CprInfo)
 146   | ITscc
 147   | ITsccAllCafs
 148
 149   | ITdotdot                    -- reserved symbols
 150   | ITdcolon
 151   | ITequal
 152   | ITlam
 153   | ITvbar
 154   | ITlarrow
 155   | ITrarrow
 156   | ITat
 157   | ITtilde
 158   | ITdarrow
 159   | ITminus
 160   | ITbang
 161
 162   | ITbiglam                    -- GHC-extension symbols
 163
 164   | ITocurly                    -- special symbols
 165   | ITccurly
 166   | ITobrack
 167   | ITcbrack
 168   | IToparen
 169   | ITcparen
 170   | IToubxparen
 171   | ITcubxparen
 172   | ITsemi
 173   | ITcomma
 174
 175   | ITvarid   FAST_STRING       -- identifiers
 176   | ITconid   FAST_STRING
 177   | ITvarsym  FAST_STRING
 178   | ITconsym  FAST_STRING
 179   | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 180   | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 181   | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 182   | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 183
 184   | ITpragma StringBuffer
 185
 186   | ITchar Char
 187   | ITstring FAST_STRING
 188   | ITinteger Integer
 189   | ITrational Rational
 190
 191   | ITunknown String            -- Used when the lexer can't make sense of it
 192   | ITeof                       -- end of file token
 193   deriving Text -- debugging
 194 \end{code}
 195
 196 %************************************************************************
 197 %*                                                                      *
 198 \subsection{The lexical analyser}
 199 %*                                                                      *
 200 %************************************************************************
 201
 202 \begin{code}
 203 lexIface :: (IfaceToken -> IfM a) -> IfM a
 204 lexIface cont buf =
 205  _scc_ "Lexer"
 206 -- if bufferExhausted buf then
 207 --  []
 208 -- else
 209 --  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
 210   case currentChar# buf of
 211       -- whitespace and comments, ignore.
 212     ' '#  -> lexIface cont (stepOn buf)
 213     '\t'# -> lexIface cont (stepOn buf)
 214     '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
 215
 216 -- Numbers and comments
 217     '-'#  ->
 218       case lookAhead# buf 1# of
 219 --        '-'# -> lex_comment cont (stepOnBy# buf 2#)
 220         c    ->
 221           if is_digit c
 222           then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 223           else lex_sym cont buf
 224
 225     '{'# ->                             -- look for "{-##" special iface pragma
 226         case lookAhead# buf 1# of
 227            '-'# -> case lookAhead# buf 2# of
 228                     '#'# -> case lookAhead# buf 3# of
 229                                 '#'# ->
 230                                    let (lexeme, buf')
 231                                           = doDiscard False (stepOnBy# buf 4#) in
 232                                    cont (ITpragma lexeme) buf'
 233                                 _ ->  lex_nested_comment (lexIface cont) buf
 234                     _    -> cont ITocurly (stepOn buf)
 235                             -- lex_nested_comment (lexIface cont) buf
 236            _ -> cont ITocurly (stepOn buf)
 237
 238     -- special symbols ----------------------------------------------------
 239     '('# ->
 240          case prefixMatch (stepOn buf) "..)" of
 241            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
 242            Nothing ->
 243             case lookAhead# buf 1# of
 244               '#'# -> cont IToubxparen (stepOnBy# buf 2#)
 245               _    -> cont IToparen (stepOn buf)
 246     ')'# -> cont ITcparen (stepOn buf)
 247     '}'# -> cont ITccurly (stepOn buf)
 248     '#'# -> case lookAhead# buf 1# of
 249                 ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
 250                 _    -> lex_sym cont (incLexeme buf)
 251     '['# -> cont ITobrack (stepOn buf)
 252     ']'# -> cont ITcbrack (stepOn buf)
 253     ','# -> cont ITcomma  (stepOn buf)
 254     ';'# -> cont ITsemi   (stepOn buf)
 255
 256     -- strings/characters -------------------------------------------------
 257     '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
 258               buf' ->
 259                   -- the string literal does *not* include the dquotes
 260                 case lexemeToFastString buf' of
 261                  v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
 262
 263     '\''# -> --
 264              -- untilEndOfChar# extends the current lexeme until
 265              -- it hits a non-escaped single quote. The lexeme of the
 266              -- StringBuffer returned does *not* include the closing quote,
 267              -- hence we augment the lexeme and make sure to add the
 268              -- starting quote, before `read'ing the string.
 269              --
 270              case untilEndOfChar# (stepOn buf) of
 271                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 272                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
 273
 274     -- strictness and cpr pragmas and __scc treated specially.
 275     '_'# ->
 276          case lookAhead# buf 1# of
 277            '_'# -> case lookAhead# buf 2# of
 278                     'S'# ->
 279                         lex_demand cont (stepOnUntil (not . isSpace)
 280                                         (stepOnBy# buf 3#)) -- past __S
 281                     'M'# ->
 282                         lex_cpr cont (stepOnUntil (not . isSpace)
 283                                      (stepOnBy# buf 3#)) -- past __M
 284                     's'# ->
 285                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 286                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 287                                Nothing   -> lex_id cont buf
 288                     _ -> lex_id cont buf
 289            _    -> lex_id cont buf
 290
 291 -- ``thingy'' form for casm
 292     '`'# ->
 293             case lookAhead# buf 1# of
 294               '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
 295               _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
 296                                                      -- scanning an id of some sort.
 297
 298     '\NUL'# ->
 299             if bufferExhausted (stepOn buf) then
 300                cont ITeof buf
 301             else
 302                trace "lexIface: misplaced NUL?" $
 303                cont (ITunknown "\NUL") (stepOn buf)
 304
 305     c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
 306       | is_symbol c -> lex_sym cont buf
 307       | is_upper  c -> lex_con cont buf
 308       | is_ident  c -> lex_id  cont buf
 309
 310 --  where
 311 lex_comment cont buf =
 312 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 313    case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
 314
 315 -------------------------------------------------------------------------------
 316
 317 lex_nested_comment cont buf =
 318   case currentChar# buf of
 319         '-'# -> case lookAhead# buf 1# of
 320                  '}'# -> cont (stepOnBy# buf 2#)
 321                  _    -> lex_nested_comment cont (stepOn buf)
 322
 323         '{'# -> case lookAhead# buf 1# of
 324                  '-'# -> lex_nested_comment
 325                                 (lex_nested_comment cont)
 326                                 (stepOnBy# buf 2#)
 327                  _    -> lex_nested_comment cont (stepOn buf)
 328
 329         _   -> lex_nested_comment cont (stepOn buf)
 330
 331 -------------------------------------------------------------------------------
 332
 333 lex_demand cont buf =
 334  case read_em [] buf of { (ls,buf') ->
 335  case currentChar# buf' of
 336    'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
 337    _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
 338  }
 339  where
 340    -- code snatched from Demand.lhs
 341   read_em acc buf =
 342    case currentChar# buf of
 343     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 344     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 345     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 346     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 347     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 348     ')'# -> (reverse acc, stepOn buf)
 349     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 350     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 351     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 352     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 353     _    -> (reverse acc, buf)
 354
 355   do_unpack new_or_data wrapper_unpacks acc buf
 356    = case read_em [] buf of
 357       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 358
 359 lex_cpr cont buf =
 360  case read_em [] buf of { (cpr_inf,buf') ->
 361    ASSERT ( null (tail cpr_inf) )
 362    cont (ITcprinfo $ head cpr_inf) (stepOverLexeme buf')
 363  }
 364  where
 365    -- code snatched from lex_demand above
 366   read_em acc buf =
 367    case currentChar# buf of
 368     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 369     '('# -> do_unpack acc (stepOn buf)
 370     ')'# -> (reverse acc, stepOn buf)
 371     _    -> (reverse acc, buf)
 372
 373   do_unpack acc buf
 374    = case read_em [] buf of
 375       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 376
 377 ------------------
 378 lex_scc cont buf =
 379  case currentChar# buf of
 380   'C'# -> cont ITsccAllCafs  (stepOverLexeme (stepOn buf))
 381   other -> cont ITscc buf
 382
 383 -----------
 384 lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
 385 lex_num cont minus acc# buf =
 386  --trace ("lex_num: "++[C# (currentChar# buf)]) $
 387  case scanNumLit (I# acc#) buf of
 388      (acc',buf') ->
 389        case currentChar# buf' of
 390          '.'# ->
 391              -- this case is not optimised at all, as the
 392              -- presence of floating point numbers in interface
 393              -- files is not that common. (ToDo)
 394             case expandWhile# is_digit (incLexeme buf') of
 395               buf2 -> -- points to first non digit char
 396                 let l = case currentChar# buf2 of
 397                           'e'# -> let buf3 = incLexeme buf2 in
 398                               case currentChar# buf3 of
 399                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 400                                 _    -> expandWhile# is_digit buf3
 401                           _ -> buf2
 402                 in let v = readRational__ (lexemeToString l) in
 403                    cont (ITrational v) (stepOverLexeme l)
 404
 405          _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
 406
 407 -----------
 408 lex_cstring cont buf =
 409  case expandUntilMatch buf "\'\'" of
 410    buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
 411            (stepOverLexeme buf')
 412
 413 ------------------------------------------------------------------------------
 414 -- Character Classes
 415
 416 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 417
 418 {-# INLINE is_ctype #-}
 419 #if __GLASGOW_HASKELL__ >= 303
 420 is_ctype :: Word8 -> Char# -> Bool
 421 is_ctype mask = \c ->
 422    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 423 #else
 424 is_ctype :: Int -> Char# -> Bool
 425 is_ctype (I# mask) = \c ->
 426     let (A# ctype) = ``char_types'' :: Addr
 427         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 428     in
 429         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 430 #endif
 431
 432 is_ident  = is_ctype 1
 433 is_symbol = is_ctype 2
 434 is_any    = is_ctype 4
 435 is_space  = is_ctype 8
 436 is_upper  = is_ctype 16
 437 is_digit  = is_ctype 32
 438
 439 -----------------------------------------------------------------------------
 440 -- identifiers, symbols etc.
 441
 442 lex_id cont buf =
 443  case expandWhile# is_ident buf of { buf1 ->
 444  case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 445  let new_buf = stepOverLexeme buf'
 446      lexeme  = lexemeToFastString buf'
 447  in
 448  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 449         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 450                           cont kwd_token new_buf;
 451         Nothing        ->
 452  case lookupUFM ifaceKeywordsFM lexeme of {
 453         Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
 454                           cont kwd_token new_buf;
 455         Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
 456                           cont (mk_var_token lexeme) new_buf
 457  }}}}
 458
 459 lex_sym cont buf =
 460  case expandWhile# is_symbol buf of
 461    buf'
 462      | is_comment lexeme -> lex_comment cont new_buf
 463      | otherwise         ->
 464            case lookupUFM haskellKeySymsFM lexeme of {
 465                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 466                                   cont kwd_token new_buf ;
 467                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 468                                   cont (mk_var_token lexeme) new_buf
 469            }
 470         where lexeme = lexemeToFastString buf'
 471               new_buf = stepOverLexeme buf'
 472
 473               is_comment fs
 474                 | len < 2   = False
 475                 | otherwise = trundle 0
 476                   where
 477                    len = lengthFS fs
 478
 479                    trundle n | n == len  = True
 480                              | otherwise = indexFS fs n == '-' && trundle (n+1)
 481
 482 lex_con cont buf =
 483  case expandWhile# is_ident buf of        { buf1 ->
 484  case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
 485  case currentChar# buf' of
 486      '.'# -> munch hiFile
 487      '!'# -> munch hiBootFile
 488      _    -> just_a_conid
 489
 490    where
 491     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 492                    cont (ITconid lexeme) new_buf
 493     lexeme = lexemeToFastString buf'
 494     new_buf = stepOverLexeme buf'
 495     munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid
 496  }}
 497
 498 lex_qid cont mod hif buf just_a_conid =
 499  case currentChar# buf of
 500   '['# ->       -- Special case for []
 501     case lookAhead# buf 1# of
 502      ']'# -> cont (ITqconid  (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#)
 503      _    -> just_a_conid
 504
 505   '('# ->  -- Special case for (,,,)
 506            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 507     case lookAhead# buf 1# of
 508      '#'# -> case lookAhead# buf 2# of
 509                 ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#)
 510                                 just_a_conid
 511                 _    -> just_a_conid
 512      ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#)
 513      ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid
 514      _    -> just_a_conid
 515
 516   '-'# -> case lookAhead# buf 1# of
 517             '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#)
 518             _    -> lex_id3 cont mod hif buf just_a_conid
 519   _    -> lex_id3 cont mod hif buf just_a_conid
 520
 521 lex_id3 cont mod hif buf just_a_conid
 522   | is_symbol c =
 523      case expandWhile# is_symbol buf of { buf' ->
 524      let
 525       lexeme  = lexemeToFastString buf'
 526       new_buf = stepOverLexeme buf'
 527      in
 528      case lookupUFM haskellKeySymsFM lexeme of {
 529         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 530         Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 531      }}
 532
 533   | otherwise   =
 534      case expandWhile# is_ident buf of { buf1 ->
 535      if emptyLexeme buf1
 536             then just_a_conid
 537             else
 538      case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 539      let
 540       lexeme  = lexemeToFastString buf'
 541       new_buf = stepOverLexeme buf'
 542      in
 543      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 544             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 545             Nothing        ->
 546      case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files
 547             Just kwd_token -> just_a_conid;
 548             Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 549      }}}}
 550   where c = currentChar# buf
 551
 552 mk_var_token pk_str
 553   | is_upper f          = ITconid pk_str
 554         -- _[A-Z] is treated as a constructor in interface files.
 555   | f `eqChar#` '_'# && not (_NULL_ tl)
 556         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
 557   | is_ident f          = ITvarid pk_str
 558   | f `eqChar#` ':'#    = ITconsym pk_str
 559   | otherwise           = ITvarsym pk_str
 560   where
 561       (C# f) = _HEAD_ pk_str
 562       tl     = _TAIL_ pk_str
 563
 564 mk_qvar_token m hif token =
 565  case mk_var_token token of
 566    ITconid n  -> ITqconid  (m,n,hif)
 567    ITvarid n  -> ITqvarid  (m,n,hif)
 568    ITconsym n -> ITqconsym (m,n,hif)
 569    ITvarsym n -> ITqvarsym (m,n,hif)
 570    _          -> ITunknown (show token)
 571 \end{code}
 572
 573 ----------------------------------------------------------------------------
 574 Horrible stuff for dealing with M.(,,,)
 575
 576 \begin{code}
 577 lex_tuple cont mod hif buf back_off =
 578   go 2 buf
 579   where
 580    go n buf =
 581     case currentChar# buf of
 582       ','# -> go (n+1) (stepOn buf)
 583       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf)
 584       _    -> back_off
 585
 586 lex_ubx_tuple cont mod hif buf back_off =
 587   go 2 buf
 588   where
 589    go n buf =
 590     case currentChar# buf of
 591       ','# -> go (n+1) (stepOn buf)
 592       '#'# -> case lookAhead# buf 1# of
 593                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif))
 594                                  (stepOnBy# buf 2#)
 595                 _    -> back_off
 596       _    -> back_off
 597 \end{code}
 598
 599 -----------------------------------------------------------------------------
 600 Keyword Lists
 601
 602 \begin{code}
 603 ifaceKeywordsFM :: UniqFM IfaceToken
 604 ifaceKeywordsFM = listToUFM $
 605       map (\ (x,y) -> (_PK_ x,y))
 606      [  ("__interface",         ITinterface),
 607         ("__export",            ITexport),
 608         ("__instimport",        ITinstimport),
 609         ("__forall",            ITforall),
 610         ("__letrec",            ITletrec),
 611         ("__coerce",            ITcoerce),
 612         ("__inline",            ITinline),
 613         ("__DEFAULT",           ITdefaultbranch),
 614         ("__bot",               ITbottom),
 615         ("__integer",           ITinteger_lit),
 616         ("__float",             ITfloat_lit),
 617         ("__rational",          ITrational_lit),
 618         ("__addr",              ITaddr_lit),
 619         ("__litlit",            ITlit_lit),
 620         ("__string",            ITstring_lit),
 621         ("__a",                 ITtypeapp),
 622         ("__o",                 ITonce),
 623         ("__m",                 ITmany),
 624         ("__A",                 ITarity),
 625         ("__P",                 ITspecialise),
 626         ("__C",                 ITnocaf),
 627         ("__u",                 ITunfold NoInlinePragInfo),
 628         ("__U",                 ITunfold IWantToBeINLINEd),
 629         ("__UU",                ITunfold IMustBeINLINEd),
 630         ("__Unot",              ITunfold IMustNotBeINLINEd),
 631         ("__Ux",                ITunfold IAmALoopBreaker),
 632
 633         ("__ccall",             ITccall (False, False, False)),
 634         ("__ccall_GC",          ITccall (False, False, True)),
 635         ("__dyn_ccall",         ITccall (True,  False, False)),
 636         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 637         ("__casm",              ITccall (False, True,  False)),
 638         ("__dyn_casm",          ITccall (True,  True,  False)),
 639         ("__casm_GC",           ITccall (False, True,  True)),
 640         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 641
 642         ("/\\",                 ITbiglam)
 643        ]
 644
 645 haskellKeywordsFM = listToUFM $
 646       map (\ (x,y) -> (_PK_ x,y))
 647        [( "case",       ITcase ),
 648         ( "class",      ITclass ),
 649         ( "data",       ITdata ),
 650         ( "default",    ITdefault ),
 651         ( "deriving",   ITderiving ),
 652         ( "do",         ITdo ),
 653         ( "else",       ITelse ),
 654         ( "if",         ITif ),
 655         ( "import",     ITimport ),
 656         ( "in",         ITin ),
 657         ( "infix",      ITinfix ),
 658         ( "infixl",     ITinfixl ),
 659         ( "infixr",     ITinfixr ),
 660         ( "instance",   ITinstance ),
 661         ( "let",        ITlet ),
 662         ( "module",     ITmodule ),
 663         ( "newtype",    ITnewtype ),
 664         ( "of",         ITof ),
 665         ( "then",       ITthen ),
 666         ( "type",       ITtype ),
 667         ( "where",      ITwhere )
 668
 669 --      These three aren't Haskell keywords at all
 670 --      and 'as' is often used as a variable name
 671 --      ( "as",         ITas ),
 672 --      ( "qualified",  ITqualified ),
 673 --      ( "hiding",     IThiding )
 674
 675      ]
 676
 677 haskellKeySymsFM = listToUFM $
 678         map (\ (x,y) -> (_PK_ x,y))
 679       [ ("..",                  ITdotdot)
 680        ,("::",                  ITdcolon)
 681        ,("=",                   ITequal)
 682        ,("\\",                  ITlam)
 683        ,("|",                   ITvbar)
 684        ,("<-",                  ITlarrow)
 685        ,("->",                  ITrarrow)
 686        ,("@",                   ITat)
 687        ,("~",                   ITtilde)
 688        ,("=>",                  ITdarrow)
 689        ,("-",                   ITminus)
 690        ,("!",                   ITbang)
 691        ]
 692 \end{code}
 693
 694 -----------------------------------------------------------------------------
 695 doDiscard rips along really fast, looking for a '#-}',
 696 indicating the end of the pragma we're skipping
 697
 698 \begin{code}
 699 doDiscard inStr buf =
 700  case currentChar# buf of
 701    '#'# | not inStr ->
 702        case lookAhead# buf 1# of { '#'# ->
 703        case lookAhead# buf 2# of { '-'# ->
 704        case lookAhead# buf 3# of { '}'# ->
 705            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
 706         _    -> doDiscard inStr (incLexeme buf) };
 707         _    -> doDiscard inStr (incLexeme buf) };
 708         _    -> doDiscard inStr (incLexeme buf) }
 709    '"'# ->
 710        let
 711         odd_slashes buf flg i# =
 712           case lookAhead# buf i# of
 713            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
 714            _     -> flg
 715        in
 716        case lookAhead# buf (negateInt# 1#) of --backwards, actually
 717          '\\'# -> -- escaping something..
 718            if odd_slashes buf True (negateInt# 2#) then
 719                -- odd number of slashes, " is escaped.
 720               doDiscard inStr (incLexeme buf)
 721            else
 722                -- even number of slashes, \ is escaped.
 723               doDiscard (not inStr) (incLexeme buf)
 724          _ -> case inStr of -- forced to avoid build-up
 725                True  -> doDiscard False (incLexeme buf)
 726                False -> doDiscard True  (incLexeme buf)
 727    _ -> doDiscard inStr (incLexeme buf)
 728
 729 \end{code}
 730
 731 -----------------------------------------------------------------------------
 732
 733 \begin{code}
 734 type IfM a = StringBuffer       -- Input string
 735           -> SrcLoc
 736           -> MaybeErr a {-error-}Message
 737
 738 returnIf   :: a -> IfM a
 739 returnIf a s l = Succeeded a
 740
 741 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 742 m `thenIf` k = \s l ->
 743         case m s l of
 744                 Succeeded a -> k a s l
 745                 Failed err  -> Failed err
 746
 747 getSrcLocIf :: IfM SrcLoc
 748 getSrcLocIf s l = Succeeded l
 749
 750 happyError :: IfM a
 751 happyError s l = Failed (ifaceParseErr s l)
 752
 753
 754 {-
 755  Note that if the name of the file we're processing ends
 756  with `hi-boot', we accept it on faith as having the right
 757  version. This is done so that .hi-boot files that comes
 758  with hsc don't have to be updated before every release,
 759  *and* it allows us to share .hi-boot files with versions
 760  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
 761
 762  If the version number is 0, the checking is also turned off.
 763  (needed to deal with GHC.hi only!)
 764
 765  Once we can assume we're compiling with a version of ghc that
 766  supports interface file checking, we can drop the special
 767  pleading
 768 -}
 769 checkVersion :: Maybe Integer -> IfM ()
 770 checkVersion mb@(Just v) s l
 771  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
 772  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 773 checkVersion mb@Nothing  s l
 774  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
 775  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 776
 777 -----------------------------------------------------------------
 778
 779 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
 780 ifaceParseErr s l
 781   = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
 782           ptext SLIT("current input ="), text first_bit]
 783   where
 784     first_bit = lexemeToString (stepOnBy# s 100#)
 785
 786 ifaceVersionErr hi_vers l toks
 787   = hsep [ppr l, ptext SLIT("Interface file version error;"),
 788           ptext SLIT("Expected"), int opt_HiVersion,
 789           ptext SLIT("found "), pp_version]
 790     where
 791      pp_version =
 792       case hi_vers of
 793         Nothing -> ptext SLIT("pre ghc-3.02 version")
 794         Just v  -> ptext SLIT("version") <+> integer v
 795
 796 \end{code}