ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
  27         checkVersion,
  28         happyError,
  29         StringBuffer
  30
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( ord, isSpace )
  36 import List             ( isSuffixOf )
  37
  38 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  39 import Name             ( isLowerISO, isUpperISO )
  40 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  41 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  42 import Demand           ( Demand(..) {- instance Read -} )
  43 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  44 import BasicTypes       ( NewOrData(..) )
  45 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile )
  46
  47 import Maybes           ( MaybeErr(..) )
  48 import ErrUtils         ( Message )
  49 import Outputable
  50
  51 import FastString
  52 import StringBuffer
  53 import GlaExts
  54 import ST               ( runST )
  55
  56 #if __GLASGOW_HASKELL__ >= 303
  57 import Bits
  58 import Word
  59 #endif
  60
  61 import Addr
  62 import PrelRead                 ( readRational__ ) -- Glasgow non-std
  63 \end{code}
  64
  65 %************************************************************************
  66 %*                                                                      *
  67 \subsection{Data types}
  68 %*                                                                      *
  69 %************************************************************************
  70
  71 The token data type, fairly un-interesting except from one
  72 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  73 strictness, unfolding etc).
  74
  75 The Idea/Observation here is that the renamer needs to scan through
  76 all of an interface file before it can continue. But only a fraction
  77 of the information contained in the file turns out to be useful, so
  78 delaying as much as possible of the scanning and parsing of an
  79 interface file Makes Sense (Heap profiles of the compiler
  80 show a reduction in heap usage by at least a factor of two,
  81 post-renamer).
  82
  83 Hence, the interface file lexer spots when value declarations are
  84 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  85 for the type and any other id info for that binding (unfolding, strictness
  86 etc). These constructors are applied to the result of lexing these sub-chunks.
  87
  88 The lexing of the type and id info is all done lazily, of course, so
  89 the scanning (and subsequent parsing) will be done *only* on the ids the
  90 renamer finds out that it is interested in. The rest will just be junked.
  91 Laziness, you know it makes sense :-)
  92
  93 \begin{code}
  94 data IfaceToken
  95   = ITcase                      -- Haskell keywords
  96   | ITclass
  97   | ITdata
  98   | ITdefault
  99   | ITderiving
 100   | ITdo
 101   | ITelse
 102   | ITif
 103   | ITimport
 104   | ITin
 105   | ITinfix
 106   | ITinfixl
 107   | ITinfixr
 108   | ITinstance
 109   | ITlet
 110   | ITmodule
 111   | ITnewtype
 112   | ITof
 113   | ITthen
 114   | ITtype
 115   | ITwhere
 116   | ITas
 117   | ITqualified
 118   | IThiding
 119
 120   | ITinterface                 -- GHC-extension keywords
 121   | ITexport
 122   | ITdepends
 123   | ITforall
 124   | ITletrec
 125   | ITcoerce
 126   | ITinlineCall
 127   | ITinlineMe
 128   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 129   | ITdefaultbranch
 130   | ITbottom
 131   | ITinteger_lit
 132   | ITfloat_lit
 133   | ITrational_lit
 134   | ITaddr_lit
 135   | ITlit_lit
 136   | ITstring_lit
 137   | ITtypeapp
 138   | ITonce                      -- usage annotations
 139   | ITmany
 140   | ITarity
 141   | ITrules
 142   | ITspecialise
 143   | ITnocaf
 144   | ITunfold InlinePragInfo
 145   | ITstrict ([Demand], Bool)
 146   | ITcprinfo (CprInfo)
 147   | ITscc
 148   | ITsccAllCafs
 149
 150   | ITdotdot                    -- reserved symbols
 151   | ITdcolon
 152   | ITequal
 153   | ITlam
 154   | ITvbar
 155   | ITlarrow
 156   | ITrarrow
 157   | ITat
 158   | ITtilde
 159   | ITdarrow
 160   | ITminus
 161   | ITbang
 162
 163   | ITbiglam                    -- GHC-extension symbols
 164
 165   | ITocurly                    -- special symbols
 166   | ITccurly
 167   | ITobrack
 168   | ITcbrack
 169   | IToparen
 170   | ITcparen
 171   | IToubxparen
 172   | ITcubxparen
 173   | ITsemi
 174   | ITcomma
 175
 176   | ITvarid   FAST_STRING       -- identifiers
 177   | ITconid   FAST_STRING
 178   | ITvarsym  FAST_STRING
 179   | ITconsym  FAST_STRING
 180   | ITqvarid  (FAST_STRING,FAST_STRING)
 181   | ITqconid  (FAST_STRING,FAST_STRING)
 182   | ITqvarsym (FAST_STRING,FAST_STRING)
 183   | ITqconsym (FAST_STRING,FAST_STRING)
 184
 185   | ITpragma StringBuffer
 186
 187   | ITchar Char
 188   | ITstring FAST_STRING
 189   | ITinteger Integer
 190   | ITrational Rational
 191
 192   | ITunknown String            -- Used when the lexer can't make sense of it
 193   | ITeof                       -- end of file token
 194   deriving Text -- debugging
 195 \end{code}
 196
 197 %************************************************************************
 198 %*                                                                      *
 199 \subsection{The lexical analyser}
 200 %*                                                                      *
 201 %************************************************************************
 202
 203 \begin{code}
 204 lexIface :: (IfaceToken -> IfM a) -> IfM a
 205 lexIface cont buf =
 206  _scc_ "Lexer"
 207 -- if bufferExhausted buf then
 208 --  []
 209 -- else
 210 --  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
 211   case currentChar# buf of
 212       -- whitespace and comments, ignore.
 213     ' '#  -> lexIface cont (stepOn buf)
 214     '\t'# -> lexIface cont (stepOn buf)
 215     '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
 216
 217 -- Numbers and comments
 218     '-'#  ->
 219       case lookAhead# buf 1# of
 220 --        '-'# -> lex_comment cont (stepOnBy# buf 2#)
 221         c    ->
 222           if is_digit c
 223           then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 224           else lex_sym cont buf
 225
 226     '{'# ->                             -- look for "{-##" special iface pragma
 227         case lookAhead# buf 1# of
 228            '-'# -> case lookAhead# buf 2# of
 229                     '#'# -> case lookAhead# buf 3# of
 230                                 '#'# ->
 231                                    let (lexeme, buf')
 232                                           = doDiscard False (stepOnBy# buf 4#) in
 233                                    cont (ITpragma lexeme) buf'
 234                                 _ ->  lex_nested_comment (lexIface cont) buf
 235                     _    -> cont ITocurly (stepOn buf)
 236                             -- lex_nested_comment (lexIface cont) buf
 237            _ -> cont ITocurly (stepOn buf)
 238
 239     -- special symbols ----------------------------------------------------
 240     '('# ->
 241          case prefixMatch (stepOn buf) "..)" of
 242            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
 243            Nothing ->
 244             case lookAhead# buf 1# of
 245               '#'# -> cont IToubxparen (stepOnBy# buf 2#)
 246               _    -> cont IToparen (stepOn buf)
 247     ')'# -> cont ITcparen (stepOn buf)
 248     '}'# -> cont ITccurly (stepOn buf)
 249     '#'# -> case lookAhead# buf 1# of
 250                 ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
 251                 _    -> lex_sym cont (incLexeme buf)
 252     '['# -> cont ITobrack (stepOn buf)
 253     ']'# -> cont ITcbrack (stepOn buf)
 254     ','# -> cont ITcomma  (stepOn buf)
 255     ';'# -> cont ITsemi   (stepOn buf)
 256
 257     -- strings/characters -------------------------------------------------
 258     '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
 259               buf' ->
 260                   -- the string literal does *not* include the dquotes
 261                 case lexemeToFastString buf' of
 262                  v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
 263
 264     '\''# -> --
 265              -- untilEndOfChar# extends the current lexeme until
 266              -- it hits a non-escaped single quote. The lexeme of the
 267              -- StringBuffer returned does *not* include the closing quote,
 268              -- hence we augment the lexeme and make sure to add the
 269              -- starting quote, before `read'ing the string.
 270              --
 271              case untilEndOfChar# (stepOn buf) of
 272                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 273                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
 274
 275     -- strictness and cpr pragmas and __scc treated specially.
 276     '_'# ->
 277          case lookAhead# buf 1# of
 278            '_'# -> case lookAhead# buf 2# of
 279                     'S'# ->
 280                         lex_demand cont (stepOnUntil (not . isSpace)
 281                                         (stepOnBy# buf 3#)) -- past __S
 282                     'M'# ->
 283                         lex_cpr cont (stepOnUntil (not . isSpace)
 284                                      (stepOnBy# buf 3#)) -- past __M
 285                     's'# ->
 286                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 287                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 288                                Nothing   -> lex_id cont buf
 289                     _ -> lex_id cont buf
 290            _    -> lex_id cont buf
 291
 292 -- ``thingy'' form for casm
 293     '`'# ->
 294             case lookAhead# buf 1# of
 295               '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
 296               _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
 297                                                      -- scanning an id of some sort.
 298
 299     '\NUL'# ->
 300             if bufferExhausted (stepOn buf) then
 301                cont ITeof buf
 302             else
 303                trace "lexIface: misplaced NUL?" $
 304                cont (ITunknown "\NUL") (stepOn buf)
 305
 306     c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
 307       | is_symbol c -> lex_sym cont buf
 308       | is_upper  c -> lex_con cont buf
 309       | is_ident  c -> lex_id  cont buf
 310
 311 --  where
 312 lex_comment cont buf =
 313 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 314    case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
 315
 316 -------------------------------------------------------------------------------
 317
 318 lex_nested_comment cont buf =
 319   case currentChar# buf of
 320         '-'# -> case lookAhead# buf 1# of
 321                  '}'# -> cont (stepOnBy# buf 2#)
 322                  _    -> lex_nested_comment cont (stepOn buf)
 323
 324         '{'# -> case lookAhead# buf 1# of
 325                  '-'# -> lex_nested_comment
 326                                 (lex_nested_comment cont)
 327                                 (stepOnBy# buf 2#)
 328                  _    -> lex_nested_comment cont (stepOn buf)
 329
 330         _   -> lex_nested_comment cont (stepOn buf)
 331
 332 -------------------------------------------------------------------------------
 333
 334 lex_demand cont buf =
 335  case read_em [] buf of { (ls,buf') ->
 336  case currentChar# buf' of
 337    'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
 338    _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
 339  }
 340  where
 341    -- code snatched from Demand.lhs
 342   read_em acc buf =
 343    case currentChar# buf of
 344     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 345     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 346     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 347     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 348     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 349     ')'# -> (reverse acc, stepOn buf)
 350     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 351     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 352     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 353     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 354     _    -> (reverse acc, buf)
 355
 356   do_unpack new_or_data wrapper_unpacks acc buf
 357    = case read_em [] buf of
 358       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 359
 360 lex_cpr cont buf =
 361  case read_em [] buf of { (cpr_inf,buf') ->
 362    ASSERT ( null (tail cpr_inf) )
 363    cont (ITcprinfo $ head cpr_inf) (stepOverLexeme buf')
 364  }
 365  where
 366    -- code snatched from lex_demand above
 367   read_em acc buf =
 368    case currentChar# buf of
 369     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 370     '('# -> do_unpack acc (stepOn buf)
 371     ')'# -> (reverse acc, stepOn buf)
 372     _    -> (reverse acc, buf)
 373
 374   do_unpack acc buf
 375    = case read_em [] buf of
 376       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 377
 378 ------------------
 379 lex_scc cont buf =
 380  case currentChar# buf of
 381   'C'# -> cont ITsccAllCafs  (stepOverLexeme (stepOn buf))
 382   other -> cont ITscc buf
 383
 384 -----------
 385 lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
 386 lex_num cont minus acc# buf =
 387  --trace ("lex_num: "++[C# (currentChar# buf)]) $
 388  case scanNumLit (I# acc#) buf of
 389      (acc',buf') ->
 390        case currentChar# buf' of
 391          '.'# ->
 392              -- this case is not optimised at all, as the
 393              -- presence of floating point numbers in interface
 394              -- files is not that common. (ToDo)
 395             case expandWhile# is_digit (incLexeme buf') of
 396               buf2 -> -- points to first non digit char
 397                 let l = case currentChar# buf2 of
 398                           'e'# -> let buf3 = incLexeme buf2 in
 399                               case currentChar# buf3 of
 400                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 401                                 _    -> expandWhile# is_digit buf3
 402                           _ -> buf2
 403                 in let v = readRational__ (lexemeToString l) in
 404                    cont (ITrational v) (stepOverLexeme l)
 405
 406          _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
 407
 408 -----------
 409 lex_cstring cont buf =
 410  case expandUntilMatch buf "\'\'" of
 411    buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
 412            (stepOverLexeme buf')
 413
 414 ------------------------------------------------------------------------------
 415 -- Character Classes
 416
 417 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 418
 419 {-# INLINE is_ctype #-}
 420 #if __GLASGOW_HASKELL__ >= 303
 421 is_ctype :: Word8 -> Char# -> Bool
 422 is_ctype mask = \c ->
 423    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 424 #else
 425 is_ctype :: Int -> Char# -> Bool
 426 is_ctype (I# mask) = \c ->
 427     let (A# ctype) = ``char_types'' :: Addr
 428         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 429     in
 430         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 431 #endif
 432
 433 is_ident  = is_ctype 1
 434 is_symbol = is_ctype 2
 435 is_any    = is_ctype 4
 436 is_space  = is_ctype 8
 437 is_upper  = is_ctype 16
 438 is_digit  = is_ctype 32
 439
 440 -----------------------------------------------------------------------------
 441 -- identifiers, symbols etc.
 442
 443 lex_id cont buf =
 444  case expandWhile# is_ident buf of { buf1 ->
 445  case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 446  let new_buf = stepOverLexeme buf'
 447      lexeme  = lexemeToFastString buf'
 448  in
 449  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 450         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 451                           cont kwd_token new_buf;
 452         Nothing        ->
 453  case lookupUFM ifaceKeywordsFM lexeme of {
 454         Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
 455                           cont kwd_token new_buf;
 456         Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
 457                           cont (mk_var_token lexeme) new_buf
 458  }}}}
 459
 460 lex_sym cont buf =
 461  case expandWhile# is_symbol buf of
 462    buf'
 463      | is_comment lexeme -> lex_comment cont new_buf
 464      | otherwise         ->
 465            case lookupUFM haskellKeySymsFM lexeme of {
 466                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 467                                   cont kwd_token new_buf ;
 468                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 469                                   cont (mk_var_token lexeme) new_buf
 470            }
 471         where lexeme = lexemeToFastString buf'
 472               new_buf = stepOverLexeme buf'
 473
 474               is_comment fs
 475                 | len < 2   = False
 476                 | otherwise = trundle 0
 477                   where
 478                    len = lengthFS fs
 479
 480                    trundle n | n == len  = True
 481                              | otherwise = indexFS fs n == '-' && trundle (n+1)
 482
 483 lex_con cont buf =
 484  case expandWhile# is_ident buf of        { buf1 ->
 485  case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
 486  case currentChar# buf' of
 487      '.'# -> lex_qid cont lexeme (stepOn new_buf) just_a_conid
 488      _    -> just_a_conid
 489
 490    where
 491     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 492                    cont (ITconid lexeme) new_buf
 493     lexeme = lexemeToFastString buf'
 494     new_buf = stepOverLexeme buf'
 495  }}
 496
 497 lex_qid cont mod buf just_a_conid =
 498  case currentChar# buf of
 499   '['# ->       -- Special case for []
 500     case lookAhead# buf 1# of
 501      ']'# -> cont (ITqconid (mod,SLIT("[]"))) (stepOnBy# buf 2#)
 502      _    -> just_a_conid
 503
 504   '('# ->  -- Special case for (,,,)
 505            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 506     case lookAhead# buf 1# of
 507      '#'# -> case lookAhead# buf 2# of
 508                 ','# -> lex_ubx_tuple cont mod (stepOnBy# buf 3#)
 509                                 just_a_conid
 510                 _    -> just_a_conid
 511      ')'# -> cont (ITqconid (mod,SLIT("()"))) (stepOnBy# buf 2#)
 512      ','# -> lex_tuple cont mod (stepOnBy# buf 2#) just_a_conid
 513      _    -> just_a_conid
 514
 515   '-'# -> case lookAhead# buf 1# of
 516             '>'# -> cont (ITqconid (mod,SLIT("->"))) (stepOnBy# buf 2#)
 517             _    -> lex_id3 cont mod buf just_a_conid
 518   _    -> lex_id3 cont mod buf just_a_conid
 519
 520 lex_id3 cont mod buf just_a_conid
 521   | is_symbol c =
 522      case expandWhile# is_symbol buf of { buf' ->
 523      let
 524       lexeme  = lexemeToFastString buf'
 525       new_buf = stepOverLexeme buf'
 526      in
 527      case lookupUFM haskellKeySymsFM lexeme of {
 528         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 529         Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
 530      }}
 531
 532   | otherwise   =
 533      case expandWhile# is_ident buf of { buf1 ->
 534      if emptyLexeme buf1
 535             then just_a_conid
 536             else
 537      case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 538      let
 539       lexeme  = lexemeToFastString buf'
 540       new_buf = stepOverLexeme buf'
 541      in
 542      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 543             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 544             Nothing        ->
 545      case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files
 546             Just kwd_token -> just_a_conid;
 547             Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
 548      }}}}
 549   where c = currentChar# buf
 550
 551 mk_var_token pk_str
 552   | is_upper f          = ITconid pk_str
 553         -- _[A-Z] is treated as a constructor in interface files.
 554   | f `eqChar#` '_'# && not (_NULL_ tl)
 555         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
 556   | is_ident f          = ITvarid pk_str
 557   | f `eqChar#` ':'#    = ITconsym pk_str
 558   | otherwise           = ITvarsym pk_str
 559   where
 560       (C# f) = _HEAD_ pk_str
 561       tl     = _TAIL_ pk_str
 562
 563 mk_qvar_token m token =
 564  case mk_var_token token of
 565    ITconid n  -> ITqconid  (m,n)
 566    ITvarid n  -> ITqvarid  (m,n)
 567    ITconsym n -> ITqconsym (m,n)
 568    ITvarsym n -> ITqvarsym (m,n)
 569    _          -> ITunknown (show token)
 570 \end{code}
 571
 572 ----------------------------------------------------------------------------
 573 Horrible stuff for dealing with M.(,,,)
 574
 575 \begin{code}
 576 lex_tuple cont mod buf back_off =
 577   go 2 buf
 578   where
 579    go n buf =
 580     case currentChar# buf of
 581       ','# -> go (n+1) (stepOn buf)
 582       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
 583       _    -> back_off
 584
 585 lex_ubx_tuple cont mod buf back_off =
 586   go 2 buf
 587   where
 588    go n buf =
 589     case currentChar# buf of
 590       ','# -> go (n+1) (stepOn buf)
 591       '#'# -> case lookAhead# buf 1# of
 592                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
 593                                  (stepOnBy# buf 2#)
 594                 _    -> back_off
 595       _    -> back_off
 596 \end{code}
 597
 598 -----------------------------------------------------------------------------
 599 Keyword Lists
 600
 601 \begin{code}
 602 ifaceKeywordsFM :: UniqFM IfaceToken
 603 ifaceKeywordsFM = listToUFM $
 604       map (\ (x,y) -> (_PK_ x,y))
 605      [  ("__interface",         ITinterface),
 606         ("__export",            ITexport),
 607         ("__depends",           ITdepends),
 608         ("__forall",            ITforall),
 609         ("__letrec",            ITletrec),
 610         ("__coerce",            ITcoerce),
 611         ("__inline_me",         ITinlineMe),
 612         ("__inline_call",       ITinlineCall),
 613         ("__DEFAULT",           ITdefaultbranch),
 614         ("__bot",               ITbottom),
 615         ("__integer",           ITinteger_lit),
 616         ("__float",             ITfloat_lit),
 617         ("__rational",          ITrational_lit),
 618         ("__addr",              ITaddr_lit),
 619         ("__litlit",            ITlit_lit),
 620         ("__string",            ITstring_lit),
 621         ("__R",                 ITrules),
 622         ("__a",                 ITtypeapp),
 623         ("__o",                 ITonce),
 624         ("__m",                 ITmany),
 625         ("__A",                 ITarity),
 626         ("__P",                 ITspecialise),
 627         ("__C",                 ITnocaf),
 628         ("__u",                 ITunfold NoInlinePragInfo),
 629
 630         ("__ccall",             ITccall (False, False, False)),
 631         ("__ccall_GC",          ITccall (False, False, True)),
 632         ("__dyn_ccall",         ITccall (True,  False, False)),
 633         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 634         ("__casm",              ITccall (False, True,  False)),
 635         ("__dyn_casm",          ITccall (True,  True,  False)),
 636         ("__casm_GC",           ITccall (False, True,  True)),
 637         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 638
 639         ("/\\",                 ITbiglam)
 640        ]
 641
 642 haskellKeywordsFM = listToUFM $
 643       map (\ (x,y) -> (_PK_ x,y))
 644        [( "case",       ITcase ),
 645         ( "class",      ITclass ),
 646         ( "data",       ITdata ),
 647         ( "default",    ITdefault ),
 648         ( "deriving",   ITderiving ),
 649         ( "do",         ITdo ),
 650         ( "else",       ITelse ),
 651         ( "if",         ITif ),
 652         ( "import",     ITimport ),
 653         ( "in",         ITin ),
 654         ( "infix",      ITinfix ),
 655         ( "infixl",     ITinfixl ),
 656         ( "infixr",     ITinfixr ),
 657         ( "instance",   ITinstance ),
 658         ( "let",        ITlet ),
 659         ( "module",     ITmodule ),
 660         ( "newtype",    ITnewtype ),
 661         ( "of",         ITof ),
 662         ( "then",       ITthen ),
 663         ( "type",       ITtype ),
 664         ( "where",      ITwhere )
 665
 666 --      These three aren't Haskell keywords at all
 667 --      and 'as' is often used as a variable name
 668 --      ( "as",         ITas ),
 669 --      ( "qualified",  ITqualified ),
 670 --      ( "hiding",     IThiding )
 671
 672      ]
 673
 674 haskellKeySymsFM = listToUFM $
 675         map (\ (x,y) -> (_PK_ x,y))
 676       [ ("..",                  ITdotdot)
 677        ,("::",                  ITdcolon)
 678        ,("=",                   ITequal)
 679        ,("\\",                  ITlam)
 680        ,("|",                   ITvbar)
 681        ,("<-",                  ITlarrow)
 682        ,("->",                  ITrarrow)
 683        ,("@",                   ITat)
 684        ,("~",                   ITtilde)
 685        ,("=>",                  ITdarrow)
 686        ,("-",                   ITminus)
 687        ,("!",                   ITbang)
 688        ]
 689 \end{code}
 690
 691 -----------------------------------------------------------------------------
 692 doDiscard rips along really fast, looking for a '#-}',
 693 indicating the end of the pragma we're skipping
 694
 695 \begin{code}
 696 doDiscard inStr buf =
 697  case currentChar# buf of
 698    '#'# | not inStr ->
 699        case lookAhead# buf 1# of { '#'# ->
 700        case lookAhead# buf 2# of { '-'# ->
 701        case lookAhead# buf 3# of { '}'# ->
 702            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
 703         _    -> doDiscard inStr (incLexeme buf) };
 704         _    -> doDiscard inStr (incLexeme buf) };
 705         _    -> doDiscard inStr (incLexeme buf) }
 706    '"'# ->
 707        let
 708         odd_slashes buf flg i# =
 709           case lookAhead# buf i# of
 710            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
 711            _     -> flg
 712        in
 713        case lookAhead# buf (negateInt# 1#) of --backwards, actually
 714          '\\'# -> -- escaping something..
 715            if odd_slashes buf True (negateInt# 2#) then
 716                -- odd number of slashes, " is escaped.
 717               doDiscard inStr (incLexeme buf)
 718            else
 719                -- even number of slashes, \ is escaped.
 720               doDiscard (not inStr) (incLexeme buf)
 721          _ -> case inStr of -- forced to avoid build-up
 722                True  -> doDiscard False (incLexeme buf)
 723                False -> doDiscard True  (incLexeme buf)
 724    _ -> doDiscard inStr (incLexeme buf)
 725
 726 \end{code}
 727
 728 -----------------------------------------------------------------------------
 729
 730 \begin{code}
 731 type IfM a = StringBuffer       -- Input string
 732           -> SrcLoc
 733           -> MaybeErr a {-error-}Message
 734
 735 returnIf   :: a -> IfM a
 736 returnIf a s l = Succeeded a
 737
 738 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 739 m `thenIf` k = \s l ->
 740         case m s l of
 741                 Succeeded a -> k a s l
 742                 Failed err  -> Failed err
 743
 744 getSrcLocIf :: IfM SrcLoc
 745 getSrcLocIf s l = Succeeded l
 746
 747 happyError :: IfM a
 748 happyError s l = Failed (ifaceParseErr s l)
 749
 750
 751 {-
 752  Note that if the name of the file we're processing ends
 753  with `hi-boot', we accept it on faith as having the right
 754  version. This is done so that .hi-boot files that comes
 755  with hsc don't have to be updated before every release,
 756  *and* it allows us to share .hi-boot files with versions
 757  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
 758
 759  If the version number is 0, the checking is also turned off.
 760  (needed to deal with GHC.hi only!)
 761
 762  Once we can assume we're compiling with a version of ghc that
 763  supports interface file checking, we can drop the special
 764  pleading
 765 -}
 766 checkVersion :: Maybe Integer -> IfM ()
 767 checkVersion mb@(Just v) s l
 768  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
 769  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 770 checkVersion mb@Nothing  s l
 771  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
 772  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 773
 774 -----------------------------------------------------------------
 775
 776 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
 777 ifaceParseErr s l
 778   = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
 779           ptext SLIT("current input ="), text first_bit]
 780   where
 781     first_bit = lexemeToString (stepOnBy# s 100#)
 782
 783 ifaceVersionErr hi_vers l toks
 784   = hsep [ppr l, ptext SLIT("Interface file version error;"),
 785           ptext SLIT("Expected"), int opt_HiVersion,
 786           ptext SLIT("found "), pp_version]
 787     where
 788      pp_version =
 789       case hi_vers of
 790         Nothing -> ptext SLIT("pre ghc-3.02 version")
 791         Just v  -> ptext SLIT("version") <+> integer v
 792
 793 \end{code}