ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
  27         checkVersion,
  28         happyError,
  29         StringBuffer
  30
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( ord, isSpace )
  36 import List             ( isSuffixOf )
  37
  38 import IdInfo           ( InlinePragInfo(..) )
  39 import Name             ( isLowerISO, isUpperISO )
  40 import OccName          ( IfaceFlavour, hiFile, hiBootFile )
  41 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  42 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  43 import Demand           ( Demand(..) {- instance Read -} )
  44 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  45 import BasicTypes       ( NewOrData(..) )
  46 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile )
  47
  48 import Maybes           ( MaybeErr(..) )
  49 import ErrUtils         ( Message )
  50 import Outputable
  51
  52 import FastString
  53 import StringBuffer
  54 import GlaExts
  55 import ST               ( runST )
  56
  57 #if __GLASGOW_HASKELL__ >= 303
  58 import Bits
  59 import Word
  60 #endif
  61
  62 import Addr
  63
  64 import PrelRead                 ( readRational__ ) -- Glasgow non-std
  65 \end{code}
  66
  67 %************************************************************************
  68 %*                                                                      *
  69 \subsection{Data types}
  70 %*                                                                      *
  71 %************************************************************************
  72
  73 The token data type, fairly un-interesting except from one
  74 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  75 strictness, unfolding etc).
  76
  77 The Idea/Observation here is that the renamer needs to scan through
  78 all of an interface file before it can continue. But only a fraction
  79 of the information contained in the file turns out to be useful, so
  80 delaying as much as possible of the scanning and parsing of an
  81 interface file Makes Sense (Heap profiles of the compiler
  82 show a reduction in heap usage by at least a factor of two,
  83 post-renamer).
  84
  85 Hence, the interface file lexer spots when value declarations are
  86 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  87 for the type and any other id info for that binding (unfolding, strictness
  88 etc). These constructors are applied to the result of lexing these sub-chunks.
  89
  90 The lexing of the type and id info is all done lazily, of course, so
  91 the scanning (and subsequent parsing) will be done *only* on the ids the
  92 renamer finds out that it is interested in. The rest will just be junked.
  93 Laziness, you know it makes sense :-)
  94
  95 \begin{code}
  96 data IfaceToken
  97   = ITcase                      -- Haskell keywords
  98   | ITclass
  99   | ITdata
 100   | ITdefault
 101   | ITderiving
 102   | ITdo
 103   | ITelse
 104   | ITif
 105   | ITimport
 106   | ITin
 107   | ITinfix
 108   | ITinfixl
 109   | ITinfixr
 110   | ITinstance
 111   | ITlet
 112   | ITmodule
 113   | ITnewtype
 114   | ITof
 115   | ITthen
 116   | ITtype
 117   | ITwhere
 118   | ITas
 119   | ITqualified
 120   | IThiding
 121
 122   | ITinterface                 -- GHC-extension keywords
 123   | ITexport
 124   | ITinstimport
 125   | ITforall
 126   | ITletrec
 127   | ITcoerce
 128   | ITinline
 129   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 130   | ITdefaultbranch
 131   | ITbottom
 132   | ITinteger_lit
 133   | ITfloat_lit
 134   | ITrational_lit
 135   | ITaddr_lit
 136   | ITlit_lit
 137   | ITstring_lit
 138   | ITtypeapp
 139   | ITarity
 140   | ITspecialise
 141   | ITnocaf
 142   | ITunfold InlinePragInfo
 143   | ITstrict ([Demand], Bool)
 144   | ITscc
 145   | ITsccAllCafs
 146   | ITsccAllDicts
 147
 148   | ITdotdot                    -- reserved symbols
 149   | ITdcolon
 150   | ITequal
 151   | ITlam
 152   | ITvbar
 153   | ITlarrow
 154   | ITrarrow
 155   | ITat
 156   | ITtilde
 157   | ITdarrow
 158   | ITminus
 159   | ITbang
 160
 161   | ITbiglam                    -- GHC-extension symbols
 162
 163   | ITocurly                    -- special symbols
 164   | ITccurly
 165   | ITobrack
 166   | ITcbrack
 167   | IToparen
 168   | ITcparen
 169   | IToubxparen
 170   | ITcubxparen
 171   | ITsemi
 172   | ITcomma
 173
 174   | ITvarid   FAST_STRING       -- identifiers
 175   | ITconid   FAST_STRING
 176   | ITvarsym  FAST_STRING
 177   | ITconsym  FAST_STRING
 178   | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 179   | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 180   | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 181   | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 182
 183   | ITpragma StringBuffer
 184
 185   | ITchar Char
 186   | ITstring FAST_STRING
 187   | ITinteger Integer
 188   | ITrational Rational
 189
 190   | ITunknown String            -- Used when the lexer can't make sense of it
 191   | ITeof                       -- end of file token
 192   deriving Text -- debugging
 193 \end{code}
 194
 195 %************************************************************************
 196 %*                                                                      *
 197 \subsection{The lexical analyser}
 198 %*                                                                      *
 199 %************************************************************************
 200
 201 \begin{code}
 202 lexIface :: (IfaceToken -> IfM a) -> IfM a
 203 lexIface cont buf =
 204  _scc_ "Lexer"
 205 -- if bufferExhausted buf then
 206 --  []
 207 -- else
 208 --  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
 209   case currentChar# buf of
 210       -- whitespace and comments, ignore.
 211     ' '#  -> lexIface cont (stepOn buf)
 212     '\t'# -> lexIface cont (stepOn buf)
 213     '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
 214
 215 -- Numbers and comments
 216     '-'#  ->
 217       case lookAhead# buf 1# of
 218 --        '-'# -> lex_comment cont (stepOnBy# buf 2#)
 219         c    ->
 220           if is_digit c
 221           then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 222           else lex_sym cont buf
 223
 224     '{'# ->                             -- look for "{-##" special iface pragma
 225         case lookAhead# buf 1# of
 226            '-'# -> case lookAhead# buf 2# of
 227                     '#'# -> case lookAhead# buf 3# of
 228                                 '#'# ->
 229                                    let (lexeme, buf')
 230                                           = doDiscard False (stepOnBy# buf 4#) in
 231                                    cont (ITpragma lexeme) buf'
 232                                 _ ->  lex_nested_comment (lexIface cont) buf
 233                     _    -> cont ITocurly (stepOn buf)
 234                             -- lex_nested_comment (lexIface cont) buf
 235            _ -> cont ITocurly (stepOn buf)
 236
 237     -- special symbols ----------------------------------------------------
 238     '('# ->
 239          case prefixMatch (stepOn buf) "..)" of
 240            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
 241            Nothing ->
 242             case lookAhead# buf 1# of
 243               '#'# -> cont IToubxparen (stepOnBy# buf 2#)
 244               _    -> cont IToparen (stepOn buf)
 245     ')'# -> cont ITcparen (stepOn buf)
 246     '}'# -> cont ITccurly (stepOn buf)
 247     '#'# -> case lookAhead# buf 1# of
 248                 ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
 249                 _    -> lex_sym cont (incLexeme buf)
 250     '['# -> cont ITobrack (stepOn buf)
 251     ']'# -> cont ITcbrack (stepOn buf)
 252     ','# -> cont ITcomma  (stepOn buf)
 253     ';'# -> cont ITsemi   (stepOn buf)
 254
 255     -- strings/characters -------------------------------------------------
 256     '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
 257               buf' ->
 258                   -- the string literal does *not* include the dquotes
 259                 case lexemeToFastString buf' of
 260                  v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
 261
 262     '\''# -> --
 263              -- untilEndOfChar# extends the current lexeme until
 264              -- it hits a non-escaped single quote. The lexeme of the
 265              -- StringBuffer returned does *not* include the closing quote,
 266              -- hence we augment the lexeme and make sure to add the
 267              -- starting quote, before `read'ing the string.
 268              --
 269              case untilEndOfChar# (stepOn buf) of
 270                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 271                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
 272
 273     -- strictness pragma and __scc treated specially.
 274     '_'# ->
 275          case lookAhead# buf 1# of
 276            '_'# -> case lookAhead# buf 2# of
 277                     'S'# ->
 278                         lex_demand cont (stepOnUntil (not . isSpace)
 279                                         (stepOnBy# buf 3#)) -- past __S
 280                     's'# ->
 281                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 282                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 283                                Nothing   -> lex_id cont buf
 284                     _ -> lex_id cont buf
 285            _    -> lex_id cont buf
 286
 287 -- ``thingy'' form for casm
 288     '`'# ->
 289             case lookAhead# buf 1# of
 290               '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
 291               _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
 292                                                      -- scanning an id of some sort.
 293
 294     '\NUL'# ->
 295             if bufferExhausted (stepOn buf) then
 296                cont ITeof buf
 297             else
 298                trace "lexIface: misplaced NUL?" $
 299                cont (ITunknown "\NUL") (stepOn buf)
 300
 301     c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
 302       | is_symbol c -> lex_sym cont buf
 303       | is_upper  c -> lex_con cont buf
 304       | is_ident  c -> lex_id  cont buf
 305
 306 --  where
 307 lex_comment cont buf =
 308 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 309    case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
 310
 311 -------------------------------------------------------------------------------
 312
 313 lex_nested_comment cont buf =
 314   case currentChar# buf of
 315         '-'# -> case lookAhead# buf 1# of
 316                  '}'# -> cont (stepOnBy# buf 2#)
 317                  _    -> lex_nested_comment cont (stepOn buf)
 318
 319         '{'# -> case lookAhead# buf 1# of
 320                  '-'# -> lex_nested_comment
 321                                 (lex_nested_comment cont)
 322                                 (stepOnBy# buf 2#)
 323                  _    -> lex_nested_comment cont (stepOn buf)
 324
 325         _   -> lex_nested_comment cont (stepOn buf)
 326
 327 -------------------------------------------------------------------------------
 328
 329 lex_demand cont buf =
 330  case read_em [] buf of { (ls,buf') ->
 331  case currentChar# buf' of
 332    'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
 333    _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
 334  }
 335  where
 336    -- code snatched from Demand.lhs
 337   read_em acc buf =
 338    case currentChar# buf of
 339     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 340     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 341     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 342     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 343     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 344     ')'# -> (reverse acc, stepOn buf)
 345     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 346     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 347     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 348     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 349     _    -> (reverse acc, buf)
 350
 351   do_unpack new_or_data wrapper_unpacks acc buf
 352    = case read_em [] buf of
 353       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 354
 355 ------------------
 356 lex_scc cont buf =
 357  case currentChar# buf of
 358   'C'# -> cont ITsccAllCafs  (stepOverLexeme (stepOn buf))
 359   'D'# -> cont ITsccAllDicts (stepOverLexeme (stepOn buf))
 360   other -> cont ITscc buf
 361
 362 -----------
 363 lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
 364 lex_num cont minus acc# buf =
 365  --trace ("lex_num: "++[C# (currentChar# buf)]) $
 366  case scanNumLit (I# acc#) buf of
 367      (acc',buf') ->
 368        case currentChar# buf' of
 369          '.'# ->
 370              -- this case is not optimised at all, as the
 371              -- presence of floating point numbers in interface
 372              -- files is not that common. (ToDo)
 373             case expandWhile# is_digit (incLexeme buf') of
 374               buf2 -> -- points to first non digit char
 375                 let l = case currentChar# buf2 of
 376                           'e'# -> let buf3 = incLexeme buf2 in
 377                               case currentChar# buf3 of
 378                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 379                                 _    -> expandWhile# is_digit buf3
 380                           _ -> buf2
 381                 in let v = readRational__ (lexemeToString l) in
 382                    cont (ITrational v) (stepOverLexeme l)
 383
 384          _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
 385
 386 -----------
 387 lex_cstring cont buf =
 388  case expandUntilMatch buf "\'\'" of
 389    buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
 390            (stepOverLexeme buf')
 391
 392 ------------------------------------------------------------------------------
 393 -- Character Classes
 394
 395 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 396
 397 {-# INLINE is_ctype #-}
 398 #if __GLASGOW_HASKELL__ >= 303
 399 is_ctype :: Word8 -> Char# -> Bool
 400 is_ctype mask = \c ->
 401    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 402 #else
 403 is_ctype :: Int -> Char# -> Bool
 404 is_ctype (I# mask) = \c ->
 405     let (A# ctype) = ``char_types'' :: Addr
 406         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 407     in
 408         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 409 #endif
 410
 411 is_ident  = is_ctype 1
 412 is_symbol = is_ctype 2
 413 is_any    = is_ctype 4
 414 is_space  = is_ctype 8
 415 is_upper  = is_ctype 16
 416 is_digit  = is_ctype 32
 417
 418 -----------------------------------------------------------------------------
 419 -- identifiers, symbols etc.
 420
 421 lex_id cont buf =
 422  case expandWhile# is_ident buf of { buf1 ->
 423  case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 424  let new_buf = stepOverLexeme buf'
 425      lexeme  = lexemeToFastString buf'
 426  in
 427  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 428         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 429                           cont kwd_token new_buf;
 430         Nothing        ->
 431  case lookupUFM ifaceKeywordsFM lexeme of {
 432         Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
 433                           cont kwd_token new_buf;
 434         Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
 435                           cont (mk_var_token lexeme) new_buf
 436  }}}}
 437
 438 lex_sym cont buf =
 439  case expandWhile# is_symbol buf of
 440    buf'
 441      | is_comment lexeme -> lex_comment cont new_buf
 442      | otherwise         ->
 443            case lookupUFM haskellKeySymsFM lexeme of {
 444                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 445                                   cont kwd_token new_buf ;
 446                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 447                                   cont (mk_var_token lexeme) new_buf
 448            }
 449         where lexeme = lexemeToFastString buf'
 450               new_buf = stepOverLexeme buf'
 451
 452               is_comment fs
 453                 | len < 2   = False
 454                 | otherwise = trundle 0
 455                   where
 456                    len = lengthFS fs
 457
 458                    trundle n | n == len  = True
 459                              | otherwise = indexFS fs n == '-' && trundle (n+1)
 460
 461 lex_con cont buf =
 462  case expandWhile# is_ident buf of        { buf1 ->
 463  case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
 464  case currentChar# buf' of
 465      '.'# -> munch hiFile
 466      '!'# -> munch hiBootFile
 467      _    -> just_a_conid
 468
 469    where
 470     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 471                    cont (ITconid lexeme) new_buf
 472     lexeme = lexemeToFastString buf'
 473     new_buf = stepOverLexeme buf'
 474     munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid
 475  }}
 476
 477 lex_qid cont mod hif buf just_a_conid =
 478  case currentChar# buf of
 479   '['# ->       -- Special case for []
 480     case lookAhead# buf 1# of
 481      ']'# -> cont (ITqconid  (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#)
 482      _    -> just_a_conid
 483
 484   '('# ->  -- Special case for (,,,)
 485            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 486     case lookAhead# buf 1# of
 487      '#'# -> case lookAhead# buf 2# of
 488                 ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#)
 489                                 just_a_conid
 490                 _    -> just_a_conid
 491      ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#)
 492      ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid
 493      _    -> just_a_conid
 494
 495   '-'# -> case lookAhead# buf 1# of
 496             '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#)
 497             _    -> lex_id3 cont mod hif buf just_a_conid
 498   _    -> lex_id3 cont mod hif buf just_a_conid
 499
 500 lex_id3 cont mod hif buf just_a_conid
 501   | is_symbol c =
 502      case expandWhile# is_symbol buf of { buf' ->
 503      let
 504       lexeme  = lexemeToFastString buf'
 505       new_buf = stepOverLexeme buf'
 506      in
 507      case lookupUFM haskellKeySymsFM lexeme of {
 508         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 509         Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 510      }}
 511
 512   | otherwise   =
 513      case expandWhile# is_ident buf of { buf1 ->
 514      if emptyLexeme buf1
 515             then just_a_conid
 516             else
 517      case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 518      let
 519       lexeme  = lexemeToFastString buf'
 520       new_buf = stepOverLexeme buf'
 521      in
 522      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 523             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 524             Nothing        ->
 525      case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files
 526             Just kwd_token -> just_a_conid;
 527             Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 528      }}}}
 529   where c = currentChar# buf
 530
 531 mk_var_token pk_str
 532   | is_upper f          = ITconid pk_str
 533         -- _[A-Z] is treated as a constructor in interface files.
 534   | f `eqChar#` '_'# && not (_NULL_ tl)
 535         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
 536   | is_ident f          = ITvarid pk_str
 537   | f `eqChar#` ':'#    = ITconsym pk_str
 538   | otherwise           = ITvarsym pk_str
 539   where
 540       (C# f) = _HEAD_ pk_str
 541       tl     = _TAIL_ pk_str
 542
 543 mk_qvar_token m hif token =
 544  case mk_var_token token of
 545    ITconid n  -> ITqconid  (m,n,hif)
 546    ITvarid n  -> ITqvarid  (m,n,hif)
 547    ITconsym n -> ITqconsym (m,n,hif)
 548    ITvarsym n -> ITqvarsym (m,n,hif)
 549    _          -> ITunknown (show token)
 550 \end{code}
 551
 552 ----------------------------------------------------------------------------
 553 Horrible stuff for dealing with M.(,,,)
 554
 555 \begin{code}
 556 lex_tuple cont mod hif buf back_off =
 557   go 2 buf
 558   where
 559    go n buf =
 560     case currentChar# buf of
 561       ','# -> go (n+1) (stepOn buf)
 562       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf)
 563       _    -> back_off
 564
 565 lex_ubx_tuple cont mod hif buf back_off =
 566   go 2 buf
 567   where
 568    go n buf =
 569     case currentChar# buf of
 570       ','# -> go (n+1) (stepOn buf)
 571       '#'# -> case lookAhead# buf 1# of
 572                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif))
 573                                  (stepOnBy# buf 2#)
 574                 _    -> back_off
 575       _    -> back_off
 576 \end{code}
 577
 578 -----------------------------------------------------------------------------
 579 Keyword Lists
 580
 581 \begin{code}
 582 ifaceKeywordsFM :: UniqFM IfaceToken
 583 ifaceKeywordsFM = listToUFM $
 584       map (\ (x,y) -> (_PK_ x,y))
 585      [  ("__interface",         ITinterface),
 586         ("__export",            ITexport),
 587         ("__instimport",        ITinstimport),
 588         ("__forall",            ITforall),
 589         ("__letrec",            ITletrec),
 590         ("__coerce",            ITcoerce),
 591         ("__inline",            ITinline),
 592         ("__DEFAULT",           ITdefaultbranch),
 593         ("__bot",               ITbottom),
 594         ("__integer",           ITinteger_lit),
 595         ("__float",             ITfloat_lit),
 596         ("__rational",          ITrational_lit),
 597         ("__addr",              ITaddr_lit),
 598         ("__litlit",            ITlit_lit),
 599         ("__string",            ITstring_lit),
 600         ("__a",                 ITtypeapp),
 601         ("__A",                 ITarity),
 602         ("__P",                 ITspecialise),
 603         ("__C",                 ITnocaf),
 604         ("__u",                 ITunfold NoInlinePragInfo),
 605         ("__U",                 ITunfold IWantToBeINLINEd),
 606         ("__UU",                ITunfold IMustBeINLINEd),
 607         ("__Unot",              ITunfold IMustNotBeINLINEd),
 608         ("__Ux",                ITunfold IAmALoopBreaker),
 609
 610         ("__ccall",             ITccall (False, False, False)),
 611         ("__ccall_GC",          ITccall (False, False, True)),
 612         ("__dyn_ccall",         ITccall (True,  False, False)),
 613         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 614         ("__casm",              ITccall (False, True,  False)),
 615         ("__dyn_casm",          ITccall (True,  True,  False)),
 616         ("__casm_GC",           ITccall (False, True,  True)),
 617         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 618
 619         ("/\\",                 ITbiglam)
 620        ]
 621
 622 haskellKeywordsFM = listToUFM $
 623       map (\ (x,y) -> (_PK_ x,y))
 624        [( "case",       ITcase ),
 625         ( "class",      ITclass ),
 626         ( "data",       ITdata ),
 627         ( "default",    ITdefault ),
 628         ( "deriving",   ITderiving ),
 629         ( "do",         ITdo ),
 630         ( "else",       ITelse ),
 631         ( "if",         ITif ),
 632         ( "import",     ITimport ),
 633         ( "in",         ITin ),
 634         ( "infix",      ITinfix ),
 635         ( "infixl",     ITinfixl ),
 636         ( "infixr",     ITinfixr ),
 637         ( "instance",   ITinstance ),
 638         ( "let",        ITlet ),
 639         ( "module",     ITmodule ),
 640         ( "newtype",    ITnewtype ),
 641         ( "of",         ITof ),
 642         ( "then",       ITthen ),
 643         ( "type",       ITtype ),
 644         ( "where",      ITwhere )
 645
 646 --      These three aren't Haskell keywords at all
 647 --      and 'as' is often used as a variable name
 648 --      ( "as",         ITas ),
 649 --      ( "qualified",  ITqualified ),
 650 --      ( "hiding",     IThiding )
 651
 652      ]
 653
 654 haskellKeySymsFM = listToUFM $
 655         map (\ (x,y) -> (_PK_ x,y))
 656       [ ("..",                  ITdotdot)
 657        ,("::",                  ITdcolon)
 658        ,("=",                   ITequal)
 659        ,("\\",                  ITlam)
 660        ,("|",                   ITvbar)
 661        ,("<-",                  ITlarrow)
 662        ,("->",                  ITrarrow)
 663        ,("@",                   ITat)
 664        ,("~",                   ITtilde)
 665        ,("=>",                  ITdarrow)
 666        ,("-",                   ITminus)
 667        ,("!",                   ITbang)
 668        ]
 669 \end{code}
 670
 671 -----------------------------------------------------------------------------
 672 doDiscard rips along really fast, looking for a '#-}',
 673 indicating the end of the pragma we're skipping
 674
 675 \begin{code}
 676 doDiscard inStr buf =
 677  case currentChar# buf of
 678    '#'# | not inStr ->
 679        case lookAhead# buf 1# of { '#'# ->
 680        case lookAhead# buf 2# of { '-'# ->
 681        case lookAhead# buf 3# of { '}'# ->
 682            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
 683         _    -> doDiscard inStr (incLexeme buf) };
 684         _    -> doDiscard inStr (incLexeme buf) };
 685         _    -> doDiscard inStr (incLexeme buf) }
 686    '"'# ->
 687        let
 688         odd_slashes buf flg i# =
 689           case lookAhead# buf i# of
 690            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
 691            _     -> flg
 692        in
 693        case lookAhead# buf (negateInt# 1#) of --backwards, actually
 694          '\\'# -> -- escaping something..
 695            if odd_slashes buf True (negateInt# 2#) then
 696                -- odd number of slashes, " is escaped.
 697               doDiscard inStr (incLexeme buf)
 698            else
 699                -- even number of slashes, \ is escaped.
 700               doDiscard (not inStr) (incLexeme buf)
 701          _ -> case inStr of -- forced to avoid build-up
 702                True  -> doDiscard False (incLexeme buf)
 703                False -> doDiscard True  (incLexeme buf)
 704    _ -> doDiscard inStr (incLexeme buf)
 705
 706 \end{code}
 707
 708 -----------------------------------------------------------------------------
 709
 710 \begin{code}
 711 type IfM a = StringBuffer       -- Input string
 712           -> SrcLoc
 713           -> MaybeErr a {-error-}Message
 714
 715 returnIf   :: a -> IfM a
 716 returnIf a s l = Succeeded a
 717
 718 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 719 m `thenIf` k = \s l ->
 720         case m s l of
 721                 Succeeded a -> k a s l
 722                 Failed err  -> Failed err
 723
 724 getSrcLocIf :: IfM SrcLoc
 725 getSrcLocIf s l = Succeeded l
 726
 727 happyError :: IfM a
 728 happyError s l = Failed (ifaceParseErr s l)
 729
 730
 731 {-
 732  Note that if the name of the file we're processing ends
 733  with `hi-boot', we accept it on faith as having the right
 734  version. This is done so that .hi-boot files that comes
 735  with hsc don't have to be updated before every release,
 736  *and* it allows us to share .hi-boot files with versions
 737  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
 738
 739  If the version number is 0, the checking is also turned off.
 740  (needed to deal with GHC.hi only!)
 741
 742  Once we can assume we're compiling with a version of ghc that
 743  supports interface file checking, we can drop the special
 744  pleading
 745 -}
 746 checkVersion :: Maybe Integer -> IfM ()
 747 checkVersion mb@(Just v) s l
 748  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
 749  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 750 checkVersion mb@Nothing  s l
 751  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
 752  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 753
 754 -----------------------------------------------------------------
 755
 756 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
 757 ifaceParseErr s l
 758   = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
 759           ptext SLIT("current input ="), text first_bit]
 760   where
 761     first_bit = lexemeToString (stepOnBy# s 100#)
 762
 763 ifaceVersionErr hi_vers l toks
 764   = hsep [ppr l, ptext SLIT("Interface file version error;"),
 765           ptext SLIT("Expected"), int opt_HiVersion,
 766           ptext SLIT("found "), pp_version]
 767     where
 768      pp_version =
 769       case hi_vers of
 770         Nothing -> ptext SLIT("pre ghc-3.02 version")
 771         Just v  -> ptext SLIT("version") <+> integer v
 772
 773 \end{code}