ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
  27         checkVersion,
  28         happyError,
  29         StringBuffer
  30
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( ord, isSpace )
  36 import List             ( isSuffixOf )
  37
  38 import IdInfo           ( InlinePragInfo(..) )
  39 import Name             ( isLowerISO, isUpperISO )
  40 import Module           ( IfaceFlavour, hiFile, hiBootFile )
  41 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  42 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  43 import Demand           ( Demand(..) {- instance Read -} )
  44 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  45 import BasicTypes       ( NewOrData(..) )
  46 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile )
  47
  48 import Maybes           ( MaybeErr(..) )
  49 import ErrUtils         ( Message )
  50 import Outputable
  51
  52 import FastString
  53 import StringBuffer
  54 import GlaExts
  55 import ST               ( runST )
  56
  57 #if __GLASGOW_HASKELL__ >= 303
  58 import Bits
  59 import Word
  60 #endif
  61
  62 import Addr
  63 import PrelRead                 ( readRational__ ) -- Glasgow non-std
  64 \end{code}
  65
  66 %************************************************************************
  67 %*                                                                      *
  68 \subsection{Data types}
  69 %*                                                                      *
  70 %************************************************************************
  71
  72 The token data type, fairly un-interesting except from one
  73 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  74 strictness, unfolding etc).
  75
  76 The Idea/Observation here is that the renamer needs to scan through
  77 all of an interface file before it can continue. But only a fraction
  78 of the information contained in the file turns out to be useful, so
  79 delaying as much as possible of the scanning and parsing of an
  80 interface file Makes Sense (Heap profiles of the compiler
  81 show a reduction in heap usage by at least a factor of two,
  82 post-renamer).
  83
  84 Hence, the interface file lexer spots when value declarations are
  85 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  86 for the type and any other id info for that binding (unfolding, strictness
  87 etc). These constructors are applied to the result of lexing these sub-chunks.
  88
  89 The lexing of the type and id info is all done lazily, of course, so
  90 the scanning (and subsequent parsing) will be done *only* on the ids the
  91 renamer finds out that it is interested in. The rest will just be junked.
  92 Laziness, you know it makes sense :-)
  93
  94 \begin{code}
  95 data IfaceToken
  96   = ITcase                      -- Haskell keywords
  97   | ITclass
  98   | ITdata
  99   | ITdefault
 100   | ITderiving
 101   | ITdo
 102   | ITelse
 103   | ITif
 104   | ITimport
 105   | ITin
 106   | ITinfix
 107   | ITinfixl
 108   | ITinfixr
 109   | ITinstance
 110   | ITlet
 111   | ITmodule
 112   | ITnewtype
 113   | ITof
 114   | ITthen
 115   | ITtype
 116   | ITwhere
 117   | ITas
 118   | ITqualified
 119   | IThiding
 120
 121   | ITinterface                 -- GHC-extension keywords
 122   | ITexport
 123   | ITinstimport
 124   | ITforall
 125   | ITletrec
 126   | ITcoerce
 127   | ITinline
 128   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 129   | ITdefaultbranch
 130   | ITbottom
 131   | ITinteger_lit
 132   | ITfloat_lit
 133   | ITrational_lit
 134   | ITaddr_lit
 135   | ITlit_lit
 136   | ITstring_lit
 137   | ITtypeapp
 138   | ITarity
 139   | ITspecialise
 140   | ITnocaf
 141   | ITunfold InlinePragInfo
 142   | ITstrict ([Demand], Bool)
 143   | ITscc
 144   | ITsccAllCafs
 145   | ITsccAllDicts
 146
 147   | ITdotdot                    -- reserved symbols
 148   | ITdcolon
 149   | ITequal
 150   | ITlam
 151   | ITvbar
 152   | ITlarrow
 153   | ITrarrow
 154   | ITat
 155   | ITtilde
 156   | ITdarrow
 157   | ITminus
 158   | ITbang
 159
 160   | ITbiglam                    -- GHC-extension symbols
 161
 162   | ITocurly                    -- special symbols
 163   | ITccurly
 164   | ITobrack
 165   | ITcbrack
 166   | IToparen
 167   | ITcparen
 168   | IToubxparen
 169   | ITcubxparen
 170   | ITsemi
 171   | ITcomma
 172
 173   | ITvarid   FAST_STRING       -- identifiers
 174   | ITconid   FAST_STRING
 175   | ITvarsym  FAST_STRING
 176   | ITconsym  FAST_STRING
 177   | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 178   | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 179   | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 180   | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 181
 182   | ITpragma StringBuffer
 183
 184   | ITchar Char
 185   | ITstring FAST_STRING
 186   | ITinteger Integer
 187   | ITrational Rational
 188
 189   | ITunknown String            -- Used when the lexer can't make sense of it
 190   | ITeof                       -- end of file token
 191   deriving Text -- debugging
 192 \end{code}
 193
 194 %************************************************************************
 195 %*                                                                      *
 196 \subsection{The lexical analyser}
 197 %*                                                                      *
 198 %************************************************************************
 199
 200 \begin{code}
 201 lexIface :: (IfaceToken -> IfM a) -> IfM a
 202 lexIface cont buf =
 203  _scc_ "Lexer"
 204 -- if bufferExhausted buf then
 205 --  []
 206 -- else
 207 --  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
 208   case currentChar# buf of
 209       -- whitespace and comments, ignore.
 210     ' '#  -> lexIface cont (stepOn buf)
 211     '\t'# -> lexIface cont (stepOn buf)
 212     '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
 213
 214 -- Numbers and comments
 215     '-'#  ->
 216       case lookAhead# buf 1# of
 217 --        '-'# -> lex_comment cont (stepOnBy# buf 2#)
 218         c    ->
 219           if is_digit c
 220           then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 221           else lex_sym cont buf
 222
 223     '{'# ->                             -- look for "{-##" special iface pragma
 224         case lookAhead# buf 1# of
 225            '-'# -> case lookAhead# buf 2# of
 226                     '#'# -> case lookAhead# buf 3# of
 227                                 '#'# ->
 228                                    let (lexeme, buf')
 229                                           = doDiscard False (stepOnBy# buf 4#) in
 230                                    cont (ITpragma lexeme) buf'
 231                                 _ ->  lex_nested_comment (lexIface cont) buf
 232                     _    -> cont ITocurly (stepOn buf)
 233                             -- lex_nested_comment (lexIface cont) buf
 234            _ -> cont ITocurly (stepOn buf)
 235
 236     -- special symbols ----------------------------------------------------
 237     '('# ->
 238          case prefixMatch (stepOn buf) "..)" of
 239            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
 240            Nothing ->
 241             case lookAhead# buf 1# of
 242               '#'# -> cont IToubxparen (stepOnBy# buf 2#)
 243               _    -> cont IToparen (stepOn buf)
 244     ')'# -> cont ITcparen (stepOn buf)
 245     '}'# -> cont ITccurly (stepOn buf)
 246     '#'# -> case lookAhead# buf 1# of
 247                 ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
 248                 _    -> lex_sym cont (incLexeme buf)
 249     '['# -> cont ITobrack (stepOn buf)
 250     ']'# -> cont ITcbrack (stepOn buf)
 251     ','# -> cont ITcomma  (stepOn buf)
 252     ';'# -> cont ITsemi   (stepOn buf)
 253
 254     -- strings/characters -------------------------------------------------
 255     '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
 256               buf' ->
 257                   -- the string literal does *not* include the dquotes
 258                 case lexemeToFastString buf' of
 259                  v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
 260
 261     '\''# -> --
 262              -- untilEndOfChar# extends the current lexeme until
 263              -- it hits a non-escaped single quote. The lexeme of the
 264              -- StringBuffer returned does *not* include the closing quote,
 265              -- hence we augment the lexeme and make sure to add the
 266              -- starting quote, before `read'ing the string.
 267              --
 268              case untilEndOfChar# (stepOn buf) of
 269                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 270                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
 271
 272     -- strictness pragma and __scc treated specially.
 273     '_'# ->
 274          case lookAhead# buf 1# of
 275            '_'# -> case lookAhead# buf 2# of
 276                     'S'# ->
 277                         lex_demand cont (stepOnUntil (not . isSpace)
 278                                         (stepOnBy# buf 3#)) -- past __S
 279                     's'# ->
 280                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 281                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 282                                Nothing   -> lex_id cont buf
 283                     _ -> lex_id cont buf
 284            _    -> lex_id cont buf
 285
 286 -- ``thingy'' form for casm
 287     '`'# ->
 288             case lookAhead# buf 1# of
 289               '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
 290               _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
 291                                                      -- scanning an id of some sort.
 292
 293     '\NUL'# ->
 294             if bufferExhausted (stepOn buf) then
 295                cont ITeof buf
 296             else
 297                trace "lexIface: misplaced NUL?" $
 298                cont (ITunknown "\NUL") (stepOn buf)
 299
 300     c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
 301       | is_symbol c -> lex_sym cont buf
 302       | is_upper  c -> lex_con cont buf
 303       | is_ident  c -> lex_id  cont buf
 304
 305 --  where
 306 lex_comment cont buf =
 307 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 308    case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
 309
 310 -------------------------------------------------------------------------------
 311
 312 lex_nested_comment cont buf =
 313   case currentChar# buf of
 314         '-'# -> case lookAhead# buf 1# of
 315                  '}'# -> cont (stepOnBy# buf 2#)
 316                  _    -> lex_nested_comment cont (stepOn buf)
 317
 318         '{'# -> case lookAhead# buf 1# of
 319                  '-'# -> lex_nested_comment
 320                                 (lex_nested_comment cont)
 321                                 (stepOnBy# buf 2#)
 322                  _    -> lex_nested_comment cont (stepOn buf)
 323
 324         _   -> lex_nested_comment cont (stepOn buf)
 325
 326 -------------------------------------------------------------------------------
 327
 328 lex_demand cont buf =
 329  case read_em [] buf of { (ls,buf') ->
 330  case currentChar# buf' of
 331    'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
 332    _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
 333  }
 334  where
 335    -- code snatched from Demand.lhs
 336   read_em acc buf =
 337    case currentChar# buf of
 338     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 339     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 340     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 341     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 342     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 343     ')'# -> (reverse acc, stepOn buf)
 344     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 345     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 346     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 347     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 348     _    -> (reverse acc, buf)
 349
 350   do_unpack new_or_data wrapper_unpacks acc buf
 351    = case read_em [] buf of
 352       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 353
 354 ------------------
 355 lex_scc cont buf =
 356  case currentChar# buf of
 357   'C'# -> cont ITsccAllCafs  (stepOverLexeme (stepOn buf))
 358   'D'# -> cont ITsccAllDicts (stepOverLexeme (stepOn buf))
 359   other -> cont ITscc buf
 360
 361 -----------
 362 lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
 363 lex_num cont minus acc# buf =
 364  --trace ("lex_num: "++[C# (currentChar# buf)]) $
 365  case scanNumLit (I# acc#) buf of
 366      (acc',buf') ->
 367        case currentChar# buf' of
 368          '.'# ->
 369              -- this case is not optimised at all, as the
 370              -- presence of floating point numbers in interface
 371              -- files is not that common. (ToDo)
 372             case expandWhile# is_digit (incLexeme buf') of
 373               buf2 -> -- points to first non digit char
 374                 let l = case currentChar# buf2 of
 375                           'e'# -> let buf3 = incLexeme buf2 in
 376                               case currentChar# buf3 of
 377                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 378                                 _    -> expandWhile# is_digit buf3
 379                           _ -> buf2
 380                 in let v = readRational__ (lexemeToString l) in
 381                    cont (ITrational v) (stepOverLexeme l)
 382
 383          _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
 384
 385 -----------
 386 lex_cstring cont buf =
 387  case expandUntilMatch buf "\'\'" of
 388    buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
 389            (stepOverLexeme buf')
 390
 391 ------------------------------------------------------------------------------
 392 -- Character Classes
 393
 394 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 395
 396 {-# INLINE is_ctype #-}
 397 #if __GLASGOW_HASKELL__ >= 303
 398 is_ctype :: Word8 -> Char# -> Bool
 399 is_ctype mask = \c ->
 400    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 401 #else
 402 is_ctype :: Int -> Char# -> Bool
 403 is_ctype (I# mask) = \c ->
 404     let (A# ctype) = ``char_types'' :: Addr
 405         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 406     in
 407         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 408 #endif
 409
 410 is_ident  = is_ctype 1
 411 is_symbol = is_ctype 2
 412 is_any    = is_ctype 4
 413 is_space  = is_ctype 8
 414 is_upper  = is_ctype 16
 415 is_digit  = is_ctype 32
 416
 417 -----------------------------------------------------------------------------
 418 -- identifiers, symbols etc.
 419
 420 lex_id cont buf =
 421  case expandWhile# is_ident buf of { buf1 ->
 422  case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 423  let new_buf = stepOverLexeme buf'
 424      lexeme  = lexemeToFastString buf'
 425  in
 426  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 427         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 428                           cont kwd_token new_buf;
 429         Nothing        ->
 430  case lookupUFM ifaceKeywordsFM lexeme of {
 431         Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
 432                           cont kwd_token new_buf;
 433         Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
 434                           cont (mk_var_token lexeme) new_buf
 435  }}}}
 436
 437 lex_sym cont buf =
 438  case expandWhile# is_symbol buf of
 439    buf'
 440      | is_comment lexeme -> lex_comment cont new_buf
 441      | otherwise         ->
 442            case lookupUFM haskellKeySymsFM lexeme of {
 443                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 444                                   cont kwd_token new_buf ;
 445                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 446                                   cont (mk_var_token lexeme) new_buf
 447            }
 448         where lexeme = lexemeToFastString buf'
 449               new_buf = stepOverLexeme buf'
 450
 451               is_comment fs
 452                 | len < 2   = False
 453                 | otherwise = trundle 0
 454                   where
 455                    len = lengthFS fs
 456
 457                    trundle n | n == len  = True
 458                              | otherwise = indexFS fs n == '-' && trundle (n+1)
 459
 460 lex_con cont buf =
 461  case expandWhile# is_ident buf of        { buf1 ->
 462  case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
 463  case currentChar# buf' of
 464      '.'# -> munch hiFile
 465      '!'# -> munch hiBootFile
 466      _    -> just_a_conid
 467
 468    where
 469     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 470                    cont (ITconid lexeme) new_buf
 471     lexeme = lexemeToFastString buf'
 472     new_buf = stepOverLexeme buf'
 473     munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid
 474  }}
 475
 476 lex_qid cont mod hif buf just_a_conid =
 477  case currentChar# buf of
 478   '['# ->       -- Special case for []
 479     case lookAhead# buf 1# of
 480      ']'# -> cont (ITqconid  (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#)
 481      _    -> just_a_conid
 482
 483   '('# ->  -- Special case for (,,,)
 484            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 485     case lookAhead# buf 1# of
 486      '#'# -> case lookAhead# buf 2# of
 487                 ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#)
 488                                 just_a_conid
 489                 _    -> just_a_conid
 490      ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#)
 491      ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid
 492      _    -> just_a_conid
 493
 494   '-'# -> case lookAhead# buf 1# of
 495             '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#)
 496             _    -> lex_id3 cont mod hif buf just_a_conid
 497   _    -> lex_id3 cont mod hif buf just_a_conid
 498
 499 lex_id3 cont mod hif buf just_a_conid
 500   | is_symbol c =
 501      case expandWhile# is_symbol buf of { buf' ->
 502      let
 503       lexeme  = lexemeToFastString buf'
 504       new_buf = stepOverLexeme buf'
 505      in
 506      case lookupUFM haskellKeySymsFM lexeme of {
 507         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 508         Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 509      }}
 510
 511   | otherwise   =
 512      case expandWhile# is_ident buf of { buf1 ->
 513      if emptyLexeme buf1
 514             then just_a_conid
 515             else
 516      case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 517      let
 518       lexeme  = lexemeToFastString buf'
 519       new_buf = stepOverLexeme buf'
 520      in
 521      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 522             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 523             Nothing        ->
 524      case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files
 525             Just kwd_token -> just_a_conid;
 526             Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 527      }}}}
 528   where c = currentChar# buf
 529
 530 mk_var_token pk_str
 531   | is_upper f          = ITconid pk_str
 532         -- _[A-Z] is treated as a constructor in interface files.
 533   | f `eqChar#` '_'# && not (_NULL_ tl)
 534         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
 535   | is_ident f          = ITvarid pk_str
 536   | f `eqChar#` ':'#    = ITconsym pk_str
 537   | otherwise           = ITvarsym pk_str
 538   where
 539       (C# f) = _HEAD_ pk_str
 540       tl     = _TAIL_ pk_str
 541
 542 mk_qvar_token m hif token =
 543  case mk_var_token token of
 544    ITconid n  -> ITqconid  (m,n,hif)
 545    ITvarid n  -> ITqvarid  (m,n,hif)
 546    ITconsym n -> ITqconsym (m,n,hif)
 547    ITvarsym n -> ITqvarsym (m,n,hif)
 548    _          -> ITunknown (show token)
 549 \end{code}
 550
 551 ----------------------------------------------------------------------------
 552 Horrible stuff for dealing with M.(,,,)
 553
 554 \begin{code}
 555 lex_tuple cont mod hif buf back_off =
 556   go 2 buf
 557   where
 558    go n buf =
 559     case currentChar# buf of
 560       ','# -> go (n+1) (stepOn buf)
 561       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf)
 562       _    -> back_off
 563
 564 lex_ubx_tuple cont mod hif buf back_off =
 565   go 2 buf
 566   where
 567    go n buf =
 568     case currentChar# buf of
 569       ','# -> go (n+1) (stepOn buf)
 570       '#'# -> case lookAhead# buf 1# of
 571                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif))
 572                                  (stepOnBy# buf 2#)
 573                 _    -> back_off
 574       _    -> back_off
 575 \end{code}
 576
 577 -----------------------------------------------------------------------------
 578 Keyword Lists
 579
 580 \begin{code}
 581 ifaceKeywordsFM :: UniqFM IfaceToken
 582 ifaceKeywordsFM = listToUFM $
 583       map (\ (x,y) -> (_PK_ x,y))
 584      [  ("__interface",         ITinterface),
 585         ("__export",            ITexport),
 586         ("__instimport",        ITinstimport),
 587         ("__forall",            ITforall),
 588         ("__letrec",            ITletrec),
 589         ("__coerce",            ITcoerce),
 590         ("__inline",            ITinline),
 591         ("__DEFAULT",           ITdefaultbranch),
 592         ("__bot",               ITbottom),
 593         ("__integer",           ITinteger_lit),
 594         ("__float",             ITfloat_lit),
 595         ("__rational",          ITrational_lit),
 596         ("__addr",              ITaddr_lit),
 597         ("__litlit",            ITlit_lit),
 598         ("__string",            ITstring_lit),
 599         ("__a",                 ITtypeapp),
 600         ("__A",                 ITarity),
 601         ("__P",                 ITspecialise),
 602         ("__C",                 ITnocaf),
 603         ("__u",                 ITunfold NoInlinePragInfo),
 604         ("__U",                 ITunfold IWantToBeINLINEd),
 605         ("__UU",                ITunfold IMustBeINLINEd),
 606         ("__Unot",              ITunfold IMustNotBeINLINEd),
 607         ("__Ux",                ITunfold IAmALoopBreaker),
 608
 609         ("__ccall",             ITccall (False, False, False)),
 610         ("__ccall_GC",          ITccall (False, False, True)),
 611         ("__dyn_ccall",         ITccall (True,  False, False)),
 612         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 613         ("__casm",              ITccall (False, True,  False)),
 614         ("__dyn_casm",          ITccall (True,  True,  False)),
 615         ("__casm_GC",           ITccall (False, True,  True)),
 616         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 617
 618         ("/\\",                 ITbiglam)
 619        ]
 620
 621 haskellKeywordsFM = listToUFM $
 622       map (\ (x,y) -> (_PK_ x,y))
 623        [( "case",       ITcase ),
 624         ( "class",      ITclass ),
 625         ( "data",       ITdata ),
 626         ( "default",    ITdefault ),
 627         ( "deriving",   ITderiving ),
 628         ( "do",         ITdo ),
 629         ( "else",       ITelse ),
 630         ( "if",         ITif ),
 631         ( "import",     ITimport ),
 632         ( "in",         ITin ),
 633         ( "infix",      ITinfix ),
 634         ( "infixl",     ITinfixl ),
 635         ( "infixr",     ITinfixr ),
 636         ( "instance",   ITinstance ),
 637         ( "let",        ITlet ),
 638         ( "module",     ITmodule ),
 639         ( "newtype",    ITnewtype ),
 640         ( "of",         ITof ),
 641         ( "then",       ITthen ),
 642         ( "type",       ITtype ),
 643         ( "where",      ITwhere )
 644
 645 --      These three aren't Haskell keywords at all
 646 --      and 'as' is often used as a variable name
 647 --      ( "as",         ITas ),
 648 --      ( "qualified",  ITqualified ),
 649 --      ( "hiding",     IThiding )
 650
 651      ]
 652
 653 haskellKeySymsFM = listToUFM $
 654         map (\ (x,y) -> (_PK_ x,y))
 655       [ ("..",                  ITdotdot)
 656        ,("::",                  ITdcolon)
 657        ,("=",                   ITequal)
 658        ,("\\",                  ITlam)
 659        ,("|",                   ITvbar)
 660        ,("<-",                  ITlarrow)
 661        ,("->",                  ITrarrow)
 662        ,("@",                   ITat)
 663        ,("~",                   ITtilde)
 664        ,("=>",                  ITdarrow)
 665        ,("-",                   ITminus)
 666        ,("!",                   ITbang)
 667        ]
 668 \end{code}
 669
 670 -----------------------------------------------------------------------------
 671 doDiscard rips along really fast, looking for a '#-}',
 672 indicating the end of the pragma we're skipping
 673
 674 \begin{code}
 675 doDiscard inStr buf =
 676  case currentChar# buf of
 677    '#'# | not inStr ->
 678        case lookAhead# buf 1# of { '#'# ->
 679        case lookAhead# buf 2# of { '-'# ->
 680        case lookAhead# buf 3# of { '}'# ->
 681            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
 682         _    -> doDiscard inStr (incLexeme buf) };
 683         _    -> doDiscard inStr (incLexeme buf) };
 684         _    -> doDiscard inStr (incLexeme buf) }
 685    '"'# ->
 686        let
 687         odd_slashes buf flg i# =
 688           case lookAhead# buf i# of
 689            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
 690            _     -> flg
 691        in
 692        case lookAhead# buf (negateInt# 1#) of --backwards, actually
 693          '\\'# -> -- escaping something..
 694            if odd_slashes buf True (negateInt# 2#) then
 695                -- odd number of slashes, " is escaped.
 696               doDiscard inStr (incLexeme buf)
 697            else
 698                -- even number of slashes, \ is escaped.
 699               doDiscard (not inStr) (incLexeme buf)
 700          _ -> case inStr of -- forced to avoid build-up
 701                True  -> doDiscard False (incLexeme buf)
 702                False -> doDiscard True  (incLexeme buf)
 703    _ -> doDiscard inStr (incLexeme buf)
 704
 705 \end{code}
 706
 707 -----------------------------------------------------------------------------
 708
 709 \begin{code}
 710 type IfM a = StringBuffer       -- Input string
 711           -> SrcLoc
 712           -> MaybeErr a {-error-}Message
 713
 714 returnIf   :: a -> IfM a
 715 returnIf a s l = Succeeded a
 716
 717 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 718 m `thenIf` k = \s l ->
 719         case m s l of
 720                 Succeeded a -> k a s l
 721                 Failed err  -> Failed err
 722
 723 getSrcLocIf :: IfM SrcLoc
 724 getSrcLocIf s l = Succeeded l
 725
 726 happyError :: IfM a
 727 happyError s l = Failed (ifaceParseErr s l)
 728
 729
 730 {-
 731  Note that if the name of the file we're processing ends
 732  with `hi-boot', we accept it on faith as having the right
 733  version. This is done so that .hi-boot files that comes
 734  with hsc don't have to be updated before every release,
 735  *and* it allows us to share .hi-boot files with versions
 736  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
 737
 738  If the version number is 0, the checking is also turned off.
 739  (needed to deal with GHC.hi only!)
 740
 741  Once we can assume we're compiling with a version of ghc that
 742  supports interface file checking, we can drop the special
 743  pleading
 744 -}
 745 checkVersion :: Maybe Integer -> IfM ()
 746 checkVersion mb@(Just v) s l
 747  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
 748  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 749 checkVersion mb@Nothing  s l
 750  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
 751  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 752
 753 -----------------------------------------------------------------
 754
 755 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
 756 ifaceParseErr s l
 757   = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
 758           ptext SLIT("current input ="), text first_bit]
 759   where
 760     first_bit = lexemeToString (stepOnBy# s 100#)
 761
 762 ifaceVersionErr hi_vers l toks
 763   = hsep [ppr l, ptext SLIT("Interface file version error;"),
 764           ptext SLIT("Expected"), int opt_HiVersion,
 765           ptext SLIT("found "), pp_version]
 766     where
 767      pp_version =
 768       case hi_vers of
 769         Nothing -> ptext SLIT("pre ghc-3.02 version")
 770         Just v  -> ptext SLIT("version") <+> integer v
 771
 772 \end{code}