ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
  27         checkVersion,
  28         happyError,
  29         StringBuffer
  30
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( ord, isSpace )
  36 import List             ( isSuffixOf )
  37
  38 import IdInfo           ( InlinePragInfo(..), CprInfo(..) )
  39 import Name             ( isLowerISO, isUpperISO )
  40 import Module           ( IfaceFlavour, hiFile, hiBootFile )
  41 import PrelMods         ( mkTupNameStr, mkUbxTupNameStr )
  42 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  43 import Demand           ( Demand(..) {- instance Read -} )
  44 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  45 import BasicTypes       ( NewOrData(..) )
  46 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile )
  47
  48 import Maybes           ( MaybeErr(..) )
  49 import ErrUtils         ( Message )
  50 import Outputable
  51
  52 import FastString
  53 import StringBuffer
  54 import GlaExts
  55 import ST               ( runST )
  56
  57 #if __GLASGOW_HASKELL__ >= 303
  58 import Bits
  59 import Word
  60 #endif
  61
  62 import Addr
  63 import PrelRead                 ( readRational__ ) -- Glasgow non-std
  64 \end{code}
  65
  66 %************************************************************************
  67 %*                                                                      *
  68 \subsection{Data types}
  69 %*                                                                      *
  70 %************************************************************************
  71
  72 The token data type, fairly un-interesting except from one
  73 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  74 strictness, unfolding etc).
  75
  76 The Idea/Observation here is that the renamer needs to scan through
  77 all of an interface file before it can continue. But only a fraction
  78 of the information contained in the file turns out to be useful, so
  79 delaying as much as possible of the scanning and parsing of an
  80 interface file Makes Sense (Heap profiles of the compiler
  81 show a reduction in heap usage by at least a factor of two,
  82 post-renamer).
  83
  84 Hence, the interface file lexer spots when value declarations are
  85 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  86 for the type and any other id info for that binding (unfolding, strictness
  87 etc). These constructors are applied to the result of lexing these sub-chunks.
  88
  89 The lexing of the type and id info is all done lazily, of course, so
  90 the scanning (and subsequent parsing) will be done *only* on the ids the
  91 renamer finds out that it is interested in. The rest will just be junked.
  92 Laziness, you know it makes sense :-)
  93
  94 \begin{code}
  95 data IfaceToken
  96   = ITcase                      -- Haskell keywords
  97   | ITclass
  98   | ITdata
  99   | ITdefault
 100   | ITderiving
 101   | ITdo
 102   | ITelse
 103   | ITif
 104   | ITimport
 105   | ITin
 106   | ITinfix
 107   | ITinfixl
 108   | ITinfixr
 109   | ITinstance
 110   | ITlet
 111   | ITmodule
 112   | ITnewtype
 113   | ITof
 114   | ITthen
 115   | ITtype
 116   | ITwhere
 117   | ITas
 118   | ITqualified
 119   | IThiding
 120
 121   | ITinterface                 -- GHC-extension keywords
 122   | ITexport
 123   | ITinstimport
 124   | ITforall
 125   | ITletrec
 126   | ITcoerce
 127   | ITinline
 128   | ITccall (Bool,Bool,Bool)    -- (is_dyn, is_casm, may_gc)
 129   | ITdefaultbranch
 130   | ITbottom
 131   | ITinteger_lit
 132   | ITfloat_lit
 133   | ITrational_lit
 134   | ITaddr_lit
 135   | ITlit_lit
 136   | ITstring_lit
 137   | ITtypeapp
 138   | ITarity
 139   | ITspecialise
 140   | ITnocaf
 141   | ITunfold InlinePragInfo
 142   | ITstrict ([Demand], Bool)
 143   | ITcprinfo (CprInfo)
 144   | ITscc
 145   | ITsccAllCafs
 146
 147   | ITdotdot                    -- reserved symbols
 148   | ITdcolon
 149   | ITequal
 150   | ITlam
 151   | ITvbar
 152   | ITlarrow
 153   | ITrarrow
 154   | ITat
 155   | ITtilde
 156   | ITdarrow
 157   | ITminus
 158   | ITbang
 159
 160   | ITbiglam                    -- GHC-extension symbols
 161
 162   | ITocurly                    -- special symbols
 163   | ITccurly
 164   | ITobrack
 165   | ITcbrack
 166   | IToparen
 167   | ITcparen
 168   | IToubxparen
 169   | ITcubxparen
 170   | ITsemi
 171   | ITcomma
 172
 173   | ITvarid   FAST_STRING       -- identifiers
 174   | ITconid   FAST_STRING
 175   | ITvarsym  FAST_STRING
 176   | ITconsym  FAST_STRING
 177   | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 178   | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 179   | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 180   | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 181
 182   | ITpragma StringBuffer
 183
 184   | ITchar Char
 185   | ITstring FAST_STRING
 186   | ITinteger Integer
 187   | ITrational Rational
 188
 189   | ITunknown String            -- Used when the lexer can't make sense of it
 190   | ITeof                       -- end of file token
 191   deriving Text -- debugging
 192 \end{code}
 193
 194 %************************************************************************
 195 %*                                                                      *
 196 \subsection{The lexical analyser}
 197 %*                                                                      *
 198 %************************************************************************
 199
 200 \begin{code}
 201 lexIface :: (IfaceToken -> IfM a) -> IfM a
 202 lexIface cont buf =
 203  _scc_ "Lexer"
 204 -- if bufferExhausted buf then
 205 --  []
 206 -- else
 207 --  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
 208   case currentChar# buf of
 209       -- whitespace and comments, ignore.
 210     ' '#  -> lexIface cont (stepOn buf)
 211     '\t'# -> lexIface cont (stepOn buf)
 212     '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
 213
 214 -- Numbers and comments
 215     '-'#  ->
 216       case lookAhead# buf 1# of
 217 --        '-'# -> lex_comment cont (stepOnBy# buf 2#)
 218         c    ->
 219           if is_digit c
 220           then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 221           else lex_sym cont buf
 222
 223     '{'# ->                             -- look for "{-##" special iface pragma
 224         case lookAhead# buf 1# of
 225            '-'# -> case lookAhead# buf 2# of
 226                     '#'# -> case lookAhead# buf 3# of
 227                                 '#'# ->
 228                                    let (lexeme, buf')
 229                                           = doDiscard False (stepOnBy# buf 4#) in
 230                                    cont (ITpragma lexeme) buf'
 231                                 _ ->  lex_nested_comment (lexIface cont) buf
 232                     _    -> cont ITocurly (stepOn buf)
 233                             -- lex_nested_comment (lexIface cont) buf
 234            _ -> cont ITocurly (stepOn buf)
 235
 236     -- special symbols ----------------------------------------------------
 237     '('# ->
 238          case prefixMatch (stepOn buf) "..)" of
 239            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
 240            Nothing ->
 241             case lookAhead# buf 1# of
 242               '#'# -> cont IToubxparen (stepOnBy# buf 2#)
 243               _    -> cont IToparen (stepOn buf)
 244     ')'# -> cont ITcparen (stepOn buf)
 245     '}'# -> cont ITccurly (stepOn buf)
 246     '#'# -> case lookAhead# buf 1# of
 247                 ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
 248                 _    -> lex_sym cont (incLexeme buf)
 249     '['# -> cont ITobrack (stepOn buf)
 250     ']'# -> cont ITcbrack (stepOn buf)
 251     ','# -> cont ITcomma  (stepOn buf)
 252     ';'# -> cont ITsemi   (stepOn buf)
 253
 254     -- strings/characters -------------------------------------------------
 255     '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
 256               buf' ->
 257                   -- the string literal does *not* include the dquotes
 258                 case lexemeToFastString buf' of
 259                  v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
 260
 261     '\''# -> --
 262              -- untilEndOfChar# extends the current lexeme until
 263              -- it hits a non-escaped single quote. The lexeme of the
 264              -- StringBuffer returned does *not* include the closing quote,
 265              -- hence we augment the lexeme and make sure to add the
 266              -- starting quote, before `read'ing the string.
 267              --
 268              case untilEndOfChar# (stepOn buf) of
 269                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 270                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
 271
 272     -- strictness and cpr pragmas and __scc treated specially.
 273     '_'# ->
 274          case lookAhead# buf 1# of
 275            '_'# -> case lookAhead# buf 2# of
 276                     'S'# ->
 277                         lex_demand cont (stepOnUntil (not . isSpace)
 278                                         (stepOnBy# buf 3#)) -- past __S
 279                     'M'# ->
 280                         lex_cpr cont (stepOnUntil (not . isSpace)
 281                                      (stepOnBy# buf 3#)) -- past __M
 282                     's'# ->
 283                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 284                                Just buf' -> lex_scc cont (stepOverLexeme buf')
 285                                Nothing   -> lex_id cont buf
 286                     _ -> lex_id cont buf
 287            _    -> lex_id cont buf
 288
 289 -- ``thingy'' form for casm
 290     '`'# ->
 291             case lookAhead# buf 1# of
 292               '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
 293               _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
 294                                                      -- scanning an id of some sort.
 295
 296     '\NUL'# ->
 297             if bufferExhausted (stepOn buf) then
 298                cont ITeof buf
 299             else
 300                trace "lexIface: misplaced NUL?" $
 301                cont (ITunknown "\NUL") (stepOn buf)
 302
 303     c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
 304       | is_symbol c -> lex_sym cont buf
 305       | is_upper  c -> lex_con cont buf
 306       | is_ident  c -> lex_id  cont buf
 307
 308 --  where
 309 lex_comment cont buf =
 310 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 311    case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
 312
 313 -------------------------------------------------------------------------------
 314
 315 lex_nested_comment cont buf =
 316   case currentChar# buf of
 317         '-'# -> case lookAhead# buf 1# of
 318                  '}'# -> cont (stepOnBy# buf 2#)
 319                  _    -> lex_nested_comment cont (stepOn buf)
 320
 321         '{'# -> case lookAhead# buf 1# of
 322                  '-'# -> lex_nested_comment
 323                                 (lex_nested_comment cont)
 324                                 (stepOnBy# buf 2#)
 325                  _    -> lex_nested_comment cont (stepOn buf)
 326
 327         _   -> lex_nested_comment cont (stepOn buf)
 328
 329 -------------------------------------------------------------------------------
 330
 331 lex_demand cont buf =
 332  case read_em [] buf of { (ls,buf') ->
 333  case currentChar# buf' of
 334    'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
 335    _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
 336  }
 337  where
 338    -- code snatched from Demand.lhs
 339   read_em acc buf =
 340    case currentChar# buf of
 341     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 342     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 343     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 344     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 345     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 346     ')'# -> (reverse acc, stepOn buf)
 347     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 348     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 349     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 350     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 351     _    -> (reverse acc, buf)
 352
 353   do_unpack new_or_data wrapper_unpacks acc buf
 354    = case read_em [] buf of
 355       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 356
 357 lex_cpr cont buf =
 358  case read_em [] buf of { (cpr_inf,buf') ->
 359    ASSERT ( null (tail cpr_inf) )
 360    cont (ITcprinfo $ head cpr_inf) (stepOverLexeme buf')
 361  }
 362  where
 363    -- code snatched from lex_demand above
 364   read_em acc buf =
 365    case currentChar# buf of
 366     '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
 367     '('# -> do_unpack acc (stepOn buf)
 368     ')'# -> (reverse acc, stepOn buf)
 369     _    -> (reverse acc, buf)
 370
 371   do_unpack acc buf
 372    = case read_em [] buf of
 373       (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 374
 375 ------------------
 376 lex_scc cont buf =
 377  case currentChar# buf of
 378   'C'# -> cont ITsccAllCafs  (stepOverLexeme (stepOn buf))
 379   other -> cont ITscc buf
 380
 381 -----------
 382 lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
 383 lex_num cont minus acc# buf =
 384  --trace ("lex_num: "++[C# (currentChar# buf)]) $
 385  case scanNumLit (I# acc#) buf of
 386      (acc',buf') ->
 387        case currentChar# buf' of
 388          '.'# ->
 389              -- this case is not optimised at all, as the
 390              -- presence of floating point numbers in interface
 391              -- files is not that common. (ToDo)
 392             case expandWhile# is_digit (incLexeme buf') of
 393               buf2 -> -- points to first non digit char
 394                 let l = case currentChar# buf2 of
 395                           'e'# -> let buf3 = incLexeme buf2 in
 396                               case currentChar# buf3 of
 397                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 398                                 _    -> expandWhile# is_digit buf3
 399                           _ -> buf2
 400                 in let v = readRational__ (lexemeToString l) in
 401                    cont (ITrational v) (stepOverLexeme l)
 402
 403          _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
 404
 405 -----------
 406 lex_cstring cont buf =
 407  case expandUntilMatch buf "\'\'" of
 408    buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
 409            (stepOverLexeme buf')
 410
 411 ------------------------------------------------------------------------------
 412 -- Character Classes
 413
 414 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 415
 416 {-# INLINE is_ctype #-}
 417 #if __GLASGOW_HASKELL__ >= 303
 418 is_ctype :: Word8 -> Char# -> Bool
 419 is_ctype mask = \c ->
 420    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 421 #else
 422 is_ctype :: Int -> Char# -> Bool
 423 is_ctype (I# mask) = \c ->
 424     let (A# ctype) = ``char_types'' :: Addr
 425         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 426     in
 427         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 428 #endif
 429
 430 is_ident  = is_ctype 1
 431 is_symbol = is_ctype 2
 432 is_any    = is_ctype 4
 433 is_space  = is_ctype 8
 434 is_upper  = is_ctype 16
 435 is_digit  = is_ctype 32
 436
 437 -----------------------------------------------------------------------------
 438 -- identifiers, symbols etc.
 439
 440 lex_id cont buf =
 441  case expandWhile# is_ident buf of { buf1 ->
 442  case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 443  let new_buf = stepOverLexeme buf'
 444      lexeme  = lexemeToFastString buf'
 445  in
 446  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 447         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 448                           cont kwd_token new_buf;
 449         Nothing        ->
 450  case lookupUFM ifaceKeywordsFM lexeme of {
 451         Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
 452                           cont kwd_token new_buf;
 453         Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
 454                           cont (mk_var_token lexeme) new_buf
 455  }}}}
 456
 457 lex_sym cont buf =
 458  case expandWhile# is_symbol buf of
 459    buf'
 460      | is_comment lexeme -> lex_comment cont new_buf
 461      | otherwise         ->
 462            case lookupUFM haskellKeySymsFM lexeme of {
 463                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 464                                   cont kwd_token new_buf ;
 465                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 466                                   cont (mk_var_token lexeme) new_buf
 467            }
 468         where lexeme = lexemeToFastString buf'
 469               new_buf = stepOverLexeme buf'
 470
 471               is_comment fs
 472                 | len < 2   = False
 473                 | otherwise = trundle 0
 474                   where
 475                    len = lengthFS fs
 476
 477                    trundle n | n == len  = True
 478                              | otherwise = indexFS fs n == '-' && trundle (n+1)
 479
 480 lex_con cont buf =
 481  case expandWhile# is_ident buf of        { buf1 ->
 482  case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
 483  case currentChar# buf' of
 484      '.'# -> munch hiFile
 485      '!'# -> munch hiBootFile
 486      _    -> just_a_conid
 487
 488    where
 489     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 490                    cont (ITconid lexeme) new_buf
 491     lexeme = lexemeToFastString buf'
 492     new_buf = stepOverLexeme buf'
 493     munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid
 494  }}
 495
 496 lex_qid cont mod hif buf just_a_conid =
 497  case currentChar# buf of
 498   '['# ->       -- Special case for []
 499     case lookAhead# buf 1# of
 500      ']'# -> cont (ITqconid  (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#)
 501      _    -> just_a_conid
 502
 503   '('# ->  -- Special case for (,,,)
 504            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 505     case lookAhead# buf 1# of
 506      '#'# -> case lookAhead# buf 2# of
 507                 ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#)
 508                                 just_a_conid
 509                 _    -> just_a_conid
 510      ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#)
 511      ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid
 512      _    -> just_a_conid
 513
 514   '-'# -> case lookAhead# buf 1# of
 515             '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#)
 516             _    -> lex_id3 cont mod hif buf just_a_conid
 517   _    -> lex_id3 cont mod hif buf just_a_conid
 518
 519 lex_id3 cont mod hif buf just_a_conid
 520   | is_symbol c =
 521      case expandWhile# is_symbol buf of { buf' ->
 522      let
 523       lexeme  = lexemeToFastString buf'
 524       new_buf = stepOverLexeme buf'
 525      in
 526      case lookupUFM haskellKeySymsFM lexeme of {
 527         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 528         Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 529      }}
 530
 531   | otherwise   =
 532      case expandWhile# is_ident buf of { buf1 ->
 533      if emptyLexeme buf1
 534             then just_a_conid
 535             else
 536      case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 537      let
 538       lexeme  = lexemeToFastString buf'
 539       new_buf = stepOverLexeme buf'
 540      in
 541      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 542             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 543             Nothing        ->
 544      case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files
 545             Just kwd_token -> just_a_conid;
 546             Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 547      }}}}
 548   where c = currentChar# buf
 549
 550 mk_var_token pk_str
 551   | is_upper f          = ITconid pk_str
 552         -- _[A-Z] is treated as a constructor in interface files.
 553   | f `eqChar#` '_'# && not (_NULL_ tl)
 554         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
 555   | is_ident f          = ITvarid pk_str
 556   | f `eqChar#` ':'#    = ITconsym pk_str
 557   | otherwise           = ITvarsym pk_str
 558   where
 559       (C# f) = _HEAD_ pk_str
 560       tl     = _TAIL_ pk_str
 561
 562 mk_qvar_token m hif token =
 563  case mk_var_token token of
 564    ITconid n  -> ITqconid  (m,n,hif)
 565    ITvarid n  -> ITqvarid  (m,n,hif)
 566    ITconsym n -> ITqconsym (m,n,hif)
 567    ITvarsym n -> ITqvarsym (m,n,hif)
 568    _          -> ITunknown (show token)
 569 \end{code}
 570
 571 ----------------------------------------------------------------------------
 572 Horrible stuff for dealing with M.(,,,)
 573
 574 \begin{code}
 575 lex_tuple cont mod hif buf back_off =
 576   go 2 buf
 577   where
 578    go n buf =
 579     case currentChar# buf of
 580       ','# -> go (n+1) (stepOn buf)
 581       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf)
 582       _    -> back_off
 583
 584 lex_ubx_tuple cont mod hif buf back_off =
 585   go 2 buf
 586   where
 587    go n buf =
 588     case currentChar# buf of
 589       ','# -> go (n+1) (stepOn buf)
 590       '#'# -> case lookAhead# buf 1# of
 591                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif))
 592                                  (stepOnBy# buf 2#)
 593                 _    -> back_off
 594       _    -> back_off
 595 \end{code}
 596
 597 -----------------------------------------------------------------------------
 598 Keyword Lists
 599
 600 \begin{code}
 601 ifaceKeywordsFM :: UniqFM IfaceToken
 602 ifaceKeywordsFM = listToUFM $
 603       map (\ (x,y) -> (_PK_ x,y))
 604      [  ("__interface",         ITinterface),
 605         ("__export",            ITexport),
 606         ("__instimport",        ITinstimport),
 607         ("__forall",            ITforall),
 608         ("__letrec",            ITletrec),
 609         ("__coerce",            ITcoerce),
 610         ("__inline",            ITinline),
 611         ("__DEFAULT",           ITdefaultbranch),
 612         ("__bot",               ITbottom),
 613         ("__integer",           ITinteger_lit),
 614         ("__float",             ITfloat_lit),
 615         ("__rational",          ITrational_lit),
 616         ("__addr",              ITaddr_lit),
 617         ("__litlit",            ITlit_lit),
 618         ("__string",            ITstring_lit),
 619         ("__a",                 ITtypeapp),
 620         ("__A",                 ITarity),
 621         ("__P",                 ITspecialise),
 622         ("__C",                 ITnocaf),
 623         ("__u",                 ITunfold NoInlinePragInfo),
 624         ("__U",                 ITunfold IWantToBeINLINEd),
 625         ("__UU",                ITunfold IMustBeINLINEd),
 626         ("__Unot",              ITunfold IMustNotBeINLINEd),
 627         ("__Ux",                ITunfold IAmALoopBreaker),
 628
 629         ("__ccall",             ITccall (False, False, False)),
 630         ("__ccall_GC",          ITccall (False, False, True)),
 631         ("__dyn_ccall",         ITccall (True,  False, False)),
 632         ("__dyn_ccall_GC",      ITccall (True,  False, True)),
 633         ("__casm",              ITccall (False, True,  False)),
 634         ("__dyn_casm",          ITccall (True,  True,  False)),
 635         ("__casm_GC",           ITccall (False, True,  True)),
 636         ("__dyn_casm_GC",       ITccall (True,  True,  True)),
 637
 638         ("/\\",                 ITbiglam)
 639        ]
 640
 641 haskellKeywordsFM = listToUFM $
 642       map (\ (x,y) -> (_PK_ x,y))
 643        [( "case",       ITcase ),
 644         ( "class",      ITclass ),
 645         ( "data",       ITdata ),
 646         ( "default",    ITdefault ),
 647         ( "deriving",   ITderiving ),
 648         ( "do",         ITdo ),
 649         ( "else",       ITelse ),
 650         ( "if",         ITif ),
 651         ( "import",     ITimport ),
 652         ( "in",         ITin ),
 653         ( "infix",      ITinfix ),
 654         ( "infixl",     ITinfixl ),
 655         ( "infixr",     ITinfixr ),
 656         ( "instance",   ITinstance ),
 657         ( "let",        ITlet ),
 658         ( "module",     ITmodule ),
 659         ( "newtype",    ITnewtype ),
 660         ( "of",         ITof ),
 661         ( "then",       ITthen ),
 662         ( "type",       ITtype ),
 663         ( "where",      ITwhere )
 664
 665 --      These three aren't Haskell keywords at all
 666 --      and 'as' is often used as a variable name
 667 --      ( "as",         ITas ),
 668 --      ( "qualified",  ITqualified ),
 669 --      ( "hiding",     IThiding )
 670
 671      ]
 672
 673 haskellKeySymsFM = listToUFM $
 674         map (\ (x,y) -> (_PK_ x,y))
 675       [ ("..",                  ITdotdot)
 676        ,("::",                  ITdcolon)
 677        ,("=",                   ITequal)
 678        ,("\\",                  ITlam)
 679        ,("|",                   ITvbar)
 680        ,("<-",                  ITlarrow)
 681        ,("->",                  ITrarrow)
 682        ,("@",                   ITat)
 683        ,("~",                   ITtilde)
 684        ,("=>",                  ITdarrow)
 685        ,("-",                   ITminus)
 686        ,("!",                   ITbang)
 687        ]
 688 \end{code}
 689
 690 -----------------------------------------------------------------------------
 691 doDiscard rips along really fast, looking for a '#-}',
 692 indicating the end of the pragma we're skipping
 693
 694 \begin{code}
 695 doDiscard inStr buf =
 696  case currentChar# buf of
 697    '#'# | not inStr ->
 698        case lookAhead# buf 1# of { '#'# ->
 699        case lookAhead# buf 2# of { '-'# ->
 700        case lookAhead# buf 3# of { '}'# ->
 701            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
 702         _    -> doDiscard inStr (incLexeme buf) };
 703         _    -> doDiscard inStr (incLexeme buf) };
 704         _    -> doDiscard inStr (incLexeme buf) }
 705    '"'# ->
 706        let
 707         odd_slashes buf flg i# =
 708           case lookAhead# buf i# of
 709            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
 710            _     -> flg
 711        in
 712        case lookAhead# buf (negateInt# 1#) of --backwards, actually
 713          '\\'# -> -- escaping something..
 714            if odd_slashes buf True (negateInt# 2#) then
 715                -- odd number of slashes, " is escaped.
 716               doDiscard inStr (incLexeme buf)
 717            else
 718                -- even number of slashes, \ is escaped.
 719               doDiscard (not inStr) (incLexeme buf)
 720          _ -> case inStr of -- forced to avoid build-up
 721                True  -> doDiscard False (incLexeme buf)
 722                False -> doDiscard True  (incLexeme buf)
 723    _ -> doDiscard inStr (incLexeme buf)
 724
 725 \end{code}
 726
 727 -----------------------------------------------------------------------------
 728
 729 \begin{code}
 730 type IfM a = StringBuffer       -- Input string
 731           -> SrcLoc
 732           -> MaybeErr a {-error-}Message
 733
 734 returnIf   :: a -> IfM a
 735 returnIf a s l = Succeeded a
 736
 737 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 738 m `thenIf` k = \s l ->
 739         case m s l of
 740                 Succeeded a -> k a s l
 741                 Failed err  -> Failed err
 742
 743 getSrcLocIf :: IfM SrcLoc
 744 getSrcLocIf s l = Succeeded l
 745
 746 happyError :: IfM a
 747 happyError s l = Failed (ifaceParseErr s l)
 748
 749
 750 {-
 751  Note that if the name of the file we're processing ends
 752  with `hi-boot', we accept it on faith as having the right
 753  version. This is done so that .hi-boot files that comes
 754  with hsc don't have to be updated before every release,
 755  *and* it allows us to share .hi-boot files with versions
 756  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
 757
 758  If the version number is 0, the checking is also turned off.
 759  (needed to deal with GHC.hi only!)
 760
 761  Once we can assume we're compiling with a version of ghc that
 762  supports interface file checking, we can drop the special
 763  pleading
 764 -}
 765 checkVersion :: Maybe Integer -> IfM ()
 766 checkVersion mb@(Just v) s l
 767  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
 768  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 769 checkVersion mb@Nothing  s l
 770  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
 771  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 772
 773 -----------------------------------------------------------------
 774
 775 ifaceParseErr :: StringBuffer -> SrcLoc -> Message
 776 ifaceParseErr s l
 777   = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
 778           ptext SLIT("current input ="), text first_bit]
 779   where
 780     first_bit = lexemeToString (stepOnBy# s 100#)
 781
 782 ifaceVersionErr hi_vers l toks
 783   = hsep [ppr l, ptext SLIT("Interface file version error;"),
 784           ptext SLIT("Expected"), int opt_HiVersion,
 785           ptext SLIT("found "), pp_version]
 786     where
 787      pp_version =
 788       case hi_vers of
 789         Nothing -> ptext SLIT("pre ghc-3.02 version")
 790         Just v  -> ptext SLIT("version") <+> integer v
 791
 792 \end{code}