ghc/compiler/reader/Lex.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 \section[Lexical analysis]{Lexical analysis}
   5
   6 --------------------------------------------------------
   7 [Jan 98]
   8 There's a known bug in here:
   9
  10         If an interface file ends prematurely, Lex tries to
  11         do headFS of an empty FastString.
  12
  13 An example that provokes the error is
  14
  15         f _:_ _forall_ [a] <<<END OF FILE>>>
  16 --------------------------------------------------------
  17
  18 \begin{code}
  19 {-# OPTIONS -#include "ctypes.h" #-}
  20
  21 module Lex (
  22
  23         ifaceParseErr,
  24
  25         -- Monad for parser
  26         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
  27         checkVersion,
  28         happyError,
  29         StringBuffer
  30
  31     ) where
  32
  33 #include "HsVersions.h"
  34
  35 import Char             ( ord, isSpace )
  36 import List             ( isSuffixOf )
  37
  38 import CostCentre       -- Pretty much all of it
  39 import IdInfo           ( InlinePragInfo(..) )
  40 import Name             ( mkTupNameStr, mkUbxTupNameStr,
  41                           isLowerISO, isUpperISO )
  42
  43 import CmdLineOpts      ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  44 import Demand           ( Demand(..) {- instance Read -} )
  45 import UniqFM           ( UniqFM, listToUFM, lookupUFM)
  46 import BasicTypes       ( NewOrData(..), IfaceFlavour(..) )
  47 import SrcLoc           ( SrcLoc, incSrcLine, srcLocFile )
  48
  49 import Maybes           ( MaybeErr(..) )
  50 import ErrUtils         ( ErrMsg )
  51 import Outputable
  52
  53 import FastString
  54 import StringBuffer
  55 import GlaExts
  56 import ST               ( runST )
  57
  58 #if __GLASGOW_HASKELL__ >= 303
  59 import Bits
  60 import Word
  61 #endif
  62
  63 import Addr
  64
  65 import PrelRead                 ( readRational__ ) -- Glasgow non-std
  66 \end{code}
  67
  68 %************************************************************************
  69 %*                                                                      *
  70 \subsection{Data types}
  71 %*                                                                      *
  72 %************************************************************************
  73
  74 The token data type, fairly un-interesting except from one
  75 constructor, @ITidinfo@, which is used to lazily lex id info (arity,
  76 strictness, unfolding etc).
  77
  78 The Idea/Observation here is that the renamer needs to scan through
  79 all of an interface file before it can continue. But only a fraction
  80 of the information contained in the file turns out to be useful, so
  81 delaying as much as possible of the scanning and parsing of an
  82 interface file Makes Sense (Heap profiles of the compiler
  83 show a reduction in heap usage by at least a factor of two,
  84 post-renamer).
  85
  86 Hence, the interface file lexer spots when value declarations are
  87 being scanned and return the @ITidinfo@ and @ITtype@ constructors
  88 for the type and any other id info for that binding (unfolding, strictness
  89 etc). These constructors are applied to the result of lexing these sub-chunks.
  90
  91 The lexing of the type and id info is all done lazily, of course, so
  92 the scanning (and subsequent parsing) will be done *only* on the ids the
  93 renamer finds out that it is interested in. The rest will just be junked.
  94 Laziness, you know it makes sense :-)
  95
  96 \begin{code}
  97 data IfaceToken
  98   = ITcase                      -- Haskell keywords
  99   | ITclass
 100   | ITdata
 101   | ITdefault
 102   | ITderiving
 103   | ITdo
 104   | ITelse
 105   | ITif
 106   | ITimport
 107   | ITin
 108   | ITinfix
 109   | ITinfixl
 110   | ITinfixr
 111   | ITinstance
 112   | ITlet
 113   | ITmodule
 114   | ITnewtype
 115   | ITof
 116   | ITthen
 117   | ITtype
 118   | ITwhere
 119   | ITas
 120   | ITqualified
 121   | IThiding
 122
 123   | ITinterface                 -- GHC-extension keywords
 124   | ITexport
 125   | ITinstimport
 126   | ITforall
 127   | ITletrec
 128   | ITcoerce
 129   | ITinline
 130   | ITccall (Bool,Bool) -- (is_casm, may_gc)
 131   | ITdefaultbranch
 132   | ITbottom
 133   | ITinteger_lit
 134   | ITfloat_lit
 135   | ITrational_lit
 136   | ITaddr_lit
 137   | ITlit_lit
 138   | ITstring_lit
 139   | ITtypeapp
 140   | ITarity
 141   | ITspecialise
 142   | ITnocaf
 143   | ITunfold InlinePragInfo
 144   | ITstrict [Demand]
 145   | ITscc CostCentre
 146
 147   | ITdotdot                    -- reserved symbols
 148   | ITdcolon
 149   | ITequal
 150   | ITlam
 151   | ITvbar
 152   | ITlarrow
 153   | ITrarrow
 154   | ITat
 155   | ITtilde
 156   | ITdarrow
 157   | ITminus
 158   | ITbang
 159
 160   | ITbiglam                    -- GHC-extension symbols
 161
 162   | ITocurly                    -- special symbols
 163   | ITccurly
 164   | ITobrack
 165   | ITcbrack
 166   | IToparen
 167   | ITcparen
 168   | IToubxparen
 169   | ITcubxparen
 170   | ITsemi
 171   | ITcomma
 172
 173   | ITvarid   FAST_STRING       -- identifiers
 174   | ITconid   FAST_STRING
 175   | ITvarsym  FAST_STRING
 176   | ITconsym  FAST_STRING
 177   | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 178   | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
 179   | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 180   | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
 181
 182   | ITpragma StringBuffer
 183
 184   | ITchar Char
 185   | ITstring FAST_STRING
 186   | ITinteger Integer
 187   | ITrational Rational
 188
 189   | ITunknown String            -- Used when the lexer can't make sense of it
 190   | ITeof                       -- end of file token
 191   deriving Text -- debugging
 192
 193 instance Text CostCentre -- cheat!
 194
 195 \end{code}
 196
 197 %************************************************************************
 198 %*                                                                      *
 199 \subsection{The lexical analyser}
 200 %*                                                                      *
 201 %************************************************************************
 202
 203 \begin{code}
 204 lexIface :: (IfaceToken -> IfM a) -> IfM a
 205 lexIface cont buf =
 206  _scc_ "Lexer"
 207 -- if bufferExhausted buf then
 208 --  []
 209 -- else
 210 --  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
 211   case currentChar# buf of
 212       -- whitespace and comments, ignore.
 213     ' '#  -> lexIface cont (stepOn buf)
 214     '\t'# -> lexIface cont (stepOn buf)
 215     '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
 216
 217 -- Numbers and comments
 218     '-'#  ->
 219       case lookAhead# buf 1# of
 220         '-'# -> lex_comment cont (stepOnBy# buf 2#)
 221         c    ->
 222           if is_digit c
 223           then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
 224           else lex_sym cont buf
 225
 226     '{'# ->                             -- look for "{-##" special iface pragma
 227         case lookAhead# buf 1# of
 228            '-'# -> case lookAhead# buf 2# of
 229                     '#'# -> case lookAhead# buf 3# of
 230                                 '#'# ->
 231                                    let (lexeme, buf')
 232                                           = doDiscard False (stepOnBy# buf 4#) in
 233                                    cont (ITpragma lexeme) buf'
 234                                 _ ->  lex_nested_comment (lexIface cont) buf
 235                     _    -> cont ITocurly (stepOn buf)
 236                             -- lex_nested_comment (lexIface cont) buf
 237            _ -> cont ITocurly (stepOn buf)
 238
 239     -- special symbols ----------------------------------------------------
 240     '('# ->
 241          case prefixMatch (stepOn buf) "..)" of
 242            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
 243            Nothing ->
 244             case lookAhead# buf 1# of
 245               '#'# -> cont IToubxparen (stepOnBy# buf 2#)
 246               _    -> cont IToparen (stepOn buf)
 247     ')'# -> cont ITcparen (stepOn buf)
 248     '}'# -> cont ITccurly (stepOn buf)
 249     '#'# -> case lookAhead# buf 1# of
 250                 ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
 251                 _    -> lex_sym cont (incLexeme buf)
 252     '['# -> cont ITobrack (stepOn buf)
 253     ']'# -> cont ITcbrack (stepOn buf)
 254     ','# -> cont ITcomma  (stepOn buf)
 255     ';'# -> cont ITsemi   (stepOn buf)
 256
 257     -- strings/characters -------------------------------------------------
 258     '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
 259               buf' ->
 260                   -- the string literal does *not* include the dquotes
 261                 case lexemeToFastString buf' of
 262                  v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
 263
 264     '\''# -> --
 265              -- untilEndOfChar# extends the current lexeme until
 266              -- it hits a non-escaped single quote. The lexeme of the
 267              -- StringBuffer returned does *not* include the closing quote,
 268              -- hence we augment the lexeme and make sure to add the
 269              -- starting quote, before `read'ing the string.
 270              --
 271              case untilEndOfChar# (stepOn buf) of
 272                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
 273                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
 274
 275     -- strictness pragma and __scc treated specially.
 276     '_'# ->
 277          case lookAhead# buf 1# of
 278            '_'# -> case lookAhead# buf 2# of
 279                     'S'# ->
 280                         lex_demand cont (stepOnUntil (not . isSpace)
 281                                         (stepOnBy# buf 3#)) -- past __S
 282                     's'# ->
 283                         case prefixMatch (stepOnBy# buf 3#) "cc" of
 284                                Just buf' -> lex_scc cont
 285                                                 (stepOnUntil (not . isSpace)
 286                                                 (stepOverLexeme buf'))
 287                                Nothing   -> lex_id cont buf
 288                     _ -> lex_id cont buf
 289            _    -> lex_id cont buf
 290
 291 -- ``thingy'' form for casm
 292     '`'# ->
 293             case lookAhead# buf 1# of
 294               '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
 295               _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
 296                                                      -- scanning an id of some sort.
 297
 298     '\NUL'# ->
 299             if bufferExhausted (stepOn buf) then
 300                cont ITeof buf
 301             else
 302                trace "lexIface: misplaced NUL?" $
 303                cont (ITunknown "\NUL") (stepOn buf)
 304
 305     c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
 306       | is_symbol c -> lex_sym cont buf
 307       | is_upper  c -> lex_con cont buf
 308       | is_ident  c -> lex_id  cont buf
 309
 310 --  where
 311 lex_comment cont buf =
 312 --   _trace ("comment: "++[C# (currentChar# buf)]) $
 313    case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
 314
 315 -------------------------------------------------------------------------------
 316
 317 lex_nested_comment cont buf =
 318   case currentChar# buf of
 319         '-'# -> case lookAhead# buf 1# of
 320                  '}'# -> cont (stepOnBy# buf 2#)
 321                  _    -> lex_nested_comment cont (stepOn buf)
 322
 323         '{'# -> case lookAhead# buf 1# of
 324                  '-'# -> lex_nested_comment
 325                                 (lex_nested_comment cont)
 326                                 (stepOnBy# buf 2#)
 327                  _    -> lex_nested_comment cont (stepOn buf)
 328
 329         _   -> lex_nested_comment cont (stepOn buf)
 330
 331 -------------------------------------------------------------------------------
 332
 333 lex_demand cont buf =
 334  case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
 335  where
 336    -- code snatched from Demand.lhs
 337   read_em acc buf =
 338    case currentChar# buf of
 339     'L'# -> read_em (WwLazy False : acc) (stepOn buf)
 340     'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
 341     'S'# -> read_em (WwStrict     : acc) (stepOn buf)
 342     'P'# -> read_em (WwPrim       : acc) (stepOn buf)
 343     'E'# -> read_em (WwEnum       : acc) (stepOn buf)
 344     ')'# -> (reverse acc, stepOn buf)
 345     'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
 346     'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
 347     'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
 348     'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
 349     _    -> (reverse acc, buf)
 350
 351   do_unpack new_or_data wrapper_unpacks acc buf
 352    = case read_em [] buf of
 353       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 354
 355 ------------------
 356 lex_scc cont buf =
 357  case currentChar# buf of
 358   '"'# ->
 359         case prefixMatch (stepOn buf) "CAFs." of
 360          Just buf' ->
 361           case untilChar# (stepOverLexeme buf') '\"'# of
 362            buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
 363          Nothing ->
 364             case prefixMatch (stepOn buf) "DICTs." of
 365              Just buf' ->
 366               case untilChar# (stepOverLexeme buf') '\"'# of
 367                buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
 368                         (stepOn (stepOverLexeme buf''))
 369              Nothing ->
 370               let
 371                match_user_cc buf =
 372                 case untilChar# buf '/'# of
 373                  buf' ->
 374                   let mod_name = lexemeToFastString buf' in
 375 --                        case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
 376 --                         buf'' ->
 377 --                            let grp_name = lexemeToFastString buf'' in
 378                     case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
 379                      buf'' ->
 380                        -- The label may contain arbitrary characters, so it
 381                        -- may have been escaped etc., hence we `read' it in to get
 382                        -- rid of these meta-chars in the string and then pack it (again.)
 383                        -- ToDo: do the same for module name (single quotes allowed in m-names).
 384                        -- BTW, the code in this module is totally gruesome..
 385                        let upk_label = _UNPK_ (lexemeToFastString buf'') in
 386                        case reads ('"':upk_label++"\"") of
 387                         ((cc_label,_):_) ->
 388                             let cc_name = _PK_ cc_label in
 389                             (mkUserCC cc_name mod_name _NIL_{-grp_name-},
 390                              stepOn (stepOverLexeme buf''))
 391                         _ ->
 392                           trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
 393                           (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
 394                            stepOn (stepOverLexeme buf''))
 395               in
 396               case prefixMatch (stepOn buf) "CAF:" of
 397                Just buf' ->
 398                  case match_user_cc (stepOverLexeme buf') of
 399                   (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
 400                Nothing ->
 401                  case match_user_cc (stepOn buf) of
 402                   (cc, buf'') -> cont (ITscc cc) buf''
 403   c -> cont (ITunknown [C# c]) (stepOn buf)
 404
 405
 406 -----------
 407 lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
 408 lex_num cont minus acc# buf =
 409  --trace ("lex_num: "++[C# (currentChar# buf)]) $
 410  case scanNumLit (I# acc#) buf of
 411      (acc',buf') ->
 412        case currentChar# buf' of
 413          '.'# ->
 414              -- this case is not optimised at all, as the
 415              -- presence of floating point numbers in interface
 416              -- files is not that common. (ToDo)
 417             case expandWhile# is_digit (incLexeme buf') of
 418               buf2 -> -- points to first non digit char
 419                 let l = case currentChar# buf2 of
 420                           'e'# -> let buf3 = incLexeme buf2 in
 421                               case currentChar# buf3 of
 422                                 '-'# -> expandWhile# is_digit (incLexeme buf3)
 423                                 _    -> expandWhile# is_digit buf3
 424                           _ -> buf2
 425                 in let v = readRational__ (lexemeToString l) in
 426                    cont (ITrational v) (stepOverLexeme l)
 427
 428          _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
 429
 430 -----------
 431 lex_cstring cont buf =
 432  case expandUntilMatch buf "\'\'" of
 433    buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
 434            (stepOverLexeme buf')
 435
 436 ------------------------------------------------------------------------------
 437 -- Character Classes
 438
 439 is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
 440
 441 {-# INLINE is_ctype #-}
 442 #if __GLASGOW_HASKELL__ >= 303
 443 is_ctype :: Word8 -> Char# -> Bool
 444 is_ctype mask = \c ->
 445    (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
 446 #else
 447 is_ctype :: Int -> Char# -> Bool
 448 is_ctype (I# mask) = \c ->
 449     let (A# ctype) = ``char_types'' :: Addr
 450         flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
 451     in
 452         (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
 453 #endif
 454
 455 is_ident  = is_ctype 1
 456 is_symbol = is_ctype 2
 457 is_any    = is_ctype 4
 458 is_space  = is_ctype 8
 459 is_upper  = is_ctype 16
 460 is_digit  = is_ctype 32
 461
 462 -----------------------------------------------------------------------------
 463 -- identifiers, symbols etc.
 464
 465 lex_id cont buf =
 466  case expandWhile# is_ident buf of { buf1 ->
 467  case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 468  let new_buf = stepOverLexeme buf'
 469      lexeme  = lexemeToFastString buf'
 470  in
 471  case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 472         Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
 473                           cont kwd_token new_buf;
 474         Nothing        ->
 475  case lookupUFM ifaceKeywordsFM lexeme of {
 476         Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
 477                           cont kwd_token new_buf;
 478         Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
 479                           cont (mk_var_token lexeme) new_buf
 480  }}}}
 481
 482 lex_sym cont buf =
 483  case expandWhile# is_symbol buf of
 484    buf' -> case lookupUFM haskellKeySymsFM lexeme of {
 485                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
 486                                   cont kwd_token new_buf ;
 487                 Nothing        -> --trace ("sym: "++unpackFS lexeme) $
 488                                   cont (mk_var_token lexeme) new_buf
 489            }
 490         where lexeme = lexemeToFastString buf'
 491               new_buf = stepOverLexeme buf'
 492
 493 lex_con cont buf =
 494  case expandWhile# is_ident buf of        { buf1 ->
 495  case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
 496  case currentChar# buf' of
 497      '.'# -> munch HiFile
 498      '!'# -> munch HiBootFile
 499      _    -> just_a_conid
 500
 501    where
 502     just_a_conid = --trace ("con: "++unpackFS lexeme) $
 503                    cont (ITconid lexeme) new_buf
 504     lexeme = lexemeToFastString buf'
 505     new_buf = stepOverLexeme buf'
 506     munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid
 507  }}
 508
 509 lex_qid cont mod hif buf just_a_conid =
 510  case currentChar# buf of
 511   '['# ->       -- Special case for []
 512     case lookAhead# buf 1# of
 513      ']'# -> cont (ITqconid  (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#)
 514      _    -> just_a_conid
 515
 516   '('# ->  -- Special case for (,,,)
 517            -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
 518     case lookAhead# buf 1# of
 519      '#'# -> case lookAhead# buf 2# of
 520                 ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#)
 521                                 just_a_conid
 522                 _    -> just_a_conid
 523      ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#)
 524      ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid
 525      _    -> just_a_conid
 526
 527   '-'# -> case lookAhead# buf 1# of
 528             '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#)
 529             _    -> lex_id3 cont mod hif buf just_a_conid
 530   _    -> lex_id3 cont mod hif buf just_a_conid
 531
 532 lex_id3 cont mod hif buf just_a_conid
 533   | is_symbol c =
 534      case expandWhile# is_symbol buf of { buf' ->
 535      let
 536       lexeme  = lexemeToFastString buf'
 537       new_buf = stepOverLexeme buf'
 538      in
 539      case lookupUFM haskellKeySymsFM lexeme of {
 540         Just kwd_token -> just_a_conid; -- avoid M.:: etc.
 541         Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 542      }}
 543
 544   | otherwise   =
 545      case expandWhile# is_ident buf of { buf1 ->
 546      if emptyLexeme buf1
 547             then just_a_conid
 548             else
 549      case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
 550      let
 551       lexeme  = lexemeToFastString buf'
 552       new_buf = stepOverLexeme buf'
 553      in
 554      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
 555             Just kwd_token -> just_a_conid; -- avoid M.where etc.
 556             Nothing        ->
 557      case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files
 558             Just kwd_token -> just_a_conid;
 559             Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
 560      }}}}
 561   where c = currentChar# buf
 562
 563 mk_var_token pk_str
 564   | is_upper f          = ITconid pk_str
 565         -- _[A-Z] is treated as a constructor in interface files.
 566   | f `eqChar#` '_'# && not (_NULL_ tl)
 567         && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
 568   | is_ident f          = ITvarid pk_str
 569   | f `eqChar#` ':'#    = ITconsym pk_str
 570   | otherwise           = ITvarsym pk_str
 571   where
 572       (C# f) = _HEAD_ pk_str
 573       tl     = _TAIL_ pk_str
 574
 575 mk_qvar_token m hif token =
 576  case mk_var_token token of
 577    ITconid n  -> ITqconid  (m,n,hif)
 578    ITvarid n  -> ITqvarid  (m,n,hif)
 579    ITconsym n -> ITqconsym (m,n,hif)
 580    ITvarsym n -> ITqvarsym (m,n,hif)
 581    _          -> ITunknown (show token)
 582 \end{code}
 583
 584 ----------------------------------------------------------------------------
 585 Horrible stuff for dealing with M.(,,,)
 586
 587 \begin{code}
 588 lex_tuple cont mod hif buf back_off =
 589   go 2 buf
 590   where
 591    go n buf =
 592     case currentChar# buf of
 593       ','# -> go (n+1) (stepOn buf)
 594       ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf)
 595       _    -> back_off
 596
 597 lex_ubx_tuple cont mod hif buf back_off =
 598   go 2 buf
 599   where
 600    go n buf =
 601     case currentChar# buf of
 602       ','# -> go (n+1) (stepOn buf)
 603       '#'# -> case lookAhead# buf 1# of
 604                 ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif))
 605                                  (stepOnBy# buf 2#)
 606                 _    -> back_off
 607       _    -> back_off
 608 \end{code}
 609
 610 -----------------------------------------------------------------------------
 611 Keyword Lists
 612
 613 \begin{code}
 614 ifaceKeywordsFM :: UniqFM IfaceToken
 615 ifaceKeywordsFM = listToUFM $
 616       map (\ (x,y) -> (_PK_ x,y))
 617      [  ("__interface",         ITinterface),
 618         ("__export",            ITexport),
 619         ("__instimport",        ITinstimport),
 620         ("__forall",            ITforall),
 621         ("__letrec",            ITletrec),
 622         ("__coerce",            ITcoerce),
 623         ("__inline",            ITinline),
 624         ("__DEFAULT",           ITdefaultbranch),
 625         ("__bot",               ITbottom),
 626         ("__integer",           ITinteger_lit),
 627         ("__float",             ITfloat_lit),
 628         ("__rational",          ITrational_lit),
 629         ("__addr",              ITaddr_lit),
 630         ("__litlit",            ITlit_lit),
 631         ("__string",            ITstring_lit),
 632         ("__a",                 ITtypeapp),
 633         ("__A",                 ITarity),
 634         ("__P",                 ITspecialise),
 635         ("__C",                 ITnocaf),
 636         ("__u",                 ITunfold NoInlinePragInfo),
 637         ("__U",                 ITunfold IWantToBeINLINEd),
 638         ("__UU",                ITunfold IMustBeINLINEd),
 639         ("__Unot",              ITunfold IMustNotBeINLINEd),
 640         ("__Ux",                ITunfold IAmALoopBreaker),
 641
 642         ("__ccall",             ITccall (False, False)),
 643         ("__ccall_GC",          ITccall (False, True)),
 644         ("__casm",              ITccall (True,  False)),
 645         ("__casm_GC",           ITccall (True,  True)),
 646
 647         ("/\\",                 ITbiglam)
 648        ]
 649
 650 haskellKeywordsFM = listToUFM $
 651       map (\ (x,y) -> (_PK_ x,y))
 652        [( "case",       ITcase ),
 653         ( "class",      ITclass ),
 654         ( "data",       ITdata ),
 655         ( "default",    ITdefault ),
 656         ( "deriving",   ITderiving ),
 657         ( "do",         ITdo ),
 658         ( "else",       ITelse ),
 659         ( "if",         ITif ),
 660         ( "import",     ITimport ),
 661         ( "in",         ITin ),
 662         ( "infix",      ITinfix ),
 663         ( "infixl",     ITinfixl ),
 664         ( "infixr",     ITinfixr ),
 665         ( "instance",   ITinstance ),
 666         ( "let",        ITlet ),
 667         ( "module",     ITmodule ),
 668         ( "newtype",    ITnewtype ),
 669         ( "of",         ITof ),
 670         ( "then",       ITthen ),
 671         ( "type",       ITtype ),
 672         ( "where",      ITwhere ),
 673         ( "as",         ITas ),
 674         ( "qualified",  ITqualified ),
 675         ( "hiding",     IThiding )
 676      ]
 677
 678 haskellKeySymsFM = listToUFM $
 679         map (\ (x,y) -> (_PK_ x,y))
 680       [ ("..",                  ITdotdot)
 681        ,("::",                  ITdcolon)
 682        ,("=",                   ITequal)
 683        ,("\\",                  ITlam)
 684        ,("|",                   ITvbar)
 685        ,("<-",                  ITlarrow)
 686        ,("->",                  ITrarrow)
 687        ,("@",                   ITat)
 688        ,("~",                   ITtilde)
 689        ,("=>",                  ITdarrow)
 690        ,("-",                   ITminus)
 691        ,("!",                   ITbang)
 692        ]
 693 \end{code}
 694
 695 -----------------------------------------------------------------------------
 696 doDiscard rips along really fast, looking for a '#-}',
 697 indicating the end of the pragma we're skipping
 698
 699 \begin{code}
 700 doDiscard inStr buf =
 701  case currentChar# buf of
 702    '#'# | not inStr ->
 703        case lookAhead# buf 1# of { '#'# ->
 704        case lookAhead# buf 2# of { '-'# ->
 705        case lookAhead# buf 3# of { '}'# ->
 706            (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
 707         _    -> doDiscard inStr (incLexeme buf) };
 708         _    -> doDiscard inStr (incLexeme buf) };
 709         _    -> doDiscard inStr (incLexeme buf) }
 710    '"'# ->
 711        let
 712         odd_slashes buf flg i# =
 713           case lookAhead# buf i# of
 714            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
 715            _     -> flg
 716        in
 717        case lookAhead# buf (negateInt# 1#) of --backwards, actually
 718          '\\'# -> -- escaping something..
 719            if odd_slashes buf True (negateInt# 2#) then
 720                -- odd number of slashes, " is escaped.
 721               doDiscard inStr (incLexeme buf)
 722            else
 723                -- even number of slashes, \ is escaped.
 724               doDiscard (not inStr) (incLexeme buf)
 725          _ -> case inStr of -- forced to avoid build-up
 726                True  -> doDiscard False (incLexeme buf)
 727                False -> doDiscard True  (incLexeme buf)
 728    _ -> doDiscard inStr (incLexeme buf)
 729
 730 \end{code}
 731
 732 -----------------------------------------------------------------------------
 733
 734 \begin{code}
 735 type IfM a = StringBuffer       -- Input string
 736           -> SrcLoc
 737           -> MaybeErr a ErrMsg
 738
 739 returnIf   :: a -> IfM a
 740 returnIf a s l = Succeeded a
 741
 742 thenIf     :: IfM a -> (a -> IfM b) -> IfM b
 743 m `thenIf` k = \s l ->
 744         case m s l of
 745                 Succeeded a -> k a s l
 746                 Failed err  -> Failed err
 747
 748 getSrcLocIf :: IfM SrcLoc
 749 getSrcLocIf s l = Succeeded l
 750
 751 happyError :: IfM a
 752 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
 753
 754
 755 {-
 756  Note that if the name of the file we're processing ends
 757  with `hi-boot', we accept it on faith as having the right
 758  version. This is done so that .hi-boot files that comes
 759  with hsc don't have to be updated before every release,
 760  *and* it allows us to share .hi-boot files with versions
 761  of hsc that don't have .hi version checking (e.g., ghc-2.10's)
 762
 763  If the version number is 0, the checking is also turned off.
 764  (needed to deal with GHC.hi only!)
 765
 766  Once we can assume we're compiling with a version of ghc that
 767  supports interface file checking, we can drop the special
 768  pleading
 769 -}
 770 checkVersion :: Maybe Integer -> IfM ()
 771 checkVersion mb@(Just v) s l
 772  | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
 773  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 774 checkVersion mb@Nothing  s l
 775  | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
 776  | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
 777
 778 -----------------------------------------------------------------
 779
 780 ifaceParseErr l toks
 781   = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
 782           ptext SLIT("toks="), text (show (take 10 toks))]
 783
 784 ifaceVersionErr hi_vers l toks
 785   = hsep [ppr l, ptext SLIT("Interface file version error;"),
 786           ptext SLIT("Expected"), int opt_HiVersion,
 787           ptext SLIT("found "), pp_version]
 788     where
 789      pp_version =
 790       case hi_vers of
 791         Nothing -> ptext SLIT("pre ghc-3.02 version")
 792         Just v  -> ptext SLIT("version") <+> integer v
 793
 794 \end{code}