X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=525d50bd614c0161ecd52aff5019c6188968f752;hb=15a63009a30ce0d1614b36803185f548db838805;hp=e891cae73b9ced9e0e28e86a78b4767d7f31957c;hpb=6821c8a47c0fc61a2d989d368f926cc0ded776e9;p=ghc-hetmet.git diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index e891cae..525d50b 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -31,6 +31,8 @@ -- Note that Alex itself generates code with with some unused bindings and -- without type signatures, so removing the flag might not be possible. +{-# OPTIONS_GHC -funbox-strict-fields #-} + module Lexer ( Token(..), lexer, pragState, mkPState, PState(..), P(..), ParseResult(..), getSrcLoc, @@ -60,15 +62,11 @@ import Data.Char ( chr, ord, isSpace ) import Data.Ratio import Debug.Trace -#if __GLASGOW_HASKELL__ >= 605 -import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper ) -#else -import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) -#endif +import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) } $unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar. -$whitechar = [\ \n\r\f\v\xa0 $unispace] +$whitechar = [\ \n\r\f\v $unispace] $white_no_nl = $whitechar # \n $tab = \t @@ -78,16 +76,16 @@ $decdigit = $ascdigit -- for now, should really be $digit (ToDo) $digit = [$ascdigit $unidigit] $special = [\(\)\,\;\[\]\`\{\}] -$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7] +$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar. $symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] $unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar. -$asclarge = [A-Z \xc0-\xd6 \xd8-\xde] +$asclarge = [A-Z] $large = [$asclarge $unilarge] $unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar. -$ascsmall = [a-z \xdf-\xf6 \xf8-\xff] +$ascsmall = [a-z] $small = [$ascsmall $unismall \_] $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar. @@ -504,8 +502,8 @@ data Token | ITvocurly | ITvccurly | ITobrack - | ITopabrack -- [:, for parallel arrays with -fparr - | ITcpabrack -- :], for parallel arrays with -fparr + | ITopabrack -- [:, for parallel arrays with -XParr + | ITcpabrack -- :], for parallel arrays with -XParr | ITcbrack | IToparen | ITcparen @@ -1218,7 +1216,7 @@ lex_char c inp = do c | isAny c -> do setInput inp; return c _other -> lit_error -isAny c | c > '\xff' = isPrint c +isAny c | c > '\x7f' = isPrint c | otherwise = is_any c lex_escape :: P Char @@ -1242,7 +1240,7 @@ lex_escape = do 'x' -> readNum is_hexdigit 16 hexDigit 'o' -> readNum is_octdigit 8 octDecDigit - x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x) + x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x) c1 -> do i <- getInput @@ -1486,7 +1484,7 @@ alexGetChar (AI loc ofs s) adj_c | c <= '\x06' = non_graphic - | c <= '\xff' = c + | c <= '\x7f' = c -- Alex doesn't handle Unicode, so when Unicode -- character is encoutered we output these values -- with the actual character value hidden in the state. @@ -1496,7 +1494,7 @@ alexGetChar (AI loc ofs s) LowercaseLetter -> lower TitlecaseLetter -> upper ModifierLetter -> other_graphic - OtherLetter -> other_graphic + OtherLetter -> lower -- see #1103 NonSpacingMark -> other_graphic SpacingCombiningMark -> other_graphic EnclosingMark -> other_graphic @@ -1550,7 +1548,7 @@ getLexState :: P Int getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls -- for reasons of efficiency, flags indicating language extensions (eg, --- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed +-- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed -- integer genericsBit, ffiBit, parrBit :: Int @@ -1597,14 +1595,12 @@ qqEnabled flags = testBit flags qqBit -- PState for parsing options pragmas -- -pragState :: StringBuffer -> SrcLoc -> PState -pragState buf loc = +pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState +pragState dynflags buf loc = PState { - buffer = buf, + buffer = buf, messages = emptyMessages, - -- XXX defaultDynFlags is not right, but we don't have a real - -- dflags handy - dflags = defaultDynFlags, + dflags = dynflags, last_loc = mkSrcSpan loc loc, last_offs = 0, last_len = 0,