From: simonmar Date: Mon, 8 Sep 2003 11:52:27 +0000 (+0000) Subject: [project @ 2003-09-08 11:52:24 by simonmar] X-Git-Tag: Approx_11550_changesets_converted~492 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=9541ef3440f89f5f275509b1cc64fb9c498dcf73;p=ghc-hetmet.git [project @ 2003-09-08 11:52:24 by simonmar] Replace the handwritten lexer with one generated by Alex. YOU NOW NEED ALEX (v 2.0 or later) TO COMPILE GHC FROM CVS. Highlights: - Faster than the previous lexer (about 10% of total parse time, depending on the token mix). - More correct than the previous lexer: a couple of minor wibbles in the syntax were fixed. - Completely accurate source spans for each token are now collected. This information isn't used yet, but it will be used to give much more accurate error messages in the future. - SrcLoc now contains a column field as well as a line number, although this is currently ignored when printing out SrcLocs. - StringBuffer is now based on a ByteArray# rather than a Ptr, which means that StringBuffers are now garbage collected. Previously StringBuffers were hardly ever released, so a GHCi session would leak space as more source files were loaded in. - Code size reduction: Lexer.x is about the same size as the old Lex.lhs, but StringBuffer.lhs is significantly shorter and simpler. Sadly I wasn't able to get rid of parser/Ctypes.hs (yet). --- diff --git a/ghc/compiler/Makefile b/ghc/compiler/Makefile index 1db9a45..04c8d8b 100644 --- a/ghc/compiler/Makefile +++ b/ghc/compiler/Makefile @@ -411,6 +411,8 @@ endif # Required due to use of Concurrent.myThreadId utils/Panic_HC_OPTS += -fvia-C +parser/Lexer_HC_OPTS += -funbox-strict-fields + # ghc_strlen percolates through so many modules that it is easier to get its # prototype via a global option instead of a myriad of per-file OPTIONS SRC_HC_OPTS += '-\#include "hschooks.h"' @@ -473,12 +475,6 @@ primop-usage.hs-incl: prelude/primops.txt $(GENPOC) --usage < $< > $@ - -# ---------------------------------------------------------------------------- -# Parsers/lexers - -SRC_HAPPY_OPTS += +RTS -K2m -H16m -RTS - #----------------------------------------------------------------------------- # Linking @@ -518,45 +514,47 @@ endif # ---------------------------------------------------------------------------- # profiling. -rename/RnBinds_HC_OPTS += -auto-all -rename/RnEnv_HC_OPTS += -auto-all -rename/RnExpr_HC_OPTS += -auto-all -rename/RnHiFiles_HC_OPTS += -auto-all -rename/RnHsSyn_HC_OPTS += -auto-all -rename/Rename_HC_OPTS += -auto-all -rename/RnIfaces_HC_OPTS += -auto-all -rename/RnNames_HC_OPTS += -auto-all -rename/RnSource_HC_OPTS += -auto-all -rename/RnTypes_HC_OPTS += -auto-all - -typecheck/Inst_HC_OPTS += -auto-all -typecheck/TcBinds_HC_OPTS += -auto-all -typecheck/TcClassDcl_HC_OPTS += -auto-all -typecheck/TcDefaults_HC_OPTS += -auto-all -typecheck/TcDeriv_HC_OPTS += -auto-all -typecheck/TcEnv_HC_OPTS += -auto-all -typecheck/TcExpr_HC_OPTS += -auto-all -typecheck/TcForeign_HC_OPTS += -auto-all -typecheck/TcGenDeriv_HC_OPTS += -auto-all -typecheck/TcHsSyn_HC_OPTS += -auto-all -typecheck/TcIfaceSig_HC_OPTS += -auto-all -typecheck/TcInstDcls_HC_OPTS += -auto-all -typecheck/TcMatches_HC_OPTS += -auto-all -typecheck/TcMonoType_HC_OPTS += -auto-all -typecheck/TcMType_HC_OPTS += -auto-all -typecheck/TcPat_HC_OPTS += -auto-all -typecheck/TcRnDriver_HC_OPTS += -auto-all -#typecheck/TcRnMonad_HC_OPTS += -auto-all -#typecheck/TcRnTypes_HC_OPTS += -auto-all -typecheck/TcRules_HC_OPTS += -auto-all -typecheck/TcSimplify_HC_OPTS += -auto-all -typecheck/TcSplice_HC_OPTS += -auto-all -typecheck/TcTyClsDecls_HC_OPTS += -auto-all -typecheck/TcTyDecls_HC_OPTS += -auto-all -typecheck/TcType_HC_OPTS += -auto-all -typecheck/TcUnify_HC_OPTS += -auto-all - -absCSyn/PprAbsC_HC_OPTS += -auto-all +# rename/RnBinds_HC_OPTS += -auto-all +# rename/RnEnv_HC_OPTS += -auto-all +# rename/RnExpr_HC_OPTS += -auto-all +# rename/RnHiFiles_HC_OPTS += -auto-all +# rename/RnHsSyn_HC_OPTS += -auto-all +# rename/Rename_HC_OPTS += -auto-all +# rename/RnIfaces_HC_OPTS += -auto-all +# rename/RnNames_HC_OPTS += -auto-all +# rename/RnSource_HC_OPTS += -auto-all +# rename/RnTypes_HC_OPTS += -auto-all +# +# typecheck/Inst_HC_OPTS += -auto-all +# typecheck/TcBinds_HC_OPTS += -auto-all +# typecheck/TcClassDcl_HC_OPTS += -auto-all +# typecheck/TcDefaults_HC_OPTS += -auto-all +# typecheck/TcDeriv_HC_OPTS += -auto-all +# typecheck/TcEnv_HC_OPTS += -auto-all +# typecheck/TcExpr_HC_OPTS += -auto-all +# typecheck/TcForeign_HC_OPTS += -auto-all +# typecheck/TcGenDeriv_HC_OPTS += -auto-all +# typecheck/TcHsSyn_HC_OPTS += -auto-all +# typecheck/TcIfaceSig_HC_OPTS += -auto-all +# typecheck/TcInstDcls_HC_OPTS += -auto-all +# typecheck/TcMatches_HC_OPTS += -auto-all +# typecheck/TcMonoType_HC_OPTS += -auto-all +# typecheck/TcMType_HC_OPTS += -auto-all +# typecheck/TcPat_HC_OPTS += -auto-all +# typecheck/TcRnDriver_HC_OPTS += -auto-all +# #typecheck/TcRnMonad_HC_OPTS += -auto-all +# #typecheck/TcRnTypes_HC_OPTS += -auto-all +# typecheck/TcRules_HC_OPTS += -auto-all +# typecheck/TcSimplify_HC_OPTS += -auto-all +# typecheck/TcSplice_HC_OPTS += -auto-all +# typecheck/TcTyClsDecls_HC_OPTS += -auto-all +# typecheck/TcTyDecls_HC_OPTS += -auto-all +# typecheck/TcType_HC_OPTS += -auto-all +# typecheck/TcUnify_HC_OPTS += -auto-all +# +# absCSyn/PprAbsC_HC_OPTS += -auto-all + +coreSyn/CorePrep_HC_OPTS += -auto-all #----------------------------------------------------------------------------- # clean diff --git a/ghc/compiler/basicTypes/IdInfo.hi-boot-6 b/ghc/compiler/basicTypes/IdInfo.hi-boot-6 index d29d826..e090800 100644 --- a/ghc/compiler/basicTypes/IdInfo.hi-boot-6 +++ b/ghc/compiler/basicTypes/IdInfo.hi-boot-6 @@ -4,5 +4,5 @@ data IdInfo data GlobalIdDetails notGlobalId :: GlobalIdDetails -seqIdInfo :: IdInfo -> GHC.Base.() +seqIdInfo :: IdInfo -> () vanillaIdInfo :: IdInfo diff --git a/ghc/compiler/basicTypes/SrcLoc.lhs b/ghc/compiler/basicTypes/SrcLoc.lhs index c3249df..377a8c8 100644 --- a/ghc/compiler/basicTypes/SrcLoc.lhs +++ b/ghc/compiler/basicTypes/SrcLoc.lhs @@ -13,15 +13,15 @@ module SrcLoc ( mkSrcLoc, isGoodSrcLoc, isWiredInLoc, noSrcLoc, -- "I'm sorry, I haven't a clue" + advanceSrcLoc, importedSrcLoc, -- Unknown place in an interface wiredInSrcLoc, -- Something wired into the compiler generatedSrcLoc, -- Code generated within the compiler - incSrcLine, replaceSrcLine, - - srcLocFile, -- return the file name part. - srcLocLine -- return the line part. + srcLocFile, -- return the file name part + srcLocLine, -- return the line part + srcLocCol, -- return the column part ) where #include "HsVersions.h" @@ -32,7 +32,7 @@ import FastString ( unpackFS ) import FastTypes import FastString -import GLAEXTS ( (+#) ) +import GLAEXTS ( (+#), quotInt# ) \end{code} %************************************************************************ @@ -52,9 +52,24 @@ data SrcLoc -- isWiredInName | SrcLoc FastString -- A precise location (file name) - FastInt + FastInt -- line + FastInt -- column | UnhelpfulSrcLoc FastString -- Just a general indication + +{- +data SrcSpan + = WiredInSpan + + -- A precise source file span + | SrcSpan FastString -- file name + FastInt -- beginning line + FastInt -- beginning column + FastInt -- end line + FastInt -- end column + + | UnhelpfulSrcSpan FastString -- Just a general indication +-} \end{code} Note that an entity might be imported via more than one route, and @@ -70,30 +85,35 @@ rare case. Things to make 'em: \begin{code} -mkSrcLoc x y = SrcLoc x (iUnbox y) +mkSrcLoc x line col = SrcLoc x (iUnbox line) (iUnbox col) wiredInSrcLoc = WiredInLoc noSrcLoc = UnhelpfulSrcLoc FSLIT("") importedSrcLoc = UnhelpfulSrcLoc FSLIT("") generatedSrcLoc = UnhelpfulSrcLoc FSLIT("") -isGoodSrcLoc (SrcLoc _ _) = True +isGoodSrcLoc (SrcLoc _ _ _) = True isGoodSrcLoc other = False isWiredInLoc WiredInLoc = True isWiredInLoc other = False srcLocFile :: SrcLoc -> FastString -srcLocFile (SrcLoc fname _) = fname +srcLocFile (SrcLoc fname _ _) = fname + +srcLocLine :: SrcLoc -> Int +srcLocLine (SrcLoc _ l c) = iBox l -srcLocLine :: SrcLoc -> FastInt -srcLocLine (SrcLoc _ l) = l +srcLocCol :: SrcLoc -> Int +srcLocCol (SrcLoc _ l c) = iBox c -incSrcLine :: SrcLoc -> SrcLoc -incSrcLine (SrcLoc s l) = SrcLoc s (l +# 1#) -incSrcLine loc = loc +advanceSrcLoc :: SrcLoc -> Char -> SrcLoc +advanceSrcLoc (SrcLoc f l c) '\t' = SrcLoc f l (tab c) +advanceSrcLoc (SrcLoc f l c) '\n' = SrcLoc f (l +# 1#) 0# +advanceSrcLoc (SrcLoc f l c) _ = SrcLoc f l (c +# 1#) -replaceSrcLine :: SrcLoc -> FastInt -> SrcLoc -replaceSrcLine (SrcLoc s _) l = SrcLoc s l +-- Advance to the next tab stop. Tabs are at column positions 0, 8, 16, etc. +tab :: FastInt -> FastInt +tab c = (c `quotInt#` 8# +# 1#) *# 8# \end{code} %************************************************************************ @@ -118,19 +138,23 @@ cmpSrcLoc WiredInLoc other = LT cmpSrcLoc (UnhelpfulSrcLoc s1) (UnhelpfulSrcLoc s2) = s1 `compare` s2 cmpSrcLoc (UnhelpfulSrcLoc s1) other = GT -cmpSrcLoc (SrcLoc s1 l1) WiredInLoc = GT -cmpSrcLoc (SrcLoc s1 l1) (UnhelpfulSrcLoc _) = LT -cmpSrcLoc (SrcLoc s1 l1) (SrcLoc s2 l2) = (s1 `compare` s2) `thenCmp` (l1 `cmpline` l2) - where - l1 `cmpline` l2 | l1 <# l2 = LT - | l1 ==# l2 = EQ - | otherwise = GT +cmpSrcLoc (SrcLoc _ _ _) WiredInLoc = GT +cmpSrcLoc (SrcLoc _ _ _) (UnhelpfulSrcLoc _) = LT +cmpSrcLoc (SrcLoc s1 l1 c1) (SrcLoc s2 l2 c2) + = (s1 `compare` s2) `thenCmp` (l1 `cmpline` l2) `thenCmp` (c1 `cmpline` c2) + where + l1 `cmpline` l2 | l1 <# l2 = LT + | l1 ==# l2 = EQ + | otherwise = GT instance Outputable SrcLoc where - ppr (SrcLoc src_path src_line) + ppr (SrcLoc src_path src_line src_col) = getPprStyle $ \ sty -> if userStyle sty || debugStyle sty then - hcat [ ftext src_path, char ':', int (iBox src_line) ] + hcat [ ftext src_path, char ':', + int (iBox src_line) + {- TODO: char ':', int (iBox src_col) -} + ] else hcat [text "{-# LINE ", int (iBox src_line), space, char '\"', ftext src_path, text " #-}"] diff --git a/ghc/compiler/main/HscMain.lhs b/ghc/compiler/main/HscMain.lhs index e920e7b..02465bf 100644 --- a/ghc/compiler/main/HscMain.lhs +++ b/ghc/compiler/main/HscMain.lhs @@ -36,9 +36,10 @@ import CoreLint ( lintUnfolding ) import HsSyn import RdrName ( nameRdrName ) -import StringBuffer ( hGetStringBuffer, freeStringBuffer ) +import StringBuffer ( hGetStringBuffer ) import Parser -import Lex ( ParseResult(..), ExtFlags(..), mkPState ) +import Lexer ( P(..), ParseResult(..), ExtFlags(..), + mkPState, showPFailed ) import SrcLoc ( mkSrcLoc ) import TcRnDriver ( checkOldIface, tcRnModule, tcRnExtCore, tcRnIface ) import RnEnv ( extendOrigNameCache ) @@ -152,7 +153,8 @@ hscNoRecomp hsc_env pcs_ch have_object showModMsg have_object mod location); -- Typecheck - (pcs_tc, maybe_tc_result) <- tcRnIface hsc_env pcs_ch old_iface ; + (pcs_tc, maybe_tc_result) <- _scc_ "tcRnIface" + tcRnIface hsc_env pcs_ch old_iface ; case maybe_tc_result of { Nothing -> return (HscFail pcs_tc); @@ -387,13 +389,12 @@ myParseModule dflags src_filename buf <- hGetStringBuffer src_filename let exts = mkExtFlags dflags - loc = mkSrcLoc (mkFastString src_filename) 1 + loc = mkSrcLoc (mkFastString src_filename) 1 0 - case parseModule buf (mkPState loc exts) of { + case unP parseModule (mkPState buf loc exts) of { - PFailed err -> do { hPutStrLn stderr (showSDoc err); - freeStringBuffer buf; - return Nothing }; + PFailed l1 l2 err -> do { hPutStrLn stderr (showPFailed l1 l2 err); + return Nothing }; POk _ rdr_module -> do { @@ -510,13 +511,12 @@ hscParseStmt dflags str buf <- stringToStringBuffer str let exts = mkExtFlags dflags - loc = mkSrcLoc FSLIT("") 1 + loc = mkSrcLoc FSLIT("") 1 0 - case parseStmt buf (mkPState loc exts) of { + case unP parseStmt (mkPState buf loc exts) of { - PFailed err -> do { hPutStrLn stderr (showSDoc err); --- Not yet implemented in <4.11 freeStringBuffer buf; - return Nothing }; + PFailed l1 l2 err -> do { hPutStrLn stderr (showPFailed l1 l2 err); + return Nothing }; -- no stmt: the line consisted of just space or comments POk _ Nothing -> return Nothing; @@ -525,7 +525,6 @@ hscParseStmt dflags str --ToDo: can't free the string buffer until we've finished this -- compilation sweep and all the identifiers have gone away. - --freeStringBuffer buf; dumpIfSet_dyn dflags Opt_D_dump_parsed "Parser" (ppr rdr_stmt); return (Just rdr_stmt) }} @@ -568,16 +567,14 @@ myParseIdentifier dflags str = do buf <- stringToStringBuffer str let exts = mkExtFlags dflags - loc = mkSrcLoc FSLIT("") 1 + loc = mkSrcLoc FSLIT("") 1 0 - case parseIdentifier buf (mkPState loc exts) of + case unP parseIdentifier (mkPState buf loc exts) of - PFailed err -> do { hPutStrLn stderr (showSDoc err); - freeStringBuffer buf; - return Nothing } + PFailed l1 l2 err -> do { hPutStrLn stderr (showPFailed l1 l2 err); + return Nothing } - POk _ rdr_name -> do { --should, but can't: freeStringBuffer buf; - return (Just rdr_name) } + POk _ rdr_name -> return (Just rdr_name) #endif \end{code} diff --git a/ghc/compiler/main/ParsePkgConf.y b/ghc/compiler/main/ParsePkgConf.y index f83dd58..62813d3 100644 --- a/ghc/compiler/main/ParsePkgConf.y +++ b/ghc/compiler/main/ParsePkgConf.y @@ -4,7 +4,7 @@ module ParsePkgConf( loadPackageConfig ) where #include "HsVersions.h" import Packages ( PackageConfig(..), defaultPackageConfig ) -import Lex +import Lexer import FastString import StringBuffer import SrcLoc @@ -15,18 +15,18 @@ import EXCEPTION ( throwDyn ) } %token - '{' { ITocurly } - '}' { ITccurly } - '[' { ITobrack } - ']' { ITcbrack } - ',' { ITcomma } - '=' { ITequal } - VARID { ITvarid $$ } - CONID { ITconid $$ } - STRING { ITstring $$ } - -%monad { P } { thenP } { returnP } -%lexer { lexer } { ITeof } + '{' { T _ _ ITocurly } + '}' { T _ _ ITccurly } + '[' { T _ _ ITobrack } + ']' { T _ _ ITcbrack } + ',' { T _ _ ITcomma } + '=' { T _ _ ITequal } + VARID { T _ _ (ITvarid $$) } + CONID { T _ _ (ITconid $$) } + STRING { T _ _ (ITstring $$) } + +%monad { P } { >>= } { return } +%lexer { lexer } { T _ _ ITeof } %name parse %tokentype { Token } %% @@ -49,7 +49,7 @@ fields :: { PackageConfig -> PackageConfig } field :: { PackageConfig -> PackageConfig } : VARID '=' STRING {% case unpackFS $1 of { - "name" -> returnP (\ p -> p{name = unpackFS $3}); + "name" -> return (\ p -> p{name = unpackFS $3}); _ -> happyError } } | VARID '=' bool @@ -84,29 +84,27 @@ strs :: { [String] } bool :: { Bool } : CONID {% case unpackFS $1 of { - "True" -> returnP True; - "False" -> returnP False; + "True" -> return True; + "False" -> return False; _ -> happyError } } { happyError :: P a -happyError buf PState{ loc = loc } = PFailed (srcParseErr buf loc) +happyError = srcParseFail loadPackageConfig :: FilePath -> IO [PackageConfig] loadPackageConfig conf_filename = do buf <- hGetStringBuffer conf_filename - let loc = mkSrcLoc (mkFastString conf_filename) 1 + let loc = mkSrcLoc (mkFastString conf_filename) 1 0 exts = ExtFlags {glasgowExtsEF = False, ffiEF = False, arrowsEF = False, withEF = False, parrEF = False} - case parse buf (mkPState loc exts) of - PFailed err -> do - freeStringBuffer buf - throwDyn (InstallationError (showSDoc err)) + case unP parse (mkPState buf loc exts) of + PFailed l1 l2 err -> do + throwDyn (InstallationError (showPFailed l1 l2 err)) POk _ pkg_details -> do - freeStringBuffer buf return pkg_details } diff --git a/ghc/compiler/parser/Ctype.lhs b/ghc/compiler/parser/Ctype.lhs index 405dc5c..414aa4f 100644 --- a/ghc/compiler/parser/Ctype.lhs +++ b/ghc/compiler/parser/Ctype.lhs @@ -15,7 +15,6 @@ module Ctype import DATA_INT ( Int32 ) import DATA_BITS ( Bits((.&.)) ) -import GLAEXTS ( Char#, Char(..) ) \end{code} Bit masks @@ -36,10 +35,10 @@ at the big case below. \begin{code} {-# INLINE is_ctype #-} -is_ctype :: Int -> Char# -> Bool -is_ctype mask c = (fromIntegral (charType (C# c)) .&. fromIntegral mask) /= (0::Int32) +is_ctype :: Int -> Char -> Bool +is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32) -is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char# -> Bool +is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char -> Bool is_ident = is_ctype cIdent is_symbol = is_ctype cSymbol is_any = is_ctype cAny @@ -65,7 +64,7 @@ charType c = case c of '\7' -> 0 -- \007 '\8' -> 0 -- \010 '\9' -> cAny + cSpace -- \t - '\10' -> cAny + cSpace -- \n + '\10' -> cSpace -- \n (not allowed in strings, so !cAny) '\11' -> cAny + cSpace -- \v '\12' -> cAny + cSpace -- \f '\13' -> cAny + cSpace -- ^M diff --git a/ghc/compiler/parser/Lexer.x b/ghc/compiler/parser/Lexer.x new file mode 100644 index 0000000..5b7d0a5 --- /dev/null +++ b/ghc/compiler/parser/Lexer.x @@ -0,0 +1,1329 @@ +----------------------------------------------------------------------------- +-- (c) The University of Glasgow, 2003 +-- +-- GHC's lexer. +-- +-- This is a combination of an Alex-generated lexer from a regex +-- definition, with some hand-coded bits. +-- +-- Completely accurate information about token-spans within the source +-- file is maintained. Every token has a start and end SrcLoc attached to it. +-- +----------------------------------------------------------------------------- + +-- ToDo / known bugs: +-- - Unicode +-- - parsing integers is a bit slow +-- - readRational is a bit slow +-- +-- Known bugs, that were also in the previous version: +-- - M... should be 3 tokens, not 1. +-- - pragma-end should be only valid in a pragma + +{ +module Lexer ( + Token(..), Token__(..), lexer, ExtFlags(..), mkPState, showPFailed, + P(..), ParseResult(..), setSrcLocFor, getSrcLoc, + failMsgP, failLocMsgP, srcParseFail, + popContext, pushCurrentContext, + ) where + +#include "HsVersions.h" + +import ForeignCall ( Safety(..) ) +import ErrUtils ( Message ) +import Outputable +import StringBuffer +import FastString +import FastTypes +import SrcLoc +import UniqFM +import Ctype +import Util ( maybePrefixMatch ) + +import DATA_BITS +import Char +import Ratio +import TRACE +} + +$whitechar = [\ \t\n\r\f\v] +$white_no_nl = $whitechar # \n + +$ascdigit = 0-9 +$unidigit = \x01 +$digit = [$ascdigit $unidigit] + +$special = [\(\)\,\;\[\]\`\{\}] +$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] +$unisymbol = \x02 +$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] + +$unilarge = \x03 +$asclarge = [A-Z \xc0-\xd6 \xd8-\xde] +$large = [$asclarge $unilarge] + +$unismall = \x04 +$ascsmall = [a-z \xdf-\xf6 \xf8-\xff] +$small = [$ascsmall $unismall \_] + +$graphic = [$small $large $symbol $digit $special \:\"\'] + +$octit = 0-7 +$hexit = [$digit A-F a-f] +$symchar = [$symbol \:] +$nl = [\n\r] +$idchar = [$small $large $digit \'] + +@varid = $small $idchar* +@conid = $large $idchar* + +@varsym = $symbol $symchar* +@consym = \: $symchar* + +@decimal = $digit+ +@octal = $octit+ +@hexadecimal = $hexit+ +@exponent = [eE] [\-\+]? @decimal + +-- we support the hierarchical module name extension: +@qual = (@conid \.)+ + +@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent + +haskell :- + +-- everywhere: skip whitespace and comments +$white_no_nl+ ; + +-- Everywhere: deal with nested comments. We explicitly rule out +-- pragmas, "{-#", so that we don't accidentally treat them as comments. +-- (this can happen even though pragmas will normally take precedence due to +-- longest-match, because pragmas aren't valid in every state, but comments +-- are). +"{-" / { notFollowedBy '#' } { nested_comment } + +-- Single-line comments are a bit tricky. Haskell 98 says that two or +-- more dashes followed by a symbol should be parsed as a varsym, so we +-- have to exclude those. +-- The regex says: "munch all the characters after the dashes, as long as +-- the first one is not a symbol". +"--"\-* ([^$symbol] .*)? ; + +-- 'bol' state: beginning of a line. Slurp up all the whitespace (including +-- blank lines) until we find a non-whitespace character, then do layout +-- processing. +-- +-- One slight wibble here: what if the line begins with {-#? In +-- theory, we have to lex the pragma to see if it's one we recognise, +-- and if it is, then we backtrack and do_bol, otherwise we treat it +-- as a nested comment. We don't bother with this: if the line begins +-- with {-#, then we'll assume it's a pragma we know about and go for do_bol. + { + \n ; + ^\# (line)? { begin line_prag1 } + () { do_bol } +} + +-- after a layout keyword (let, where, do, of), we begin a new layout +-- context if the curly brace is missing. +-- Careful! This stuff is quite delicate. + { + \{ / { notFollowedBy '-' } { pop_and open_brace } + -- we might encounter {-# here, but {- has been handled already + \n ; + ^\# (line)? { begin line_prag1 } +} + +-- do is treated in a subtly different way, see new_layout_context + () { new_layout_context True } + () { new_layout_context False } + +-- after a new layout context which was found to be to the left of the +-- previous context, we have generated a '{' token, and we now need to +-- generate a matching '}' token. + () { do_layout_left } + +<0,glaexts> \n { begin bol } + +"{-#" $whitechar* (line|LINE) { begin line_prag2 } + +-- single-line line pragmas, of the form +-- # "" \n + $digit+ { set_line line_prag1a } + \" $graphic* \" { set_file line_prag1b } + .* { pop } + +-- Haskell-style line pragmas, of the form +-- {-# LINE "" #-} + $digit+ { set_line line_prag2a } + \" $graphic* \" { set_file line_prag2b } + "#-}" { pop } + +<0,glaexts> { + "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) + { token ITspecialise_prag } + "{-#" $whitechar* (SOURCE|source) { token ITsource_prag } + "{-#" $whitechar* (INLINE|inline) { token ITinline_prag } + "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) + { token ITnoinline_prag } + "{-#" $whitechar* (RULES|rules) { token ITrules_prag } + "{-#" $whitechar* (DEPRECATED|deprecated) + { token ITdeprecated_prag } + "{-#" $whitechar* (SCC|scc) { token ITscc_prag } + "{-#" $whitechar* (CORE|core) { token ITcore_prag } + + "{-#" { nested_comment } + + -- ToDo: should only be valid inside a pragma: + "#-}" { token ITclose_prag} +} + + +-- '0' state: ordinary lexemes +-- 'glaexts' state: glasgow extensions (postfix '#', etc.) + +-- "special" symbols + + { + "(#" { token IToubxparen } + "#)" { token ITcubxparen } + + "[:" { token ITopabrack } + ":]" { token ITcpabrack } + + "{|" { token ITocurlybar } + "|}" { token ITccurlybar } + + "[|" { token ITopenExpQuote } + "[e|" { token ITopenExpQuote } + "[p|" { token ITopenPatQuote } + "[d|" { token ITopenDecQuote } + "[t|" { token ITopenTypQuote } + "|]" { token ITcloseQuote } +} + +<0,glaexts> { + "(|" / { \b _ _ _ -> arrowsEnabled b} { special IToparenbar } + "|)" / { \b _ _ _ -> arrowsEnabled b} { special ITcparenbar } + \( { special IToparen } + \) { special ITcparen } + \[ { special ITobrack } + \] { special ITcbrack } + \, { special ITcomma } + \; { special ITsemi } + \` { special ITbackquote } + + \{ { open_brace } + \} { close_brace } +} + + { + \? @varid { skip_one_varid ITdupipvarid } + \% @varid { skip_one_varid ITsplitipvarid } + \$ @varid { skip_one_varid ITidEscape } + "$(" { token ITparenEscape } +} + +<0,glaexts> { + @qual @varid { check_qvarid } + @qual @conid { idtoken qconid } + @varid { varid } + @conid { idtoken conid } +} + +-- after an illegal qvarid, such as 'M.let', +-- we back up and try again in the bad_qvarid state: + { + @conid { pop_and (idtoken conid) } + @qual @conid { pop_and (idtoken qconid) } +} + + { + @qual @varid "#"+ { idtoken qvarid } + @qual @conid "#"+ { idtoken qconid } + @varid "#"+ { varid } + @conid "#"+ { idtoken conid } +} + +-- ToDo: M.(,,,) + +<0,glaexts> { + @qual @varsym { idtoken qvarsym } + @qual @consym { idtoken qconsym } + @varsym { varsym } + @consym { consym } +} + +<0,glaexts> { + @decimal { tok_decimal } + 0[oO] @octal { tok_octal } + 0[xX] @hexadecimal { tok_hexadecimal } +} + + { + @decimal \# { prim_decimal } + 0[oO] @octal \# { prim_octal } + 0[xX] @hexadecimal \# { prim_hexadecimal } +} + +<0,glaexts> @floating_point { strtoken tok_float } + @floating_point \# { init_strtoken 1 prim_float } + @floating_point \# \# { init_strtoken 2 prim_double } + +-- Strings and chars are lexed by hand-written code. The reason is +-- that even if we recognise the string or char here in the regex +-- lexer, we would still have to parse the string afterward in order +-- to convert it to a String. +<0,glaexts> { + \' { lex_char_tok } + \" { lex_string_tok } +} + + "``" (([$graphic $whitechar] # \') | \' ([$graphic $whitechar] # \'))* + "''" { clitlit } + +{ +-- work around bug in Alex 2.0 +#if __GLASGOW_HASKELL__ < 503 +unsafeAt arr i = arr ! i +#endif + +-- ----------------------------------------------------------------------------- +-- The token type + +data Token = T SrcLoc{-start-} SrcLoc{-end-} Token__ + +data Token__ + = ITas -- Haskell keywords + | ITcase + | ITclass + | ITdata + | ITdefault + | ITderiving + | ITdo + | ITelse + | IThiding + | ITif + | ITimport + | ITin + | ITinfix + | ITinfixl + | ITinfixr + | ITinstance + | ITlet + | ITmodule + | ITnewtype + | ITof + | ITqualified + | ITthen + | ITtype + | ITwhere + | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now) + + | ITforall -- GHC extension keywords + | ITforeign + | ITexport + | ITlabel + | ITdynamic + | ITsafe + | ITthreadsafe + | ITunsafe + | ITwith + | ITstdcallconv + | ITccallconv + | ITdotnet + | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc) + | ITmdo + + | ITspecialise_prag -- Pragmas + | ITsource_prag + | ITinline_prag + | ITnoinline_prag + | ITrules_prag + | ITdeprecated_prag + | ITline_prag + | ITscc_prag + | ITcore_prag -- hdaume: core annotations + | ITclose_prag + + | ITdotdot -- reserved symbols + | ITcolon + | ITdcolon + | ITequal + | ITlam + | ITvbar + | ITlarrow + | ITrarrow + | ITat + | ITtilde + | ITdarrow + | ITminus + | ITbang + | ITstar + | ITdot + + | ITbiglam -- GHC-extension symbols + + | ITocurly -- special symbols + | ITccurly + | ITocurlybar -- {|, for type applications + | ITccurlybar -- |}, for type applications + | ITvocurly + | ITvccurly + | ITobrack + | ITopabrack -- [:, for parallel arrays with -fparr + | ITcpabrack -- :], for parallel arrays with -fparr + | ITcbrack + | IToparen + | ITcparen + | IToubxparen + | ITcubxparen + | ITsemi + | ITcomma + | ITunderscore + | ITbackquote + + | ITvarid FastString -- identifiers + | ITconid FastString + | ITvarsym FastString + | ITconsym FastString + | ITqvarid (FastString,FastString) + | ITqconid (FastString,FastString) + | ITqvarsym (FastString,FastString) + | ITqconsym (FastString,FastString) + + | ITdupipvarid FastString -- GHC extension: implicit param: ?x + | ITsplitipvarid FastString -- GHC extension: implicit param: %x + + | ITpragma StringBuffer + + | ITchar Char + | ITstring FastString + | ITinteger Integer + | ITrational Rational + + | ITprimchar Char + | ITprimstring FastString + | ITprimint Integer + | ITprimfloat Rational + | ITprimdouble Rational + | ITlitlit FastString + + -- MetaHaskell extension tokens + | ITopenExpQuote -- [| or [e| + | ITopenPatQuote -- [p| + | ITopenDecQuote -- [d| + | ITopenTypQuote -- [t| + | ITcloseQuote -- |] + | ITidEscape FastString -- $x + | ITparenEscape -- $( + | ITreifyType + | ITreifyDecl + | ITreifyFixity + + -- Arrow notation extension + | ITproc + | ITrec + | IToparenbar -- (| + | ITcparenbar -- |) + | ITlarrowtail -- -< + | ITrarrowtail -- >- + | ITLarrowtail -- -<< + | ITRarrowtail -- >>- + + | ITunknown String -- Used when the lexer can't make sense of it + | ITeof -- end of file token +#ifdef DEBUG + deriving Show -- debugging +#endif + +isSpecial :: Token__ -> Bool +-- If we see M.x, where x is a keyword, but +-- is special, we treat is as just plain M.x, +-- not as a keyword. +isSpecial ITas = True +isSpecial IThiding = True +isSpecial ITqualified = True +isSpecial ITforall = True +isSpecial ITexport = True +isSpecial ITlabel = True +isSpecial ITdynamic = True +isSpecial ITsafe = True +isSpecial ITthreadsafe = True +isSpecial ITunsafe = True +isSpecial ITwith = True +isSpecial ITccallconv = True +isSpecial ITstdcallconv = True +isSpecial ITmdo = True +isSpecial _ = False + +-- the bitmap provided as the third component indicates whether the +-- corresponding extension keyword is valid under the extension options +-- provided to the compiler; if the extension corresponding to *any* of the +-- bits set in the bitmap is enabled, the keyword is valid (this setup +-- facilitates using a keyword in two different extensions that can be +-- activated independently) +-- +reservedWordsFM = listToUFM $ + map (\(x, y, z) -> (mkFastString x, (y, z))) + [( "_", ITunderscore, 0 ), + ( "as", ITas, 0 ), + ( "case", ITcase, 0 ), + ( "class", ITclass, 0 ), + ( "data", ITdata, 0 ), + ( "default", ITdefault, 0 ), + ( "deriving", ITderiving, 0 ), + ( "do", ITdo, 0 ), + ( "else", ITelse, 0 ), + ( "hiding", IThiding, 0 ), + ( "if", ITif, 0 ), + ( "import", ITimport, 0 ), + ( "in", ITin, 0 ), + ( "infix", ITinfix, 0 ), + ( "infixl", ITinfixl, 0 ), + ( "infixr", ITinfixr, 0 ), + ( "instance", ITinstance, 0 ), + ( "let", ITlet, 0 ), + ( "module", ITmodule, 0 ), + ( "newtype", ITnewtype, 0 ), + ( "of", ITof, 0 ), + ( "qualified", ITqualified, 0 ), + ( "then", ITthen, 0 ), + ( "type", ITtype, 0 ), + ( "where", ITwhere, 0 ), + ( "_scc_", ITscc, 0 ), -- ToDo: remove + + ( "forall", ITforall, bit glaExtsBit), + ( "mdo", ITmdo, bit glaExtsBit), + ( "reifyDecl", ITreifyDecl, bit glaExtsBit), + ( "reifyType", ITreifyType, bit glaExtsBit), + ( "reifyFixity",ITreifyFixity, bit glaExtsBit), + + ( "foreign", ITforeign, bit ffiBit), + ( "export", ITexport, bit ffiBit), + ( "label", ITlabel, bit ffiBit), + ( "dynamic", ITdynamic, bit ffiBit), + ( "safe", ITsafe, bit ffiBit), + ( "threadsafe", ITthreadsafe, bit ffiBit), + ( "unsafe", ITunsafe, bit ffiBit), + ( "stdcall", ITstdcallconv, bit ffiBit), + ( "ccall", ITccallconv, bit ffiBit), + ( "dotnet", ITdotnet, bit ffiBit), + + ( "with", ITwith, bit withBit), + + ( "rec", ITrec, bit arrowsBit), + ( "proc", ITproc, bit arrowsBit), + + -- On death row + ("_ccall_", ITccall (False, False, PlayRisky), + bit glaExtsBit), + ("_ccall_GC_", ITccall (False, False, PlaySafe False), + bit glaExtsBit), + ("_casm_", ITccall (False, True, PlayRisky), + bit glaExtsBit), + ("_casm_GC_", ITccall (False, True, PlaySafe False), + bit glaExtsBit) + ] + +reservedSymsFM = listToUFM $ + map (\ (x,y,z) -> (mkFastString x,(y,z))) + [ ("..", ITdotdot, 0) + ,(":", ITcolon, 0) -- (:) is a reserved op, + -- meaning only list cons + ,("::", ITdcolon, 0) + ,("=", ITequal, 0) + ,("\\", ITlam, 0) + ,("|", ITvbar, 0) + ,("<-", ITlarrow, 0) + ,("->", ITrarrow, 0) + ,("@", ITat, 0) + ,("~", ITtilde, 0) + ,("=>", ITdarrow, 0) + ,("-", ITminus, 0) + ,("!", ITbang, 0) + + ,("*", ITstar, bit glaExtsBit) -- For data T (a::*) = MkT + ,(".", ITdot, bit glaExtsBit) -- For 'forall a . t' + + ,("-<", ITlarrowtail, bit arrowsBit) + ,(">-", ITrarrowtail, bit arrowsBit) + ,("-<<", ITLarrowtail, bit arrowsBit) + ,(">>-", ITRarrowtail, bit arrowsBit) + ] + +-- ----------------------------------------------------------------------------- +-- Lexer actions + +type Action = SrcLoc -> SrcLoc -> StringBuffer -> Int -> P Token + +special :: Token__ -> Action +special tok loc end _buf len = return (T loc end tok) + +token :: Token__ -> Action +token t loc end buf len = return (T loc end t) + +idtoken :: (StringBuffer -> Int -> Token__) -> Action +idtoken f loc end buf len = return (T loc end $! (f buf len)) + +skip_one_varid :: (FastString -> Token__) -> Action +skip_one_varid f loc end buf len + = return (T loc end $! f (lexemeToFastString (stepOn buf) (len-1))) + +strtoken :: (String -> Token__) -> Action +strtoken f loc end buf len = + return (T loc end $! (f $! lexemeToString buf len)) + +init_strtoken :: Int -> (String -> Token__) -> Action +-- like strtoken, but drops the last N character(s) +init_strtoken drop f loc end buf len = + return (T loc end $! (f $! lexemeToString buf (len-drop))) + +begin :: Int -> Action +begin code _loc _end _str _len = do pushLexState code; lexToken + +pop :: Action +pop _loc _end _buf _len = do popLexState; lexToken + +pop_and :: Action -> Action +pop_and act loc end buf len = do popLexState; act loc end buf len + +notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char + +{- + nested comments require traversing by hand, they can't be parsed + using regular expressions. +-} +nested_comment :: Action +nested_comment loc _end _str _len = do + input <- getInput + go 1 input + where go 0 input = do setInput input; lexToken + go n input = do + case alexGetChar input of + Nothing -> err input + Just (c,input) -> do + case c of + '-' -> do + case alexGetChar input of + Nothing -> err input + Just ('\125',input) -> go (n-1) input + Just (c,_) -> go n input + '\123' -> do + case alexGetChar input of + Nothing -> err input + Just ('-',input') -> go (n+1) input' + Just (c,input) -> go n input + c -> go n input + + err input = do failLocMsgP loc (fst input) "unterminated `{-'" + +open_brace, close_brace :: Action +open_brace loc end _str _len = do + ctx <- getContext + setContext (NoLayout:ctx) + return (T loc end ITocurly) +close_brace loc end _str _len = do + popContext + return (T loc end ITccurly) + +-- We have to be careful not to count M. as a qualified name +-- when is a keyword. We hack around this by catching +-- the offending tokens afterward, and re-lexing in a different state. +check_qvarid loc end buf len = do + case lookupUFM reservedWordsFM var of + Just (keyword,exts) + | not (isSpecial keyword) -> + if exts == 0 + then try_again + else do + b <- extension (\i -> exts .&. i /= 0) + if b then try_again + else return token + _other -> return token + where + (mod,var) = splitQualName buf len + token = T loc end (ITqvarid (mod,var)) + + try_again = do + setInput (loc,buf) + pushLexState bad_qvarid + lexToken + +qvarid buf len = ITqvarid $! splitQualName buf len +qconid buf len = ITqconid $! splitQualName buf len + +splitQualName :: StringBuffer -> Int -> (FastString,FastString) +-- takes a StringBuffer and a length, and returns the module name +-- and identifier parts of a qualified name. Splits at the *last* dot, +-- because of hierarchical module names. +splitQualName orig_buf len = split orig_buf 0 0 + where + split buf dot_off n + | n == len = done dot_off + | lookAhead buf n == '.' = split2 buf n (n+1) + | otherwise = split buf dot_off (n+1) + + -- careful, we might get names like M.... + -- so, if the character after the dot is not upper-case, this is + -- the end of the qualifier part. + split2 buf dot_off n + | isUpper (lookAhead buf n) = split buf dot_off (n+1) + | otherwise = done dot_off + + done dot_off = + (lexemeToFastString orig_buf dot_off, + lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1)) + +varid loc end buf len = + case lookupUFM reservedWordsFM fs of + Just (keyword,0) -> do + maybe_layout keyword + return (T loc end keyword) + Just (keyword,exts) -> do + b <- extension (\i -> exts .&. i /= 0) + if b then do maybe_layout keyword + return (T loc end keyword) + else return (T loc end (ITvarid fs)) + _other -> return (T loc end (ITvarid fs)) + where + fs = lexemeToFastString buf len + +conid buf len = ITconid fs + where fs = lexemeToFastString buf len + +qvarsym buf len = ITqvarsym $! splitQualName buf len +qconsym buf len = ITqconsym $! splitQualName buf len + +varsym = sym ITvarsym +consym = sym ITconsym + +sym con loc end buf len = + case lookupUFM reservedSymsFM fs of + Just (keyword,0) -> return (T loc end keyword) + Just (keyword,exts) -> do + b <- extension (\i -> exts .&. i /= 0) + if b then return (T loc end keyword) + else return (T loc end $! con fs) + _other -> return (T loc end $! con fs) + where + fs = lexemeToFastString buf len + +tok_decimal loc end buf len + = return (T loc end (ITinteger $! parseInteger buf len 10 oct_or_dec)) + +tok_octal loc end buf len + = return (T loc end (ITinteger $! parseInteger (stepOnBy 2 buf) (len-2) 8 oct_or_dec)) + +tok_hexadecimal loc end buf len + = return (T loc end (ITinteger $! parseInteger (stepOnBy 2 buf) (len-2) 16 hex)) + +prim_decimal loc end buf len + = return (T loc end (ITprimint $! parseInteger buf (len-1) 10 oct_or_dec)) + +prim_octal loc end buf len + = return (T loc end (ITprimint $! parseInteger (stepOnBy 2 buf) (len-3) 8 oct_or_dec)) + +prim_hexadecimal loc end buf len + = return (T loc end (ITprimint $! parseInteger (stepOnBy 2 buf) (len-3) 16 hex)) + +tok_float str = ITrational $! readRational__ str +prim_float str = ITprimfloat $! readRational__ str +prim_double str = ITprimdouble $! readRational__ str + +parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseInteger buf len radix to_int + = go 0 0 + where go i x | i == len = x + | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i))) + +clitlit :: Action +clitlit loc end buf len = + return (T loc end (ITlitlit $! lexemeToFastString (stepOnBy 2 buf) (len-4))) + +-- ----------------------------------------------------------------------------- +-- Layout processing + +-- we're at the first token on a line, insert layout tokens if necessary +do_bol :: Action +do_bol loc end _str _len = do + pos <- getOffside end + case pos of + LT -> do + --trace "layout: inserting '}'" $ do + popContext + -- do NOT pop the lex state, we might have a ';' to insert + return (T loc end ITvccurly) + EQ -> do + --trace "layout: inserting ';'" $ do + popLexState + return (T loc end ITsemi) + GT -> do + popLexState + lexToken + +-- certain keywords put us in the "layout" state, where we might +-- add an opening curly brace. +maybe_layout ITdo = pushLexState layout_do +maybe_layout ITof = pushLexState layout +maybe_layout ITlet = pushLexState layout +maybe_layout ITwhere = pushLexState layout +maybe_layout _ = return () + +-- Pushing a new implicit layout context. If the indentation of the +-- next token is not greater than the previous layout context, then +-- Haskell 98 says that the new layout context should be empty; that is +-- the lexer must generate {}. +-- +-- We are slightly more lenient than this: when the new context is started +-- by a 'do', then we allow the new context to be at the same indentation as +-- the previous context. This is what the 'strict' argument is for. +-- +new_layout_context strict loc end _buf _len = do + popLexState + let offset = srcLocCol loc + ctx <- getContext + case ctx of + Layout prev_off : _ | + (strict && prev_off >= offset || + not strict && prev_off > offset) -> do + -- token is indented to the left of the previous context. + -- we must generate a {} sequence now. + pushLexState layout_left + return (T loc end ITvocurly) + other -> do + setContext (Layout offset : ctx) + return (T loc end ITvocurly) + +do_layout_left loc end _buf _len = do + popLexState + pushLexState bol -- we must be at the start of a line + return (T loc end ITvccurly) + +-- ----------------------------------------------------------------------------- +-- LINE pragmas + +set_line :: Int -> Action +set_line code loc end buf len = do + let line = parseInteger buf len 10 oct_or_dec + setSrcLoc (mkSrcLoc (srcLocFile end) (fromIntegral line - 1) 0) + -- subtract one: the line number refers to the *following* line + popLexState + pushLexState code + lexToken + +set_file :: Int -> Action +set_file code loc end buf len = do + let file = lexemeToFastString (stepOn buf) (len-2) + setSrcLoc (mkSrcLoc file (srcLocLine end) (srcLocCol end)) + popLexState + pushLexState code + lexToken + +-- ----------------------------------------------------------------------------- +-- Strings & Chars + +-- This stuff is horrible. I hates it. + +lex_string_tok :: Action +lex_string_tok loc end buf len = do + tok <- lex_string "" + end <- getSrcLoc + return (T loc end tok) + +lex_string :: String -> P Token__ +lex_string s = do + i <- getInput + case alexGetChar i of + Nothing -> lit_error + + Just ('"',i) -> do + setInput i + glaexts <- extension glaExtsEnabled + if glaexts + then do + i <- getInput + case alexGetChar i of + Just ('#',i) -> do + setInput i + if any (> '\xFF') s + then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'" + else let s' = mkFastStringNarrow (reverse s) in + -- always a narrow string/byte array + return (ITprimstring s') + _other -> + return (ITstring (mkFastString (reverse s))) + else + return (ITstring (mkFastString (reverse s))) + + Just ('\\',i) + | Just ('&',i) <- next -> do + setInput i; lex_string s + | Just (c,i) <- next, is_space c -> do + setInput i; lex_stringgap s + where next = alexGetChar i + + Just _ -> do + c <- lex_char + lex_string (c:s) + + +lex_stringgap s = do + c <- getCharOrFail + case c of + '\\' -> lex_string s + c | is_space c -> lex_stringgap s + _other -> lit_error + + +lex_char_tok :: Action +lex_char_tok loc _end buf len = do + c <- lex_char + mc <- getCharOrFail + case mc of + '\'' -> do + glaexts <- extension glaExtsEnabled + if glaexts + then do + i@(end,_) <- getInput + case alexGetChar i of + Just ('#',i@(end,_)) -> do + setInput i + return (T loc end (ITprimchar c)) + _other -> + return (T loc end (ITchar c)) + else do + end <- getSrcLoc + return (T loc end (ITchar c)) + + _other -> lit_error + +lex_char :: P Char +lex_char = do + mc <- getCharOrFail + case mc of + '\\' -> lex_escape + c | is_any c -> return c + _other -> lit_error + +lex_escape :: P Char +lex_escape = do + c <- getCharOrFail + case c of + 'a' -> return '\a' + 'b' -> return '\b' + 'f' -> return '\f' + 'n' -> return '\n' + 'r' -> return '\r' + 't' -> return '\t' + 'v' -> return '\v' + '\\' -> return '\\' + '"' -> return '\"' + '\'' -> return '\'' + '^' -> do c <- getCharOrFail + if c >= '@' && c <= '_' + then return (chr (ord c - ord '@')) + else lit_error + + 'x' -> readNum is_hexdigit 16 hex + 'o' -> readNum is_octdigit 8 oct_or_dec + x | is_digit x -> readNum2 is_digit 10 oct_or_dec (oct_or_dec x) + + c1 -> do + i <- getInput + case alexGetChar i of + Nothing -> lit_error + Just (c2,i2) -> + case alexGetChar i2 of + Nothing -> lit_error + Just (c3,i3) -> + let str = [c1,c2,c3] in + case [ (c,rest) | (p,c) <- silly_escape_chars, + Just rest <- [maybePrefixMatch p str] ] of + (escape_char,[]):_ -> do + setInput i3 + return escape_char + (escape_char,_:_):_ -> do + setInput i2 + return escape_char + [] -> lit_error + +readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char +readNum is_digit base conv = do + c <- getCharOrFail + if is_digit c + then readNum2 is_digit base conv (conv c) + else lit_error + +readNum2 is_digit base conv i = do + input <- getInput + read i input + where read i input = do + case alexGetChar input of + Just (c,input') | is_digit c -> do + read (i*base + conv c) input' + _other -> do + setInput input + if i >= 0 && i <= 0x10FFFF + then return (chr i) + else lit_error + +is_hexdigit c + = is_digit c + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F') + +hex c | is_digit c = ord c - ord '0' + | otherwise = ord (to_lower c) - ord 'a' + 10 + +oct_or_dec c = ord c - ord '0' + +is_octdigit c = c >= '0' && c <= '7' + +to_lower c + | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a')) + | otherwise = c + +silly_escape_chars = [ + ("NUL", '\NUL'), + ("SOH", '\SOH'), + ("STX", '\STX'), + ("ETX", '\ETX'), + ("EOT", '\EOT'), + ("ENQ", '\ENQ'), + ("ACK", '\ACK'), + ("BEL", '\BEL'), + ("BS", '\BS'), + ("HT", '\HT'), + ("LF", '\LF'), + ("VT", '\VT'), + ("FF", '\FF'), + ("CR", '\CR'), + ("SO", '\SO'), + ("SI", '\SI'), + ("DLE", '\DLE'), + ("DC1", '\DC1'), + ("DC2", '\DC2'), + ("DC3", '\DC3'), + ("DC4", '\DC4'), + ("NAK", '\NAK'), + ("SYN", '\SYN'), + ("ETB", '\ETB'), + ("CAN", '\CAN'), + ("EM", '\EM'), + ("SUB", '\SUB'), + ("ESC", '\ESC'), + ("FS", '\FS'), + ("GS", '\GS'), + ("RS", '\RS'), + ("US", '\US'), + ("SP", '\SP'), + ("DEL", '\DEL') + ] + +lit_error = lexError "lexical error in string/character literal" + +getCharOrFail :: P Char +getCharOrFail = do + i <- getInput + case alexGetChar i of + Nothing -> lexError "unexpected end-of-file in string/character literal" + Just (c,i) -> do setInput i; return c + +-- ----------------------------------------------------------------------------- +-- Floats + +readRational :: ReadS Rational -- NB: doesn't handle leading "-" +readRational r = do + (n,d,s) <- readFix r + (k,t) <- readExp s + return ((n%1)*10^^(k-d), t) + where + readFix r = do + (ds,s) <- lexDecDigits r + (ds',t) <- lexDotDigits s + return (read (ds++ds'), length ds', t) + + readExp (e:s) | e `elem` "eE" = readExp' s + readExp s = return (0,s) + + readExp' ('+':s) = readDec s + readExp' ('-':s) = do + (k,t) <- readDec s + return (-k,t) + readExp' s = readDec s + + readDec s = do + (ds,r) <- nonnull isDigit s + return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ], + r) + + lexDecDigits = nonnull isDigit + + lexDotDigits ('.':s) = return (span isDigit s) + lexDotDigits s = return ("",s) + + nonnull p s = do (cs@(_:_),t) <- return (span p s) + return (cs,t) + +readRational__ :: String -> Rational -- NB: *does* handle a leading "-" +readRational__ top_s + = case top_s of + '-' : xs -> - (read_me xs) + xs -> read_me xs + where + read_me s + = case (do { (x,"") <- readRational s ; return x }) of + [x] -> x + [] -> error ("readRational__: no parse:" ++ top_s) + _ -> error ("readRational__: ambiguous parse:" ++ top_s) + +-- ----------------------------------------------------------------------------- +-- The Parse Monad + +data LayoutContext + = NoLayout + | Layout !Int + +data ParseResult a + = POk PState a + | PFailed + SrcLoc SrcLoc -- The start and end of the text span related to + -- the error. Might be used in environments which can + -- show this span, e.g. by highlighting it. + Message -- The error message + +showPFailed loc1 loc2 err + = showSDoc (hcat [ppr loc1, text ": ", err]) + +data PState = PState { + buffer :: StringBuffer, + last_loc :: SrcLoc, -- pos of previous token + last_len :: !Int, -- len of previous token + loc :: SrcLoc, -- current loc (end of prev token + 1) + extsBitmap :: !Int, -- bitmap that determines permitted extensions + context :: [LayoutContext], + lex_state :: [Int] + } + -- last_loc and last_len are used when generating error messages, + -- and in pushCurrentContext only. + +newtype P a = P { unP :: PState -> ParseResult a } + +instance Monad P where + return = returnP + (>>=) = thenP + fail = failP + +returnP :: a -> P a +returnP a = P $ \s -> POk s a + +thenP :: P a -> (a -> P b) -> P b +(P m) `thenP` k = P $ \ s -> + case m s of + POk s1 a -> (unP (k a)) s1 + PFailed l1 l2 err -> PFailed l1 l2 err + +failP :: String -> P a +failP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg) + +failMsgP :: String -> P a +failMsgP msg = P $ \s -> PFailed (last_loc s) (loc s) (text msg) + +failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a +failLocMsgP loc1 loc2 str = P $ \s -> PFailed loc1 loc2 (text str) + +extension :: (Int -> Bool) -> P Bool +extension p = P $ \s -> POk s (p $! extsBitmap s) + +getExts :: P Int +getExts = P $ \s -> POk s (extsBitmap s) + +setSrcLoc :: SrcLoc -> P () +setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} () + +-- tmp, for supporting stuff in RdrHsSyn. The scope better not include +-- any calls to the lexer, because it assumes things about the SrcLoc. +setSrcLocFor :: SrcLoc -> P a -> P a +setSrcLocFor new_loc scope = P $ \s@PState{ loc = old_loc } -> + case unP scope s{loc=new_loc} of + PFailed l1 l2 msg -> PFailed l1 l2 msg + POk _ r -> POk s r + +getSrcLoc :: P SrcLoc +getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc + +setLastToken :: SrcLoc -> Int -> P () +setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } () + +type AlexInput = (SrcLoc,StringBuffer) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_,s) = prevChar s '\n' + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (loc,s) + | atEnd s = Nothing + | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s')) + where c = currentChar s + loc' = advanceSrcLoc loc c + s' = stepOn s + +getInput :: P AlexInput +getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b) + +setInput :: AlexInput -> P () +setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } () + +pushLexState :: Int -> P () +pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} () + +popLexState :: P Int +popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls + +getLexState :: P Int +getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls + +-- for reasons of efficiency, flags indicating language extensions (eg, +-- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed +-- integer + +glaExtsBit, ffiBit, parrBit :: Int +glaExtsBit = 0 +ffiBit = 1 +parrBit = 2 +withBit = 3 +arrowsBit = 4 + +glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool +glaExtsEnabled flags = testBit flags glaExtsBit +ffiEnabled flags = testBit flags ffiBit +withEnabled flags = testBit flags withBit +parrEnabled flags = testBit flags parrBit +arrowsEnabled flags = testBit flags arrowsBit + +-- convenient record-based bitmap for the interface to the rest of the world +-- +-- NB: `glasgowExtsEF' implies `ffiEF' (see `mkPState' below) +-- +data ExtFlags = ExtFlags { + glasgowExtsEF :: Bool, + ffiEF :: Bool, + withEF :: Bool, + parrEF :: Bool, + arrowsEF :: Bool + } + +-- create a parse state +-- +mkPState :: StringBuffer -> SrcLoc -> ExtFlags -> PState +mkPState buf loc exts = + PState { + buffer = buf, + last_loc = loc, + last_len = 0, + loc = loc, + extsBitmap = fromIntegral bitmap, + context = [], + lex_state = [bol, if glaExtsEnabled bitmap then glaexts else 0] + -- we begin in the layout state if toplev_layout is set + } + where + bitmap = glaExtsBit `setBitIf` glasgowExtsEF exts + .|. ffiBit `setBitIf` (ffiEF exts + || glasgowExtsEF exts) + .|. withBit `setBitIf` withEF exts + .|. parrBit `setBitIf` parrEF exts + .|. arrowsBit `setBitIf` arrowsEF exts + -- + setBitIf :: Int -> Bool -> Int + b `setBitIf` cond | cond = bit b + | otherwise = 0 + +getContext :: P [LayoutContext] +getContext = P $ \s@PState{context=ctx} -> POk s ctx + +setContext :: [LayoutContext] -> P () +setContext ctx = P $ \s -> POk s{context=ctx} () + +popContext :: P () +popContext = P $ \ s@(PState{ buffer = buf, context = ctx, + loc = loc, last_len = len, last_loc = last_loc }) -> + case ctx of + (_:tl) -> POk s{ context = tl } () + [] -> PFailed last_loc loc (srcParseErr buf len) + +-- Push a new layout context at the indentation of the last token read. +-- This is only used at the outer level of a module when the 'module' +-- keyword is missing. +pushCurrentContext :: P () +pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } -> + POk s{ context = Layout (srcLocCol loc) : ctx} () + +getOffside :: SrcLoc -> P Ordering +getOffside loc = P $ \s@PState{context=stk} -> + let ord = case stk of + (Layout n:_) -> compare (srcLocCol loc) n + _ -> GT + in POk s ord + +-- --------------------------------------------------------------------------- +-- Construct a parse error + +srcParseErr + :: StringBuffer -- current buffer (placed just after the last token) + -> Int -- length of the previous token + -> Message +srcParseErr buf len + = hcat [ if null token + then ptext SLIT("parse error (possibly incorrect indentation)") + else hcat [ptext SLIT("parse error on input "), + char '`', text token, char '\''] + ] + where token = lexemeToString (stepOnBy (-len) buf) len + +-- Report a parse failure, giving the span of the previous token as +-- the location of the error. This is the entry point for errors +-- detected during parsing. +srcParseFail :: P a +srcParseFail = P $ \PState{ buffer = buf, last_len = len, + last_loc = last_loc, loc = loc } -> + PFailed last_loc loc (srcParseErr buf len) + +-- A lexical error is reported at a particular position in the source file, +-- not over a token range. TODO: this is slightly wrong, because we record +-- the error at the character position following the one which caused the +-- error. We should somehow back up by one character. +lexError :: String -> P a +lexError str = do + loc <- getSrcLoc + failLocMsgP loc loc str + +-- ----------------------------------------------------------------------------- +-- This is the top-level function: called from the parser each time a +-- new token is to be read from the input. + +lexer :: (Token -> P a) -> P a +lexer cont = do + tok@(T _ _ tok__) <- lexToken + --trace ("token: " ++ show tok__) $ do + cont tok + +lexToken :: P Token +lexToken = do + inp@(loc1,buf) <- getInput + sc <- getLexState + exts <- getExts + case alexScanUser exts inp sc of + AlexEOF -> do setLastToken loc1 0 + return (T loc1 loc1 ITeof) + AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error" + AlexSkip inp2 _ -> do + setInput inp2 + lexToken + AlexToken inp2@(end,buf2) len t -> do + setInput inp2 + setLastToken loc1 len + t loc1 end buf len +} diff --git a/ghc/compiler/parser/Parser.y b/ghc/compiler/parser/Parser.y index 1802117..a4294e1 100644 --- a/ghc/compiler/parser/Parser.y +++ b/ghc/compiler/parser/Parser.y @@ -1,6 +1,6 @@ {- -*-haskell-*- ----------------------------------------------------------------------------- -$Id: Parser.y,v 1.121 2003/07/16 08:49:05 ross Exp $ +$Id: Parser.y,v 1.122 2003/09/08 11:52:25 simonmar Exp $ Haskell grammar. @@ -18,11 +18,12 @@ import HsTypes ( mkHsTupCon ) import RdrHsSyn import HscTypes ( ParsedIface(..), IsBootInterface, noDependencies ) -import Lex +import Lexer import RdrName import PrelNames ( mAIN_Name, funTyConName, listTyConName, parrTyConName, consDataConName ) -import TysWiredIn ( unitTyCon, unitDataCon, tupleTyCon, tupleCon, nilDataCon ) +import TysWiredIn ( unitTyCon, unitDataCon, tupleTyCon, + tupleCon, nilDataCon ) import ForeignCall ( Safety(..), CExportSpec(..), CCallConv(..), CCallTarget(..), defaultCCallConv, ) @@ -33,9 +34,9 @@ import SrcLoc ( SrcLoc ) import Module import CmdLineOpts ( opt_SccProfilingOn, opt_InPackage ) import Type ( Kind, mkArrowKind, liftedTypeKind ) -import BasicTypes ( Boxity(..), Fixity(..), FixityDirection(..), IPName(..), - NewOrData(..), StrictnessMark(..), Activation(..), - FixitySig(..) ) +import BasicTypes ( Boxity(..), Fixity(..), FixityDirection(..), + IPName(..), NewOrData(..), StrictnessMark(..), + Activation(..), FixitySig(..) ) import Panic import GLAEXTS @@ -43,6 +44,7 @@ import CStrings ( CLabelString ) import FastString import Maybes ( orElse ) import Outputable +import Char ( ord ) } @@ -90,170 +92,141 @@ Conflicts: 29 shift/reduce, [SDM 19/9/2002] -} %token - '_' { ITunderscore } -- Haskell keywords - 'as' { ITas } - 'case' { ITcase } - 'class' { ITclass } - 'data' { ITdata } - 'default' { ITdefault } - 'deriving' { ITderiving } - 'do' { ITdo } - 'else' { ITelse } - 'hiding' { IThiding } - 'if' { ITif } - 'import' { ITimport } - 'in' { ITin } - 'infix' { ITinfix } - 'infixl' { ITinfixl } - 'infixr' { ITinfixr } - 'instance' { ITinstance } - 'let' { ITlet } - 'module' { ITmodule } - 'newtype' { ITnewtype } - 'of' { ITof } - 'qualified' { ITqualified } - 'then' { ITthen } - 'type' { ITtype } - 'where' { ITwhere } - '_scc_' { ITscc } -- ToDo: remove - - 'forall' { ITforall } -- GHC extension keywords - 'foreign' { ITforeign } - 'export' { ITexport } - 'label' { ITlabel } - 'dynamic' { ITdynamic } - 'safe' { ITsafe } - 'threadsafe' { ITthreadsafe } - 'unsafe' { ITunsafe } - 'with' { ITwith } - 'mdo' { ITmdo } - 'stdcall' { ITstdcallconv } - 'ccall' { ITccallconv } - 'dotnet' { ITdotnet } - 'proc' { ITproc } -- for arrow notation extension - 'rec' { ITrec } -- for arrow notation extension - '_ccall_' { ITccall (False, False, PlayRisky) } - '_ccall_GC_' { ITccall (False, False, PlaySafe False) } - '_casm_' { ITccall (False, True, PlayRisky) } - '_casm_GC_' { ITccall (False, True, PlaySafe False) } - - '{-# SPECIALISE' { ITspecialise_prag } - '{-# SOURCE' { ITsource_prag } - '{-# INLINE' { ITinline_prag } - '{-# NOINLINE' { ITnoinline_prag } - '{-# RULES' { ITrules_prag } - '{-# CORE' { ITcore_prag } -- hdaume: annotated core - '{-# SCC' { ITscc_prag } - '{-# DEPRECATED' { ITdeprecated_prag } - '#-}' { ITclose_prag } - -{- - '__interface' { ITinterface } -- interface keywords - '__export' { IT__export } - '__instimport' { ITinstimport } - '__forall' { IT__forall } - '__letrec' { ITletrec } - '__coerce' { ITcoerce } - '__depends' { ITdepends } - '__inline' { ITinline } - '__DEFAULT' { ITdefaultbranch } - '__bot' { ITbottom } - '__integer' { ITinteger_lit } - '__float' { ITfloat_lit } - '__rational' { ITrational_lit } - '__addr' { ITaddr_lit } - '__label' { ITlabel_lit } - '__litlit' { ITlit_lit } - '__string' { ITstring_lit } - '__ccall' { ITccall $$ } - '__scc' { IT__scc } - '__sccC' { ITsccAllCafs } - - '__A' { ITarity } - '__P' { ITspecialise } - '__C' { ITnocaf } - '__U' { ITunfold } - '__S' { ITstrict $$ } - '__M' { ITcprinfo $$ } --} - - '..' { ITdotdot } -- reserved symbols - ':' { ITcolon } - '::' { ITdcolon } - '=' { ITequal } - '\\' { ITlam } - '|' { ITvbar } - '<-' { ITlarrow } - '->' { ITrarrow } - '@' { ITat } - '~' { ITtilde } - '=>' { ITdarrow } - '-' { ITminus } - '!' { ITbang } - '*' { ITstar } - '-<' { ITlarrowtail } -- for arrow notation - '>-' { ITrarrowtail } -- for arrow notation - '-<<' { ITLarrowtail } -- for arrow notation - '>>-' { ITRarrowtail } -- for arrow notation - '.' { ITdot } - - '{' { ITocurly } -- special symbols - '}' { ITccurly } - '{|' { ITocurlybar } - '|}' { ITccurlybar } - vccurly { ITvccurly } -- virtual close curly (from layout) - '[' { ITobrack } - ']' { ITcbrack } - '[:' { ITopabrack } - ':]' { ITcpabrack } - '(' { IToparen } - ')' { ITcparen } - '(#' { IToubxparen } - '#)' { ITcubxparen } - '(|' { IToparenbar } - '|)' { ITcparenbar } - ';' { ITsemi } - ',' { ITcomma } - '`' { ITbackquote } - - VARID { ITvarid $$ } -- identifiers - CONID { ITconid $$ } - VARSYM { ITvarsym $$ } - CONSYM { ITconsym $$ } - QVARID { ITqvarid $$ } - QCONID { ITqconid $$ } - QVARSYM { ITqvarsym $$ } - QCONSYM { ITqconsym $$ } - - IPDUPVARID { ITdupipvarid $$ } -- GHC extension - IPSPLITVARID { ITsplitipvarid $$ } -- GHC extension - - CHAR { ITchar $$ } - STRING { ITstring $$ } - INTEGER { ITinteger $$ } - RATIONAL { ITrational $$ } - - PRIMCHAR { ITprimchar $$ } - PRIMSTRING { ITprimstring $$ } - PRIMINTEGER { ITprimint $$ } - PRIMFLOAT { ITprimfloat $$ } - PRIMDOUBLE { ITprimdouble $$ } - CLITLIT { ITlitlit $$ } + '_' { T _ _ ITunderscore } -- Haskell keywords + 'as' { T _ _ ITas } + 'case' { T _ _ ITcase } + 'class' { T _ _ ITclass } + 'data' { T _ _ ITdata } + 'default' { T _ _ ITdefault } + 'deriving' { T _ _ ITderiving } + 'do' { T _ _ ITdo } + 'else' { T _ _ ITelse } + 'hiding' { T _ _ IThiding } + 'if' { T _ _ ITif } + 'import' { T _ _ ITimport } + 'in' { T _ _ ITin } + 'infix' { T _ _ ITinfix } + 'infixl' { T _ _ ITinfixl } + 'infixr' { T _ _ ITinfixr } + 'instance' { T _ _ ITinstance } + 'let' { T _ _ ITlet } + 'module' { T _ _ ITmodule } + 'newtype' { T _ _ ITnewtype } + 'of' { T _ _ ITof } + 'qualified' { T _ _ ITqualified } + 'then' { T _ _ ITthen } + 'type' { T _ _ ITtype } + 'where' { T _ _ ITwhere } + '_scc_' { T _ _ ITscc } -- ToDo: remove + + 'forall' { T _ _ ITforall } -- GHC extension keywords + 'foreign' { T _ _ ITforeign } + 'export' { T _ _ ITexport } + 'label' { T _ _ ITlabel } + 'dynamic' { T _ _ ITdynamic } + 'safe' { T _ _ ITsafe } + 'threadsafe' { T _ _ ITthreadsafe } + 'unsafe' { T _ _ ITunsafe } + 'with' { T _ _ ITwith } + 'mdo' { T _ _ ITmdo } + 'stdcall' { T _ _ ITstdcallconv } + 'ccall' { T _ _ ITccallconv } + 'dotnet' { T _ _ ITdotnet } + 'proc' { T _ _ ITproc } -- for arrow notation extension + 'rec' { T _ _ ITrec } -- for arrow notation extension + '_ccall_' { T _ _ (ITccall (False, False, PlayRisky)) } + '_ccall_GC_' { T _ _ (ITccall (False, False, PlaySafe False)) } + '_casm_' { T _ _ (ITccall (False, True, PlayRisky)) } + '_casm_GC_' { T _ _ (ITccall (False, True, PlaySafe False)) } + + '{-# SPECIALISE' { T _ _ ITspecialise_prag } + '{-# SOURCE' { T _ _ ITsource_prag } + '{-# INLINE' { T _ _ ITinline_prag } + '{-# NOINLINE' { T _ _ ITnoinline_prag } + '{-# RULES' { T _ _ ITrules_prag } + '{-# CORE' { T _ _ ITcore_prag } -- hdaume: annotated core + '{-# SCC' { T _ _ ITscc_prag } + '{-# DEPRECATED' { T _ _ ITdeprecated_prag } + '#-}' { T _ _ ITclose_prag } + + '..' { T _ _ ITdotdot } -- reserved symbols + ':' { T _ _ ITcolon } + '::' { T _ _ ITdcolon } + '=' { T _ _ ITequal } + '\\' { T _ _ ITlam } + '|' { T _ _ ITvbar } + '<-' { T _ _ ITlarrow } + '->' { T _ _ ITrarrow } + '@' { T _ _ ITat } + '~' { T _ _ ITtilde } + '=>' { T _ _ ITdarrow } + '-' { T _ _ ITminus } + '!' { T _ _ ITbang } + '*' { T _ _ ITstar } + '-<' { T _ _ ITlarrowtail } -- for arrow notation + '>-' { T _ _ ITrarrowtail } -- for arrow notation + '-<<' { T _ _ ITLarrowtail } -- for arrow notation + '>>-' { T _ _ ITRarrowtail } -- for arrow notation + '.' { T _ _ ITdot } + + '{' { T _ _ ITocurly } -- special symbols + '}' { T _ _ ITccurly } + '{|' { T _ _ ITocurlybar } + '|}' { T _ _ ITccurlybar } + vocurly { T _ _ ITvocurly } -- virtual open curly (from layout) + vccurly { T _ _ ITvccurly } -- virtual close curly (from layout) + '[' { T _ _ ITobrack } + ']' { T _ _ ITcbrack } + '[:' { T _ _ ITopabrack } + ':]' { T _ _ ITcpabrack } + '(' { T _ _ IToparen } + ')' { T _ _ ITcparen } + '(#' { T _ _ IToubxparen } + '#)' { T _ _ ITcubxparen } + '(|' { T _ _ IToparenbar } + '|)' { T _ _ ITcparenbar } + ';' { T _ _ ITsemi } + ',' { T _ _ ITcomma } + '`' { T _ _ ITbackquote } + + VARID { T _ _ (ITvarid $$) } -- identifiers + CONID { T _ _ (ITconid $$) } + VARSYM { T _ _ (ITvarsym $$) } + CONSYM { T _ _ (ITconsym $$) } + QVARID { T _ _ (ITqvarid $$) } + QCONID { T _ _ (ITqconid $$) } + QVARSYM { T _ _ (ITqvarsym $$) } + QCONSYM { T _ _ (ITqconsym $$) } + + IPDUPVARID { T _ _ (ITdupipvarid $$) } -- GHC extension + IPSPLITVARID { T _ _ (ITsplitipvarid $$) } -- GHC extension + + CHAR { T _ _ (ITchar $$) } + STRING { T _ _ (ITstring $$) } + INTEGER { T _ _ (ITinteger $$) } + RATIONAL { T _ _ (ITrational $$) } + + PRIMCHAR { T _ _ (ITprimchar $$) } + PRIMSTRING { T _ _ (ITprimstring $$) } + PRIMINTEGER { T _ _ (ITprimint $$) } + PRIMFLOAT { T _ _ (ITprimfloat $$) } + PRIMDOUBLE { T _ _ (ITprimdouble $$) } + CLITLIT { T _ _ (ITlitlit $$) } -- Template Haskell -'[|' { ITopenExpQuote } -'[p|' { ITopenPatQuote } -'[t|' { ITopenTypQuote } -'[d|' { ITopenDecQuote } -'|]' { ITcloseQuote } -ID_SPLICE { ITidEscape $$ } -- $x -'$(' { ITparenEscape } -- $( exp ) -REIFY_TYPE { ITreifyType } -REIFY_DECL { ITreifyDecl } -REIFY_FIXITY { ITreifyFixity } - -%monad { P } { thenP } { returnP } -%lexer { lexer } { ITeof } +'[|' { T _ _ ITopenExpQuote } +'[p|' { T _ _ ITopenPatQuote } +'[t|' { T _ _ ITopenTypQuote } +'[d|' { T _ _ ITopenDecQuote } +'|]' { T _ _ ITcloseQuote } +ID_SPLICE { T _ _ (ITidEscape $$) } -- $x +'$(' { T _ _ ITparenEscape } -- $( exp ) +REIFY_TYPE { T _ _ ITreifyType } +REIFY_DECL { T _ _ ITreifyDecl } +REIFY_FIXITY { T _ _ ITreifyFixity } + +%monad { P } { >>= } { return } +%lexer { lexer } { T _ _ ITeof } %name parseModule module %name parseStmt maybe_stmt %name parseIdentifier identifier @@ -274,8 +247,11 @@ REIFY_FIXITY { ITreifyFixity } module :: { RdrNameHsModule } : srcloc 'module' modid maybemoddeprec maybeexports 'where' body { HsModule (Just (mkHomeModule $3)) $5 (fst $7) (snd $7) $4 $1 } - | srcloc body - { HsModule Nothing Nothing (fst $2) (snd $2) Nothing $1 } + | srcloc missing_module_keyword top close + { HsModule Nothing Nothing (fst $3) (snd $3) Nothing $1 } + +missing_module_keyword :: { () } + : {- empty -} {% pushCurrentContext } maybemoddeprec :: { Maybe DeprecTxt } : '{-# DEPRECATED' STRING '#-}' { Just $2 } @@ -283,7 +259,7 @@ maybemoddeprec :: { Maybe DeprecTxt } body :: { ([RdrNameImportDecl], [RdrNameHsDecl]) } : '{' top '}' { $2 } - | layout_on top close { $2 } + | vocurly top close { $2 } top :: { ([RdrNameImportDecl], [RdrNameHsDecl]) } : importdecls { (reverse $1,[]) } @@ -316,7 +292,7 @@ iface :: { ParsedIface } ifacebody :: { [RdrNameTyClDecl] } : '{' ifacedecls '}' { $2 } - | layout_on ifacedecls close { $2 } + | vocurly ifacedecls close { $2 } ifacedecls :: { [RdrNameTyClDecl] } : ifacedecl ';' ifacedecls { $1 : $3 } @@ -464,10 +440,10 @@ syn_hdr :: { (RdrName, [RdrNameHsTyVar]) } -- We don't retain the syntax of an i -- (Eq a, Ord b) => T a b -- Rather a lot of inlining here, else we get reduce/reduce errors tycl_hdr :: { (RdrNameContext, RdrName, [RdrNameHsTyVar]) } - : context '=>' type {% checkTyClHdr $3 `thenP` \ (tc,tvs) -> - returnP ($1, tc, tvs) } - | type {% checkTyClHdr $1 `thenP` \ (tc,tvs) -> - returnP ([], tc, tvs) } + : context '=>' type {% checkTyClHdr $3 >>= \ (tc,tvs) -> + return ($1, tc, tvs) } + | type {% checkTyClHdr $1 >>= \ (tc,tvs) -> + return ([], tc, tvs) } ----------------------------------------------------------------------------- -- Nested declarations @@ -481,7 +457,7 @@ decls :: { [RdrBinding] } -- Reversed decllist :: { [RdrBinding] } -- Reversed : '{' decls '}' { $2 } - | layout_on decls close { $2 } + | vocurly decls close { $2 } where :: { [RdrBinding] } -- Reversed -- No implicit parameters @@ -491,7 +467,7 @@ where :: { [RdrBinding] } -- Reversed binds :: { RdrNameHsBinds } -- May have implicit parameters : decllist { cvBinds $1 } | '{' dbinds '}' { IPBinds $2 False{-not with-} } - | layout_on dbinds close { IPBinds $2 False{-not with-} } + | vocurly dbinds close { IPBinds $2 False{-not with-} } wherebinds :: { RdrNameHsBinds } -- May have implicit parameters : 'where' binds { $2 } @@ -599,7 +575,7 @@ fdecl1DEPRECATED | 'import' callconv STRING STRING safety varid_no_unsafe '::' sigtype {% case $2 of DNCall -> parseError "Illegal format of .NET foreign import" - CCall cconv -> returnP $ + CCall cconv -> return $ let imp = CFunction (StaticTarget $4) in @@ -609,7 +585,7 @@ fdecl1DEPRECATED | 'import' callconv STRING 'unsafe' varid_no_unsafe '::' sigtype {% case $2 of DNCall -> parseError "Illegal format of .NET foreign import" - CCall cconv -> returnP $ + CCall cconv -> return $ let imp = CFunction (StaticTarget $3) in @@ -625,7 +601,7 @@ fdecl1DEPRECATED | 'import' callconv 'dynamic' safety varid_no_unsafe '::' sigtype {% case $2 of DNCall -> parseError "Illegal format of .NET foreign import" - CCall cconv -> returnP $ + CCall cconv -> return $ ForeignImport $5 $7 (CImport cconv $4 nilFS nilFS (CFunction DynamicTarget)) } @@ -640,7 +616,7 @@ fdecl1DEPRECATED | 'export' callconv STRING STRING varid '::' sigtype {% case $2 of DNCall -> parseError "Illegal format of .NET foreign import" - CCall cconv -> returnP $ + CCall cconv -> return $ ForeignExport $5 $7 (CExport (CExportStatic $4 cconv)) } @@ -654,7 +630,7 @@ fdecl1DEPRECATED | 'export' callconv 'dynamic' varid '::' sigtype {% case $2 of DNCall -> parseError "Illegal format of .NET foreign import" - CCall cconv -> returnP $ + CCall cconv -> return $ ForeignImport $4 $6 (CImport cconv (PlaySafe False) nilFS nilFS CWrapper) } ----------- DEPRECATED .NET decls ------------ @@ -948,18 +924,18 @@ infixexp :: { RdrNameHsExpr } exp10 :: { RdrNameHsExpr } : '\\' srcloc aexp aexps opt_asig '->' srcloc exp - {% checkPatterns $2 ($3 : reverse $4) `thenP` \ ps -> - returnP (HsLam (Match ps $5 + {% checkPatterns $2 ($3 : reverse $4) >>= \ ps -> + return (HsLam (Match ps $5 (GRHSs (unguardedRHS $8 $7) EmptyBinds placeHolderType))) } | 'let' binds 'in' exp { HsLet $2 $4 } | 'if' srcloc exp 'then' exp 'else' exp { HsIf $3 $5 $7 $2 } | 'case' srcloc exp 'of' altslist { HsCase $3 $5 $2 } | '-' fexp { mkHsNegApp $2 } - | srcloc 'do' stmtlist {% checkDo $3 `thenP` \ stmts -> - returnP (mkHsDo DoExpr stmts $1) } - | srcloc 'mdo' stmtlist {% checkMDo $3 `thenP` \ stmts -> - returnP (mkHsDo MDoExpr stmts $1) } + | srcloc 'do' stmtlist {% checkDo $3 >>= \ stmts -> + return (mkHsDo DoExpr stmts $1) } + | srcloc 'mdo' stmtlist {% checkMDo $3 >>= \ stmts -> + return (mkHsDo MDoExpr stmts $1) } | '_ccall_' ccallid aexps0 { HsCCall $2 $3 PlayRisky False placeHolderType } | '_ccall_GC_' ccallid aexps0 { HsCCall $2 $3 (PlaySafe False) False placeHolderType } @@ -971,8 +947,8 @@ exp10 :: { RdrNameHsExpr } else HsPar $2 } | 'proc' srcloc aexp '->' srcloc exp - {% checkPattern $2 $3 `thenP` \ p -> - returnP (HsProc p (HsCmdTop $6 [] placeHolderType undefined) $5) } + {% checkPattern $2 $3 >>= \ p -> + return (HsProc p (HsCmdTop $6 [] placeHolderType undefined) $5) } | '{-# CORE' STRING '#-}' exp { HsCoreAnn $2 $4 } -- hdaume: core annotation @@ -1022,8 +998,8 @@ aexp2 :: { RdrNameHsExpr } : ipvar { HsIPVar $1 } | qcname { HsVar $1 } | literal { HsLit $1 } - | INTEGER { HsOverLit (mkHsIntegral $1) } - | RATIONAL { HsOverLit (mkHsFractional $1) } + | INTEGER { HsOverLit $! mkHsIntegral $1 } + | RATIONAL { HsOverLit $! mkHsFractional $1 } | '(' exp ')' { HsPar $2 } | '(' exp ',' texps ')' { ExplicitTuple ($2 : reverse $4) Boxed} | '(#' texps '#)' { ExplicitTuple (reverse $2) Unboxed } @@ -1038,8 +1014,8 @@ aexp2 :: { RdrNameHsExpr } | srcloc '$(' exp ')' { mkHsSplice $3 $1 } -- $( exp ) | srcloc '[|' exp '|]' { HsBracket (ExpBr $3) $1 } | srcloc '[t|' ctype '|]' { HsBracket (TypBr $3) $1 } - | srcloc '[p|' infixexp '|]' {% checkPattern $1 $3 `thenP` \p -> - returnP (HsBracket (PatBr p) $1) } + | srcloc '[p|' infixexp '|]' {% checkPattern $1 $3 >>= \p -> + return (HsBracket (PatBr p) $1) } | srcloc '[d|' cvtopbody '|]' { HsBracket (DecBr (mkGroup $3)) $1 } -- arrow notation extension @@ -1055,7 +1031,7 @@ acmd :: { RdrNameHsCmdTop } cvtopbody :: { [RdrNameHsDecl] } : '{' cvtopdecls '}' { $2 } - | layout_on cvtopdecls close { $2 } + | vocurly cvtopdecls close { $2 } texps :: { [RdrNameHsExpr] } : texps ',' exp { $3 : $1 } @@ -1131,7 +1107,7 @@ parr :: { RdrNameHsExpr } altslist :: { [RdrNameMatch] } : '{' alts '}' { reverse $2 } - | layout_on alts close { reverse $2 } + | vocurly alts close { reverse $2 } alts :: { [RdrNameMatch] } : alts1 { $1 } @@ -1144,8 +1120,8 @@ alts1 :: { [RdrNameMatch] } alt :: { RdrNameMatch } : srcloc infixexp opt_sig ralt wherebinds - {% (checkPattern $1 $2 `thenP` \p -> - returnP (Match [p] $3 + {% (checkPattern $1 $2 >>= \p -> + return (Match [p] $3 (GRHSs $4 $5 placeHolderType)) )} ralt :: { [RdrNameGRHS] } @@ -1163,8 +1139,8 @@ gdpat :: { RdrNameGRHS } -- Statement sequences stmtlist :: { [RdrNameStmt] } - : '{' stmts '}' { $2 } - | layout_on_for_do stmts close { $2 } + : '{' stmts '}' { $2 } + | vocurly stmts close { $2 } -- do { ;; s ; s ; ; s ;; } -- The last Stmt should be a ResultStmt, but that's hard to enforce @@ -1188,13 +1164,13 @@ maybe_stmt :: { Maybe RdrNameStmt } stmt :: { RdrNameStmt } : qual { $1 } - | srcloc infixexp '->' exp {% checkPattern $1 $4 `thenP` \p -> - returnP (BindStmt p $2 $1) } + | srcloc infixexp '->' exp {% checkPattern $1 $4 >>= \p -> + return (BindStmt p $2 $1) } | srcloc 'rec' stmtlist { RecStmt $3 undefined undefined undefined } qual :: { RdrNameStmt } - : srcloc infixexp '<-' exp {% checkPattern $1 $2 `thenP` \p -> - returnP (BindStmt p $4 $1) } + : srcloc infixexp '<-' exp {% checkPattern $1 $2 >>= \p -> + return (BindStmt p $4 $1) } | srcloc exp { ExprStmt $2 placeHolderType $1 } | srcloc 'let' binds { LetStmt $3 } @@ -1215,7 +1191,7 @@ fbind :: { (RdrName, RdrNameHsExpr) } dbinding :: { [(IPName RdrName, RdrNameHsExpr)] } : '{' dbinds '}' { $2 } - | layout_on dbinds close { $2 } + | vocurly dbinds close { $2 } dbinds :: { [(IPName RdrName, RdrNameHsExpr)] } : dbinds ';' dbind { $3 : $1 } @@ -1438,17 +1414,17 @@ consym :: { RdrName } -- Literals literal :: { HsLit } - : CHAR { HsChar $1 } + : CHAR { HsChar (ord $1) } --TODO remove ord | STRING { HsString $1 } | PRIMINTEGER { HsIntPrim $1 } - | PRIMCHAR { HsCharPrim $1 } + | PRIMCHAR { HsCharPrim (ord $1) } --TODO remove ord | PRIMSTRING { HsStringPrim $1 } | PRIMFLOAT { HsFloatPrim $1 } | PRIMDOUBLE { HsDoublePrim $1 } | CLITLIT { HsLitLit $1 placeHolderType } -srcloc :: { SrcLoc } : {% getSrcLocP } - +srcloc :: { SrcLoc } : {% getSrcLoc } + ----------------------------------------------------------------------------- -- Layout @@ -1456,9 +1432,6 @@ close :: { () } : vccurly { () } -- context popped in lexer. | error {% popContext } -layout_on :: { () } : {% layoutOn True{-strict-} } -layout_on_for_do :: { () } : {% layoutOn False } - ----------------------------------------------------------------------------- -- Miscellaneous (mostly renamings) @@ -1478,5 +1451,5 @@ commas :: { Int } { happyError :: P a -happyError buf PState{ loc = loc } = PFailed (srcParseErr buf loc) +happyError = srcParseFail } diff --git a/ghc/compiler/parser/RdrHsSyn.lhs b/ghc/compiler/parser/RdrHsSyn.lhs index 101ada1..652a3e6 100644 --- a/ghc/compiler/parser/RdrHsSyn.lhs +++ b/ghc/compiler/parser/RdrHsSyn.lhs @@ -99,7 +99,7 @@ import RdrName ( RdrName, isRdrTyVar, mkRdrUnqual, mkUnqual, rdrNameOcc, setRdrNameSpace ) import BasicTypes ( RecFlag(..), FixitySig(..), maxPrecedence ) import Class ( DefMeth (..) ) -import Lex ( P, mapP, setSrcLocP, thenP, returnP, getSrcLocP, failMsgP ) +import Lexer ( P, setSrcLocFor, getSrcLoc, failLocMsgP ) import HscTypes ( RdrAvailInfo, GenAvailInfo(..) ) import TysWiredIn ( unitTyCon ) import ForeignCall ( CCallConv, Safety, CCallTarget(..), CExportSpec(..), @@ -484,19 +484,19 @@ mkPrefixCon ty tys = split ty tys where split (HsAppTy t u) ts = split t (unbangedType u : ts) - split (HsTyVar tc) ts = tyConToDataCon tc `thenP` \ data_con -> - returnP (data_con, PrefixCon ts) + split (HsTyVar tc) ts = tyConToDataCon tc >>= \ data_con -> + return (data_con, PrefixCon ts) split _ _ = parseError "Illegal data/newtype declaration" mkRecCon :: RdrName -> [([RdrName],RdrNameBangType)] -> P (RdrName, RdrNameConDetails) mkRecCon con fields - = tyConToDataCon con `thenP` \ data_con -> - returnP (data_con, RecCon [ (l,t) | (ls,t) <- fields, l <- ls ]) + = tyConToDataCon con >>= \ data_con -> + return (data_con, RecCon [ (l,t) | (ls,t) <- fields, l <- ls ]) tyConToDataCon :: RdrName -> P RdrName tyConToDataCon tc | isTcOcc (rdrNameOcc tc) - = returnP (setRdrNameSpace tc srcDataName) + = return (setRdrNameSpace tc srcDataName) | otherwise = parseError (showSDoc (text "Not a constructor:" <+> quotes (ppr tc))) @@ -507,21 +507,21 @@ checkInstType :: RdrNameHsType -> P RdrNameHsType checkInstType t = case t of HsForAllTy tvs ctxt ty -> - checkDictTy ty [] `thenP` \ dict_ty -> - returnP (HsForAllTy tvs ctxt dict_ty) + checkDictTy ty [] >>= \ dict_ty -> + return (HsForAllTy tvs ctxt dict_ty) HsParTy ty -> checkInstType ty - ty -> checkDictTy ty [] `thenP` \ dict_ty-> - returnP (HsForAllTy Nothing [] dict_ty) + ty -> checkDictTy ty [] >>= \ dict_ty-> + return (HsForAllTy Nothing [] dict_ty) checkTyVars :: [RdrNameHsType] -> P [RdrNameHsTyVar] checkTyVars tvs - = mapP chk tvs + = mapM chk tvs where -- Check that the name space is correct! - chk (HsKindSig (HsTyVar tv) k) | isRdrTyVar tv = returnP (IfaceTyVar tv k) - chk (HsTyVar tv) | isRdrTyVar tv = returnP (UserTyVar tv) + chk (HsKindSig (HsTyVar tv) k) | isRdrTyVar tv = return (IfaceTyVar tv k) + chk (HsTyVar tv) | isRdrTyVar tv = return (UserTyVar tv) chk other = parseError "Type found where type variable expected" checkTyClHdr :: RdrNameHsType -> P (RdrName, [RdrNameHsTyVar]) @@ -534,46 +534,46 @@ checkTyClHdr ty = go ty [] where go (HsTyVar tc) acc - | not (isRdrTyVar tc) = checkTyVars acc `thenP` \ tvs -> - returnP (tc, tvs) + | not (isRdrTyVar tc) = checkTyVars acc >>= \ tvs -> + return (tc, tvs) go (HsOpTy t1 (HsTyOp tc) t2) acc - = checkTyVars (t1:t2:acc) `thenP` \ tvs -> - returnP (tc, tvs) + = checkTyVars (t1:t2:acc) >>= \ tvs -> + return (tc, tvs) go (HsParTy ty) acc = go ty acc go (HsAppTy t1 t2) acc = go t1 (t2:acc) go other acc = parseError "Malformed LHS to type of class declaration" checkContext :: RdrNameHsType -> P RdrNameContext checkContext (HsTupleTy _ ts) -- (Eq a, Ord b) shows up as a tuple type - = mapP checkPred ts + = mapM checkPred ts checkContext (HsParTy ty) -- to be sure HsParTy doesn't get into the way = checkContext ty checkContext (HsTyVar t) -- Empty context shows up as a unit type () - | t == getRdrName unitTyCon = returnP [] + | t == getRdrName unitTyCon = return [] checkContext t - = checkPred t `thenP` \p -> - returnP [p] + = checkPred t >>= \p -> + return [p] checkPred :: RdrNameHsType -> P (HsPred RdrName) -- Watch out.. in ...deriving( Show )... we use checkPred on -- the list of partially applied predicates in the deriving, -- so there can be zero args. -checkPred (HsPredTy (HsIParam n ty)) = returnP (HsIParam n ty) +checkPred (HsPredTy (HsIParam n ty)) = return (HsIParam n ty) checkPred ty = go ty [] where go (HsTyVar t) args | not (isRdrTyVar t) - = returnP (HsClassP t args) + = return (HsClassP t args) go (HsAppTy l r) args = go l (r:args) go (HsParTy t) args = go t args go _ _ = parseError "Illegal class assertion" checkDictTy :: RdrNameHsType -> [RdrNameHsType] -> P RdrNameHsType checkDictTy (HsTyVar t) args@(_:_) | not (isRdrTyVar t) - = returnP (mkHsDictTy t args) + = return (mkHsDictTy t args) checkDictTy (HsAppTy l r) args = checkDictTy l (r:args) checkDictTy (HsParTy t) args = checkDictTy t args checkDictTy _ _ = parseError "Malformed context in instance header" @@ -591,37 +591,37 @@ checkDo = checkDoMDo "a " "'do'" checkMDo = checkDoMDo "an " "'mdo'" checkDoMDo _ nm [] = parseError $ "Empty " ++ nm ++ " construct" -checkDoMDo _ _ [ExprStmt e _ l] = returnP [ResultStmt e l] +checkDoMDo _ _ [ExprStmt e _ l] = return [ResultStmt e l] checkDoMDo pre nm [s] = parseError $ "The last statement in " ++ pre ++ nm ++ " construct must be an expression" -checkDoMDo pre nm (s:ss) = checkDoMDo pre nm ss `thenP` \ ss' -> - returnP (s:ss') +checkDoMDo pre nm (s:ss) = checkDoMDo pre nm ss >>= \ ss' -> + return (s:ss') ---------------------------------------------------------------------------- +-- ------------------------------------------------------------------------- -- Checking Patterns. -- We parse patterns as expressions and check for valid patterns below, -- converting the expression into a pattern at the same time. checkPattern :: SrcLoc -> RdrNameHsExpr -> P RdrNamePat -checkPattern loc e = setSrcLocP loc (checkPat e []) +checkPattern loc e = setSrcLocFor loc (checkPat e []) checkPatterns :: SrcLoc -> [RdrNameHsExpr] -> P [RdrNamePat] -checkPatterns loc es = mapP (checkPattern loc) es +checkPatterns loc es = mapM (checkPattern loc) es checkPat :: RdrNameHsExpr -> [RdrNamePat] -> P RdrNamePat -checkPat (HsVar c) args | isRdrDataCon c = returnP (ConPatIn c (PrefixCon args)) +checkPat (HsVar c) args | isRdrDataCon c = return (ConPatIn c (PrefixCon args)) checkPat (HsApp f x) args = - checkPat x [] `thenP` \x -> + checkPat x [] >>= \x -> checkPat f (x:args) checkPat e [] = case e of - EWildPat -> returnP (WildPat placeHolderType) + EWildPat -> return (WildPat placeHolderType) HsVar x | isQual x -> parseError ("Qualified variable in pattern: " ++ showRdrName x) - | otherwise -> returnP (VarPat x) - HsLit l -> returnP (LitPat l) - HsOverLit l -> returnP (NPatIn l Nothing) - ELazyPat e -> checkPat e [] `thenP` (returnP . LazyPat) - EAsPat n e -> checkPat e [] `thenP` (returnP . AsPat n) - ExprWithTySig e t -> checkPat e [] `thenP` \e -> + | otherwise -> return (VarPat x) + HsLit l -> return (LitPat l) + HsOverLit l -> return (NPatIn l Nothing) + ELazyPat e -> checkPat e [] >>= (return . LazyPat) + EAsPat n e -> checkPat e [] >>= (return . AsPat n) + ExprWithTySig e t -> checkPat e [] >>= \e -> -- Pattern signatures are parsed as sigtypes, -- but they aren't explicit forall points. Hence -- we have to remove the implicit forall here. @@ -629,48 +629,48 @@ checkPat e [] = case e of HsForAllTy Nothing [] ty -> ty other -> other in - returnP (SigPatIn e t') + return (SigPatIn e t') -- Translate out NegApps of literals in patterns. We negate -- the Integer here, and add back the call to 'negate' when -- we typecheck the pattern. -- NB. Negative *primitive* literals are already handled by -- RdrHsSyn.mkHsNegApp - NegApp (HsOverLit lit) neg -> returnP (NPatIn lit (Just neg)) + NegApp (HsOverLit lit) neg -> return (NPatIn lit (Just neg)) OpApp (HsVar n) (HsVar plus) _ (HsOverLit lit@(HsIntegral _ _)) | plus == plus_RDR - -> returnP (mkNPlusKPat n lit) + -> return (mkNPlusKPat n lit) where plus_RDR = mkUnqual varName FSLIT("+") -- Hack - OpApp l op fix r -> checkPat l [] `thenP` \l -> - checkPat r [] `thenP` \r -> + OpApp l op fix r -> checkPat l [] >>= \l -> + checkPat r [] >>= \r -> case op of HsVar c | isDataOcc (rdrNameOcc c) - -> returnP (ConPatIn c (InfixCon l r)) + -> return (ConPatIn c (InfixCon l r)) _ -> patFail - HsPar e -> checkPat e [] `thenP` (returnP . ParPat) - ExplicitList _ es -> mapP (\e -> checkPat e []) es `thenP` \ps -> - returnP (ListPat ps placeHolderType) - ExplicitPArr _ es -> mapP (\e -> checkPat e []) es `thenP` \ps -> - returnP (PArrPat ps placeHolderType) + HsPar e -> checkPat e [] >>= (return . ParPat) + ExplicitList _ es -> mapM (\e -> checkPat e []) es >>= \ps -> + return (ListPat ps placeHolderType) + ExplicitPArr _ es -> mapM (\e -> checkPat e []) es >>= \ps -> + return (PArrPat ps placeHolderType) - ExplicitTuple es b -> mapP (\e -> checkPat e []) es `thenP` \ps -> - returnP (TuplePat ps b) + ExplicitTuple es b -> mapM (\e -> checkPat e []) es >>= \ps -> + return (TuplePat ps b) - RecordCon c fs -> mapP checkPatField fs `thenP` \fs -> - returnP (ConPatIn c (RecCon fs)) + RecordCon c fs -> mapM checkPatField fs >>= \fs -> + return (ConPatIn c (RecCon fs)) -- Generics - HsType ty -> returnP (TypePat ty) + HsType ty -> return (TypePat ty) _ -> patFail checkPat _ _ = patFail checkPatField :: (RdrName, RdrNameHsExpr) -> P (RdrName, RdrNamePat) -checkPatField (n,e) = checkPat e [] `thenP` \p -> - returnP (n,p) +checkPatField (n,e) = checkPat e [] >>= \p -> + return (n,p) patFail = parseError "Parse error in pattern" @@ -691,19 +691,19 @@ checkValDef lhs opt_sig grhss loc | isQual f -> parseError ("Qualified name in function definition: " ++ showRdrName f) | otherwise - -> checkPatterns loc es `thenP` \ps -> - returnP (RdrValBinding (FunMonoBind f inf [Match ps opt_sig grhss] loc)) + -> checkPatterns loc es >>= \ps -> + return (RdrValBinding (FunMonoBind f inf [Match ps opt_sig grhss] loc)) Nothing -> - checkPattern loc lhs `thenP` \lhs -> - returnP (RdrValBinding (PatMonoBind lhs grhss loc)) + checkPattern loc lhs >>= \lhs -> + return (RdrValBinding (PatMonoBind lhs grhss loc)) checkValSig :: RdrNameHsExpr -> RdrNameHsType -> SrcLoc -> P RdrBinding -checkValSig (HsVar v) ty loc | isUnqual v = returnP (RdrHsDecl (SigD (Sig v ty loc))) +checkValSig (HsVar v) ty loc | isUnqual v = return (RdrHsDecl (SigD (Sig v ty loc))) checkValSig other ty loc = parseError "Type signature given for an expression" mkSigDecls :: [Sig RdrName] -> RdrBinding @@ -731,7 +731,7 @@ isFunLhs _ _ = Nothing -- Miscellaneous utilities checkPrecP :: Int -> P Int -checkPrecP i | 0 <= i && i <= maxPrecedence = returnP i +checkPrecP i | 0 <= i && i <= maxPrecedence = return i | otherwise = parseError "Precedence out of range" mkRecConstrOrUpdate @@ -740,9 +740,9 @@ mkRecConstrOrUpdate -> P RdrNameHsExpr mkRecConstrOrUpdate (HsVar c) fs | isRdrDataCon c - = returnP (RecordCon c fs) + = return (RecordCon c fs) mkRecConstrOrUpdate exp fs@(_:_) - = returnP (RecordUpd exp fs) + = return (RecordUpd exp fs) mkRecConstrOrUpdate _ _ = parseError "Empty record update" @@ -762,11 +762,11 @@ mkImport :: CallConv -> SrcLoc -> P RdrNameHsDecl mkImport (CCall cconv) safety (entity, v, ty) loc = - parseCImport entity cconv safety v `thenP` \importSpec -> - returnP $ ForD (ForeignImport v ty importSpec False loc) + parseCImport entity cconv safety v >>= \importSpec -> + return $ ForD (ForeignImport v ty importSpec False loc) mkImport (DNCall ) _ (entity, v, ty) loc = - parseDImport entity `thenP` \ spec -> - returnP $ ForD (ForeignImport v ty (DNImport spec) False loc) + parseDImport entity >>= \ spec -> + return $ ForD (ForeignImport v ty (DNImport spec) False loc) -- parse the entity string of a foreign import declaration for the `ccall' or -- `stdcall' calling convention' @@ -779,9 +779,9 @@ parseCImport :: FastString parseCImport entity cconv safety v -- FIXME: we should allow white space around `dynamic' and `wrapper' -=chak | entity == FSLIT ("dynamic") = - returnP $ CImport cconv safety nilFS nilFS (CFunction DynamicTarget) + return $ CImport cconv safety nilFS nilFS (CFunction DynamicTarget) | entity == FSLIT ("wrapper") = - returnP $ CImport cconv safety nilFS nilFS CWrapper + return $ CImport cconv safety nilFS nilFS CWrapper | otherwise = parse0 (unpackFS entity) where -- using the static keyword? @@ -820,9 +820,9 @@ parseCImport entity cconv safety v where (first, rest) = break (== ' ') str -- - build cid header False lib = returnP $ + build cid header False lib = return $ CImport cconv safety header lib (CFunction (StaticTarget cid)) - build cid header True lib = returnP $ + build cid header True lib = return $ CImport cconv safety header lib (CLabel cid ) -- @@ -853,7 +853,7 @@ parseDImport entity = parse0 comps parse2 isStatic kind xs = parse3 isStatic kind "" xs parse3 isStatic kind assem [x] = - returnP (DNCallSpec isStatic kind assem x + return (DNCallSpec isStatic kind assem x -- these will be filled in once known. (error "FFI-dotnet-args") (error "FFI-dotnet-result")) @@ -867,7 +867,7 @@ mkExport :: CallConv -> (FastString, RdrName, RdrNameHsType) -> SrcLoc -> P RdrNameHsDecl -mkExport (CCall cconv) (entity, v, ty) loc = returnP $ +mkExport (CCall cconv) (entity, v, ty) loc = return $ ForD (ForeignExport v ty (CExport (CExportStatic entity' cconv)) False loc) where entity' | nullFastString entity = mkExtName v @@ -910,7 +910,6 @@ showRdrName r = showSDoc (ppr r) parseError :: String -> P a parseError s = - getSrcLocP `thenP` \ loc -> - failMsgP (hcat [ppr loc, text ": ", text s]) + getSrcLoc >>= \ loc -> + failLocMsgP loc loc s \end{code} - diff --git a/ghc/compiler/rename/RnHiFiles.lhs b/ghc/compiler/rename/RnHiFiles.lhs index 82512dc..977b80f 100644 --- a/ghc/compiler/rename/RnHiFiles.lhs +++ b/ghc/compiler/rename/RnHiFiles.lhs @@ -63,7 +63,7 @@ import FastString ( mkFastString ) import ErrUtils ( Message ) import Finder ( findModule, findPackageModule, hiBootExt, hiBootVerExt ) -import Lex +import Lexer import FiniteMap import ListSetOps ( minusList ) import Outputable @@ -645,7 +645,7 @@ readIface mod file_path is_hi_boot_file read_iface mod file_path is_hi_boot_file | is_hi_boot_file -- Read ascii = do { buffer <- hGetStringBuffer file_path ; - case parseIface buffer (mkPState loc exts) of + case unP parseIface (mkPState buffer loc exts) of POk _ iface | wanted_mod_name == actual_mod_name -> return iface | otherwise @@ -656,7 +656,8 @@ read_iface mod file_path is_hi_boot_file actual_mod_name = pi_mod iface err = hiModuleNameMismatchWarn wanted_mod_name actual_mod_name - PFailed err -> throwDyn (ProgramError (showSDoc err)) + PFailed loc1 loc2 err -> + throwDyn (ProgramError (showPFailed loc1 loc2 err)) } | otherwise -- Read binary @@ -668,7 +669,7 @@ read_iface mod file_path is_hi_boot_file arrowsEF = True, withEF = True, parrEF = True} - loc = mkSrcLoc (mkFastString file_path) 1 + loc = mkSrcLoc (mkFastString file_path) 1 0 \end{code} diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index d7cfddd..16218fd 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -1,516 +1,172 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998 +% (c) The University of Glasgow, 1997-2003 % \section{String buffers} Buffers for scanning string input stored in external arrays. \begin{code} - -{-# OPTIONS -optc-DNON_POSIX_SOURCE #-} - module StringBuffer ( StringBuffer, - -- creation/destruction + -- * Creation/destruction hGetStringBuffer, -- :: FilePath -> IO StringBuffer stringToStringBuffer, -- :: String -> IO StringBuffer - freeStringBuffer, -- :: StringBuffer -> IO () - - -- Lookup - currentChar, -- :: StringBuffer -> Char - currentChar#, -- :: StringBuffer -> Char# - indexSBuffer, -- :: StringBuffer -> Int -> Char - indexSBuffer#, -- :: StringBuffer -> Int# -> Char# - -- relative lookup, i.e, currentChar = lookAhead 0 - lookAhead, -- :: StringBuffer -> Int -> Char - lookAhead#, -- :: StringBuffer -> Int# -> Char# - - -- offsets - currentIndex#, -- :: StringBuffer -> Int# - lexemeIndex, -- :: StringBuffer -> Int# - - -- moving the end point of the current lexeme. - addToCurrentPos, -- :: StringBuffer -> Int# -> StringBuffer - incCurrentPos, -- :: StringBuffer -> StringBuffer - decCurrentPos, -- :: StringBuffer -> StringBuffer - - -- move the start and end lexeme pointer on by x units. - stepOn, -- :: StringBuffer -> StringBuffer - stepOnBy#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnTo#, -- :: StringBuffer -> Int# -> StringBuffer - stepOnUntil, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer - stepOverLexeme, -- :: StringBuffer -> StringBuffer - scanNumLit, -- :: Int -> StringBuffer -> (Int, StringBuffer) - squeezeLexeme, -- :: StringBuffer -> Int# -> StringBuffer - mergeLexemes, -- :: StringBuffer -> StringBuffer -> StringBuffer - expandWhile, -- :: (Char -> Bool) -> StringBuffer -> StringBuffer - expandWhile#, -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer - expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer - -- at or beyond end of buffer? - bufferExhausted, -- :: StringBuffer -> Bool - emptyLexeme, -- :: StringBuffer -> Bool - - -- matching - prefixMatch, -- :: StringBuffer -> String -> Bool - untilEndOfString#, -- :: StringBuffer -> Int# - - -- conversion - lexemeToString, -- :: StringBuffer -> String - lexemeToFastString, -- :: StringBuffer -> FastString - ) where -#include "HsVersions.h" + -- * Lookup + currentChar, -- :: StringBuffer -> Char + prevChar, -- :: StringBuffer -> Char -> Char + lookAhead, -- :: StringBuffer -> Int -> Char + atEnd, -- :: StringBuffer -> Bool + difference, -- :: StringBuffer -> StringBuffer -> Int + -- * Moving + stepOn, stepOnBy, -#if __GLASGOW_HASKELL__ < 502 -import Panic ( panic ) -#else -#if __GLASGOW_HASKELL__ < 503 -import Ptr ( Ptr(..) ) -#else -import GHC.Ptr ( Ptr(..) ) -#endif -#endif + -- * Conversion + lexemeToString, -- :: StringBuffer -> Int -> String + lexemeToFastString, -- :: StringBuffer -> Int -> FastString + ) where -#if __GLASGOW_HASKELL__ < 501 -import Char ( chr ) -#elif __GLASGOW_HASKELL__ < 503 -import PrelIO ( hGetcBuffered ) -#else -import GHC.IO ( hGetcBuffered ) -#endif +#include "HsVersions.h" -import PrimPacked import FastString +import Panic import GLAEXTS import Foreign -#if __GLASGOW_HASKELL__ >= 502 -import CString ( newCString ) -#endif - -import IO ( openFile, isEOFError ) -import EXCEPTION ( bracket ) - #if __GLASGOW_HASKELL__ < 503 import PrelIOBase import PrelHandle #else import GHC.IOBase -import GHC.Handle +import GHC.IO ( slurpFile ) #endif -import Char ( isDigit ) -\end{code} +import IO ( openFile, hFileSize, IOMode(ReadMode) ) + +#if __GLASGOW_HASKELL__ < 503 +import IArray ( listArray ) +import ArrayBase ( UArray(..) ) +import MutableArray +import IOExts ( hGetBufBA ) +#else +import Data.Array.IArray ( listArray ) +import Data.Array.MArray ( unsafeFreeze, newArray_ ) +import Data.Array.Base ( UArray(..) ) +import Data.Array.IO ( IOArray, hGetArray ) +#endif + +import Char ( ord ) + +-- ----------------------------------------------------------------------------- +-- The StringBuffer type + +-- A StringBuffer is a ByteArray# with a pointer into it. We also cache +-- the length of the ByteArray# for speed. -\begin{code} data StringBuffer = StringBuffer - Addr# + ByteArray# Int# -- length - Int# -- lexeme start Int# -- current pos -\end{code} -\begin{code} instance Show StringBuffer where showsPrec _ s = showString "" -\end{code} -\begin{code} +-- ----------------------------------------------------------------------------- +-- Creation / Destruction + hGetStringBuffer :: FilePath -> IO StringBuffer hGetStringBuffer fname = do - (a, read) <- slurpFileExpandTabs fname - - -- urk! slurpFile gives us a buffer that doesn't have room for - -- the sentinel. Assume it has a final newline for now, and overwrite - -- that with the sentinel. slurpFileExpandTabs (below) leaves room - -- for the sentinel. - let (Ptr a#) = a; - (I# read#) = read; - end# = read# -# 1# - - -- add sentinel '\NUL' - writeCharOffPtr a (I# end#) '\0' - - return (StringBuffer a# end# 0# 0#) -\end{code} - ------------------------------------------------------------------------------ --- Turn a String into a StringBuffer - -\begin{code} -stringToStringBuffer :: String -> IO StringBuffer -freeStringBuffer :: StringBuffer -> IO () - -#if __GLASGOW_HASKELL__ >= 502 -stringToStringBuffer str = do - let sz@(I# sz#) = length str - Ptr a# <- newCString str - return (StringBuffer a# sz# 0# 0#) - -freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#) -#else -stringToStringBuffer = panic "stringToStringBuffer: not implemented" -freeStringBuffer sb = return () -#endif - -\end{code} - ------------------------------------------------------------------------------ -This very disturbing bit of code is used for expanding the tabs in a -file before we start parsing it. Expanding the tabs early makes the -lexer a lot simpler: we only have to record the beginning of the line -in order to be able to calculate the column offset of the current -token. - -We guess the size of the buffer required as 20% extra for -expanded tabs, and enlarge it if necessary. - -\begin{code} -#if __GLASGOW_HASKELL__ < 501 -getErrType :: IO Int -getErrType = _ccall_ getErrType__ -#endif - -slurpFileExpandTabs :: FilePath -> IO (Ptr (),Int) -slurpFileExpandTabs fname = do - bracket (openFile fname ReadMode) (hClose) - (\ handle -> - do sz <- hFileSize handle - if sz > toInteger (maxBound::Int) - then ioError (userError "slurpFile: file too big") - else do - let sz_i = fromInteger sz - if sz_i == 0 - -- empty file: just allocate a buffer containing '\0' - then do chunk <- allocMem 1 - writeCharOffPtr chunk 0 '\0' - return (chunk, 0) - else do let sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs - chunk <- allocMem sz_i' - trySlurp handle sz_i' chunk - ) - -trySlurp :: Handle -> Int -> Ptr () -> IO (Ptr (), Int) -trySlurp handle sz_i chunk = -#if __GLASGOW_HASKELL__ < 501 - wantReadableHandle "hGetChar" handle $ \ handle_ -> - let fo = haFO__ handle_ in -#else - wantReadableHandle "hGetChar" handle $ - \ handle_@Handle__{ haFD=fd, haBuffer=ref, haBufferMode=mode } -> -#endif - let - (I# chunk_sz) = sz_i - - tAB_SIZE = 8# - - slurpFile :: Int# -> Int# -> Ptr () -> Int# -> Int# -> IO (Ptr (), Int) - slurpFile c off chunk chunk_sz max_off = slurp c off - where - - slurp :: Int# -> Int# -> IO (Ptr (), Int) - slurp c off | off >=# max_off = do - let new_sz = chunk_sz *# 2# - chunk' <- reAllocMem chunk (I# new_sz) - slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#)) - slurp c off = do -#if __GLASGOW_HASKELL__ < 501 - intc <- mayBlock fo (_ccall_ fileGetc fo) - if intc == ((-1)::Int) - then do errtype <- getErrType - if errtype == (19{-ERR_EOF-} :: Int) - then return (chunk, I# off) - else constructErrorAndFail "slurpFile" - else case chr intc of -#else - buf <- readIORef ref - ch <- (if not (bufferEmpty buf) - then hGetcBuffered fd ref buf - else do -#if __GLASGOW_HASKELL__ >= 503 - new_buf <- fillReadBuffer fd True False buf + h <- openFile fname ReadMode + size <- hFileSize h + let size_i@(I# sz#) = fromIntegral size +#if __GLASGOW_HASKELL__ < 503 + arr <- stToIO (newCharArray (0,size_i-1)) + r <- hGetBufBA h arr size_i #else - new_buf <- fillReadBuffer fd True buf + arr <- newArray_ (0,size_i-1) + r <- hGetArray h arr size_i #endif - hGetcBuffered fd ref new_buf) - `catch` \e -> if isEOFError e - then return '\xFFFF' - else ioError e - case ch of - '\xFFFF' -> return (chunk, I# off) -#endif - '\t' -> tabIt c off - ch -> do writeCharOffPtr chunk (I# off) ch - let c' | ch == '\n' = 0# - | otherwise = c +# 1# - slurp c' (off +# 1#) - - tabIt :: Int# -> Int# -> IO (Ptr (), Int) - -- can't run out of buffer in here, because we reserved an - -- extra tAB_SIZE bytes at the end earlier. - tabIt c off = do - writeCharOffPtr chunk (I# off) ' ' - let c' = c +# 1# - off' = off +# 1# - if c' `remInt#` tAB_SIZE ==# 0# - then slurp c' off' - else tabIt c' off' - in do - - -- allow space for a full tab at the end of the buffer - -- (that's what the max_off thing is for), - -- and add 1 to allow room for the final sentinel \NUL at - -- the end of the file. - (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#)) - return (chunk', rc+1 {- room for sentinel -}) - - -reAllocMem :: Ptr () -> Int -> IO (Ptr ()) -reAllocMem ptr sz = do - chunk <- c_realloc ptr sz - if chunk == nullPtr - then fail "reAllocMem" - else return chunk - -allocMem :: Int -> IO (Ptr ()) -allocMem sz = do - chunk <- c_malloc sz - if chunk == nullPtr -#if __GLASGOW_HASKELL__ < 501 - then constructErrorAndFail "allocMem" + if (r /= size_i) + then ioError (userError "short read of file") + else do +#if __GLASGOW_HASKELL__ < 503 + frozen <- stToIO (unsafeFreezeByteArray arr) + case frozen of + ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #else - then ioException (IOError Nothing ResourceExhausted "malloc" - "out of memory" Nothing) + frozen <- unsafeFreeze arr + case frozen of + UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #endif - else return chunk - -#if __GLASGOW_HASKELL__ <= 408 -c_malloc sz = do A# a <- c_malloc' sz; return (Ptr a) -foreign import ccall "malloc" unsafe - c_malloc' :: Int -> IO Addr -c_realloc (Ptr a) sz = do A# a <- c_realloc' (A# a) sz; return (Ptr a) -foreign import ccall "realloc" unsafe - c_realloc' :: Addr -> Int -> IO Addr +#if __GLASGOW_HASKELL__ >= 502 +stringToStringBuffer str = do + let size@(I# sz#) = length str + arr = listArray (0,size-1) (map (fromIntegral.ord) str) + :: UArray Int Word8 + case arr of + UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) #else -foreign import ccall "malloc" unsafe - c_malloc :: Int -> IO (Ptr a) - -foreign import ccall "realloc" unsafe - c_realloc :: Ptr a -> Int -> IO (Ptr a) +stringToStringBuffer = panic "stringToStringBuffer: not implemented" #endif -\end{code} -Lookup +-- ----------------------------------------------------------------------------- +-- Lookup -\begin{code} currentChar :: StringBuffer -> Char -currentChar sb = case currentChar# sb of c -> C# c - -lookAhead :: StringBuffer -> Int -> Char -lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c - -indexSBuffer :: StringBuffer -> Int -> Char -indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c - -currentChar# :: StringBuffer -> Char# -indexSBuffer# :: StringBuffer -> Int# -> Char# -lookAhead# :: StringBuffer -> Int# -> Char# -currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current# -indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i# - - -- relative lookup, i.e, currentChar = lookAhead 0 -lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#) - -currentIndex# :: StringBuffer -> Int# -currentIndex# (StringBuffer fo# _ _ c#) = c# +currentChar (StringBuffer arr# l# current#) = + ASSERT(current# <# l#) + C# (indexCharArray# arr# current#) -lexemeIndex :: StringBuffer -> Int# -lexemeIndex (StringBuffer fo# _ c# _) = c# -\end{code} +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer _ _ 0#) deflt = deflt +prevChar s deflt = lookAhead s (-1) - moving the start point of the current lexeme. +lookAhead :: StringBuffer -> Int -> Char +lookAhead (StringBuffer arr# l# c#) (I# i#) = + ASSERT(off <# l# && off >=# 0#) + C# (indexCharArray# arr# off) + where + off = c# +# i# -\begin{code} - -- moving the end point of the current lexeme. -addToCurrentPos :: StringBuffer -> Int# -> StringBuffer -addToCurrentPos (StringBuffer fo l# s# c#) i# = - StringBuffer fo l# s# (c# +# i#) +difference :: StringBuffer -> StringBuffer -> Int +difference (StringBuffer _ _ c1#) (StringBuffer _ _ c2#) = I# (c2# -# c1#) --- augmenting the current lexeme by one. -incCurrentPos :: StringBuffer -> StringBuffer -incCurrentPos (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#) +-- ----------------------------------------------------------------------------- +-- Moving -decCurrentPos :: StringBuffer -> StringBuffer -decCurrentPos (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#) +stepOn :: StringBuffer -> StringBuffer +stepOn s = stepOnBy 1 s -\end{code} +stepOnBy :: Int -> StringBuffer -> StringBuffer +stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#) --- move the start and end point of the buffer on by --- x units. +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l# c#) = l# ==# c# -\begin{code} -stepOn :: StringBuffer -> StringBuffer -stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same. - -stepOnBy# :: StringBuffer -> Int# -> StringBuffer -stepOnBy# (StringBuffer fo# l# s# c#) i# = - case s# +# i# of - new_s# -> StringBuffer fo# l# new_s# new_s# - --- jump to pos. -stepOnTo# :: StringBuffer -> Int# -> StringBuffer -stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s# - -squeezeLexeme :: StringBuffer -> Int# -> StringBuffer -squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c# - -mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer -mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#) - = StringBuffer fo l s# c# - -stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer - -stepOnUntil pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> StringBuffer fo l# c# c# - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> loop (c# +# 1#) - -stepOverLexeme :: StringBuffer -> StringBuffer -stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c# - -expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer -expandWhile pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred (C# ch#) -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - -expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer -expandWhile# pred (StringBuffer fo l# s# c#) = - loop c# - where - loop c# = - case indexCharOffAddr# fo c# of - ch# | pred ch# -> loop (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately. - | otherwise -> StringBuffer fo l# s# c# - -scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer) -scanNumLit acc (StringBuffer fo l# s# c#) = - loop acc c# - where - loop acc c# = - case indexCharOffAddr# fo c# of - ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#) - | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately. - | otherwise -> (acc,StringBuffer fo l# s# c#) - - -expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer -expandUntilMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) = - case indexCharOffAddr# fo c# of - ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing - | ch# `eqChar#` x# -> loop (c# +# 1#) xs - | otherwise -> loop (c# +# 1#) str - -\end{code} +-- ----------------------------------------------------------------------------- +-- Conversion -\begin{code} - -- at or beyond end of buffer? -bufferExhausted :: StringBuffer -> Bool -bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l# - -emptyLexeme :: StringBuffer -> Bool -emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c# - - -- matching -prefixMatch :: StringBuffer -> String -> Maybe StringBuffer -prefixMatch (StringBuffer fo l# s# c#) str = - loop c# str - where - loop c# [] = Just (StringBuffer fo l# s# c#) - loop c# ((C# x#):xs) - | indexCharOffAddr# fo c# `eqChar#` x# - = loop (c# +# 1#) xs - | otherwise - = Nothing - -untilEndOfString# :: StringBuffer -> StringBuffer -untilEndOfString# (StringBuffer fo l# s# c#) = - loop c# - where - getch# i# = indexCharOffAddr# fo i# - - loop c# = - case getch# c# of - '\"'# -> - case getch# (c# -# 1#) of - '\\'# -> - -- looks like an escaped something or other to me, - -- better count the number of "\\"s that are immediately - -- preceeding to decide if the " is escaped. - let - odd_slashes flg i# = - case getch# i# of - '\\'# -> odd_slashes (not flg) (i# -# 1#) - _ -> flg - in - if odd_slashes True (c# -# 2#) then - -- odd number, " is ecaped. - loop (c# +# 1#) - else -- a real end of string delimiter after all. - StringBuffer fo l# s# c# - _ -> StringBuffer fo l# s# c# - '\NUL'# -> - if c# >=# l# then -- hit sentinel, this doesn't look too good.. - StringBuffer fo l# l# l# - else - loop (c# +# 1#) - _ -> loop (c# +# 1#) - - -stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer -stepOnUntilChar# (StringBuffer fo l# s# c#) x# = - loop c# +lexemeToString :: StringBuffer -> Int -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current# where - loop c# - | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x# - = StringBuffer fo l# c# c# - | otherwise - = loop (c# +# 1#) - - -- conversion -lexemeToString :: StringBuffer -> String -lexemeToString (StringBuffer fo len# start_pos# current#) = - if start_pos# ==# current# then - "" - else - let len = I# (current# -# start_pos#) in - unpackNBytesBA (copySubStr fo (I# start_pos#) len) len - -lexemeToFastString :: StringBuffer -> FastString -lexemeToFastString (StringBuffer fo l# start_pos# current#) = - if start_pos# ==# current# then - mkFastString "" - else - mkFastSubString fo (I# start_pos#) (I# (current# -# start_pos#)) + end = current# +# len# + + unpack nh + | nh >=# end = [] + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharArray# arr# nh + +lexemeToFastString :: StringBuffer -> Int -> FastString +lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString (StringBuffer fo _ current#) (I# len) = + mkFastSubStringBA# fo current# len \end{code}