\begin{code}
data Token
- = ITcase -- Haskell keywords
+ = ITas -- Haskell keywords
+ | ITcase
| ITclass
| ITdata
| ITdefault
| ITderiving
| ITdo
| ITelse
+ | IThiding
| ITif
| ITimport
| ITin
| ITmodule
| ITnewtype
| ITof
+ | ITqualified
| ITthen
| ITtype
| ITwhere
| ITlabel
| ITdynamic
| ITunsafe
+ | ITwith
+ | ITstdcallconv
+ | ITccallconv
| ITinterface -- interface keywords
| IT__export
| ITlit_lit
| ITstring_lit
| ITtypeapp
- | ITonce
- | ITmany
+ | ITusage
+ | ITfuall
| ITarity
| ITspecialise
| ITnocaf
| ITunfold InlinePragInfo
| ITstrict ([Demand], Bool)
| ITrules
+ | ITdeprecated
| ITcprinfo (CprInfo)
| IT__scc
| ITsccAllCafs
| ITinline_prag
| ITnoinline_prag
| ITrules_prag
+ | ITdeprecated_prag
| ITline_prag
| ITclose_prag
| ITqvarsym (FAST_STRING,FAST_STRING)
| ITqconsym (FAST_STRING,FAST_STRING)
+ | ITipvarid FAST_STRING -- GHC extension: implicit param: ?x
+
| ITpragma StringBuffer
| ITchar Char
( "NOTINLINE", ITnoinline_prag ),
( "LINE", ITline_prag ),
( "RULES", ITrules_prag ),
- ( "RULEZ", ITrules_prag ) -- american spelling :-)
+ ( "RULEZ", ITrules_prag ), -- american spelling :-)
+ ( "DEPRECATED", ITdeprecated_prag )
]
haskellKeywordsFM = listToUFM $
map (\ (x,y) -> (_PK_ x,y))
[( "_", ITunderscore ),
+ ( "as", ITas ),
( "case", ITcase ),
( "class", ITclass ),
( "data", ITdata ),
( "deriving", ITderiving ),
( "do", ITdo ),
( "else", ITelse ),
+ ( "hiding", IThiding ),
( "if", ITif ),
( "import", ITimport ),
( "in", ITin ),
( "module", ITmodule ),
( "newtype", ITnewtype ),
( "of", ITof ),
+ ( "qualified", ITqualified ),
( "then", ITthen ),
( "type", ITtype ),
( "where", ITwhere ),
( "label", ITlabel ),
( "dynamic", ITdynamic ),
( "unsafe", ITunsafe ),
+ ( "with", ITwith ),
+ ( "stdcall", ITstdcallconv),
+ ( "ccall", ITccallconv),
("_ccall_", ITccall (False, False, False)),
("_ccall_GC_", ITccall (False, False, True)),
("_casm_", ITccall (False, True, False)),
("__litlit", ITlit_lit),
("__string", ITstring_lit),
("__a", ITtypeapp),
- ("__o", ITonce),
- ("__m", ITmany),
+ ("__u", ITusage),
+ ("__fuall", ITfuall),
("__A", ITarity),
("__P", ITspecialise),
("__C", ITnocaf),
("__R", ITrules),
- ("__u", ITunfold NoInlinePragInfo),
+ ("__D", ITdeprecated),
+ ("__U", ITunfold NoInlinePragInfo),
("__ccall", ITccall (False, False, False)),
("__ccall_GC", ITccall (False, False, True)),
,("!", ITbang)
,(".", ITdot) -- sadly, for 'forall a . t'
]
-
-not_special_op ITminus = False
-not_special_op ITbang = False
-not_special_op _ = True
\end{code}
-----------------------------------------------------------------------------
})
-- first, start a new lexeme and lose all the whitespace
- = tab line bol atbol (stepOverLexeme buf)
+ = _scc_ "Lexer"
+ tab line bol atbol (stepOverLexeme buf)
where
line = srcLocLine loc
lexToken :: (Token -> P a) -> Int# -> P a
lexToken cont glaexts buf =
--trace "lexToken" $
- _scc_ "Lexer"
case currentChar# buf of
-- special symbols ----------------------------------------------------
'}'# -> \ s@PState{context = ctx} ->
case ctx of
(_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
- _ -> lexError "too many '}'s" buf s
+ _ -> lexError "too many '}'s" buf s
'#'# -> case lookAhead# buf 1# of
')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
trace "lexIface: misplaced NUL?" $
cont (ITunknown "\NUL") (stepOn buf)
+ '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
+ lex_ip cont (stepOn buf)
c | is_digit c -> lex_num cont glaexts 0 buf
| is_symbol c -> lex_sym cont buf
| is_upper c -> lex_con cont glaexts buf
-- ignore \& in a string, deal with string gaps
'\\'# | next_ch `eqChar#` '&'#
- -> lex_string cont glaexts s (setCurrentPos# buf 2#)
+ -> lex_string cont glaexts s buf'
| is_space next_ch
- -> lex_stringgap cont glaexts s buf'
+ -> lex_stringgap cont glaexts s (incLexeme buf)
where next_ch = lookAhead# buf 1#
buf' = setCurrentPos# buf 2#
is_hexdigit c
= is_digit c
- || (c `geChar#` 'a'# && c `leChar#` 'h'#)
- || (c `geChar#` 'A'# && c `leChar#` 'H'#)
+ || (c `geChar#` 'a'# && c `leChar#` 'f'#)
+ || (c `geChar#` 'A'# && c `leChar#` 'F'#)
hex c | is_digit c = ord# c -# ord# '0'#
| otherwise = ord# (to_lower c) -# ord# 'a'# +# 10#
lex_cstring cont buf =
case expandUntilMatch (stepOverLexeme buf) "\'\'" of
- buf' -> cont (ITlitlit (lexemeToFastString
+ Just buf' -> cont (ITlitlit (lexemeToFastString
(setCurrentPos# buf' (negateInt# 2#))))
- (mergeLexemes buf buf')
+ (mergeLexemes buf buf')
+ Nothing -> lexError "unterminated ``" buf
------------------------------------------------------------------------------
-- Character Classes
is_symbol = is_ctype 2
is_any = is_ctype 4
is_space = is_ctype 8
-is_upper = is_ctype 16
-is_digit = is_ctype 32
+is_lower = is_ctype 16
+is_upper = is_ctype 32
+is_digit = is_ctype 64
-----------------------------------------------------------------------------
-- identifiers, symbols etc.
+lex_ip cont buf =
+ case expandWhile# is_ident buf of
+ buf' -> cont (ITipvarid lexeme) buf'
+ where lexeme = lexemeToFastString buf'
+
lex_id cont glaexts buf =
case expandWhile# is_ident buf of { buf1 ->
-- real lexeme is M.<sym>
new_buf = mergeLexemes buf buf'
in
- case lookupUFM haskellKeySymsFM lexeme of {
- Just kwd_token | not_special_op kwd_token
- -> just_a_conid; -- avoid M.::, but not M.!
- other -> cont (mk_qvar_token mod lexeme) new_buf
- }}
+ cont (mk_qvar_token mod lexeme) new_buf
+ -- wrong, but arguably morally right: M... is now a qvarsym
+ }
| otherwise =
let
case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
Just kwd_token -> just_a_conid; -- avoid M.where etc.
Nothing -> is_a_qvarid
+ -- TODO: special ids (as, qualified, hiding) shouldn't be
+ -- recognised as keywords here. ie. M.as is a qualified varid.
}}}
- we still need to insert another '}' followed by a ';',
hence the atbol trick.
+There's also a special hack in here to deal with
+
+ do
+ ....
+ e $ do
+ blah
+
+i.e. the inner context is at the same indentation level as the outer
+context. This is strictly illegal according to Haskell 98, but
+there's a lot of existing code using this style and it doesn't make
+any sense to disallow it, since empty 'do' lists don't make sense.
-}
-layoutOn :: P ()
-layoutOn buf s@(PState{ bol = bol, context = ctx }) =
+layoutOn :: Bool -> P ()
+layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
let offset = lexemeIndex buf -# bol in
case ctx of
- Layout prev_off : _ | prev_off >=# offset ->
+ Layout prev_off : _
+ | if strict then prev_off >=# offset else prev_off ># offset ->
--trace ("layout on, column: " ++ show (I# offset)) $
POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
other ->