findModule: add a fallthrough error case

[ghc-hetmet.git] / compiler / parser / Lexer.x
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x

index 4c1b48e..0b02f41 100644 (file)
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -65,7 +65,7 @@ $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  $digit     = [$ascdigit $unidigit]
  
  $special   = [\(\)\,\;\[\]\`\{\}]
  $digit     = [$ascdigit $unidigit]
  
  $special   = [\(\)\,\;\[\]\`\{\}]
-$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
  $unisymbol = \x04
  $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  
  $unisymbol = \x04
  $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  
@@ -216,6 +216,11 @@ $white_no_nl+                              ;
    "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
  }
  
    "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
  }
  
+<0,option_prags,glaexts> {
+       -- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
+  "{-#" $whitechar* $idchar+            { nested_comment }
+}
+
  -- '0' state: ordinary lexemes
  -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
  
  -- '0' state: ordinary lexemes
  -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
  
@@ -583,6 +588,9 @@ reservedSymsFM = listToUFM $
         ,("→",   ITrarrow,    bit glaExtsBit)
         ,("←",   ITlarrow,    bit glaExtsBit)
         ,("⋯",        ITdotdot,       bit glaExtsBit)
         ,("→",   ITrarrow,    bit glaExtsBit)
         ,("←",   ITlarrow,    bit glaExtsBit)
         ,("⋯",        ITdotdot,       bit glaExtsBit)
+        -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
+        -- form part of a large operator.  This would let us have a better
+        -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
  #endif
         ]
  
  #endif
         ]
  
@@ -1210,6 +1218,7 @@ alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
  alexGetChar (AI loc ofs s) 
    | atEnd s   = Nothing
    | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` 
  alexGetChar (AI loc ofs s) 
    | atEnd s   = Nothing
    | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` 
+               --trace (show (ord c)) $
                 Just (adj_c, (AI loc' ofs' s'))
    where (c,s') = nextChar s
          loc'   = advanceSrcLoc loc c
                 Just (adj_c, (AI loc' ofs' s'))
    where (c,s') = nextChar s
          loc'   = advanceSrcLoc loc c
@@ -1259,6 +1268,7 @@ alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
  alexGetChar' (AI loc ofs s) 
    | atEnd s   = Nothing
    | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` 
  alexGetChar' (AI loc ofs s) 
    | atEnd s   = Nothing
    | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` 
+               --trace (show (ord c)) $
                 Just (c, (AI loc' ofs' s'))
    where (c,s') = nextChar s
          loc'   = advanceSrcLoc loc c
                 Just (c, (AI loc' ofs' s'))
    where (c,s') = nextChar s
          loc'   = advanceSrcLoc loc c
@@ -1443,15 +1453,13 @@ lexToken = do
         span `seq` setLastToken span bytes
         t span buf bytes
  
         span `seq` setLastToken span bytes
         t span buf bytes
  
--- ToDo: Alex reports the buffer at the start of the erroneous lexeme,
--- but it would be more informative to report the location where the
--- error was actually discovered, especially if this is a decoding
--- error.
-reportLexError loc1 loc2 buf str = 
+reportLexError loc1 loc2 buf str
+  | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
+  | otherwise =
    let 
         c = fst (nextChar buf)
    in
    if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
    let 
         c = fst (nextChar buf)
    in
    if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
-    then failLocMsgP loc2 loc2 "UTF-8 decoding error"
+    then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
      else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
  }
      else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
  }