Use explicit language extensions & remove extension fields from base.cabal
[ghc-base.git] / GHC / IO / Encoding / UTF8.hs
index c249289..dea4fde 100644 (file)
@@ -1,5 +1,10 @@
-{-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-}
-{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE NoImplicitPrelude
+           , BangPatterns
+           , NondecreasingIndentation
+           , MagicHash
+  #-}
+{-# OPTIONS_GHC -funbox-strict-fields #-}
+
 -----------------------------------------------------------------------------
 -- |
 -- Module      :  GHC.IO.Encoding.UTF8
@@ -36,7 +41,8 @@ import Data.Bits
 import Data.Maybe
 
 utf8 :: TextEncoding
-utf8 = TextEncoding { mkTextDecoder = utf8_DF,
+utf8 = TextEncoding { textEncodingName = "UTF-8",
+                      mkTextDecoder = utf8_DF,
                      mkTextEncoder = utf8_EF }
 
 utf8_DF :: IO (TextDecoder ())
@@ -58,7 +64,8 @@ utf8_EF =
           })
 
 utf8_bom :: TextEncoding
-utf8_bom = TextEncoding { mkTextDecoder = utf8_bom_DF,
+utf8_bom = TextEncoding { textEncodingName = "UTF-8BOM",
+                          mkTextDecoder = utf8_bom_DF,
                           mkTextEncoder = utf8_bom_EF }
 
 utf8_bom_DF :: IO (TextDecoder Bool)
@@ -144,14 +151,33 @@ utf8_decode
                            ow' <- writeCharBuf oraw ow (chr2 c0 c1)
                            loop (ir+2) ow'
                   | c0 >= 0xe0 && c0 <= 0xef ->
-                           if iw - ir < 3 then done ir ow else do
+                      case iw - ir of
+                        1 -> done ir ow
+                        2 -> do -- check for an error even when we don't have
+                                -- the full sequence yet (#3341)
+                           c1 <- readWord8Buf iraw (ir+1)
+                           if not (validate3 c0 c1 0x80) 
+                              then invalid else done ir ow
+                        _ -> do
                            c1 <- readWord8Buf iraw (ir+1)
                            c2 <- readWord8Buf iraw (ir+2)
                            if not (validate3 c0 c1 c2) then invalid else do
                            ow' <- writeCharBuf oraw ow (chr3 c0 c1 c2)
                            loop (ir+3) ow'
                   | c0 >= 0xf0 ->
-                           if iw - ir < 4 then done ir ow else do
+                      case iw - ir of
+                        1 -> done ir ow
+                        2 -> do -- check for an error even when we don't have
+                                -- the full sequence yet (#3341)
+                           c1 <- readWord8Buf iraw (ir+1)
+                           if not (validate4 c0 c1 0x80 0x80)
+                              then invalid else done ir ow
+                        3 -> do
+                           c1 <- readWord8Buf iraw (ir+1)
+                           c2 <- readWord8Buf iraw (ir+2)
+                           if not (validate4 c0 c1 c2 0x80)
+                              then invalid else done ir ow
+                        _ -> do
                            c1 <- readWord8Buf iraw (ir+1)
                            c2 <- readWord8Buf iraw (ir+2)
                            c3 <- readWord8Buf iraw (ir+3)